diff --git a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs new file mode 100644 index 00000000000..265e79a6b03 --- /dev/null +++ b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs @@ -0,0 +1,326 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using BenchmarkDotNet.Attributes; +using Garnet.server.Vector.Filter; + +namespace BDN.benchmark.Filter +{ + // ════════════════════════════════════════════════════════════════════════ + // 1. COMPILATION (one-time cost per VSIM query) + // ════════════════════════════════════════════════════════════════════════ + + /// Compile filter string → postfix program. Always allocates (List, Stack, ExprToken[]). + [MemoryDiagnoser] + public class FilterCompileBenchmarks + { + private byte[] _comparison; + private byte[] _logicalAnd; + private byte[] _stringEq; + private byte[] _arithmetic; + private byte[] _containment; + private byte[] _combined; + + [GlobalSetup] + public void Setup() + { + _comparison = ".year > 1950"u8.ToArray(); + _logicalAnd = ".year > 1950 and .rating >= 4.0"u8.ToArray(); + _stringEq = ".genre == \"action\""u8.ToArray(); + _arithmetic = "(.year - 2000) ** 2 < 100"u8.ToArray(); + _containment = "\"classic\" in .tags"u8.ToArray(); + _combined = ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8.ToArray(); + } + + [Benchmark(Description = "Comparison (.year > N)")] + public void Comparison() => ExprCompiler.TryCompile(_comparison, out _); + + [Benchmark(Description = "Logical AND (2 clauses)")] + public void LogicalAnd() => ExprCompiler.TryCompile(_logicalAnd, out _); + + [Benchmark(Description = "String equality")] + public void StringEq() => ExprCompiler.TryCompile(_stringEq, out _); + + [Benchmark(Description = "Arithmetic + power")] + public void Arithmetic() => ExprCompiler.TryCompile(_arithmetic, out _); + + [Benchmark(Description = "Containment (in)")] + public void Containment() => ExprCompiler.TryCompile(_containment, out _); + + [Benchmark(Description = "Combined (all ops)")] + public void Combined() => ExprCompiler.TryCompile(_combined, out _); + } + + // ════════════════════════════════════════════════════════════════════════ + // 2. FIELD EXTRACTION (per candidate, per selector) + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Extract a single field from raw JSON bytes. + /// Parameterized by JSON size: Small (2 fields), Medium (5), Large (12 + nested obj). + /// + [MemoryDiagnoser] + public class FilterExtractBenchmarks + { + // Small: {"year":1980,"rating":4.5} + // Medium: {"year":1980,"rating":4.5,"genre":"action","director":"Spielberg","tags":["classic","popular"]} + // Large: 12 fields including nested object and 3-element array + private byte[] _small; + private byte[] _medium; + private byte[] _large; + + [GlobalSetup] + public void Setup() + { + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _large = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + } + + // --- Number fields (zero-alloc) --- + [Benchmark(Description = "Number · Small JSON (1st field)")] + public void Num_Small() => AttributeExtractor.ExtractField(_small, "year"); + + [Benchmark(Description = "Number · Medium JSON (2nd field)")] + public void Num_Medium() => AttributeExtractor.ExtractField(_medium, "rating"); + + [Benchmark(Description = "Number · Large JSON (skip 8 fields)")] + public void Num_Large() => AttributeExtractor.ExtractField(_large, "budget"); + + // --- String fields (zero-alloc for non-escaped) --- + [Benchmark(Description = "String · Medium JSON")] + public void Str_Medium() => AttributeExtractor.ExtractField(_medium, "genre"); + + [Benchmark(Description = "String · Large JSON (skip 5)")] + public void Str_Large() => AttributeExtractor.ExtractField(_large, "director"); + + // --- Array fields (ALLOCATES ExprToken[count]) --- + [Benchmark(Description = "Array[2] · Medium JSON → alloc")] + public void Arr_Medium() => AttributeExtractor.ExtractField(_medium, "tags"); + + [Benchmark(Description = "Array[3] · Large JSON → alloc")] + public void Arr_Large() => AttributeExtractor.ExtractField(_large, "tags"); + + // --- Boolean (zero-alloc) --- + [Benchmark(Description = "Boolean · Large JSON (skip nested obj)")] + public void Bool_Large() => AttributeExtractor.ExtractField(_large, "active"); + + // --- Missing field (zero-alloc) --- + [Benchmark(Description = "Missing · Small JSON")] + public void Miss_Small() => AttributeExtractor.ExtractField(_small, "missing"); + + [Benchmark(Description = "Missing · Medium JSON")] + public void Miss_Medium() => AttributeExtractor.ExtractField(_medium, "missing"); + + [Benchmark(Description = "Missing · Large JSON")] + public void Miss_Large() => AttributeExtractor.ExtractField(_large, "missing"); + } + + // ════════════════════════════════════════════════════════════════════════ + // 3. EXECUTION BY EXPRESSION TYPE (compile-once, run per candidate) + // Fixed JSON: Medium (5 fields, includes array) + // Ordered: most frequent → least frequent real-world query patterns + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Run pre-compiled filters against medium JSON. + /// Ordered from most common to least common real-world usage patterns. + /// + [MemoryDiagnoser] + public class FilterRunByExprBenchmarks + { + // --- Common: range / categorical filters --- + private ExprProgram _comparison; // .year > 1950 + private ExprProgram _logicalAnd; // .year > 1950 and .rating >= 4.0 + private ExprProgram _stringEq; // .genre == "action" + private ExprProgram _containsArray; // "classic" in .tags + + // --- Moderate: logical combinations --- + private ExprProgram _logicalOr; // .year < 1960 or .rating > 4.0 + private ExprProgram _not; // not (.genre == "drama") + private ExprProgram _stringNeq; // .genre != "drama" + + // --- Less common: computed / advanced --- + private ExprProgram _arithmetic; // .rating * 2 > 8 + private ExprProgram _power; // (.year - 2000) ** 2 < 100 + private ExprProgram _containsString; // "act" in .genre (substring) + + // --- Realistic combined --- + private ExprProgram _combined; // all ops together + + private byte[] _json; + private Stack _stack; + + [GlobalSetup] + public void Setup() + { + _comparison = ExprCompiler.TryCompile(".year > 1950"u8, out _); + _logicalAnd = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _stringEq = ExprCompiler.TryCompile(".genre == \"action\""u8, out _); + _containsArray = ExprCompiler.TryCompile("\"classic\" in .tags"u8, out _); + _logicalOr = ExprCompiler.TryCompile(".year < 1960 or .rating > 4.0"u8, out _); + _not = ExprCompiler.TryCompile("not (.genre == \"drama\")"u8, out _); + _stringNeq = ExprCompiler.TryCompile(".genre != \"drama\""u8, out _); + _arithmetic = ExprCompiler.TryCompile(".rating * 2 > 8"u8, out _); + _power = ExprCompiler.TryCompile("(.year - 2000) ** 2 < 100"u8, out _); + _containsString = ExprCompiler.TryCompile("\"act\" in .genre"u8, out _); + _combined = ExprCompiler.TryCompile(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8, out _); + + _json = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _stack = ExprRunner.CreateStack(); + } + + // ── Common: range / categorical ────────────────────────────────── + + [Benchmark(Description = "1. .year > N (range)")] + public bool Comparison() => ExprRunner.Run(_comparison, _json, _stack); + + [Benchmark(Description = "2. .year > N and .rating >= M (multi-range)")] + public bool LogicalAnd() => ExprRunner.Run(_logicalAnd, _json, _stack); + + [Benchmark(Description = "3. .genre == \"action\" (category)")] + public bool StringEq() => ExprRunner.Run(_stringEq, _json, _stack); + + [Benchmark(Description = "4. \"x\" in .tags (tag search) → ALLOC")] + public bool InArray() => ExprRunner.Run(_containsArray, _json, _stack); + + // ── Moderate: logical combinations ─────────────────────────────── + + [Benchmark(Description = "5. A or B (logical OR)")] + public bool LogicalOr() => ExprRunner.Run(_logicalOr, _json, _stack); + + [Benchmark(Description = "6. not (A) (exclusion)")] + public bool Not() => ExprRunner.Run(_not, _json, _stack); + + [Benchmark(Description = "7. .genre != \"drama\" (not-equal)")] + public bool StringNeq() => ExprRunner.Run(_stringNeq, _json, _stack); + + // ── Less common: computed / advanced ───────────────────────────── + + [Benchmark(Description = "8. .rating * 2 > 8 (arithmetic)")] + public bool Arithmetic() => ExprRunner.Run(_arithmetic, _json, _stack); + + [Benchmark(Description = "9. (.year-2000)**2 < 100 (power)")] + public bool Power() => ExprRunner.Run(_power, _json, _stack); + + [Benchmark(Description = "10. \"act\" in .genre (substring)")] + public bool InString() => ExprRunner.Run(_containsString, _json, _stack); + + // ── Realistic combined ─────────────────────────────────────────── + + [Benchmark(Description = "11. Combined (all ops) → ALLOC")] + public bool Combined() => ExprRunner.Run(_combined, _json, _stack); + } + + // ════════════════════════════════════════════════════════════════════════ + // 4. EXECUTION BY JSON COMPLEXITY (fixed filter, varying JSON) + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Same filter run against small / medium / large JSON. + /// Shows how JSON size affects extraction + evaluation time. + /// + [MemoryDiagnoser] + public class FilterRunByJsonBenchmarks + { + private ExprProgram _numericFilter; + private ExprProgram _arrayFilter; + + private byte[] _small; // 2 fields, no array + private byte[] _medium; // 5 fields, 2-element array + private byte[] _large; // 12 fields, 3-element array, nested object + + private Stack _stack; + + [GlobalSetup] + public void Setup() + { + _numericFilter = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _arrayFilter = ExprCompiler.TryCompile("\"classic\" in .tags"u8, out _); + + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _large = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + _stack = ExprRunner.CreateStack(); + } + + // --- Numeric filter (zero-alloc regardless of JSON size) --- + [Benchmark(Description = "Numeric AND · Small JSON")] + public bool Numeric_Small() => ExprRunner.Run(_numericFilter, _small, _stack); + + [Benchmark(Description = "Numeric AND · Medium JSON")] + public bool Numeric_Medium() => ExprRunner.Run(_numericFilter, _medium, _stack); + + [Benchmark(Description = "Numeric AND · Large JSON")] + public bool Numeric_Large() => ExprRunner.Run(_numericFilter, _large, _stack); + + // --- Array filter (allocates when array is found) --- + [Benchmark(Description = "in .tags · Small JSON (no tags → false)")] + public bool Array_Small() => ExprRunner.Run(_arrayFilter, _small, _stack); + + [Benchmark(Description = "in .tags · Medium JSON (2 elem) → alloc")] + public bool Array_Medium() => ExprRunner.Run(_arrayFilter, _medium, _stack); + + [Benchmark(Description = "in .tags · Large JSON (3 elem) → alloc")] + public bool Array_Large() => ExprRunner.Run(_arrayFilter, _large, _stack); + } + + // ════════════════════════════════════════════════════════════════════════ + // 5. BATCH (compile once, run N candidates) + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Simulate real VSIM post-filtering: compile once, evaluate N candidates. + /// Shows total allocation and throughput at scale. + /// + [MemoryDiagnoser] + public class FilterBatchBenchmarks + { + private ExprProgram _numericAnd; + private ExprProgram _combined; + private byte[] _small; + private byte[] _medium; + private Stack _stack; + + [GlobalSetup] + public void Setup() + { + _numericAnd = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _combined = ExprCompiler.TryCompile(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8, out _); + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _stack = ExprRunner.CreateStack(); + } + + [Benchmark(Description = "Numeric AND · N candidates (zero-alloc)")] + [Arguments(10)] + [Arguments(100)] + [Arguments(1000)] + public int NumericAnd(int N) + { + var matched = 0; + for (var i = 0; i < N; i++) + { + var json = (i % 3 == 0) ? _small : _medium; + if (ExprRunner.Run(_numericAnd, json, _stack)) matched++; + } + return matched; + } + + [Benchmark(Description = "Combined + array · N candidates (allocs)")] + [Arguments(10)] + [Arguments(100)] + [Arguments(1000)] + public int Combined(int N) + { + var matched = 0; + for (var i = 0; i < N; i++) + { + var json = (i % 3 == 0) ? _small : _medium; + if (ExprRunner.Run(_combined, json, _stack)) matched++; + } + return matched; + } + } +} \ No newline at end of file diff --git a/libs/server/API/GarnetApi.cs b/libs/server/API/GarnetApi.cs index 85163bd5acf..435212cc982 100644 --- a/libs/server/API/GarnetApi.cs +++ b/libs/server/API/GarnetApi.cs @@ -520,12 +520,12 @@ public unsafe GarnetStatus VectorSetRemove(ArgSlice key, ArgSlice element) => storageSession.VectorSetRemove(SpanByte.FromPinnedPointer(key.ptr, key.length), SpanByte.FromPinnedPointer(element.ptr, element.length)); /// - public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) - => storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) + => storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); /// - public unsafe GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) - => storageSession.VectorSetElementSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), element.ReadOnlySpan, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + public unsafe GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) + => storageSession.VectorSetElementSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), element.ReadOnlySpan, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); /// public unsafe GarnetStatus VectorSetEmbedding(ArgSlice key, ArgSlice element, ref SpanByteAndMemory outputDistances) diff --git a/libs/server/API/GarnetWatchApi.cs b/libs/server/API/GarnetWatchApi.cs index 2cae35fdafe..d60d8546f65 100644 --- a/libs/server/API/GarnetWatchApi.cs +++ b/libs/server/API/GarnetWatchApi.cs @@ -650,17 +650,17 @@ public bool ResetScratchBuffer(int offset) #region Vector Sets /// - public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { garnetApi.WATCH(key, StoreType.Main); - return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); } /// - public GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { garnetApi.WATCH(key, StoreType.Main); - return garnetApi.VectorSetElementSimilarity(key, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + return garnetApi.VectorSetElementSimilarity(key, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); } /// diff --git a/libs/server/API/IGarnetApi.cs b/libs/server/API/IGarnetApi.cs index 6acf3b3e303..ac4e370a762 100644 --- a/libs/server/API/IGarnetApi.cs +++ b/libs/server/API/IGarnetApi.cs @@ -2041,7 +2041,7 @@ public bool IterateObjectStore(ref TScanFunctions scanFunctions, /// Ids are encoded in as length prefixed blobs of bytes. /// Attributes are encoded in as length prefixed blobs of bytes. /// - GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result); + GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap); /// /// Perform a similarity search given an element already in the vector set and these parameters. @@ -2049,7 +2049,7 @@ public bool IterateObjectStore(ref TScanFunctions scanFunctions, /// Ids are encoded in as length prefixed blobs of bytes. /// Attributes are encoded in as length prefixed blobs of bytes. /// - GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result); + GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap); /// /// Fetch the embedding of a given element in a Vector set. diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs new file mode 100644 index 00000000000..2230080c73e --- /dev/null +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -0,0 +1,604 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Buffers; +using System.Buffers.Binary; +using System.Buffers.Text; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Ultra-lightweight top-level JSON field extractor. + /// Returns fields directly as values. + /// + /// 1. Zero heap allocations while seeking the requested key. + /// 2. A single parse (and allocation) when the key matches. + /// 3. Supports: strings (with \n \r \t \\ \" escapes), numbers, booleans, null, + /// and flat arrays of these primitives. Nested objects return null. + /// 4. Operates on raw UTF-8 bytes (ReadOnlySpan<byte>) — no JsonDocument DOM. + /// + internal static class AttributeExtractor + { + /// + /// Stride (in ints) per document in the field index: 1 (count) + 2 per field (offset, length). + /// + internal static int FieldIndexStride(int numFields) => 1 + 2 * numFields; + + /// + /// Build a field offset index for ALL documents in the contiguous attributes span. + /// + /// The attributes span is a series of length-prefixed JSON blobs: + /// [len0][json0][len1][json1]... + /// + /// For each document, the index records: + /// [fieldCount, field0_offset, field0_length, field1_offset, field1_length, ...] + /// + /// Offsets are relative to the start of that document's JSON (after the length prefix). + /// A field offset of -1 means that field was not found in that document. + /// A fieldCount of -1 means malformed JSON. + /// + /// must contain at least + /// numDocs * FieldIndexStride(fieldNames.Length) ints. + /// + public static void BuildFieldIndex( + ReadOnlySpan attributesSpan, + int numDocs, + string[] fieldNames, + Span indexBuffer) + { + var numFields = fieldNames.Length; + var stride = FieldIndexStride(numFields); + var remaining = attributesSpan; + + for (var doc = 0; doc < numDocs; doc++) + { + var docIndex = indexBuffer.Slice(doc * stride, stride); + + // Read length prefix + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); + var json = remaining.Slice(sizeof(int), attrLen); + + // Initialize: count = 0, all offsets = -1 + docIndex[0] = 0; + for (var f = 0; f < numFields; f++) + { + docIndex[1 + 2 * f] = -1; + docIndex[1 + 2 * f + 1] = 0; + } + + // Scan this document for requested field positions + ScanFieldPositions(json, fieldNames, docIndex); + + remaining = remaining[(sizeof(int) + attrLen)..]; + } + } + + /// + /// Create an ExprToken from a value at an indexed position in JSON bytes. + /// Uses the offset and length recorded by . + /// + public static ExprToken ParseValueAt(ReadOnlySpan json, int offset, int length) + { + if (offset < 0 || length <= 0 || offset + length > json.Length) + return default; + + var c = json[offset]; + + // String: content is between quotes + if (c == (byte)'"') + { + // offset points to opening quote, length includes both quotes + var contentStart = offset + 1; + var contentLen = length - 2; + // Check for escapes (scan for backslash) + var content = json.Slice(contentStart, contentLen); + if (content.IndexOf((byte)'\\') < 0) + { + // Zero-alloc: store byte offset+length into source JSON + return ExprToken.NewJsonStr(contentStart, contentLen); + } + else + { + // Escaped: materialize + return ExprToken.NewStr(UnescapeJsonString(content)); + } + } + + // Number + if (IsDigit(c) || c == (byte)'-' || c == (byte)'+') + { + var numSpan = json.Slice(offset, length); + if (Utf8Parser.TryParse(numSpan, out double value, out var consumed) && consumed == numSpan.Length) + return ExprToken.NewNum(value); + return default; + } + + // Boolean / null + if (c == (byte)'t' && length == 4) return ExprToken.NewNum(1); + if (c == (byte)'f' && length == 5) return ExprToken.NewNum(0); + if (c == (byte)'n' && length == 4) return ExprToken.NewNull(); + + // Array: parse via existing method + if (c == (byte)'[') + { + var s = json[offset..]; + return ParseArrayToken(json, ref s); + } + + return default; + } + + /// + /// Scan a single JSON object and record the byte positions of requested fields. + /// + private static void ScanFieldPositions(ReadOnlySpan json, string[] fieldNames, Span docIndex) + { + var numFields = fieldNames.Length; + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') + { + docIndex[0] = -1; // malformed + return; + } + s = s[1..]; // Skip '{' + + var found = 0; + + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) { if (found == 0) docIndex[0] = -1; return; } + if (s[0] == (byte)'}') return; + + // Expect key string + if (s[0] != (byte)'"') { if (found == 0) docIndex[0] = -1; return; } + + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) { docIndex[0] = -1; return; } + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; + + // Match against requested fields + var matchIndex = -1; + for (var i = 0; i < numFields; i++) + { + if (docIndex[1 + 2 * i] < 0 && MatchKey(keyContent, fieldNames[i])) + { + matchIndex = i; + break; + } + } + + // Expect ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') { docIndex[0] = -1; return; } + s = s[1..]; + s = TrimWhiteSpace(s); + if (s.IsEmpty) { docIndex[0] = -1; return; } + + // Record value position (offset relative to json start) + var valueStart = json.Length - s.Length; + var beforeSkip = s; + + if (!SkipValue(ref s)) { docIndex[0] = -1; return; } + + var valueLen = beforeSkip.Length - s.Length; + + if (matchIndex >= 0) + { + docIndex[1 + 2 * matchIndex] = valueStart; + docIndex[1 + 2 * matchIndex + 1] = valueLen; + found++; + docIndex[0] = found; + if (found == numFields) return; // All fields found — early exit + } + + // Look for ',' or '}' + s = TrimWhiteSpace(s); + if (s.IsEmpty) return; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return; + docIndex[0] = -1; // Malformed + return; + } + } + + /// + /// Extract multiple top-level fields from a JSON object in a single pass. + /// lists the fields to extract. + /// must be at least .Length long. + /// Entries for fields not found are set to default (IsNone). + /// Returns the number of fields successfully extracted. + /// + public static int ExtractFields(ReadOnlySpan json, string[] fieldNames, ExprToken[] results) + { + // Clear results + for (var i = 0; i < fieldNames.Length; i++) + results[i] = default; + + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') return 0; + s = s[1..]; // Skip '{' + + var found = 0; + var needed = fieldNames.Length; + + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + if (s[0] == (byte)'}') return found; + + // Expect a key string + if (s[0] != (byte)'"') return found; + + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) return found; + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; + + // Check against all requested field names + var matchIndex = -1; + for (var i = 0; i < fieldNames.Length; i++) + { + if (results[i].IsNone && MatchKey(keyContent, fieldNames[i])) + { + matchIndex = i; + break; + } + } + + // Expect ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') return found; + s = s[1..]; + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + + if (matchIndex >= 0) + { + results[matchIndex] = ParseValueToken(json, ref s); + found++; + if (found == needed) return found; // All fields found — early exit + } + else + { + if (!SkipValue(ref s)) return found; + } + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return found; + return found; // Malformed JSON + } + } + + /// + /// Extract a top-level field from a JSON object and return it as an ExprToken. + /// Returns default (IsNone) if the field is not found or the JSON is malformed. + /// + public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) + { + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') return default; + s = s[1..]; // Skip '{' + + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)'}') return default; // End of object, field not found + + // Expect a key string + if (s[0] != (byte)'"') return default; + + // Extract key content (between quotes) + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) return default; + // Key content is between afterOpenQuote and s (minus the closing quote byte) + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; + + var match = MatchKey(keyContent, fieldName); + + // Expect ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') return default; + s = s[1..]; // Skip ':' + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + + if (match) + { + // Found the field — parse the value into a token + return ParseValueToken(json, ref s); + } + else + { + // Skip the value + if (!SkipValue(ref s)) return default; + } + + // Look for ',' or '}' + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return default; // End of object, not found + return default; // Malformed JSON + } + } + + // ======================== Value parsing (allocating) ======================== + + private static ExprToken ParseValueToken(ReadOnlySpan json, ref ReadOnlySpan s) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + + var c = s[0]; + if (c == (byte)'"') return ParseStringToken(json, ref s); + if (c == (byte)'[') return ParseArrayToken(json, ref s); + if (c == (byte)'{') return default; // Nested objects not supported + if (c == (byte)'t') return ParseLiteralToken(ref s, "true"u8, ExprTokenType.Num, 1); + if (c == (byte)'f') return ParseLiteralToken(ref s, "false"u8, ExprTokenType.Num, 0); + if (c == (byte)'n') return ParseLiteralToken(ref s, "null"u8, ExprTokenType.Null, 0); + if (IsDigit(c) || c == (byte)'-' || c == (byte)'+') + return ParseNumberToken(ref s); + + return default; + } + + private static ExprToken ParseStringToken(ReadOnlySpan json, ref ReadOnlySpan s) + { + if (s.IsEmpty || s[0] != (byte)'"') return default; + s = s[1..]; // Skip opening quote + var body = s; + var hasEscape = false; + + while (!s.IsEmpty) + { + if (s[0] == (byte)'\\') + { + hasEscape = true; + s = s[2..]; // Skip escape sequence + continue; + } + if (s[0] == (byte)'"') + { + var content = body[..(body.Length - s.Length)]; + if (!hasEscape) + { + // Zero-allocation: store byte offset+length into the source JSON + var absoluteStart = json.Length - body.Length; + s = s[1..]; // Skip closing quote + return ExprToken.NewJsonStr(absoluteStart, content.Length); + } + else + { + // Escaped strings must be materialized (rare path) + var value = UnescapeJsonString(content); + s = s[1..]; // Skip closing quote + return ExprToken.NewStr(value); + } + } + s = s[1..]; + } + return default; // Unterminated string + } + + private static ExprToken ParseNumberToken(ref ReadOnlySpan s) + { + var original = s; + while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..]; + + var numSpan = original[..(original.Length - s.Length)]; + if (numSpan.IsEmpty) return default; + + if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) + { + s = original; + return default; + } + return ExprToken.NewNum(value); + } + + private static ExprToken ParseLiteralToken(ref ReadOnlySpan s, + ReadOnlySpan literal, ExprTokenType type, double num) + { + if (s.Length < literal.Length) return default; + if (!s[..literal.Length].SequenceEqual(literal)) return default; + + // Verify delimiter follows (space, comma, bracket, brace, or end) + if (s.Length > literal.Length) + { + var next = (char)s[literal.Length]; + if (!char.IsWhiteSpace(next) && next != ',' && next != ']' && next != '}') + return default; + } + + s = s[literal.Length..]; + return type == ExprTokenType.Null ? ExprToken.NewNull() : ExprToken.NewNum(num); + } + + /// Max array elements before rejecting. + private const int MaxArrayElements = 64; + + private static ExprToken ParseArrayToken(ReadOnlySpan json, ref ReadOnlySpan s) + { + if (s.IsEmpty || s[0] != (byte)'[') return default; + s = s[1..]; // Skip '[' + s = TrimWhiteSpace(s); + + // Handle empty array + if (!s.IsEmpty && s[0] == (byte)']') + { + s = s[1..]; + return ExprToken.NewTuple([], 0); + } + + // Rent from pool instead of allocating a new scratch array every call + var elements = ArrayPool.Shared.Rent(MaxArrayElements); + var count = 0; + + try + { + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty || count >= MaxArrayElements) return default; + + var ele = ParseValueToken(json, ref s); + if (ele.IsNone) return default; + elements[count++] = ele; + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)']') { s = s[1..]; break; } + return default; // Malformed + } + + var result = new ExprToken[count]; + Array.Copy(elements, result, count); + return ExprToken.NewTuple(result, count); + } + finally + { + ArrayPool.Shared.Return(elements, clearArray: true); + } + } + + // ======================== Fast skipping (non-allocating) ======================== + + private static bool SkipValue(ref ReadOnlySpan s) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) return false; + + return (char)s[0] switch + { + '"' => SkipString(ref s), + '{' => SkipBracketed(ref s, (byte)'{', (byte)'}'), + '[' => SkipBracketed(ref s, (byte)'[', (byte)']'), + 't' => SkipLiteral(ref s, "true"u8), + 'f' => SkipLiteral(ref s, "false"u8), + 'n' => SkipLiteral(ref s, "null"u8), + _ => SkipNumber(ref s), + }; + } + + private static bool SkipString(ref ReadOnlySpan s) + { + if (s.IsEmpty || s[0] != (byte)'"') return false; + s = s[1..]; // Skip opening quote + while (!s.IsEmpty) + { + if (s[0] == (byte)'\\') { s = s[2..]; continue; } + if (s[0] == (byte)'"') { s = s[1..]; return true; } + s = s[1..]; + } + return false; // Unterminated + } + + private static bool SkipBracketed(ref ReadOnlySpan s, byte opener, byte closer) + { + var depth = 1; + s = s[1..]; // Skip opener + while (!s.IsEmpty && depth > 0) + { + if (s[0] == (byte)'"') + { + if (!SkipString(ref s)) return false; + continue; + } + if (s[0] == opener) depth++; + else if (s[0] == closer) depth--; + s = s[1..]; + } + return depth == 0; + } + + private static bool SkipLiteral(ref ReadOnlySpan s, ReadOnlySpan literal) + { + if (s.Length < literal.Length) return false; + if (!s[..literal.Length].SequenceEqual(literal)) return false; + s = s[literal.Length..]; + return true; + } + + private static bool SkipNumber(ref ReadOnlySpan s) + { + var original = s; + while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..]; + return s.Length < original.Length; + } + + // ======================== Shared byte-level helpers ======================== + // These are used by both AttributeExtractor and ExprCompiler. + + internal static bool IsDigit(byte b) => b >= (byte)'0' && b <= (byte)'9'; + + internal static bool IsLetter(byte b) => (b >= (byte)'a' && b <= (byte)'z') || (b >= (byte)'A' && b <= (byte)'Z'); + + internal static bool IsLetterOrDigit(byte b) => IsLetter(b) || IsDigit(b); + + internal static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; + + /// + /// Returns the span with leading whitespace removed. + /// + internal static ReadOnlySpan TrimWhiteSpace(ReadOnlySpan s) + { + var i = 0; + while (i < s.Length && IsWhiteSpace(s[i])) i++; + return s[i..]; + } + + private static bool IsNumberChar(byte b) => + IsDigit(b) || b == (byte)'-' || b == (byte)'+' || + b == (byte)'.' || b == (byte)'e' || b == (byte)'E'; + + private static bool MatchKey(ReadOnlySpan key, string fieldName) + { + if (key.Length != fieldName.Length) return false; + for (var i = 0; i < key.Length; i++) + { + if (key[i] != (byte)fieldName[i]) return false; + } + return true; + } + + private static string UnescapeJsonString(ReadOnlySpan content) + { + // Worst case: each byte is a character + var chars = new char[content.Length]; + var len = 0; + var i = 0; + while (i < content.Length) + { + if (content[i] == (byte)'\\' && i + 1 < content.Length) + { + i++; + chars[len++] = (char)content[i] switch + { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '"' => '"', + '/' => '/', + _ => (char)content[i], + }; + i++; + } + else + { + chars[len++] = (char)content[i]; + i++; + } + } + return new string(chars, 0, len); + } + } +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs new file mode 100644 index 00000000000..b74f6a0bc70 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -0,0 +1,450 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Buffers.Text; +using System.Collections.Generic; +using System.Text; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Shunting-Yard compiler that tokenizes and compiles a filter expression string + /// into a flat postfix . + /// + /// Single-pass tokenize-and-compile approach modeled after Redis expr.c. + /// + /// The compiled program is a flat array of instructions + /// (values + operators in postfix order) that can be executed by . + /// + /// Example: + /// Input expression: + /// .price < 100 and .category == "books" + /// Compiled postfix order: + /// .price 100 < .category "books" == and + /// + /// + internal static class ExprCompiler + { + private const int DefaultCapacity = 16; + + /// + /// Compile a filter expression (as UTF-8 bytes) into a flat postfix program. + /// Returns null on syntax error; optionally reports the error position. + /// + public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) + { + errpos = -1; + if (expr.IsEmpty) + return null; + + // Phase 1: Tokenize into a flat list + var tokens = new List(DefaultCapacity); + var remaining = expr; + + while (!remaining.IsEmpty) + { + remaining = AttributeExtractor.TrimWhiteSpace(remaining); + if (remaining.IsEmpty) + break; + + // Determine if '-' should be a negative number sign or a subtraction operator + var minusIsNumber = false; + if (remaining[0] == (byte)'-' && remaining.Length > 1 && (AttributeExtractor.IsDigit(remaining[1]) || remaining[1] == (byte)'.')) + { + if (tokens.Count == 0) + { + minusIsNumber = true; + } + else + { + var prev = tokens[tokens.Count - 1]; + if (prev.TokenType == ExprTokenType.Op && prev.OpCode != OpCode.CParen) + minusIsNumber = true; + } + } + + // Number + if (AttributeExtractor.IsDigit(remaining[0]) || (minusIsNumber && remaining[0] == (byte)'-')) + { + var t = ParseNumber(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } + tokens.Add(t); + continue; + } + + // String literal + if (remaining[0] == (byte)'"' || remaining[0] == (byte)'\'') + { + var t = ParseString(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } + tokens.Add(t); + continue; + } + + // Selector (field access starting with '.') + if (remaining[0] == (byte)'.' && remaining.Length > 1 && IsSelectorChar(remaining[1])) + { + var t = ParseSelector(ref remaining); + tokens.Add(t); + continue; + } + + // Tuple literal [1, "foo", 42] + if (remaining[0] == (byte)'[') + { + var t = ParseTuple(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } + tokens.Add(t); + continue; + } + + // Operator or literal keyword (null, true, false, not, and, or, in) + if (AttributeExtractor.IsLetter(remaining[0]) || IsOperatorSpecialChar(remaining[0])) + { + var t = ParseOperatorOrLiteral(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } + tokens.Add(t); + continue; + } + + errpos = expr.Length - remaining.Length; + return null; + } + + // Phase 2: Shunting-yard compilation to postfix + var program = new List(DefaultCapacity); + var opsStack = new Stack(DefaultCapacity); + var stackItems = 0; // track what would be on the values stack at runtime + + for (var i = 0; i < tokens.Count; i++) + { + var token = tokens[i]; + + // Values go directly to program + if (token.TokenType == ExprTokenType.Num || + token.TokenType == ExprTokenType.Str || + token.TokenType == ExprTokenType.Tuple || + token.TokenType == ExprTokenType.Selector || + token.TokenType == ExprTokenType.Null) + { + program.Add(token); + stackItems++; + continue; + } + + // Operators + if (token.TokenType == ExprTokenType.Op) + { + if (!ProcessOperator(token, program, opsStack, ref stackItems, out errpos)) + return null; + continue; + } + } + + // Flush remaining operators from the stack + while (opsStack.Count > 0) + { + var op = opsStack.Pop(); + if (op.OpCode == OpCode.OParen) + { + errpos = 0; + return null; // Unmatched '(' + } + + var arity = OpTable.GetArity(op.OpCode); + if (stackItems < arity) { errpos = 0; return null; } + program.Add(op); + stackItems = stackItems - arity + 1; + } + + // After compilation, exactly one value should remain on the stack + if (stackItems != 1) { errpos = 0; return null; } + + return new ExprProgram { Instructions = program.ToArray(), Length = program.Count }; + } + + /// + /// Process an operator during shunting-yard compilation. + /// Handles parentheses, precedence, and right-associativity of **. + /// + private static bool ProcessOperator( + ExprToken op, + List program, + Stack opsStack, + ref int stackItems, + out int errpos) + { + errpos = -1; + + if (op.OpCode == OpCode.OParen) + { + opsStack.Push(op); + return true; + } + + if (op.OpCode == OpCode.CParen) + { + // Pop operators until matching '(' + while (true) + { + if (opsStack.Count == 0) { errpos = 0; return false; } // Unmatched ')' + var topOp = opsStack.Pop(); + if (topOp.OpCode == OpCode.OParen) + return true; + + var arity = OpTable.GetArity(topOp.OpCode); + if (stackItems < arity) { errpos = 0; return false; } + program.Add(topOp); + stackItems = stackItems - arity + 1; + } + } + + var curPrec = OpTable.GetPrecedence(op.OpCode); + + // Pop operators with higher or equal precedence + while (opsStack.Count > 0) + { + var topOp = opsStack.Peek(); + if (topOp.OpCode == OpCode.OParen) break; + + var topPrec = OpTable.GetPrecedence(topOp.OpCode); + if (topPrec < curPrec) break; + + // Right-associative: ** only pops if strictly higher + if (op.OpCode == OpCode.Pow && topPrec <= curPrec) break; + + opsStack.Pop(); + var arity = OpTable.GetArity(topOp.OpCode); + if (stackItems < arity) { errpos = 0; return false; } + program.Add(topOp); + stackItems = stackItems - arity + 1; + } + + opsStack.Push(op); + return true; + } + + // ======================== Tokenization helpers ======================== + // Shared helpers (IsDigit, IsLetter, IsLetterOrDigit, IsWhiteSpace, TrimWhiteSpace) + // live in AttributeExtractor and are reused here. + + private static bool IsOperatorSpecialChar(byte b) + { + return b == (byte)'+' || b == (byte)'-' || b == (byte)'*' || b == (byte)'%' || + b == (byte)'/' || b == (byte)'!' || b == (byte)'(' || b == (byte)')' || + b == (byte)'<' || b == (byte)'>' || b == (byte)'=' || b == (byte)'|' || + b == (byte)'&'; + } + + private static bool IsSelectorChar(byte c) + { + return AttributeExtractor.IsLetterOrDigit(c) || c == (byte)'_' || c == (byte)'-'; + } + + private static ExprToken ParseNumber(ref ReadOnlySpan s) + { + var original = s; + if (s[0] == (byte)'-') s = s[1..]; + + while (!s.IsEmpty && (AttributeExtractor.IsDigit(s[0]) || s[0] == (byte)'.' || s[0] == (byte)'e' || s[0] == (byte)'E')) + s = s[1..]; + + var numSpan = original[..(original.Length - s.Length)]; + if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) + { + s = original; + return default; + } + return ExprToken.NewNum(value); + } + + private static ExprToken ParseString(ref ReadOnlySpan s) + { + var quote = s[0]; + s = s[1..]; // Skip opening quote + var body = s; + var hasEscape = false; + + while (!s.IsEmpty) + { + if (s[0] == (byte)'\\' && s.Length > 1) + { + hasEscape = true; + s = s[2..]; // Skip escaped char + continue; + } + if (s[0] == quote) + { + var content = body[..(body.Length - s.Length)]; + string value; + if (!hasEscape) + { + value = Encoding.UTF8.GetString(content); + } + else + { + // Process escape sequences (matching Redis fastjson.c behavior) + var bytes = new byte[content.Length]; + var len = 0; + for (var i = 0; i < content.Length; i++) + { + if (content[i] == (byte)'\\' && i + 1 < content.Length) + { + i++; + bytes[len++] = content[i] switch + { + (byte)'n' => (byte)'\n', + (byte)'r' => (byte)'\r', + (byte)'t' => (byte)'\t', + (byte)'\\' => (byte)'\\', + (byte)'"' => (byte)'"', + (byte)'\'' => (byte)'\'', + _ => content[i], // Unknown escape — copy verbatim + }; + } + else + { + bytes[len++] = content[i]; + } + } + value = Encoding.UTF8.GetString(bytes, 0, len); + } + s = s[1..]; // Skip closing quote + return ExprToken.NewStr(value); + } + s = s[1..]; + } + return default; // Unterminated string + } + + private static ExprToken ParseSelector(ref ReadOnlySpan s) + { + s = s[1..]; // Skip the leading dot + var start = s; + while (!s.IsEmpty && IsSelectorChar(s[0])) s = s[1..]; + var name = Encoding.UTF8.GetString(start[..(start.Length - s.Length)]); + return ExprToken.NewSelector(name); + } + + private static ExprToken ParseTuple(ref ReadOnlySpan s) + { + s = s[1..]; // Skip '[' + var elements = new ExprToken[64]; // max 64 elements + var count = 0; + + s = AttributeExtractor.TrimWhiteSpace(s); + + // Handle empty tuple [] + if (!s.IsEmpty && s[0] == (byte)']') + { + s = s[1..]; + return ExprToken.NewTuple([], 0); + } + + while (true) + { + s = AttributeExtractor.TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (count >= elements.Length) return default; + + // Parse element: number or string + ExprToken ele; + if (AttributeExtractor.IsDigit(s[0]) || s[0] == (byte)'-') + { + ele = ParseNumber(ref s); + } + else if (s[0] == (byte)'"' || s[0] == (byte)'\'') + { + ele = ParseString(ref s); + } + else + { + return default; + } + if (ele.IsNone) return default; + + elements[count++] = ele; + + s = AttributeExtractor.TrimWhiteSpace(s); + if (s.IsEmpty) return default; + + if (s[0] == (byte)']') { s = s[1..]; break; } + if (s[0] != (byte)',') return default; + s = s[1..]; // Skip comma + } + + var result = new ExprToken[count]; + Array.Copy(elements, result, count); + return ExprToken.NewTuple(result, count); + } + + private static ExprToken ParseOperatorOrLiteral(ref ReadOnlySpan s) + { + var start = s; + + // Consume alphabetic or operator-special characters + while (!s.IsEmpty && (AttributeExtractor.IsLetter(s[0]) || IsOperatorSpecialChar(s[0]))) + s = s[1..]; + + var consumed = start[..(start.Length - s.Length)]; + if (consumed.IsEmpty) return default; + + // Check for literals + if (consumed.Length == 4 && consumed.SequenceEqual("null"u8)) + return ExprToken.NewNull(); + + if (consumed.Length == 4 && consumed.SequenceEqual("true"u8)) + return ExprToken.NewNum(1); + + if (consumed.Length == 5 && consumed.SequenceEqual("false"u8)) + return ExprToken.NewNum(0); + + // Find best matching operator (longest match) + OpCode bestCode = default; + var bestLen = 0; + TryMatchOp(consumed, "||"u8, OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(consumed, "or"u8, OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(consumed, "&&"u8, OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(consumed, "and"u8, OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(consumed, "**"u8, OpCode.Pow, ref bestCode, ref bestLen); + TryMatchOp(consumed, ">="u8, OpCode.Gte, ref bestCode, ref bestLen); + TryMatchOp(consumed, "<="u8, OpCode.Lte, ref bestCode, ref bestLen); + TryMatchOp(consumed, "=="u8, OpCode.Eq, ref bestCode, ref bestLen); + TryMatchOp(consumed, "!="u8, OpCode.Neq, ref bestCode, ref bestLen); + TryMatchOp(consumed, "not"u8, OpCode.Not, ref bestCode, ref bestLen); + TryMatchOp(consumed, "in"u8, OpCode.In, ref bestCode, ref bestLen); + TryMatchOp(consumed, "("u8, OpCode.OParen, ref bestCode, ref bestLen); + TryMatchOp(consumed, ")"u8, OpCode.CParen, ref bestCode, ref bestLen); + TryMatchOp(consumed, "+"u8, OpCode.Add, ref bestCode, ref bestLen); + TryMatchOp(consumed, "-"u8, OpCode.Sub, ref bestCode, ref bestLen); + TryMatchOp(consumed, "*"u8, OpCode.Mul, ref bestCode, ref bestLen); + TryMatchOp(consumed, "/"u8, OpCode.Div, ref bestCode, ref bestLen); + TryMatchOp(consumed, "%"u8, OpCode.Mod, ref bestCode, ref bestLen); + TryMatchOp(consumed, ">"u8, OpCode.Gt, ref bestCode, ref bestLen); + TryMatchOp(consumed, "<"u8, OpCode.Lt, ref bestCode, ref bestLen); + TryMatchOp(consumed, "!"u8, OpCode.Not, ref bestCode, ref bestLen); + + if (bestLen == 0) + { + s = start; + return default; + } + + // Rewind — only consume the matched operator length + s = start[bestLen..]; + return ExprToken.NewOp(bestCode); + } + + private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan opName, OpCode opCode, ref OpCode bestCode, ref int bestLen) + { + if (opName.Length > consumed.Length) return; + if (!consumed[..opName.Length].SequenceEqual(opName)) return; + if (opName.Length > bestLen) + { + bestCode = opCode; + bestLen = opName.Length; + } + } + } +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs new file mode 100644 index 00000000000..947b76cbe72 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -0,0 +1,444 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Buffers.Text; +using System.Collections.Generic; +using System.Globalization; +using System.Text; +namespace Garnet.server.Vector.Filter +{ + /// + /// Stack-based VM that executes a compiled against + /// raw JSON attribute bytes. + /// + /// Modeled after Redis expr.c exprRun() — walks the flat postfix program, + /// pushes values, and pops operands for operators. Selectors trigger + /// on-demand JSON field extraction via . + /// + /// Key design properties (matching Redis): + /// - No DOM allocation: JSON fields are extracted directly from the raw bytes. + /// - Compile once, run many: the program is reused across all candidate elements. + /// - Exact numeric equality (no epsilon) to match Redis behavior. + /// - Substring support for the IN operator when both sides are strings. + /// - null is a first-class token type. + /// + internal static class ExprRunner + { + private const int DefaultStackCapacity = 16; + + /// + /// Create a reusable evaluation stack with default capacity (16). + /// The caller owns the stack and can pass it to Run across multiple calls. + /// The stack is cleared at the start of each Run call, so the caller does not need to clear it. + /// + public static Stack CreateStack() => new Stack(DefaultStackCapacity); + + /// + /// Execute the compiled program against JSON attribute data. + /// Returns true if the expression evaluates to a truthy value, false otherwise. + /// Returns false if the JSON is malformed or a selector cannot be resolved. + /// + /// The compiled postfix program. + /// Raw JSON attribute bytes to evaluate against. + /// A reusable evaluation stack obtained from . + public static bool Run(ExprProgram program, ReadOnlySpan json, Stack stack) + { + stack.Clear(); + + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; + + // Selectors — extract field from JSON + if (inst.TokenType == ExprTokenType.Selector) + { + var extracted = AttributeExtractor.ExtractField(json, inst.Str); + if (extracted.IsNone) + { + stack.Clear(); + return false; // Selector not found → expression is false (matches Redis) + } + + stack.Push(extracted); + continue; + } + + if (!ExecuteInstruction(inst, json, stack)) + return false; + } + + var returnValue = false; + if (stack.Count > 0) + returnValue = ToBool(stack.Peek()) != 0; + + // Clear to release string references for GC + stack.Clear(); + return returnValue; + } + + /// + /// Execute the compiled program using pre-extracted field values (single-pass extraction). + /// Selectors are resolved from instead of re-scanning JSON. + /// + /// The compiled postfix program. + /// Raw JSON attribute bytes (needed for JsonRef string comparisons). + /// Selector names matching indices in . + /// Pre-extracted field values (one per selector name). + /// A reusable evaluation stack obtained from . + public static bool Run(ExprProgram program, ReadOnlySpan json, + string[] selectorNames, ExprToken[] extractedFields, Stack stack) + { + stack.Clear(); + + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; + + // Selectors — look up from pre-extracted fields + if (inst.TokenType == ExprTokenType.Selector) + { + var found = false; + for (var j = 0; j < selectorNames.Length; j++) + { + if (string.Equals(inst.Str, selectorNames[j], System.StringComparison.Ordinal)) + { + if (extractedFields[j].IsNone) + { + stack.Clear(); + return false; // Selector not found → expression is false + } + stack.Push(extractedFields[j]); + found = true; + break; + } + } + if (!found) + { + stack.Clear(); + return false; + } + continue; + } + + if (!ExecuteInstruction(inst, json, stack)) + return false; + } + + var returnValue = false; + if (stack.Count > 0) + returnValue = ToBool(stack.Peek()) != 0; + + stack.Clear(); + return returnValue; + } + + /// + /// Execute a single non-selector instruction (value push or operator evaluation). + /// Returns false if the stack is in an invalid state. + /// + private static bool ExecuteInstruction(ExprToken inst, ReadOnlySpan json, Stack stack) + { + // Non-operator values — push directly + if (inst.TokenType != ExprTokenType.Op) + { + stack.Push(inst); + return true; + } + + // Operators — pop operands, compute, push result + var arity = OpTable.GetArity(inst.OpCode); + if (stack.Count < arity) + { + stack.Clear(); + return false; + } + + ExprToken b = stack.Count > 0 ? stack.Pop() : default; + ExprToken a = arity == 2 && stack.Count > 0 ? stack.Pop() : default; + + var result = ExprToken.NewNum(0); + + switch (inst.OpCode) + { + case OpCode.Not: + result.Num = ToBool(b) == 0 ? 1 : 0; + break; + case OpCode.Pow: + result.Num = Math.Pow(ToNum(a, json), ToNum(b, json)); + break; + case OpCode.Mul: + result.Num = ToNum(a, json) * ToNum(b, json); + break; + case OpCode.Div: + result.Num = ToNum(a, json) / ToNum(b, json); + break; + case OpCode.Mod: + result.Num = ToNum(a, json) % ToNum(b, json); + break; + case OpCode.Add: + result.Num = ToNum(a, json) + ToNum(b, json); + break; + case OpCode.Sub: + result.Num = ToNum(a, json) - ToNum(b, json); + break; + case OpCode.Gt: + result.Num = ToNum(a, json) > ToNum(b, json) ? 1 : 0; + break; + case OpCode.Gte: + result.Num = ToNum(a, json) >= ToNum(b, json) ? 1 : 0; + break; + case OpCode.Lt: + result.Num = ToNum(a, json) < ToNum(b, json) ? 1 : 0; + break; + case OpCode.Lte: + result.Num = ToNum(a, json) <= ToNum(b, json) ? 1 : 0; + break; + case OpCode.Eq: + result.Num = AreEqual(a, b, json) ? 1 : 0; + break; + case OpCode.Neq: + result.Num = !AreEqual(a, b, json) ? 1 : 0; + break; + case OpCode.In: + result.Num = EvalIn(a, b, json) ? 1 : 0; + break; + case OpCode.And: + result.Num = ToBool(a) != 0 && ToBool(b) != 0 ? 1 : 0; + break; + case OpCode.Or: + result.Num = ToBool(a) != 0 || ToBool(b) != 0 ? 1 : 0; + break; + } + + stack.Push(result); + return true; + } + + // ======================== Type conversion helpers ======================== + + /// + /// Convert a token to its numeric value. + /// Strings are parsed as numbers; unparseable strings return 0. + /// Matches Redis exprTokenToNum(). + /// + private static double ToNum(ExprToken t, ReadOnlySpan json) + { + if (t.IsNone) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num; + if (t.TokenType == ExprTokenType.Str) + { + if (t.IsJsonRef) + { + var slice = json.Slice(t.Utf8Start, t.Utf8Length); + return Utf8Parser.TryParse(slice, out double result, out var consumed) && consumed == slice.Length ? result : 0; + } + + if (t.Str != null) + { + return double.TryParse(t.Str, NumberStyles.Float | NumberStyles.AllowLeadingSign, + CultureInfo.InvariantCulture, out var result) ? result : 0; + } + } + return 0; + } + + /// + /// Convert a token to boolean (0 or 1). + /// Matches Redis exprTokenToBool(): null=0, num!=0=1, empty string=0, else=1. + /// + private static double ToBool(ExprToken t) + { + if (t.IsNone) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; + if (t.TokenType == ExprTokenType.Str) + { + if (t.IsJsonRef) return t.Utf8Length == 0 ? 0 : 1; + return (t.Str == null || t.Str.Length == 0) ? 0 : 1; + } + if (t.TokenType == ExprTokenType.Null) return 0; + return 1; // Non-empty strings, tuples, etc. are truthy + } + + /// + /// Compare two tokens for equality. + /// Matches Redis exprTokensEqual(): + /// - Both strings → exact string comparison (handles JSON refs) + /// - Both numbers → exact numeric equality (no epsilon) + /// - One/both null → equal only if both null + /// - Mixed types → coerce to numbers and compare + /// + private static bool AreEqual(ExprToken a, ExprToken b, ReadOnlySpan json) + { + if (a.IsNone || b.IsNone) return a.IsNone && b.IsNone; + + // Both strings — handle 4 combinations of string/JsonRef + if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + { + if (!a.IsJsonRef && !b.IsJsonRef) + return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + + if (a.IsJsonRef && b.IsJsonRef) + return json.Slice(a.Utf8Start, a.Utf8Length).SequenceEqual(json.Slice(b.Utf8Start, b.Utf8Length)); + + // One is a compiled string, one is a JSON ref + var str = a.IsJsonRef ? b.Str : a.Str; + var jsonRef = a.IsJsonRef ? a : b; + return Utf8Equals(str, json.Slice(jsonRef.Utf8Start, jsonRef.Utf8Length)); + } + + // Both numbers + if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) + return a.Num == b.Num; // Exact comparison, matching Redis + + // One/both null + if (a.TokenType == ExprTokenType.Null || b.TokenType == ExprTokenType.Null) + return a.TokenType == b.TokenType; + + // Mixed types — coerce to number + return ToNum(a, json) == ToNum(b, json); + } + + /// + /// Evaluate the IN operator. + /// Matches Redis expr.c behavior: + /// 1. If b is a Tuple, check membership (element-wise AreEqual) + /// 2. If both a and b are strings, check substring containment + /// 3. Otherwise, false + /// + private static bool EvalIn(ExprToken a, ExprToken b, ReadOnlySpan json) + { + if (b.IsNone) return false; + + // Tuple membership (works for both expression tuples [1,2,3] and JSON array tuples) + if (b.TokenType == ExprTokenType.Tuple) + { + for (var i = 0; i < b.TupleLength; i++) + { + if (AreEqual(a, b.TupleElements[i], json)) + return true; + } + return false; + } + + // String substring check (matching Redis exprTokensStringIn) + if (!a.IsNone && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + { + // Both compiled strings + if (!a.IsJsonRef && !b.IsJsonRef) + { + if (a.Str == null || b.Str == null) return false; + if (a.Str.Length > b.Str.Length) return false; + return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + } + + // Needle is compiled string, haystack is JSON ref (most common filter case) + if (!a.IsJsonRef && b.IsJsonRef) + { + if (a.Str == null) return false; + return Utf8Contains(json.Slice(b.Utf8Start, b.Utf8Length), a.Str); + } + + // Needle is JSON ref, haystack is compiled string + if (a.IsJsonRef && !b.IsJsonRef) + { + if (b.Str == null) return false; + return Utf8ContainsReverse(b.Str, json.Slice(a.Utf8Start, a.Utf8Length)); + } + + // Both JSON refs + var needleSlice = json.Slice(a.Utf8Start, a.Utf8Length); + var haystackSlice = json.Slice(b.Utf8Start, b.Utf8Length); + return haystackSlice.IndexOf(needleSlice) >= 0; + } + + return false; + } + + // ======================== UTF-8 byte comparison helpers ======================== + + /// + /// Compare a .NET string to raw UTF-8 bytes for equality without allocating. + /// Uses ASCII fast path; falls back to encoding for non-ASCII. + /// + private static bool Utf8Equals(string str, ReadOnlySpan utf8) + { + // ASCII fast path: for single-byte chars, string length == byte length + if (str.Length == utf8.Length) + { + for (var i = 0; i < utf8.Length; i++) + { + if (str[i] > 127) goto slowPath; + if (utf8[i] != (byte)str[i]) return false; + } + return true; + } + + slowPath: + // Slow path for multi-byte UTF-8 characters (rare in filter expressions) + var maxBytes = Encoding.UTF8.GetMaxByteCount(str.Length); + Span buf = maxBytes <= 512 ? stackalloc byte[maxBytes] : new byte[maxBytes]; + var written = Encoding.UTF8.GetBytes(str.AsSpan(), buf); + return utf8.SequenceEqual(buf[..written]); + } + + /// + /// Check if a UTF-8 byte span contains a .NET string as a substring. + /// ASCII fast path; falls back to encoding for non-ASCII. + /// + private static bool Utf8Contains(ReadOnlySpan haystack, string needle) + { + if (needle.Length == 0) return true; + if (needle.Length > haystack.Length) return false; + + // ASCII fast path + for (var i = 0; i <= haystack.Length - needle.Length; i++) + { + if (haystack[i] == (byte)needle[0]) + { + var match = true; + for (var j = 1; j < needle.Length; j++) + { + if (needle[j] > 127) goto slowPath; + if (haystack[i + j] != (byte)needle[j]) { match = false; break; } + } + if (match) return true; + } + } + return false; + + slowPath: + var haystackStr = Encoding.UTF8.GetString(haystack); + return haystackStr.IndexOf(needle, StringComparison.Ordinal) >= 0; + } + + /// + /// Check if a .NET string contains a UTF-8 byte span as a substring. + /// + private static bool Utf8ContainsReverse(string haystack, ReadOnlySpan needle) + { + if (needle.Length == 0) return true; + if (needle.Length > haystack.Length) return false; + + // ASCII fast path + for (var i = 0; i <= haystack.Length - needle.Length; i++) + { + if ((byte)haystack[i] == needle[0]) + { + var match = true; + for (var j = 1; j < needle.Length; j++) + { + if (haystack[i + j] > 127) goto slowPath; + if ((byte)haystack[i + j] != needle[j]) { match = false; break; } + } + if (match) return true; + } + } + return false; + + slowPath: + var needleStr = Encoding.UTF8.GetString(needle); + return haystack.IndexOf(needleStr, StringComparison.Ordinal) >= 0; + } + } +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs new file mode 100644 index 00000000000..b07f2e745e6 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -0,0 +1,281 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +namespace Garnet.server.Vector.Filter +{ + /// + /// Token types for the filter expression virtual machine. + /// + /// The filter engine uses a stack-based postfix VM (modeled after Redis expr.c). + /// A filter string like .year >= 2000 and .rating > 7 is compiled into a flat + /// array of instructions in postfix (reverse-Polish) order: + /// + /// + /// [SEL:year] [NUM:2000] [OP:Gte] [SEL:rating] [NUM:7] [OP:Gt] [OP:And] + /// + /// + /// At execution time, walks this array left-to-right: + /// + /// Value tokens (, , , + /// ) are pushed onto the evaluation stack. + /// tokens trigger on-demand JSON field extraction + /// via ; the extracted value is pushed. + /// tokens pop 1 or 2 operands, compute the result, + /// and push it back. + /// + /// + /// After processing all instructions the top-of-stack value is tested for truthiness + /// to produce the final bool filter result. + /// + internal enum ExprTokenType : byte + { + None = 0, + Num = 1, + Str = 2, + Tuple = 3, + Selector = 4, + Op = 5, + Null = 6, + } + + /// + /// Operator opcodes used by the filter expression VM. + /// + /// Each opcode has a fixed precedence and arity defined in . + /// During compilation, uses the shunting-yard algorithm + /// to reorder operators from infix to postfix based on these precedence values. + /// During execution, pops the required number of operands + /// (arity), applies the operation, and pushes the result. + /// + /// Precedence and semantics match the Redis expr.c ExprOptable[]. + /// + internal enum OpCode : byte + { + // Precedence 0 + Or = 0, + + // Precedence 1 + And = 1, + + // Precedence 2 + Gt = 2, + Gte = 3, + Lt = 4, + Lte = 5, + Eq = 6, + Neq = 7, + In = 8, + + // Precedence 3 + Add = 9, + Sub = 10, + + // Precedence 4 + Mul = 11, + Div = 12, + Mod = 13, + + // Precedence 5 + Pow = 14, + + // Precedence 6 + Not = 15, + + // Precedence 7 (markers, not real operators) + OParen = 16, + CParen = 17, + } + + /// + /// A token in the compiled filter program or on the evaluation stack. + /// + /// Designed after Redis expr.c exprtoken — a single type that can represent any + /// value the VM needs: + /// + /// + /// TokenTypePayload used + /// + /// double (booleans are 1/0). + /// + /// — an interned or extracted string. + /// + /// — the JSON field name (e.g. "year" from .year). + /// + /// + — for the + /// in operator or JSON array values. + /// + /// — the operator to execute. + /// + /// No payload — represents JSON null or the null keyword. + /// + /// + /// Lifetime: Tokens inside the compiled are + /// allocated once and reused across all candidate evaluations. Tokens created during + /// execution (e.g. from JSON field extraction) are + /// transient and discarded after each ExprRunner.Run call. + /// + internal struct ExprToken + { + public ExprTokenType TokenType; + + /// Numeric value. Also used for bool: true=1, false=0. + public double Num; + + /// String value — for Str and Selector types. + public string Str; + + /// Operator opcode — for Op type. + public OpCode OpCode; + + /// Tuple elements for IN operator. + public ExprToken[] TupleElements; + + /// Number of elements in the tuple. + public int TupleLength; + + /// Start byte-offset of a string value in the source JSON (for zero-allocation extraction). + public int Utf8Start; + + /// Byte-length of the string value in the source JSON (for zero-allocation extraction). + public int Utf8Length; + + /// True when this token is the default (uninitialized) value, replacing null checks. + public readonly bool IsNone => TokenType == ExprTokenType.None; + + /// True when this is a Str token that references raw JSON bytes instead of an allocated string. + public readonly bool IsJsonRef => TokenType == ExprTokenType.Str && Str == null; + + public static ExprToken NewNum(double value) + { + return new ExprToken { TokenType = ExprTokenType.Num, Num = value }; + } + + public static ExprToken NewStr(string value) + { + return new ExprToken { TokenType = ExprTokenType.Str, Str = value }; + } + + public static ExprToken NewSelector(string fieldName) + { + return new ExprToken { TokenType = ExprTokenType.Selector, Str = fieldName }; + } + + public static ExprToken NewOp(OpCode opCode) + { + return new ExprToken { TokenType = ExprTokenType.Op, OpCode = opCode }; + } + + public static ExprToken NewNull() + { + return new ExprToken { TokenType = ExprTokenType.Null }; + } + + public static ExprToken NewTuple(ExprToken[] elements, int length) + { + return new ExprToken { TokenType = ExprTokenType.Tuple, TupleElements = elements, TupleLength = length }; + } + + /// + /// Create a string token that references raw UTF-8 bytes in the source JSON — zero allocation. + /// The offset and length define the string content (excluding quotes) within the JSON span. + /// + public static ExprToken NewJsonStr(int utf8Start, int utf8Length) + { + return new ExprToken { TokenType = ExprTokenType.Str, Utf8Start = utf8Start, Utf8Length = utf8Length }; + } + } + + /// + /// Operator metadata table, mirroring Redis ExprOptable. + /// Provides precedence and arity lookup for shunting-yard compilation. + /// + internal static class OpTable + { + // Indexed by OpCode for O(1) lookup. + // Entries: (Precedence, Arity). OpCode enum values are consecutive 0..17. + private static readonly (int Precedence, int Arity)[] Table; + + static OpTable() + { + Table = new (int, int)[18]; + Table[(int)OpCode.Or] = (0, 2); + Table[(int)OpCode.And] = (1, 2); + Table[(int)OpCode.Gt] = (2, 2); + Table[(int)OpCode.Gte] = (2, 2); + Table[(int)OpCode.Lt] = (2, 2); + Table[(int)OpCode.Lte] = (2, 2); + Table[(int)OpCode.Eq] = (2, 2); + Table[(int)OpCode.Neq] = (2, 2); + Table[(int)OpCode.In] = (2, 2); + Table[(int)OpCode.Add] = (3, 2); + Table[(int)OpCode.Sub] = (3, 2); + Table[(int)OpCode.Mul] = (4, 2); + Table[(int)OpCode.Div] = (4, 2); + Table[(int)OpCode.Mod] = (4, 2); + Table[(int)OpCode.Pow] = (5, 2); + Table[(int)OpCode.Not] = (6, 1); + Table[(int)OpCode.OParen] = (7, 0); + Table[(int)OpCode.CParen] = (7, 0); + } + + public static int GetPrecedence(OpCode code) => Table[(int)code].Precedence; + + public static int GetArity(OpCode code) => Table[(int)code].Arity; + } + + /// + /// Compiled filter expression program — the output of + /// and the input to ExprRunner.Run. + /// + /// Contains a flat postfix (reverse-Polish notation) instruction sequence where every + /// element is an : + /// + /// + /// Source: .year >= 2000 and .rating > 7 + /// Program: [SEL:year] [NUM:2000] [OP:>=] [SEL:rating] [NUM:7] [OP:>] [OP:and] + /// + /// + /// Compile-once, run-many: The program is compiled once per query, then + /// executed against every candidate element's raw JSON bytes. The program itself is + /// read-only during execution — all mutable state lives in the per-call evaluation + /// stack inside . + /// + /// This is the C# equivalent of the exprstate.program[] array in + /// Redis expr.c. The evaluation stack (values_stack in Redis) is + /// not stored here — it is allocated per-call in ExprRunner.Run. + /// + internal sealed class ExprProgram + { + /// The compiled postfix instruction sequence. + public ExprToken[] Instructions; + + /// Number of instructions in the program. + public int Length; + + /// Cached unique selector names (field names) used in this program. + private string[] selectorNames; + + /// + /// Get the unique selector (field) names referenced by this program. + /// Cached after first call — safe to call repeatedly. + /// + public string[] GetSelectors() + { + if (selectorNames != null) + return selectorNames; + + // Count unique selectors + var seen = new System.Collections.Generic.HashSet(System.StringComparer.Ordinal); + for (var i = 0; i < Length; i++) + { + if (Instructions[i].TokenType == ExprTokenType.Selector) + seen.Add(Instructions[i].Str); + } + + var names = new string[seen.Count]; + seen.CopyTo(names); + selectorNames = names; + return selectorNames; + } + } +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/RespServerSessionVectors.cs b/libs/server/Resp/Vector/RespServerSessionVectors.cs index 6725785254e..3f34463356d 100644 --- a/libs/server/Resp/Vector/RespServerSessionVectors.cs +++ b/libs/server/Resp/Vector/RespServerSessionVectors.cs @@ -759,11 +759,16 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) // TODO: these stackallocs are dangerous, need logic to avoid stack overflow Span idSpace = stackalloc byte[(DefaultResultSetSize * DefaultIdSize) + (DefaultResultSetSize * sizeof(int))]; Span distanceSpace = stackalloc float[DefaultResultSetSize]; - Span attributeSpace = withAttributes.Value ? stackalloc byte[(DefaultResultSetSize * DefaultAttributeSize) + (DefaultResultSetSize * sizeof(int))] : default; + var needFilter = filter.Value.Length > 0; + var needAttributes = withAttributes.Value || needFilter; + Span attributeSpace = needAttributes ? stackalloc byte[(DefaultResultSetSize * DefaultAttributeSize) + (DefaultResultSetSize * sizeof(int))] : default; var idResult = SpanByteAndMemory.FromPinnedSpan(idSpace); var distanceResult = SpanByteAndMemory.FromPinnedSpan(MemoryMarshal.Cast(distanceSpace)); var attributeResult = SpanByteAndMemory.FromPinnedSpan(attributeSpace); + // Bitmap: 1 bit per result. DefaultResultSetSize results = 8 bytes on stack. + Span bitmapSpace = needFilter ? stackalloc byte[(DefaultResultSetSize + 7) >> 3] : default; + var filterBitmapResult = SpanByteAndMemory.FromPinnedSpan(bitmapSpace); try { @@ -772,11 +777,11 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) VectorIdFormat idFormat; if (!element.HasValue) { - res = storageApi.VectorSetValueSimilarity(key, valueType, ArgSlice.FromPinnedSpan(values), count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes); + res = storageApi.VectorSetValueSimilarity(key, valueType, ArgSlice.FromPinnedSpan(values), count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes, ref filterBitmapResult); } else { - res = storageApi.VectorSetElementSimilarity(key, element.Value, count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes); + res = storageApi.VectorSetElementSimilarity(key, element.Value, count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes, ref filterBitmapResult); } if (res == GarnetStatus.NOTFOUND) @@ -804,22 +809,39 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) { var remainingIds = idResult.AsReadOnlySpan(); var distancesSpan = MemoryMarshal.Cast(distanceResult.AsReadOnlySpan()); - var remaininingAttributes = withAttributes.Value ? attributeResult.AsReadOnlySpan() : default; + var hasFilter = filterBitmapResult.Length > 0; + var filterBitmap = hasFilter ? filterBitmapResult.AsReadOnlySpan() : default; + var remaininingAttributes = (withAttributes.Value || hasFilter) ? attributeResult.AsReadOnlySpan() : default; - var arrayItemCount = distancesSpan.Length; + var totalFound = distancesSpan.Length; + + // Compute output count: if bitmap is present, popcount it; otherwise all results + int outputCount; + if (hasFilter) + { + outputCount = 0; + for (var b = 0; b < filterBitmap.Length; b++) + outputCount += System.Numerics.BitOperations.PopCount(filterBitmap[b]); + } + else + { + outputCount = totalFound; + } + + var arrayItemCount = outputCount; if (withScores.Value) { - arrayItemCount += distancesSpan.Length; + arrayItemCount += outputCount; } if (withAttributes.Value) { - arrayItemCount += distancesSpan.Length; + arrayItemCount += outputCount; } while (!RespWriteUtils.TryWriteArrayLength(arrayItemCount, ref dcurr, dend)) SendAndReset(); - for (var resultIndex = 0; resultIndex < distancesSpan.Length; resultIndex++) + for (var resultIndex = 0; resultIndex < totalFound; resultIndex++) { ReadOnlySpan elementData; @@ -855,6 +877,18 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) throw new GarnetException($"Unexpected id format: {idFormat}"); } + // Check filter bitmap — skip results that didn't pass the filter + if (hasFilter && (filterBitmap[resultIndex >> 3] & (1 << (resultIndex & 7))) == 0) + { + // Advance attribute reader for skipped results (attributes are always present when bitmap exists) + if (!remaininingAttributes.IsEmpty) + { + var skipAttrLen = BinaryPrimitives.ReadInt32LittleEndian(remaininingAttributes); + remaininingAttributes = remaininingAttributes[(sizeof(int) + skipAttrLen)..]; + } + continue; + } + while (!RespWriteUtils.TryWriteBulkString(elementData, ref dcurr, dend)) SendAndReset(); @@ -880,6 +914,12 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) while (!RespWriteUtils.TryWriteBulkString(attr, ref dcurr, dend)) SendAndReset(); } + else if (!remaininingAttributes.IsEmpty) + { + // Attributes fetched for filtering but not requested — advance reader + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaininingAttributes); + remaininingAttributes = remaininingAttributes[(sizeof(int) + attrLen)..]; + } } } } @@ -908,6 +948,7 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) idResult.Memory?.Dispose(); distanceResult.Memory?.Dispose(); attributeResult.Memory?.Dispose(); + filterBitmapResult.Memory?.Dispose(); } } finally diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 300cf7e8b05..22bc076f891 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT license. using System; @@ -11,6 +11,7 @@ using System.Threading.Tasks; using Garnet.common; using Garnet.networking; +using Garnet.server.Vector.Filter; using Microsoft.Extensions.Logging; using Tsavorite.core; @@ -490,7 +491,8 @@ internal VectorManagerResult ValueSimilarity( ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, - ref SpanByteAndMemory outputAttributes + ref SpanByteAndMemory outputAttributes, + ref SpanByteAndMemory filterBitmap ) { AssertHaveStorageSession(); @@ -559,11 +561,17 @@ out var continuation return VectorManagerResult.BadParams; } - if (includeAttributes) + if (includeAttributes || !filter.IsEmpty) { FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes); } + // Apply post-filtering if filter is specified + if (!filter.IsEmpty) + { + ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan()); + } + if (continuation != 0) { // TODO: paged results! @@ -600,7 +608,8 @@ internal VectorManagerResult ElementSimilarity( ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, - ref SpanByteAndMemory outputAttributes + ref SpanByteAndMemory outputAttributes, + ref SpanByteAndMemory filterBitmap ) { AssertHaveStorageSession(); @@ -661,11 +670,17 @@ out var continuation return VectorManagerResult.BadParams; } - if (includeAttributes) + if (includeAttributes || !filter.IsEmpty) { FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes); } + // Apply post-filtering if filter is specified + if (!filter.IsEmpty) + { + ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan()); + } + if (continuation != 0) { // TODO: paged results! @@ -896,6 +911,99 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl } } + /// + /// Apply post-filtering to vector search results using a compiled filter expression. + /// + /// Two-phase approach: + /// 1. COMPILE: The filter string is compiled ONCE into a postfix program (ExprCompiler). + /// 2. INDEX: Build a field offset index for ALL candidates in one pass over the + /// contiguous attributes span. The index records (offset, length) for each + /// field the filter needs, per candidate. This is the "simdjson-style" structural + /// pass — future optimization can use SIMD to find delimiters. + /// 3. EVALUATE: For each candidate, create tokens lazily from indexed positions + /// and run the postfix program. Only touches attribute bytes for fields the + /// filter actually references. + /// + /// The is populated with one bit per result: + /// bit i = 1 means result i passed the filter. + /// + private static int ApplyPostFilter( + ReadOnlySpan filter, + int numResults, + ReadOnlySpan attributesSpan, + Span filterBitmap) + { + if (numResults == 0) + { + return 0; + } + + // Phase 1: Compile the filter expression into a postfix program. + var program = ExprCompiler.TryCompile(filter, out _); + if (program == null) + { + return 0; // Invalid filter → filter out all results (matches Redis behavior) + } + + filterBitmap.Clear(); + + var selectors = program.GetSelectors(); + var numSelectors = selectors.Length; + var stride = AttributeExtractor.FieldIndexStride(numSelectors); + + // Phase 2: Build field offset index for ALL candidates in one pass. + // Index layout per doc: [fieldCount, field0_offset, field0_len, ..., fieldN_offset, fieldN_len] + // Size is predictable: numResults * stride ints. + var indexSize = numResults * stride; + var fieldIndex = indexSize <= 256 + ? stackalloc int[indexSize] + : new int[indexSize]; + + AttributeExtractor.BuildFieldIndex(attributesSpan, numResults, selectors, fieldIndex); + + // Phase 3: Evaluate filter per candidate using indexed positions. + var filteredCount = 0; + var stack = ExprRunner.CreateStack(); + var extractedFields = new ExprToken[numSelectors]; + var remaining = attributesSpan; + + for (var i = 0; i < numResults; i++) + { + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); + var attrData = remaining.Slice(sizeof(int), attrLen); + + var docIndex = fieldIndex.Slice(i * stride, stride); + var fieldCount = docIndex[0]; + + if (fieldCount < 0) + { + // Malformed JSON — skip + remaining = remaining[(sizeof(int) + attrLen)..]; + continue; + } + + // Create tokens lazily from indexed positions — only parses values the filter touches + for (var f = 0; f < numSelectors; f++) + { + var offset = docIndex[1 + 2 * f]; + var len = docIndex[1 + 2 * f + 1]; + extractedFields[f] = offset >= 0 + ? AttributeExtractor.ParseValueAt(attrData, offset, len) + : default; + } + + if (ExprRunner.Run(program, attrData, selectors, extractedFields, stack)) + { + filterBitmap[i >> 3] |= (byte)(1 << (i & 7)); + filteredCount++; + } + + remaining = remaining[(sizeof(int) + attrLen)..]; + } + + return filteredCount; + } + [Conditional("DEBUG")] private static void AssertHaveStorageSession() { diff --git a/libs/server/Storage/Session/MainStore/VectorStoreOps.cs b/libs/server/Storage/Session/MainStore/VectorStoreOps.cs index 24e04921775..9073d73fa83 100644 --- a/libs/server/Storage/Session/MainStore/VectorStoreOps.cs +++ b/libs/server/Storage/Session/MainStore/VectorStoreOps.cs @@ -200,7 +200,7 @@ public unsafe GarnetStatus VectorSetRemove(SpanByte key, SpanByte element) /// Perform a similarity search on an existing Vector Set given a vector as a bunch of floats. /// [SkipLocalsInit] - public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { parseState.InitializeWithArgument(ArgSlice.FromPinnedSpan(key.AsReadOnlySpan())); @@ -218,7 +218,7 @@ public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueTyp return status; } - result = vectorManager.ValueSimilarity(indexSpan, valueType, values.ReadOnlySpan, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes); + result = vectorManager.ValueSimilarity(indexSpan, valueType, values.ReadOnlySpan, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, ref filterBitmap); return GarnetStatus.OK; } @@ -228,7 +228,7 @@ public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueTyp /// Perform a similarity search on an existing Vector Set given an element that is already in the Vector Set. /// [SkipLocalsInit] - public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan element, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan element, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { parseState.InitializeWithArgument(ArgSlice.FromPinnedSpan(key.AsReadOnlySpan())); @@ -245,7 +245,7 @@ public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan return status; } - result = vectorManager.ElementSimilarity(indexSpan, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes); + result = vectorManager.ElementSimilarity(indexSpan, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, ref filterBitmap); return GarnetStatus.OK; } } diff --git a/test/Garnet.test/Filter/AttributeExtractorTests.cs b/test/Garnet.test/Filter/AttributeExtractorTests.cs new file mode 100644 index 00000000000..14721249912 --- /dev/null +++ b/test/Garnet.test/Filter/AttributeExtractorTests.cs @@ -0,0 +1,504 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for — the raw-byte JSON field extractor + /// used by the filter expression VM to resolve selectors on demand. + /// + [AllureNUnit] + [TestFixture] + public class AttributeExtractorTests : AllureTestBase + { + private static ExprToken Extract(string json, string field) + => AttributeExtractor.ExtractField(Encoding.UTF8.GetBytes(json), field); + + /// + /// Get the string value from an ExprToken, handling both allocated strings and JSON byte refs. + /// + private static string GetStr(string json, ExprToken token) + { + if (token.Str != null) return token.Str; + if (token.IsJsonRef) + { + var bytes = Encoding.UTF8.GetBytes(json); + return Encoding.UTF8.GetString(bytes, token.Utf8Start, token.Utf8Length); + } + return null; + } + + // ======================== Number extraction ======================== + + [Test] + public void ExtractField_Integer() + { + var token = Extract("{\"year\":1980}", "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1980.0, token.Num); + } + + [Test] + public void ExtractField_NegativeInteger() + { + var token = Extract("{\"temp\":-42}", "temp"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(-42.0, token.Num); + } + + [Test] + public void ExtractField_Decimal() + { + var token = Extract("{\"rating\":4.5}", "rating"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(4.5, token.Num, 0.001); + } + + [Test] + public void ExtractField_ScientificNotation() + { + var token = Extract("{\"val\":1.5e3}", "val"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1500.0, token.Num); + } + + [Test] + public void ExtractField_Zero() + { + var token = Extract("{\"val\":0}", "val"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(0.0, token.Num); + } + + // ======================== String extraction ======================== + + [Test] + public void ExtractField_SimpleString() + { + var json = "{\"genre\":\"action\"}"; + var token = Extract(json, "genre"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsTrue(token.IsJsonRef, "Non-escaped strings should be JSON byte refs"); + ClassicAssert.AreEqual("action", GetStr(json, token)); + } + + [Test] + public void ExtractField_EmptyString() + { + var json = "{\"name\":\"\"}"; + var token = Extract(json, "name"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsTrue(token.IsJsonRef); + ClassicAssert.AreEqual("", GetStr(json, token)); + } + + [Test] + public void ExtractField_StringWithEscapedQuote() + { + var token = Extract("{\"name\":\"hello\\\"world\"}", "name"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef, "Escaped strings should be materialized"); + ClassicAssert.AreEqual("hello\"world", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedBackslash() + { + var token = Extract("{\"path\":\"c:\\\\temp\"}", "path"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); + ClassicAssert.AreEqual("c:\\temp", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedNewline() + { + var token = Extract("{\"text\":\"line1\\nline2\"}", "text"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); + ClassicAssert.AreEqual("line1\nline2", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedTab() + { + var token = Extract("{\"text\":\"col1\\tcol2\"}", "text"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); + ClassicAssert.AreEqual("col1\tcol2", token.Str); + } + + [Test] + public void ExtractField_StringWithSlashEscape() + { + var token = Extract("{\"url\":\"http:\\/\\/example.com\"}", "url"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); + ClassicAssert.AreEqual("http://example.com", token.Str); + } + + // ======================== Boolean extraction ======================== + + [Test] + public void ExtractField_True() + { + var token = Extract("{\"active\":true}", "active"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_False() + { + var token = Extract("{\"deleted\":false}", "deleted"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(0.0, token.Num); + } + + // ======================== Null extraction ======================== + + [Test] + public void ExtractField_Null() + { + var token = Extract("{\"value\":null}", "value"); + ClassicAssert.AreEqual(ExprTokenType.Null, token.TokenType); + } + + // ======================== Array extraction ======================== + + [Test] + public void ExtractField_StringArray() + { + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + var token = Extract(json, "tags"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(2, token.TupleLength); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual("classic", GetStr(json, token.TupleElements[0])); + ClassicAssert.AreEqual("popular", GetStr(json, token.TupleElements[1])); + } + + [Test] + public void ExtractField_NumericArray() + { + var token = Extract("{\"scores\":[1,2,3]}", "scores"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(3, token.TupleLength); + ClassicAssert.AreEqual(1.0, token.TupleElements[0].Num); + ClassicAssert.AreEqual(2.0, token.TupleElements[1].Num); + ClassicAssert.AreEqual(3.0, token.TupleElements[2].Num); + } + + [Test] + public void ExtractField_MixedArray() + { + var json = "{\"data\":[1,\"two\",true,null]}"; + var token = Extract(json, "data"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(4, token.TupleLength); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual(1.0, token.TupleElements[0].Num); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[1].TokenType); + ClassicAssert.AreEqual("two", GetStr(json, token.TupleElements[1])); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[2].TokenType); + ClassicAssert.AreEqual(1.0, token.TupleElements[2].Num); // true → 1 + ClassicAssert.AreEqual(ExprTokenType.Null, token.TupleElements[3].TokenType); + } + + [Test] + public void ExtractField_EmptyArray() + { + var token = Extract("{\"items\":[]}", "items"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(0, token.TupleLength); + } + + // ======================== Multiple fields ======================== + + [Test] + public void ExtractField_FirstField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "a"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_MiddleField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "b"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2.0, token.Num); + } + + [Test] + public void ExtractField_LastField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "c"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(3.0, token.Num); + } + + [Test] + public void ExtractField_SkipsValuesOfDifferentTypes() + { + // Ensure the extractor correctly skips strings, arrays, objects, booleans, nulls, and numbers + // when seeking a later field + var json = "{\"s\":\"hello\",\"a\":[1,2],\"o\":{\"nested\":true},\"b\":false,\"n\":null,\"target\":42}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + // ======================== Missing / not found ======================== + + [Test] + public void ExtractField_MissingField_ReturnsNone() + { + var token = Extract("{\"year\":1980}", "rating"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_EmptyObject_ReturnsNone() + { + var token = Extract("{}", "anything"); + ClassicAssert.IsTrue(token.IsNone); + } + + // ======================== Whitespace handling ======================== + + [Test] + public void ExtractField_WithWhitespace() + { + var token = Extract(" { \"year\" : 1980 } ", "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1980.0, token.Num); + } + + [Test] + public void ExtractField_WithNewlines() + { + var json = "{\n \"year\": 1980,\n \"rating\": 4.5\n}"; + var token = Extract(json, "rating"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(4.5, token.Num, 0.001); + } + + // ======================== Nested objects (skipped) ======================== + + [Test] + public void ExtractField_NestedObject_ReturnsNone() + { + // Nested objects are not supported as values — should return IsNone + var token = Extract("{\"meta\":{\"key\":\"val\"}}", "meta"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_SkipsNestedObjectToFindLaterField() + { + var json = "{\"meta\":{\"key\":\"val\"},\"year\":2020}"; + var token = Extract(json, "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2020.0, token.Num); + } + + [Test] + public void ExtractField_SkipsDeeplyNestedObject() + { + var json = "{\"deep\":{\"a\":{\"b\":{\"c\":1}}},\"target\":99}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + // ======================== Malformed / non-JSON input ======================== + + [Test] + public void ExtractField_NotJson_ReturnsNone() + { + var token = Extract("this is not json", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_EmptyInput_ReturnsNone() + { + var token = AttributeExtractor.ExtractField([], "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_ArrayAtRoot_ReturnsNone() + { + var token = Extract("[1,2,3]", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_TruncatedJson_ReturnsNone() + { + var token = Extract("{\"year\":", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_MissingColon_ReturnsNone() + { + var token = Extract("{\"year\" 1980}", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_UnterminatedString_ReturnsNone() + { + var token = Extract("{\"name\":\"hello}", "name"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_UnterminatedKey_ReturnsNone() + { + var token = Extract("{\"name:\"hello\"}", "name"); + // The key "name will match to :, parsing should fail gracefully + ClassicAssert.IsTrue(token.IsNone); + } + + // ======================== Edge cases ======================== + + [Test] + public void ExtractField_StringValueContainingBraces() + { + var json = "{\"data\":\"{not an object}\"}"; + var token = Extract(json, "data"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("{not an object}", GetStr(json, token)); + } + + [Test] + public void ExtractField_StringValueContainingBrackets() + { + var json = "{\"data\":\"[not an array]\"}"; + var token = Extract(json, "data"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("[not an array]", GetStr(json, token)); + } + + [Test] + public void ExtractField_StringValueContainingComma() + { + var json = "{\"msg\":\"hello, world\"}"; + var token = Extract(json, "msg"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("hello, world", GetStr(json, token)); + } + + [Test] + public void ExtractField_FieldNameCaseSensitive() + { + var token = Extract("{\"Year\":2020}", "year"); + ClassicAssert.IsTrue(token.IsNone); // Case mismatch + + var token2 = Extract("{\"Year\":2020}", "Year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token2.TokenType); + ClassicAssert.AreEqual(2020.0, token2.Num); + } + + [Test] + public void ExtractField_FieldWithHyphen() + { + // Hyphens in JSON keys are valid + var token = Extract("{\"my-field\":42}", "my-field"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + [Test] + public void ExtractField_FieldWithUnderscore() + { + var token = Extract("{\"my_field\":42}", "my_field"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + [Test] + public void ExtractField_FieldWithDigits() + { + var token = Extract("{\"field123\":99}", "field123"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + [Test] + public void ExtractField_BooleanLiteralNotFollowedByDelimiter_ReturnsNone() + { + // "trueish" should not match as true + var token = Extract("{\"val\":trueish}", "val"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_NullLiteralNotFollowedByDelimiter_ReturnsNone() + { + var token = Extract("{\"val\":nullify}", "val"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_ArrayWithNestedArrays() + { + // ParseArrayToken calls ParseValueToken which handles nested arrays recursively + var token = Extract("{\"matrix\":[[1,2],[3,4]]}", "matrix"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(2, token.TupleLength); + // Each inner element is itself a Tuple + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual(2, token.TupleElements[0].TupleLength); + } + + [Test] + public void ExtractField_LargeNumberOfFields() + { + // Ensure we can skip many fields to find the target + var sb = new StringBuilder("{"); + for (var i = 0; i < 100; i++) + { + if (i > 0) sb.Append(','); + sb.Append($"\"field{i}\":{i}"); + } + sb.Append('}'); + + var token = Extract(sb.ToString(), "field99"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + [Test] + public void ExtractField_SkipsArrayWithStringsContainingQuotes() + { + // Ensure the array skipper handles escaped quotes inside string elements + var json = "{\"arr\":[\"he\\\"llo\",\"world\"],\"target\":1}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_SkipsStringWithEscapedBackslashBeforeClosingQuote() + { + // The value is the string: ends_with_backslash\ (the JSON encodes \\ at the end) + // This tests that \\\" is parsed as \\ + " (close quote), not \ + \" + var json = "{\"a\":\"ends_with_backslash\\\\\",\"b\":2}"; + var token = Extract(json, "b"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2.0, token.Num); + } + } +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprCompilerTests.cs b/test/Garnet.test/Filter/ExprCompilerTests.cs new file mode 100644 index 00000000000..dcbdcf1fa73 --- /dev/null +++ b/test/Garnet.test/Filter/ExprCompilerTests.cs @@ -0,0 +1,359 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for the ExprCompiler (shunting-yard tokenizer + compiler). + /// Verifies tokenization and compilation to flat postfix programs. + /// + [AllureNUnit] + [TestFixture] + public class ExprCompilerTests : AllureTestBase + { + [Test] + public void Compiler_IntegerNumbers() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("42"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(42.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_DecimalNumbers() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("3.14"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(3.14, program.Instructions[0].Num, 0.001); + } + + [Test] + public void Compiler_NegativeNumbers() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("-5"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(-5.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_StringLiterals() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello\""), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("hello", program.Instructions[0].Str); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("'world'"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("world", program.Instructions[0].Str); + } + + [Test] + public void Compiler_EscapedStringLiterals() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello\\\"world\""), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("hello\"world", program.Instructions[0].Str); + } + + [Test] + public void Compiler_UnterminatedStringReturnsFalse() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello"), out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_SubtractionNotConfusedWithNegative() + { + // ".a - 5" → postfix: [SEL:a] [NUM:5] [OP:Sub] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".a - 5"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(5.0, program.Instructions[1].Num); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[2].TokenType); + ClassicAssert.AreEqual(OpCode.Sub, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Selectors() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("year", program.Instructions[0].Str); + } + + [Test] + public void Compiler_Keywords() + { + // "true and false" → [NUM:1] [NUM:0] [OP:And] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true and false"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(1.0, program.Instructions[0].Num); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(0.0, program.Instructions[1].Num); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[2].TokenType); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Booleans() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(1.0, program.Instructions[0].Num); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("false"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(0.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_TwoCharOperators() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 == 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Eq, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 != 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Neq, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 >= 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Gte, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 <= 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Lte, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true && false"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true || false"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Or, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("2 ** 3"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_SingleCharOperators() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 > 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Gt, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 < 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Lt, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 + 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 * 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 / 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Div, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 % 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Mod, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Parentheses() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(.year > 10)"), out _); + ClassicAssert.IsNotNull(program); + // Postfix: [SEL:year] [NUM:10] [OP:Gt] + ClassicAssert.AreEqual(3, program.Length); + } + + [Test] + public void Compiler_ComplexExpression() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year > 1950 and .rating >= 4.0"), out _); + ClassicAssert.IsNotNull(program); + // Postfix: [SEL:year] [NUM:1950] [OP:Gt] [SEL:rating] [NUM:4.0] [OP:Gte] [OP:And] + ClassicAssert.AreEqual(7, program.Length); + } + + [Test] + public void Compiler_EmptyInput() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(""), out _); + ClassicAssert.IsNull(program); + + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(" "), out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_UnexpectedCharacterReturnsFalse() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("@"), out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_NullLiteral() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("null"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Null, program.Instructions[0].TokenType); + } + + [Test] + public void Compiler_TupleLiteral() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("[1, \"foo\", 42]"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Tuple, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(3, program.Instructions[0].TupleLength); + } + + [Test] + public void Compiler_HyphenInSelector() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".my-field"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("my-field", program.Instructions[0].Str); + } + + [Test] + public void Compiler_PrecedenceMultiplicationBeforeAddition() + { + // "1 + 2 * 3" → [1] [2] [3] [*] [+] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 + 2 * 3"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_PrecedenceAndBeforeOr() + { + // "true or false and true" → [1] [0] [1] [and] [or] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true or false and true"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Or, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_ParenthesesOverridePrecedence() + { + // "(1 + 2) * 3" → [1] [2] [+] [3] [*] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(1 + 2) * 3"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_ContainmentOperator() + { + // '"action" in .tags' → [STR:action] [SEL:tags] [OP:In] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"action\" in .tags"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(OpCode.In, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_ExponentiationRightAssociative() + { + // "2 ** 3 ** 2" → 2 ** (3 ** 2) = 512 + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("2 ** 3 ** 2"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[4].OpCode); + + var result = ExprTestHelpers.EvaluateFilter("2 ** 3 ** 2", "{}"); + ClassicAssert.AreEqual(512.0, result.Num); + } + + [Test] + public void Compiler_UnaryNot() + { + // "not true" → [NUM:1] [OP:Not] + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("not true"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(2, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(OpCode.Not, program.Instructions[1].OpCode); + } + + [Test] + public void Compiler_ErrorOnMissingClosingParen() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(1 + 2"), out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_ErrorOnUnexpectedToken() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(")"), out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_InWithTupleLiteral() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".director in [\"Spielberg\", \"Nolan\"]"), out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Tuple, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(2, program.Instructions[1].TupleLength); + ClassicAssert.AreEqual(OpCode.In, program.Instructions[2].OpCode); + } + } +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprRunnerTests.cs b/test/Garnet.test/Filter/ExprRunnerTests.cs new file mode 100644 index 00000000000..8a756092bf0 --- /dev/null +++ b/test/Garnet.test/Filter/ExprRunnerTests.cs @@ -0,0 +1,215 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for ExprRunner (stack-based VM) + AttributeExtractor (raw byte JSON extractor). + /// Verifies the compile-once-run-many evaluation pipeline. + /// + [AllureNUnit] + [TestFixture] + public class ExprRunnerTests : AllureTestBase + { + [Test] + public void Runner_Arithmetic() + { + var json = "{}"; + ClassicAssert.AreEqual(5.0, ExprTestHelpers.EvaluateFilter("2 + 3", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("3 - 2", json).Num); + ClassicAssert.AreEqual(6.0, ExprTestHelpers.EvaluateFilter("2 * 3", json).Num); + ClassicAssert.AreEqual(2.5, ExprTestHelpers.EvaluateFilter("5 / 2", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("7 % 3", json).Num); + ClassicAssert.AreEqual(8.0, ExprTestHelpers.EvaluateFilter("2 ** 3", json).Num); + } + + [Test] + public void Runner_SubtractionWithField() + { + var json = "{\"year\":1980}"; + ClassicAssert.AreEqual(1975.0, ExprTestHelpers.EvaluateFilter(".year - 5", json).Num); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year - 5 > 0", json)); + } + + [Test] + public void Runner_Comparison() + { + var json = "{}"; + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 > 3", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("3 > 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("3 < 5", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("5 < 3", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 >= 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 <= 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 == 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 != 3", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("5 != 5", json).Num); + } + + [Test] + public void Runner_LogicalAnd() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true and true", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("true and false", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("false and true", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true && true", json)); + } + + [Test] + public void Runner_LogicalOr() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true or false", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("false or true", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("false or false", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("false || true", json)); + } + + [Test] + public void Runner_LogicalNot() + { + var json = "{}"; + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("not true", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("not false", json)); + } + + [Test] + public void Runner_StringEquality() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".genre == \"action\"", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".genre == \"drama\"", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".genre != \"drama\"", json)); + } + + [Test] + public void Runner_MemberAccess() + { + var json = "{\"year\":1980,\"rating\":4.5}"; + ClassicAssert.AreEqual(1980.0, ExprTestHelpers.EvaluateFilter(".year", json).Num); + ClassicAssert.AreEqual(4.5, ExprTestHelpers.EvaluateFilter(".rating", json).Num); + } + + [Test] + public void Runner_MissingFieldReturnsFalse() + { + var json = "{\"year\":1980}"; + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".missing", json)); + } + + [Test] + public void Runner_InOperatorWithJsonArray() + { + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"classic\" in .tags", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"popular\" in .tags", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("\"modern\" in .tags", json)); + } + + [Test] + public void Runner_InOperatorWithNumericJsonArray() + { + var json = "{\"scores\":[1,2,3]}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("2 in .scores", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("5 in .scores", json)); + } + + [Test] + public void Runner_InOperatorWithTupleLiteral() + { + var json = "{\"director\":\"Nolan\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".director in [\"Spielberg\", \"Nolan\"]", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".director in [\"Spielberg\", \"Kubrick\"]", json)); + } + + [Test] + public void Runner_InOperatorSubstringCheck() + { + var json = "{\"name\":\"barfoobar\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"foo\" in .name", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("\"xyz\" in .name", json)); + } + + [Test] + public void Runner_ComplexExpression() + { + var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; + + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy( + ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); + + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy( + "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); + + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("not (.genre == \"drama\")", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year / 10 >= 198", json)); + } + + [Test] + public void Runner_BooleanJsonValues() + { + var json = "{\"active\":true,\"deleted\":false}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".active", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".deleted", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".active == true", json)); + } + + [Test] + public void Runner_ArithmeticWithNonNumericString_CoercesToZero() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.AreEqual(2.0, ExprTestHelpers.EvaluateFilter(".genre + 2", json).Num); + ClassicAssert.AreEqual(-1.0, ExprTestHelpers.EvaluateFilter(".genre - 1", json).Num); + } + + [Test] + public void Runner_NullLiteral() + { + var json = "{\"year\":1980}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year != null", json)); + } + + [Test] + public void Runner_NonJsonAttributesExcluded() + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year > 1950"), out _); + ClassicAssert.IsNotNull(program); + + var nonJson = System.Text.Encoding.UTF8.GetBytes("this is not json"); + var stack = ExprRunner.CreateStack(); + ClassicAssert.IsFalse(ExprRunner.Run(program, nonJson, stack)); + + var emptyJson = System.Text.Encoding.UTF8.GetBytes(""); + ClassicAssert.IsFalse(ExprRunner.Run(program, emptyJson, stack)); + } + + [Test] + public void Runner_ExactNumericEquality() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("5 == 5", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("5 == 5.0001", json)); + } + + [Test] + public void Runner_HyphenatedField() + { + var json = "{\"my-field\":42}"; + ClassicAssert.AreEqual(42.0, ExprTestHelpers.EvaluateFilter(".my-field", json).Num); + } + + [Test] + public void Runner_JsonEscapeHandling() + { + var json = "{\"name\":\"hello\\\"world\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".name == \"hello\\\"world\"", json)); + } + } +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs new file mode 100644 index 00000000000..a6e2e9cd92f --- /dev/null +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -0,0 +1,211 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Text; +using Garnet.server.Vector.Filter; + +namespace Garnet.test +{ + /// + /// Test helpers for the Redis-style filter pipeline. + /// Compiles filter expressions and runs them against JSON attribute data. + /// + internal static class ExprTestHelpers + { + /// + /// Compile and run a filter expression against JSON, returning the result as an ExprToken. + /// This is useful for testing arithmetic/comparison results. + /// + internal static ExprToken EvaluateFilter(string expression, string json) + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out var errpos); + if (program == null) + throw new InvalidOperationException($"Compilation failed at position {errpos}"); + + // For single-value expressions (no selectors), run returns bool. + // To get the actual value, we use RunAndReturnTop. + var jsonBytes = Encoding.UTF8.GetBytes(json); + return RunAndReturnTop(program, jsonBytes); + } + + /// + /// Compile and run a filter expression against JSON, returning a boolean result. + /// + internal static bool EvaluateFilterTruthy(string expression, string json) + { + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out var errpos); + if (program == null) + throw new InvalidOperationException($"Compilation failed at position {errpos}"); + + var jsonBytes = Encoding.UTF8.GetBytes(json); + return ExprRunner.Run(program, jsonBytes, ExprRunner.CreateStack()); + } + + /// + /// Try to compile a filter expression. Returns true on success. + /// + internal static bool TryCompile(string expression, out ExprProgram program) + { + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out _); + return program != null; + } + + /// + /// Execute a compiled program and return the top-of-stack value (for testing). + /// This is a test-only method that mirrors ExprRunner.Run but returns the raw result + /// instead of a boolean, so tests can inspect numeric/string values. + /// + private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan json) + { + var stack = new ExprToken[256]; + var stackLen = 0; + + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; + + if (inst.TokenType == ExprTokenType.Selector) + { + var extracted = AttributeExtractor.ExtractField(json, inst.Str); + if (extracted.IsNone) + return ExprToken.NewNull(); + + // Materialize JSON refs to strings for test convenience (OK to allocate in tests) + if (extracted.IsJsonRef) + { + extracted = ExprToken.NewStr(Encoding.UTF8.GetString(json.Slice(extracted.Utf8Start, extracted.Utf8Length))); + } + + stack[stackLen++] = extracted; + continue; + } + + if (inst.TokenType != ExprTokenType.Op) + { + stack[stackLen++] = inst; + continue; + } + + var arity = OpTable.GetArity(inst.OpCode); + ExprToken b = stackLen > 0 ? stack[--stackLen] : default; + ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : default; + + var result = ExprToken.NewNum(0); + + switch (inst.OpCode) + { + case OpCode.Not: + result.Num = TokenToBool(b) == 0 ? 1 : 0; + break; + case OpCode.Pow: + result.Num = Math.Pow(TokenToNum(a), TokenToNum(b)); + break; + case OpCode.Mul: + result.Num = TokenToNum(a) * TokenToNum(b); + break; + case OpCode.Div: + result.Num = TokenToNum(a) / TokenToNum(b); + break; + case OpCode.Mod: + result.Num = TokenToNum(a) % TokenToNum(b); + break; + case OpCode.Add: + result.Num = TokenToNum(a) + TokenToNum(b); + break; + case OpCode.Sub: + result.Num = TokenToNum(a) - TokenToNum(b); + break; + case OpCode.Gt: + result.Num = TokenToNum(a) > TokenToNum(b) ? 1 : 0; + break; + case OpCode.Gte: + result.Num = TokenToNum(a) >= TokenToNum(b) ? 1 : 0; + break; + case OpCode.Lt: + result.Num = TokenToNum(a) < TokenToNum(b) ? 1 : 0; + break; + case OpCode.Lte: + result.Num = TokenToNum(a) <= TokenToNum(b) ? 1 : 0; + break; + case OpCode.Eq: + result.Num = TokensEqual(a, b) ? 1 : 0; + break; + case OpCode.Neq: + result.Num = !TokensEqual(a, b) ? 1 : 0; + break; + case OpCode.In: + result.Num = EvalIn(a, b) ? 1 : 0; + break; + case OpCode.And: + result.Num = TokenToBool(a) != 0 && TokenToBool(b) != 0 ? 1 : 0; + break; + case OpCode.Or: + result.Num = TokenToBool(a) != 0 || TokenToBool(b) != 0 ? 1 : 0; + break; + } + + stack[stackLen++] = result; + } + + return stackLen > 0 ? stack[stackLen - 1] : ExprToken.NewNull(); + } + + private static double TokenToNum(ExprToken t) + { + if (t.IsNone) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num; + if (t.TokenType == ExprTokenType.Str && t.Str != null) + { + return double.TryParse(t.Str, System.Globalization.NumberStyles.Float | System.Globalization.NumberStyles.AllowLeadingSign, + System.Globalization.CultureInfo.InvariantCulture, out var result) ? result : 0; + } + return 0; + } + + private static double TokenToBool(ExprToken t) + { + if (t.IsNone) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; + if (t.TokenType == ExprTokenType.Str) + { + if (t.IsJsonRef) return t.Utf8Length == 0 ? 0 : 1; + return (t.Str == null || t.Str.Length == 0) ? 0 : 1; + } + if (t.TokenType == ExprTokenType.Null) return 0; + return 1; + } + + private static bool TokensEqual(ExprToken a, ExprToken b) + { + if (a.IsNone || b.IsNone) return a.IsNone && b.IsNone; + if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) + return a.Num == b.Num; + if (a.TokenType == ExprTokenType.Null || b.TokenType == ExprTokenType.Null) + return a.TokenType == b.TokenType; + return TokenToNum(a) == TokenToNum(b); + } + + private static bool EvalIn(ExprToken a, ExprToken b) + { + if (b.IsNone) return false; + if (b.TokenType == ExprTokenType.Tuple) + { + for (var i = 0; i < b.TupleLength; i++) + { + if (TokensEqual(a, b.TupleElements[i])) + return true; + } + return false; + } + if (!a.IsNone && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + { + if (a.Str == null || b.Str == null) return false; + return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + } + return false; + } + } +} \ No newline at end of file diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index 6ff4f2a8e49..5b4d72ead1d 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -676,6 +676,190 @@ public void VDIM() // TODO: Add WRONGTYPE behavior check once implemented } + [Test] + public void VSIMWithAttributeFiltering() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = db.KeyDelete("foo"); + + // Add first vector with year=1980 + var res1 = db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + ClassicAssert.AreEqual(1, (int)res1); + + // Add second vector with year=1960 + var res2 = db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + ClassicAssert.AreEqual(1, (int)res2); + + // Add third vector with year=1940 + var res3 = db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + ClassicAssert.AreEqual(1, (int)res3); + + + // Search with filter for year > 1950 - should return 2 results (years 1980 and 1960) + var res5 = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1950", "COUNT", "3", "WITHATTRIBS"]); + + ClassicAssert.AreEqual(4, res5.Length, + "Should return 2 results (2 pairs of id+attribute) for year > 1950"); + + // Verify both results have year > 1950 + for (var i = 0; i < res5.Length; i += 2) + { + var attr = res5[i + 1]; + var attrStr = Encoding.UTF8.GetString(attr); + ClassicAssert.IsTrue(attrStr.Contains("\"year\":1980") || attrStr.Contains("\"year\":1960"), + $"Result should have year > 1950, got: {attrStr}"); + } + + // Search with filter for year > 1990 - should return NO results since all years are < 1990 + var res4 = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1990", "COUNT", "3", "WITHATTRIBS"]); + + ClassicAssert.AreEqual(0, res4.Length, + "Should return 0 results since no vectors have year > 1990"); + } + + [Test] + public void VSIMWithFilterButWithoutWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = db.KeyDelete("foo"); + + // Add vectors with attributes + db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + + // FILTER without WITHATTRIBS should work: fetch attributes internally and apply filter + var res = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1950", "COUNT", "3"]); + + // Should return only 2 element ids (no attributes since WITHATTRIBS not specified) + ClassicAssert.AreEqual(2, res.Length, + "Should return 2 element ids (year > 1950) without attributes"); + } + + [Test] + public void VSIMWithAdvancedFiltering() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = SeedMoviesForAdvancedFiltering(db); + + // Test logical AND + var res4 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1970 and .rating > 4.0", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res4.Length, "Logical AND: year > 1970 AND rating > 4.0"); + + // Test logical OR + var res5 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year < 1970 or .year > 2000", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res5.Length, "Logical OR: year < 1970 OR year > 2000"); + + // Test string equality + var res6 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".genre == \"action\"", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res6.Length, "String equality: genre == 'action'"); + + // Test arithmetic expression + var res7 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year / 10 >= 200", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(2, res7.Length, "Arithmetic: year / 10 >= 200"); + + // Test parentheses grouping + var res8 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "(.year > 2000 or .year < 1970) and .rating >= 4.0", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(2, res8.Length, "Parentheses grouping"); + + // Test containment operator (in) + var res9 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "\"classic\" in .tags", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res9.Length, "Containment: 'classic' in tags"); + + // Test NOT operator + var res10 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "not (.genre == \"drama\")", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res10.Length, "NOT operator: not (genre == 'drama')"); + + // Test complex expression with multiple operators + var res11 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res11.Length, "Complex: rating*2 > 8 AND (year>=1980 OR 'modern' in tags)"); + } + + [Test] + public void VSIMWithAdvancedFilteringELEWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + var queryElementId = SeedMoviesForAdvancedFiltering(db); + + var res1 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".genre == \"action\"", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res1.Length, "ELE + FILTER + WITHATTRIBS: genre == 'action'"); + + var res2 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", "\"classic\" in .tags", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res2.Length, "ELE + FILTER + WITHATTRIBS: 'classic' in tags"); + + var res3 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".rating / 2 > 2 and .year >= 1980", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res3.Length, "ELE + FILTER + WITHATTRIBS: arithmetic and comparison"); + } + + [Test] + public void VSIMWithAdvancedFilteringELEWithoutWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + var queryElementId = SeedMoviesForAdvancedFiltering(db); + + var res1 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".genre == \"action\"", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res1.Length, "ELE + FILTER without WITHATTRIBS: genre == 'action'"); + + var res2 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", "\"classic\" in .tags", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res2.Length, "ELE + FILTER without WITHATTRIBS: 'classic' in tags"); + + var res3 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".rating / 2 > 2 and .year >= 1980", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res3.Length, "ELE + FILTER without WITHATTRIBS: arithmetic and comparison"); + } + + private static byte[] SeedMoviesForAdvancedFiltering(IDatabase db) + { + _ = db.KeyDelete("movies"); + + var queryElementId = new byte[] { 0, 0, 0, 0 }; + var res1 = db.Execute("VADD", ["movies", "VALUES", "3", "1.0", "2.0", "3.0", queryElementId, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); + ClassicAssert.AreEqual(1, (int)res1); + + var res2 = db.Execute("VADD", ["movies", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); + ClassicAssert.AreEqual(1, (int)res2); + + var res3 = db.Execute("VADD", ["movies", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); + ClassicAssert.AreEqual(1, (int)res3); + + return queryElementId; + } + [Test] public void DeleteVectorSet() { diff --git a/website/docs/dev/vector-sets.md b/website/docs/dev/vector-sets.md index 79fa84f09e1..c801c5b8d78 100644 --- a/website/docs/dev/vector-sets.md +++ b/website/docs/dev/vector-sets.md @@ -415,6 +415,63 @@ Garnet calls into the following DiskANN functions: - `context` is always the `Context` value created by Garnet and stored in [`Index`](#indexes) for a Vector Set, this implies it is always a non-0 multiple of 8 - `search_vector`, `search_element`, and `continue_search` all return the number of ids written into `output_ids`, and if there are more values to return they set the `nint` _pointed to by_ `continuation` or `new_continuation` +## Vector Filter Expressions (`VSIM ... FILTER`) + +`VSIM` supports `FILTER ` for attribute-based post filtering. + +### Expression syntax + +- Arithmetic: `+`, `-`, `*`, `/`, `%`, `**` +- Comparison: `==`, `!=`, `>`, `<`, `>=`, `<=` +- Logical: `and`, `or`, `not` (also `&&`, `||`, `!`) +- Containment: `in` +- Grouping: parentheses `()` + +Field access uses dot notation (for example, `.year`, `.rating`, `.genre`). + +### Supported values + +- Numbers +- Strings +- Booleans (`true` / `false`, evaluated as `1` / `0`) +- Arrays (for `in` when the right side is an attribute array) + +### Operator precedence (high to low) + +1. primary / parentheses +2. unary (`not`, `!`, unary `-`) +3. power (`**`, right-associative) +4. multiplicative (`*`, `/`, `%`) +5. additive (`+`, `-`) +6. containment (`in`) +7. comparison (`>`, `<`, `>=`, `<=`) +8. equality (`==`, `!=`) +9. logical and (`and`, `&&`) +10. logical or (`or`, `||`) + +### Notes + +- Keywords are lowercase (`and`, `or`, `not`, `in`, `true`, `false`) +- Missing attributes are treated as non-matching (null/falsy) +- Array literals inside expressions (for example, `.director in ["a","b"]`) are not currently supported +- `VSIM` query source can be either `ELE ` or `VALUES ... ` + +### Examples + +```text +VSIM movies ELE dune FILTER '.year >= 1980 and .rating > 7' +VSIM movies ELE dune FILTER '.genre == "action" && .rating > 8.0' +VSIM movies ELE dune FILTER '"classic" in .tags' +VSIM movies ELE dune FILTER '(.year - 2000) ** 2 < 100 and .rating / 2 > 4' +VSIM movies VALUES 3 0.12 0.34 0.56 FILTER '.year >= 1980 and .rating > 7' +``` + +### Reference + +- Redis `VSIM`: https://redis.io/docs/latest/commands/vsim/ +- Redis vector sets: https://redis.io/docs/latest/develop/data-types/vector-sets/ +- Redis filter expressions: https://redis.io/docs/latest/develop/data-types/vector-sets/filtered-search/ + > [!IMPORTANT] > These p/invoke definitions are all a little rough and should be cleaned up. >