From fbd5e0cb742d2c65a3204d474a4a0f0f2b9d0a8b Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 19 Feb 2026 16:49:10 -0800 Subject: [PATCH 01/31] Add post-filter support for VSIM vector search results Implement JSON-path-based filter expressions that are evaluated against vector element attributes after similarity search. The filter engine includes a tokenizer, expression parser, and evaluator supporting comparison operators, logical operators (and/or/not), arithmetic, string equality, containment (in), and parenthesized grouping. Integrate post-filtering into VectorManager for both VSIM code paths, rejecting requests that specify a filter without WITHATTRIBS. --- .../Vector/Filter/VectorFilterEvaluator.cs | 147 +++++ .../Vector/Filter/VectorFilterExpression.cs | 45 ++ .../Resp/Vector/Filter/VectorFilterParser.cs | 215 +++++++ .../Vector/Filter/VectorFilterTokenizer.cs | 123 ++++ libs/server/Resp/Vector/VectorManager.cs | 129 +++++ test/Garnet.test/RespVectorSetTests.cs | 110 ++++ test/Garnet.test/VectorFilterTests.cs | 541 ++++++++++++++++++ 7 files changed, 1310 insertions(+) create mode 100644 libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs create mode 100644 libs/server/Resp/Vector/Filter/VectorFilterExpression.cs create mode 100644 libs/server/Resp/Vector/Filter/VectorFilterParser.cs create mode 100644 libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs create mode 100644 test/Garnet.test/VectorFilterTests.cs diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs new file mode 100644 index 00000000000..754d2a3851b --- /dev/null +++ b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs @@ -0,0 +1,147 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Text.Json; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Evaluator for vector filter expressions. + /// Evaluates parsed expression trees against JSON attribute data. + /// + internal static class VectorFilterEvaluator + { + public static object EvaluateExpression(Expr expr, JsonElement root) + { + if (expr is LiteralExpr lit) + return lit.Value; + + if (expr is MemberExpr member) + { + if (root.TryGetProperty(member.Property, out var value)) + { + return value.ValueKind switch + { + JsonValueKind.Number => value.GetDouble(), + JsonValueKind.String => value.GetString(), + JsonValueKind.True => 1.0, + JsonValueKind.False => 0.0, + JsonValueKind.Array => value, + _ => null + }; + } + return null; + } + + if (expr is UnaryExpr unary) + { + var operand = EvaluateExpression(unary.Operand, root); + if (unary.Operator == "not" || unary.Operator == "!") + return IsTruthy(operand) ? 0.0 : 1.0; + if (unary.Operator == "-") + return -(ToNumber(operand)); + throw new InvalidOperationException($"Unknown unary operator: {unary.Operator}"); + } + + if (expr is BinaryExpr binary) + { + // Short-circuit logical operators + if (binary.Operator is "and" or "&&") + { + var left = EvaluateExpression(binary.Left, root); + if (!IsTruthy(left)) return 0.0; + var right = EvaluateExpression(binary.Right, root); + return IsTruthy(right) ? 1.0 : 0.0; + } + + if (binary.Operator is "or" or "||") + { + var left = EvaluateExpression(binary.Left, root); + if (IsTruthy(left)) return 1.0; + var right = EvaluateExpression(binary.Right, root); + return IsTruthy(right) ? 1.0 : 0.0; + } + + { + var left = EvaluateExpression(binary.Left, root); + var right = EvaluateExpression(binary.Right, root); + + return binary.Operator switch + { + "+" => ToNumber(left) + ToNumber(right), + "-" => ToNumber(left) - ToNumber(right), + "*" => ToNumber(left) * ToNumber(right), + "/" => ToNumber(left) / ToNumber(right), + "%" => ToNumber(left) % ToNumber(right), + "**" => Math.Pow(ToNumber(left), ToNumber(right)), + ">" => ToNumber(left) > ToNumber(right) ? 1.0 : 0.0, + "<" => ToNumber(left) < ToNumber(right) ? 1.0 : 0.0, + ">=" => ToNumber(left) >= ToNumber(right) ? 1.0 : 0.0, + "<=" => ToNumber(left) <= ToNumber(right) ? 1.0 : 0.0, + "==" => AreEqual(left, right) ? 1.0 : 0.0, + "!=" => !AreEqual(left, right) ? 1.0 : 0.0, + "in" => IsIn(left, right) ? 1.0 : 0.0, + _ => throw new InvalidOperationException($"Unknown operator: {binary.Operator}") + }; + } + } + + return null; + } + + private static double ToNumber(object value) + { + if (value is double d) return d; + if (value is int i) return i; + if (value is string s && double.TryParse(s, out var result)) return result; + return 0; + } + + public static bool IsTruthy(object value) + { + if (value == null) return false; + if (value is double d) return d != 0; + if (value is int i) return i != 0; + if (value is string s) return !string.IsNullOrEmpty(s); + if (value is bool b) return b; + return true; + } + + private static bool AreEqual(object left, object right) + { + if (left == null && right == null) return true; + if (left == null || right == null) return false; + + if (left is double || right is double) + return Math.Abs(ToNumber(left) - ToNumber(right)) < 0.0001; + + if (left is string ls && right is string rs) + return ls == rs; + + return left.Equals(right); + } + + private static bool IsIn(object needle, object haystack) + { + if (haystack is JsonElement elem && elem.ValueKind == JsonValueKind.Array) + { + foreach (var item in elem.EnumerateArray()) + { + var itemValue = item.ValueKind switch + { + JsonValueKind.Number => (object)item.GetDouble(), + JsonValueKind.String => item.GetString(), + JsonValueKind.True => 1.0, + JsonValueKind.False => 0.0, + _ => null + }; + + if (AreEqual(needle, itemValue)) + return true; + } + } + return false; + } + } +} diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs new file mode 100644 index 00000000000..976109780ac --- /dev/null +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +namespace Garnet.server.Vector.Filter +{ + /// + /// Base class for filter expression tree nodes. + /// + internal abstract class Expr { } + + /// + /// Represents a literal value (number, string, boolean). + /// + internal class LiteralExpr : Expr + { + public object Value { get; init; } + } + + /// + /// Represents a member access expression (e.g., .year, .rating). + /// + internal class MemberExpr : Expr + { + public string Property { get; init; } + } + + /// + /// Represents a unary operation (e.g., not, -). + /// + internal class UnaryExpr : Expr + { + public string Operator { get; init; } + public Expr Operand { get; init; } + } + + /// + /// Represents a binary operation (e.g., +, -, ==, and). + /// + internal class BinaryExpr : Expr + { + public Expr Left { get; init; } + public string Operator { get; init; } + public Expr Right { get; init; } + } +} diff --git a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs new file mode 100644 index 00000000000..b4b42efdaa6 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs @@ -0,0 +1,215 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; +using System.Globalization; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Recursive descent parser for vector filter expressions. + /// Supports arithmetic, comparison, logical operators, containment, and grouping. + /// + internal static class VectorFilterParser + { + public static Expr ParseExpression(List tokens, int start, out int end) + { + return ParseLogicalOr(tokens, start, out end); + } + + private static Expr ParseLogicalOr(List tokens, int start, out int end) + { + var left = ParseLogicalAnd(tokens, start, out end); + + while (end < tokens.Count && + ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "or") || + (tokens[end].Type == TokenType.Operator && tokens[end].Value == "||"))) + { + end++; + var right = ParseLogicalAnd(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = "or", Right = right }; + } + + return left; + } + + private static Expr ParseLogicalAnd(List tokens, int start, out int end) + { + var left = ParseEquality(tokens, start, out end); + + while (end < tokens.Count && + ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "and") || + (tokens[end].Type == TokenType.Operator && tokens[end].Value == "&&"))) + { + end++; + var right = ParseEquality(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = "and", Right = right }; + } + + return left; + } + + private static Expr ParseEquality(List tokens, int start, out int end) + { + var left = ParseComparison(tokens, start, out end); + + while (end < tokens.Count && tokens[end].Type == TokenType.Operator && + (tokens[end].Value == "==" || tokens[end].Value == "!=")) + { + var op = tokens[end].Value; + end++; + var right = ParseComparison(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = op, Right = right }; + } + + return left; + } + + private static Expr ParseComparison(List tokens, int start, out int end) + { + var left = ParseContainment(tokens, start, out end); + + while (end < tokens.Count && tokens[end].Type == TokenType.Operator && + (tokens[end].Value == ">" || tokens[end].Value == "<" || + tokens[end].Value == ">=" || tokens[end].Value == "<=")) + { + var op = tokens[end].Value; + end++; + var right = ParseContainment(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = op, Right = right }; + } + + return left; + } + + private static Expr ParseContainment(List tokens, int start, out int end) + { + var left = ParseAdditive(tokens, start, out end); + + if (end < tokens.Count && tokens[end].Type == TokenType.Keyword && tokens[end].Value == "in") + { + end++; + var right = ParseAdditive(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = "in", Right = right }; + } + + return left; + } + + private static Expr ParseAdditive(List tokens, int start, out int end) + { + var left = ParseMultiplicative(tokens, start, out end); + + while (end < tokens.Count && tokens[end].Type == TokenType.Operator && + (tokens[end].Value == "+" || tokens[end].Value == "-")) + { + var op = tokens[end].Value; + end++; + var right = ParseMultiplicative(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = op, Right = right }; + } + + return left; + } + + private static Expr ParseMultiplicative(List tokens, int start, out int end) + { + var left = ParseExponentiation(tokens, start, out end); + + while (end < tokens.Count && tokens[end].Type == TokenType.Operator && + (tokens[end].Value == "*" || tokens[end].Value == "/" || tokens[end].Value == "%")) + { + var op = tokens[end].Value; + end++; + var right = ParseExponentiation(tokens, end, out end); + left = new BinaryExpr { Left = left, Operator = op, Right = right }; + } + + return left; + } + + private static Expr ParseExponentiation(List tokens, int start, out int end) + { + var left = ParseUnary(tokens, start, out end); + + if (end < tokens.Count && tokens[end].Type == TokenType.Operator && tokens[end].Value == "**") + { + end++; + var right = ParseExponentiation(tokens, end, out end); // Right associative + left = new BinaryExpr { Left = left, Operator = "**", Right = right }; + } + + return left; + } + + private static Expr ParseUnary(List tokens, int start, out int end) + { + if (start < tokens.Count) + { + if ((tokens[start].Type == TokenType.Keyword && tokens[start].Value == "not") || + (tokens[start].Type == TokenType.Operator && tokens[start].Value == "!")) + { + start++; + var operand = ParseUnary(tokens, start, out end); + return new UnaryExpr { Operator = "not", Operand = operand }; + } + + if (tokens[start].Type == TokenType.Operator && tokens[start].Value == "-") + { + start++; + var operand = ParseUnary(tokens, start, out end); + return new UnaryExpr { Operator = "-", Operand = operand }; + } + } + + return ParsePrimary(tokens, start, out end); + } + + private static Expr ParsePrimary(List tokens, int start, out int end) + { + if (start >= tokens.Count) + throw new InvalidOperationException("Unexpected end of expression"); + + var token = tokens[start]; + + // Parentheses + if (token.Type == TokenType.Delimiter && token.Value == "(") + { + var expr = ParseExpression(tokens, start + 1, out end); + if (end >= tokens.Count || tokens[end].Type != TokenType.Delimiter || tokens[end].Value != ")") + throw new InvalidOperationException("Missing closing parenthesis"); + end++; + return expr; + } + + // Literals + if (token.Type == TokenType.Number) + { + end = start + 1; + return new LiteralExpr { Value = double.Parse(token.Value, CultureInfo.InvariantCulture) }; + } + + if (token.Type == TokenType.String) + { + end = start + 1; + return new LiteralExpr { Value = token.Value }; + } + + if (token.Type == TokenType.Boolean) + { + end = start + 1; + return new LiteralExpr { Value = token.Value == "true" ? 1.0 : 0.0 }; + } + + // Identifier (field access) + if (token.Type == TokenType.Identifier) + { + end = start + 1; + return new MemberExpr { Property = token.Value.TrimStart('.') }; + } + + throw new InvalidOperationException($"Unexpected token: {token.Value}"); + } + } +} diff --git a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs new file mode 100644 index 00000000000..2b90584d9b0 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs @@ -0,0 +1,123 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Token types for vector filter expressions. + /// + internal enum TokenType { Number, String, Boolean, Identifier, Operator, Keyword, Delimiter } + + /// + /// Represents a token in a filter expression. + /// + internal class Token + { + public TokenType Type { get; set; } + public string Value { get; set; } + } + + /// + /// Tokenizer for vector filter expressions. + /// Converts filter strings into tokens for parsing. + /// + internal static class VectorFilterTokenizer + { + public static List Tokenize(string input) + { + var tokens = new List(); + var i = 0; + + while (i < input.Length) + { + // Skip whitespace + if (char.IsWhiteSpace(input[i])) + { + i++; + continue; + } + + // Numbers (treat '-' as negative sign only at start or after operator/keyword/open-paren) + var isNegativeNumber = input[i] == '-' + && i + 1 < input.Length && char.IsDigit(input[i + 1]) + && (tokens.Count == 0 + || tokens[^1].Type == TokenType.Operator + || tokens[^1].Type == TokenType.Keyword + || (tokens[^1].Type == TokenType.Delimiter && tokens[^1].Value == "(")); + + if (char.IsDigit(input[i]) || isNegativeNumber) + { + var start = i; + if (input[i] == '-') i++; + while (i < input.Length && (char.IsDigit(input[i]) || input[i] == '.')) + i++; + tokens.Add(new Token { Type = TokenType.Number, Value = input.Substring(start, i - start) }); + continue; + } + + // Identifiers and keywords (field names starting with .) + if (input[i] == '.' || char.IsLetter(input[i]) || input[i] == '_') + { + var start = i; + while (i < input.Length && (char.IsLetterOrDigit(input[i]) || input[i] == '_' || input[i] == '.')) + i++; + var value = input.Substring(start, i - start); + + // Check for keywords + if (value == "and" || value == "or" || value == "not" || value == "in") + tokens.Add(new Token { Type = TokenType.Keyword, Value = value }); + else if (value == "true" || value == "false") + tokens.Add(new Token { Type = TokenType.Boolean, Value = value }); + else + tokens.Add(new Token { Type = TokenType.Identifier, Value = value }); + continue; + } + + // String literals + if (input[i] == '"' || input[i] == '\'') + { + var quote = input[i]; + var start = ++i; + while (i < input.Length && input[i] != quote) + { + if (input[i] == '\\' && i + 1 < input.Length) i++; // Skip escaped characters + i++; + } + if (i >= input.Length) + throw new InvalidOperationException($"Unterminated string literal starting at position {start - 1}"); + tokens.Add(new Token { Type = TokenType.String, Value = input.Substring(start, i - start) }); + i++; // Skip closing quote + continue; + } + + // Two-character operators + if (i + 1 < input.Length) + { + var twoChar = input.Substring(i, 2); + if (twoChar == "==" || twoChar == "!=" || twoChar == ">=" || twoChar == "<=" || + twoChar == "&&" || twoChar == "||" || twoChar == "**") + { + tokens.Add(new Token { Type = TokenType.Operator, Value = twoChar }); + i += 2; + continue; + } + } + + // Single-character operators and delimiters + if ("+-*/%> + /// Apply post-filtering to vector search results based on JSON path filter expression. + /// + private int ApplyPostFilter( + ReadOnlySpan filter, + int numResults, + ref SpanByteAndMemory outputIds, + ref SpanByteAndMemory outputDistances, + ref SpanByteAndMemory outputAttributes) + { + if (numResults == 0) + { + return numResults; + } + + var filterStr = Encoding.UTF8.GetString(filter); + var filteredCount = 0; + + // Parse the filter expression once, then evaluate per result + var tokens = VectorFilterTokenizer.Tokenize(filterStr); + var filterExpr = VectorFilterParser.ParseExpression(tokens, 0, out _); + + var idsSpan = outputIds.AsSpan(); + var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); + var attributesSpan = outputAttributes.AsSpan(); + + var idReadPos = 0; + var attrReadPos = 0; + var idWritePos = 0; + var distWritePos = 0; + var attrWritePos = 0; + + for (var i = 0; i < numResults; i++) + { + // Read ID + var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); + var idTotalLen = sizeof(int) + idLen; + + // Read attribute + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); + var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); + + // Evaluate filter + if (EvaluateFilter(filterExpr, attrData)) + { + // Copy ID if not already in place + if (idReadPos != idWritePos) + { + idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); + } + + // Copy distance if not already in place + if (i != distWritePos) + { + distancesSpan[distWritePos] = distancesSpan[i]; + } + + // Copy attribute if not already in place + if (attrReadPos != attrWritePos) + { + attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); + } + + idWritePos += idTotalLen; + distWritePos++; + attrWritePos += sizeof(int) + attrLen; + filteredCount++; + } + + idReadPos += idTotalLen; + attrReadPos += sizeof(int) + attrLen; + } + + // Update lengths + outputIds.Length = idWritePos; + outputDistances.Length = distWritePos * sizeof(float); + outputAttributes.Length = attrWritePos; + + return filteredCount; + } + + /// + /// Evaluate a pre-parsed filter expression against attribute data. + /// + private static bool EvaluateFilter(Expr filterExpr, ReadOnlySpan attributeJson) + { + try + { + using var jsonDoc = JsonDocument.Parse(attributeJson.ToArray()); + var root = jsonDoc.RootElement; + var result = VectorFilterEvaluator.EvaluateExpression(filterExpr, root); + + return VectorFilterEvaluator.IsTruthy(result); + } + catch (Exception ex) when (ex is JsonException or InvalidOperationException) + { + // If filter evaluation fails (malformed JSON or invalid expression), exclude the result + Trace.TraceWarning("Vector filter evaluation failed: {0}", ex); + return false; + } + } + [Conditional("DEBUG")] private static void AssertHaveStorageSession() { diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index e791721f119..5fbc7f59f28 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -621,6 +621,116 @@ public void VDIM() // TODO: Add WRONGTYPE behavior check once implemented } + [Test] + public void VSIMWithAttributeFiltering() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = db.KeyDelete("foo"); + + // Add first vector with year=1980 + var res1 = db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + ClassicAssert.AreEqual(1, (int)res1); + + // Add second vector with year=1960 + var res2 = db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + ClassicAssert.AreEqual(1, (int)res2); + + // Add third vector with year=1940 + var res3 = db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + ClassicAssert.AreEqual(1, (int)res3); + + + // Search with filter for year > 1950 - should return 2 results (years 1980 and 1960) + var res5 = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1950", "COUNT", "3", "WITHATTRIBS"]); + + ClassicAssert.AreEqual(4, res5.Length, + "Should return 2 results (2 pairs of id+attribute) for year > 1950"); + + // Verify both results have year > 1950 + for (var i = 0; i < res5.Length; i += 2) + { + var attr = res5[i + 1]; + var attrStr = Encoding.UTF8.GetString(attr); + ClassicAssert.IsTrue(attrStr.Contains("\"year\":1980") || attrStr.Contains("\"year\":1960"), + $"Result should have year > 1950, got: {attrStr}"); + } + + // Search with filter for year > 1990 - should return NO results since all years are < 1990 + var res4 = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1990", "COUNT", "3", "WITHATTRIBS"]); + + ClassicAssert.AreEqual(0, res4.Length, + "Should return 0 results since no vectors have year > 1990"); + } + + [Test] + public void VSIMWithAdvancedFiltering() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = db.KeyDelete("movies"); + + // Add vectors with rich attributes to test advanced filtering + var res1 = db.Execute("VADD", ["movies", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); + ClassicAssert.AreEqual(1, (int)res1); + + var res2 = db.Execute("VADD", ["movies", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); + ClassicAssert.AreEqual(1, (int)res2); + + var res3 = db.Execute("VADD", ["movies", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); + ClassicAssert.AreEqual(1, (int)res3); + + // Test logical AND + var res4 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1970 and .rating > 4.0", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res4.Length, "Logical AND: year > 1970 AND rating > 4.0"); + + // Test logical OR + var res5 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year < 1970 or .year > 2000", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res5.Length, "Logical OR: year < 1970 OR year > 2000"); + + // Test string equality + var res6 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".genre == \"action\"", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res6.Length, "String equality: genre == 'action'"); + + // Test arithmetic expression + var res7 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year / 10 >= 200", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(2, res7.Length, "Arithmetic: year / 10 >= 200"); + + // Test parentheses grouping + var res8 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "(.year > 2000 or .year < 1970) and .rating >= 4.0", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(2, res8.Length, "Parentheses grouping"); + + // Test containment operator (in) + var res9 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "\"classic\" in .tags", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res9.Length, "Containment: 'classic' in tags"); + + // Test NOT operator + var res10 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", "not (.genre == \"drama\")", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res10.Length, "NOT operator: not (genre == 'drama')"); + + // Test complex expression with multiple operators + var res11 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res11.Length, "Complex: rating*2 > 8 AND (year>=1980 OR 'modern' in tags)"); + } + [Test] public void DeleteVectorSet() { diff --git a/test/Garnet.test/VectorFilterTests.cs b/test/Garnet.test/VectorFilterTests.cs new file mode 100644 index 00000000000..7de71666e1b --- /dev/null +++ b/test/Garnet.test/VectorFilterTests.cs @@ -0,0 +1,541 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.Json; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + [AllureNUnit] + [TestFixture] + public class VectorFilterTests : AllureTestBase + { + #region Helper Methods + + /// + /// Helper to parse a JSON string into a JsonElement for evaluator tests. + /// + private static JsonElement ParseJson(string json) + { + return JsonDocument.Parse(json).RootElement; + } + + /// + /// Helper to tokenize, parse, and evaluate a filter expression against JSON. + /// + private static object EvaluateFilter(string expression, string json) + { + var tokens = VectorFilterTokenizer.Tokenize(expression); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + using var doc = JsonDocument.Parse(json); + return VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + } + + /// + /// Helper to check if a filter expression is truthy against JSON. + /// + private static bool EvaluateFilterTruthy(string expression, string json) + { + return VectorFilterEvaluator.IsTruthy(EvaluateFilter(expression, json)); + } + + #endregion + + #region Tokenizer Tests + + [Test] + public void Tokenizer_IntegerNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("42"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("42", tokens[0].Value); + } + + [Test] + public void Tokenizer_DecimalNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("3.14"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("3.14", tokens[0].Value); + } + + [Test] + public void Tokenizer_NegativeNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("-5"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("-5", tokens[0].Value); + } + + [Test] + public void Tokenizer_StringLiterals() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("hello", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("'world'"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("world", tokens[0].Value); + } + + [Test] + public void Tokenizer_EscapedStringLiterals() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\\\"world\""); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("hello\\\"world", tokens[0].Value); + } + + [Test] + public void Tokenizer_UnterminatedStringThrows() + { + ClassicAssert.Throws(() => + VectorFilterTokenizer.Tokenize("\"hello")); + } + + [Test] + public void Tokenizer_SubtractionNotConfusedWithNegative() + { + // ".a - 5" should tokenize as [.a, -, 5], not [.a, -5] + var tokens = VectorFilterTokenizer.Tokenize(".a - 5"); + ClassicAssert.AreEqual(3, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); + ClassicAssert.AreEqual("-", tokens[1].Value); + ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); + ClassicAssert.AreEqual("5", tokens[2].Value); + } + + [Test] + public void Tokenizer_Identifiers() + { + var tokens = VectorFilterTokenizer.Tokenize(".year"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual(".year", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("_field"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual("_field", tokens[0].Value); + } + + [Test] + public void Tokenizer_Keywords() + { + var keywords = new[] { "and", "or", "not", "in" }; + foreach (var kw in keywords) + { + var tokens = VectorFilterTokenizer.Tokenize(kw); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Keyword, tokens[0].Type); + ClassicAssert.AreEqual(kw, tokens[0].Value); + } + } + + [Test] + public void Tokenizer_Booleans() + { + var tokens = VectorFilterTokenizer.Tokenize("true"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); + ClassicAssert.AreEqual("true", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("false"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); + ClassicAssert.AreEqual("false", tokens[0].Value); + } + + [Test] + public void Tokenizer_TwoCharOperators() + { + var ops = new[] { "==", "!=", ">=", "<=", "&&", "||", "**" }; + foreach (var op in ops) + { + var tokens = VectorFilterTokenizer.Tokenize($"1 {op} 2"); + var opToken = tokens.First(t => t.Type == TokenType.Operator); + ClassicAssert.AreEqual(op, opToken.Value); + } + } + + [Test] + public void Tokenizer_SingleCharOperators() + { + var ops = new[] { ">", "<", "+", "-", "*", "/", "%", "!" }; + foreach (var op in ops) + { + // Use identifiers to avoid ambiguity with negative numbers for "-" + var tokens = VectorFilterTokenizer.Tokenize($".a {op} .b"); + var opToken = tokens.First(t => t.Type == TokenType.Operator); + ClassicAssert.AreEqual(op, opToken.Value); + } + } + + [Test] + public void Tokenizer_Delimiters() + { + var tokens = VectorFilterTokenizer.Tokenize("(.year > 10)"); + ClassicAssert.AreEqual(TokenType.Delimiter, tokens[0].Type); + ClassicAssert.AreEqual("(", tokens[0].Value); + ClassicAssert.AreEqual(TokenType.Delimiter, tokens[4].Type); + ClassicAssert.AreEqual(")", tokens[4].Value); + } + + [Test] + public void Tokenizer_ComplexExpression() + { + var tokens = VectorFilterTokenizer.Tokenize(".year > 1950 and .rating >= 4.0"); + ClassicAssert.AreEqual(7, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); // .year + ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); // > + ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); // 1950 + ClassicAssert.AreEqual(TokenType.Keyword, tokens[3].Type); // and + ClassicAssert.AreEqual(TokenType.Identifier, tokens[4].Type); // .rating + ClassicAssert.AreEqual(TokenType.Operator, tokens[5].Type); // >= + ClassicAssert.AreEqual(TokenType.Number, tokens[6].Type); // 4.0 + } + + [Test] + public void Tokenizer_EmptyInput() + { + var tokens = VectorFilterTokenizer.Tokenize(""); + ClassicAssert.AreEqual(0, tokens.Count); + + tokens = VectorFilterTokenizer.Tokenize(" "); + ClassicAssert.AreEqual(0, tokens.Count); + } + + #endregion + + #region Parser Tests + + [Test] + public void Parser_NumberLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("42"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out var end); + ClassicAssert.AreEqual(1, end); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual(42.0, ((LiteralExpr)expr).Value); + } + + [Test] + public void Parser_StringLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual("hello", ((LiteralExpr)expr).Value); + } + + [Test] + public void Parser_BooleanLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual(1.0, ((LiteralExpr)expr).Value); + + tokens = VectorFilterTokenizer.Tokenize("false"); + expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual(0.0, ((LiteralExpr)expr).Value); + } + + [Test] + public void Parser_MemberAccess() + { + var tokens = VectorFilterTokenizer.Tokenize(".year"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual("year", ((MemberExpr)expr).Property); + } + + [Test] + public void Parser_UnaryNot() + { + var tokens = VectorFilterTokenizer.Tokenize("not true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var unary = (UnaryExpr)expr; + ClassicAssert.AreEqual("not", unary.Operator); + ClassicAssert.IsInstanceOf(unary.Operand); + } + + [Test] + public void Parser_UnaryNegation() + { + var tokens = VectorFilterTokenizer.Tokenize(".a + (-.b)"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("+", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual("-", ((UnaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() + { + // 1 + 2 * 3 should parse as 1 + (2 * 3) + var tokens = VectorFilterTokenizer.Tokenize("1 + 2 * 3"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("+", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual("*", ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_OperatorPrecedence_AndBeforeOr() + { + // a or b and c should parse as a or (b and c) + var tokens = VectorFilterTokenizer.Tokenize("true or false and true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("or", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual("and", ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_ParenthesesOverridePrecedence() + { + // (1 + 2) * 3 should parse as (1 + 2) * 3 + var tokens = VectorFilterTokenizer.Tokenize("(1 + 2) * 3"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("*", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.AreEqual("+", ((BinaryExpr)binary.Left).Operator); + } + + [Test] + public void Parser_Containment() + { + var tokens = VectorFilterTokenizer.Tokenize("\"action\" in .tags"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("in", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + } + + [Test] + public void Parser_ExponentiationRightAssociative() + { + // 2 ** 3 ** 2 should parse as 2 ** (3 ** 2) + var tokens = VectorFilterTokenizer.Tokenize("2 ** 3 ** 2"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual("**", binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual("**", ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_ErrorOnUnexpectedEnd() + { + var tokens = new List(); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + + [Test] + public void Parser_ErrorOnMissingClosingParen() + { + var tokens = VectorFilterTokenizer.Tokenize("(1 + 2"); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + + #endregion + + #region Evaluator Tests + + [Test] + public void Evaluator_Arithmetic() + { + var json = "{}"; + ClassicAssert.AreEqual(5.0, EvaluateFilter("2 + 3", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("3 - 2", json)); + ClassicAssert.AreEqual(6.0, EvaluateFilter("2 * 3", json)); + ClassicAssert.AreEqual(2.5, EvaluateFilter("5 / 2", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("7 % 3", json)); + ClassicAssert.AreEqual(8.0, EvaluateFilter("2 ** 3", json)); + } + + [Test] + public void Evaluator_SubtractionWithField() + { + var json = "{\"year\":1980}"; + ClassicAssert.AreEqual(1975.0, EvaluateFilter(".year - 5", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy(".year - 5 > 0", json)); + } + + [Test] + public void Evaluator_Comparison() + { + var json = "{}"; + ClassicAssert.AreEqual(1.0, EvaluateFilter("5 > 3", json)); + ClassicAssert.AreEqual(0.0, EvaluateFilter("3 > 5", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("3 < 5", json)); + ClassicAssert.AreEqual(0.0, EvaluateFilter("5 < 3", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("5 >= 5", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("5 <= 5", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("5 == 5", json)); + ClassicAssert.AreEqual(1.0, EvaluateFilter("5 != 3", json)); + ClassicAssert.AreEqual(0.0, EvaluateFilter("5 != 5", json)); + } + + [Test] + public void Evaluator_LogicalAnd() + { + var json = "{}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy("true and true", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy("true and false", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy("false and true", json)); + // Also test && syntax + ClassicAssert.IsTrue(EvaluateFilterTruthy("true && true", json)); + } + + [Test] + public void Evaluator_LogicalOr() + { + var json = "{}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy("true or false", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy("false or true", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy("false or false", json)); + // Also test || syntax + ClassicAssert.IsTrue(EvaluateFilterTruthy("false || true", json)); + } + + [Test] + public void Evaluator_LogicalNot() + { + var json = "{}"; + ClassicAssert.IsFalse(EvaluateFilterTruthy("not true", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy("not false", json)); + } + + [Test] + public void Evaluator_StringEquality() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy(".genre == \"action\"", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy(".genre == \"drama\"", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy(".genre != \"drama\"", json)); + } + + [Test] + public void Evaluator_MemberAccess() + { + var json = "{\"year\":1980,\"rating\":4.5}"; + ClassicAssert.AreEqual(1980.0, EvaluateFilter(".year", json)); + ClassicAssert.AreEqual(4.5, EvaluateFilter(".rating", json)); + } + + [Test] + public void Evaluator_MissingFieldReturnsNull() + { + var json = "{\"year\":1980}"; + var result = EvaluateFilter(".missing", json); + ClassicAssert.IsNull(result); + ClassicAssert.IsFalse(EvaluateFilterTruthy(".missing", json)); + } + + [Test] + public void Evaluator_InOperatorWithArray() + { + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy("\"classic\" in .tags", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy("\"popular\" in .tags", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy("\"modern\" in .tags", json)); + } + + [Test] + public void Evaluator_InOperatorWithNumericArray() + { + var json = "{\"scores\":[1,2,3]}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy("2 in .scores", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy("5 in .scores", json)); + } + + [Test] + public void Evaluator_IsTruthy() + { + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(null)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0.0)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy("")); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(false)); + + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(1.0)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(-1.0)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(42)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy("hello")); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(true)); + } + + [Test] + public void Evaluator_ComplexExpression() + { + var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; + + // .rating * 2 > 8 and (.year >= 1980 or "modern" in .tags) + ClassicAssert.IsTrue(EvaluateFilterTruthy( + ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); + + // (.year > 2000 or .year < 1970) and .rating >= 4.0 + ClassicAssert.IsFalse(EvaluateFilterTruthy( + "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); + + // not (.genre == "drama") + ClassicAssert.IsTrue(EvaluateFilterTruthy("not (.genre == \"drama\")", json)); + + // .year / 10 >= 198 + ClassicAssert.IsTrue(EvaluateFilterTruthy(".year / 10 >= 198", json)); + } + + [Test] + public void Evaluator_ComparisonWithMissingField() + { + var json = "{\"year\":1980}"; + // Missing field compared to number: ToNumber(null) = 0, so 0 > 1950 is false + ClassicAssert.IsFalse(EvaluateFilterTruthy(".missing > 1950", json)); + } + + [Test] + public void Evaluator_BooleanJsonValues() + { + var json = "{\"active\":true,\"deleted\":false}"; + ClassicAssert.IsTrue(EvaluateFilterTruthy(".active", json)); + ClassicAssert.IsFalse(EvaluateFilterTruthy(".deleted", json)); + ClassicAssert.IsTrue(EvaluateFilterTruthy(".active == true", json)); + } + + #endregion + } +} From a57c6d5fbaeafe081b306abb65b9f772fbcdde70 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 19 Feb 2026 17:01:24 -0800 Subject: [PATCH 02/31] fix format --- libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs | 2 +- libs/server/Resp/Vector/Filter/VectorFilterExpression.cs | 2 +- libs/server/Resp/Vector/Filter/VectorFilterParser.cs | 2 +- libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs | 2 +- test/Garnet.test/VectorFilterTests.cs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs index 754d2a3851b..91bbbee0212 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs @@ -144,4 +144,4 @@ private static bool IsIn(object needle, object haystack) return false; } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 976109780ac..b1832f28dc6 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -42,4 +42,4 @@ internal class BinaryExpr : Expr public string Operator { get; init; } public Expr Right { get; init; } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs index b4b42efdaa6..b4672f547d3 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs @@ -212,4 +212,4 @@ private static Expr ParsePrimary(List tokens, int start, out int end) throw new InvalidOperationException($"Unexpected token: {token.Value}"); } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs index 2b90584d9b0..0715e100978 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs @@ -120,4 +120,4 @@ public static List Tokenize(string input) return tokens; } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/VectorFilterTests.cs b/test/Garnet.test/VectorFilterTests.cs index 7de71666e1b..85e2d8a4895 100644 --- a/test/Garnet.test/VectorFilterTests.cs +++ b/test/Garnet.test/VectorFilterTests.cs @@ -538,4 +538,4 @@ public void Evaluator_BooleanJsonValues() #endregion } -} +} \ No newline at end of file From 9c883f7cb67cf62b60c0b690ccbba764ccf8bb8c Mon Sep 17 00:00:00 2001 From: Haiyang <3389245+hailangx@users.noreply.github.com> Date: Thu, 19 Feb 2026 17:04:23 -0800 Subject: [PATCH 03/31] Update libs/server/Resp/Vector/VectorManager.cs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- libs/server/Resp/Vector/VectorManager.cs | 110 +++++++++++++---------- 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 1bc7b4813dc..9532f109a24 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -926,69 +926,83 @@ private int ApplyPostFilter( } var filterStr = Encoding.UTF8.GetString(filter); - var filteredCount = 0; - // Parse the filter expression once, then evaluate per result - var tokens = VectorFilterTokenizer.Tokenize(filterStr); - var filterExpr = VectorFilterParser.ParseExpression(tokens, 0, out _); + try + { + var filteredCount = 0; - var idsSpan = outputIds.AsSpan(); - var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); - var attributesSpan = outputAttributes.AsSpan(); + // Parse the filter expression once, then evaluate per result + var tokens = VectorFilterTokenizer.Tokenize(filterStr); + var filterExpr = VectorFilterParser.ParseExpression(tokens, 0, out var endIndex); - var idReadPos = 0; - var attrReadPos = 0; - var idWritePos = 0; - var distWritePos = 0; - var attrWritePos = 0; + // Ensure the entire token stream was consumed by the parser + if (endIndex != tokens.Count) + { + throw new ArgumentException("Invalid filter expression: unexpected tokens after end of expression.", nameof(filter)); + } - for (var i = 0; i < numResults; i++) - { - // Read ID - var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); - var idTotalLen = sizeof(int) + idLen; + var idsSpan = outputIds.AsSpan(); + var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); + var attributesSpan = outputAttributes.AsSpan(); - // Read attribute - var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); - var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); + var idReadPos = 0; + var attrReadPos = 0; + var idWritePos = 0; + var distWritePos = 0; + var attrWritePos = 0; - // Evaluate filter - if (EvaluateFilter(filterExpr, attrData)) + for (var i = 0; i < numResults; i++) { - // Copy ID if not already in place - if (idReadPos != idWritePos) - { - idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); - } + // Read ID + var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); + var idTotalLen = sizeof(int) + idLen; - // Copy distance if not already in place - if (i != distWritePos) - { - distancesSpan[distWritePos] = distancesSpan[i]; - } + // Read attribute + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); + var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); - // Copy attribute if not already in place - if (attrReadPos != attrWritePos) + // Evaluate filter + if (EvaluateFilter(filterExpr, attrData)) { - attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); + // Copy ID if not already in place + if (idReadPos != idWritePos) + { + idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); + } + + // Copy distance if not already in place + if (i != distWritePos) + { + distancesSpan[distWritePos] = distancesSpan[i]; + } + + // Copy attribute if not already in place + if (attrReadPos != attrWritePos) + { + attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); + } + + idWritePos += idTotalLen; + distWritePos++; + attrWritePos += sizeof(int) + attrLen; + filteredCount++; } - idWritePos += idTotalLen; - distWritePos++; - attrWritePos += sizeof(int) + attrLen; - filteredCount++; + idReadPos += idTotalLen; + attrReadPos += sizeof(int) + attrLen; } - idReadPos += idTotalLen; - attrReadPos += sizeof(int) + attrLen; - } - - // Update lengths - outputIds.Length = idWritePos; - outputDistances.Length = distWritePos * sizeof(float); - outputAttributes.Length = attrWritePos; + // Update lengths + outputIds.Length = idWritePos; + outputDistances.Length = distWritePos * sizeof(float); + outputAttributes.Length = attrWritePos; - return filteredCount; + return filteredCount; + } + catch (Exception ex) when (ex is ArgumentException || ex is FormatException || ex is InvalidOperationException) + { + throw new ArgumentException("Invalid filter expression.", nameof(filter), ex); + } } /// From faad1894134f80f38fec0819780b0105c1535477 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 17:09:44 -0800 Subject: [PATCH 04/31] Avoid per-result byte array allocation in EvaluateFilter (#1571) * Initial plan * Avoid per-result allocation in EvaluateFilter by using Utf8JsonReader with ParseValue Co-authored-by: hailangx <3389245+hailangx@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hailangx <3389245+hailangx@users.noreply.github.com> --- libs/server/Resp/Vector/VectorManager.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 9532f109a24..7682453dcd9 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -1012,7 +1012,8 @@ private static bool EvaluateFilter(Expr filterExpr, ReadOnlySpan attribute { try { - using var jsonDoc = JsonDocument.Parse(attributeJson.ToArray()); + var reader = new Utf8JsonReader(attributeJson); + using var jsonDoc = JsonDocument.ParseValue(ref reader); var root = jsonDoc.RootElement; var result = VectorFilterEvaluator.EvaluateExpression(filterExpr, root); From c177265438e1b32b9c10db9499186fdb14cad901 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Feb 2026 17:27:03 -0800 Subject: [PATCH 05/31] VSIM FILTER works without WITHATTRIBS by fetching attributes internally (#1572) * Initial plan * Fetch attributes internally for filtering when not returning them Co-authored-by: hailangx <3389245+hailangx@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: hailangx <3389245+hailangx@users.noreply.github.com> --- libs/server/Resp/Vector/VectorManager.cs | 45 ++++++++++++++++++------ test/Garnet.test/RespVectorSetTests.cs | 25 +++++++++++++ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 7682453dcd9..5e2c323b4c0 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -569,14 +569,25 @@ out var continuation // Apply post-filtering if filter is specified if (!filter.IsEmpty) { - if (!includeAttributes) + if (includeAttributes) { - // Filters currently rely on attributes; reject inconsistent request instead of silently ignoring filter - logger?.LogWarning("Filter expression was provided, but includeAttributes is false. Post-filtering requires attributes."); - outputIdFormat = VectorIdFormat.Invalid; - return VectorManagerResult.BadParams; + found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); + } + else + { + // Fetch attributes internally for filtering even when not returning them. + // FetchVectorElementAttributes will resize the buffer dynamically if needed. + var tempAttributes = new SpanByteAndMemory(MemoryPool.Shared.Rent(found * 64), found * 64); + try + { + FetchVectorElementAttributes(context, found, outputIds, ref tempAttributes); + found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref tempAttributes); + } + finally + { + tempAttributes.Memory?.Dispose(); + } } - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); } if (continuation != 0) @@ -684,13 +695,25 @@ out var continuation // Apply post-filtering if filter is specified if (!filter.IsEmpty) { - if (!includeAttributes) + if (includeAttributes) { - logger?.LogWarning("Filter expression was provided, but includeAttributes is false. Post-filtering requires attributes."); - outputIdFormat = VectorIdFormat.Invalid; - return VectorManagerResult.BadParams; + found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); + } + else + { + // Fetch attributes internally for filtering even when not returning them. + // FetchVectorElementAttributes will resize the buffer dynamically if needed. + var tempAttributes = new SpanByteAndMemory(MemoryPool.Shared.Rent(found * 64), found * 64); + try + { + FetchVectorElementAttributes(context, found, outputIds, ref tempAttributes); + found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref tempAttributes); + } + finally + { + tempAttributes.Memory?.Dispose(); + } } - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); } if (continuation != 0) diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index 5fbc7f59f28..c77938b6d63 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -669,6 +669,31 @@ public void VSIMWithAttributeFiltering() "Should return 0 results since no vectors have year > 1990"); } + [Test] + public void VSIMWithFilterButWithoutWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + _ = db.KeyDelete("foo"); + + // Add vectors with attributes + db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + + // FILTER without WITHATTRIBS should work: fetch attributes internally and apply filter + var res = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", + "FILTER", ".year > 1950", "COUNT", "3"]); + + // Should return only 2 element ids (no attributes since WITHATTRIBS not specified) + ClassicAssert.AreEqual(2, res.Length, + "Should return 2 element ids (year > 1950) without attributes"); + } + [Test] public void VSIMWithAdvancedFiltering() { From 97010eef77a3b51b77a15e65b37aaa63de3fa3f3 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 11:30:27 -0800 Subject: [PATCH 06/31] optimize code --- .../Vector/Filter/VectorFilterEvaluator.cs | 216 +++++++++++------- .../Vector/Filter/VectorFilterExpression.cs | 144 +++++++++++- .../Resp/Vector/Filter/VectorFilterParser.cs | 41 ++-- .../Vector/Filter/VectorFilterTokenizer.cs | 108 +++++++-- 4 files changed, 387 insertions(+), 122 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs index 91bbbee0212..eda8053c9e5 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Runtime.CompilerServices; using System.Text.Json; namespace Garnet.server.Vector.Filter @@ -9,95 +10,133 @@ namespace Garnet.server.Vector.Filter /// /// Evaluator for vector filter expressions. /// Evaluates parsed expression trees against JSON attribute data. + /// Returns FilterValue (a struct) to avoid boxing allocations on every evaluation. /// internal static class VectorFilterEvaluator { - public static object EvaluateExpression(Expr expr, JsonElement root) + /// + /// Evaluate a filter expression against a JSON element. + /// Returns a FilterValue (struct) — no boxing occurs for numeric results. + /// + public static FilterValue EvaluateExpression(Expr expr, JsonElement root) { if (expr is LiteralExpr lit) return lit.Value; if (expr is MemberExpr member) + return EvaluateMember(member, root); + + if (expr is UnaryExpr unary) + return EvaluateUnary(unary, root); + + if (expr is BinaryExpr binary) + return EvaluateBinary(binary, root); + + return FilterValue.Null; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static FilterValue EvaluateMember(MemberExpr member, JsonElement root) + { + if (root.TryGetProperty(member.Property, out var value)) { - if (root.TryGetProperty(member.Property, out var value)) + return value.ValueKind switch { - return value.ValueKind switch - { - JsonValueKind.Number => value.GetDouble(), - JsonValueKind.String => value.GetString(), - JsonValueKind.True => 1.0, - JsonValueKind.False => 0.0, - JsonValueKind.Array => value, - _ => null - }; - } - return null; + JsonValueKind.Number => FilterValue.FromNumber(value.GetDouble()), + JsonValueKind.String => FilterValue.FromString(value.GetString()), + JsonValueKind.True => FilterValue.True, + JsonValueKind.False => FilterValue.False, + JsonValueKind.Array => FilterValue.FromJsonElement(value), + _ => FilterValue.Null + }; } + return FilterValue.Null; + } - if (expr is UnaryExpr unary) + private static FilterValue EvaluateUnary(UnaryExpr unary, JsonElement root) + { + var operand = EvaluateExpression(unary.Operand, root); + return unary.Operator switch + { + OperatorKind.Not => IsTruthy(operand) ? FilterValue.False : FilterValue.True, + OperatorKind.Negate => FilterValue.FromNumber(-ToNumber(operand)), + _ => throw new InvalidOperationException($"Unknown unary operator: {unary.Operator}") + }; + } + + private static FilterValue EvaluateBinary(BinaryExpr binary, JsonElement root) + { + // Short-circuit logical operators + if (binary.Operator == OperatorKind.And) { - var operand = EvaluateExpression(unary.Operand, root); - if (unary.Operator == "not" || unary.Operator == "!") - return IsTruthy(operand) ? 0.0 : 1.0; - if (unary.Operator == "-") - return -(ToNumber(operand)); - throw new InvalidOperationException($"Unknown unary operator: {unary.Operator}"); + var left = EvaluateExpression(binary.Left, root); + if (!IsTruthy(left)) return FilterValue.False; + var right = EvaluateExpression(binary.Right, root); + return IsTruthy(right) ? FilterValue.True : FilterValue.False; } - if (expr is BinaryExpr binary) + if (binary.Operator == OperatorKind.Or) { - // Short-circuit logical operators - if (binary.Operator is "and" or "&&") - { - var left = EvaluateExpression(binary.Left, root); - if (!IsTruthy(left)) return 0.0; - var right = EvaluateExpression(binary.Right, root); - return IsTruthy(right) ? 1.0 : 0.0; - } + var left = EvaluateExpression(binary.Left, root); + if (IsTruthy(left)) return FilterValue.True; + var right = EvaluateExpression(binary.Right, root); + return IsTruthy(right) ? FilterValue.True : FilterValue.False; + } - if (binary.Operator is "or" or "||") - { - var left = EvaluateExpression(binary.Left, root); - if (IsTruthy(left)) return 1.0; - var right = EvaluateExpression(binary.Right, root); - return IsTruthy(right) ? 1.0 : 0.0; - } + { + var left = EvaluateExpression(binary.Left, root); + var right = EvaluateExpression(binary.Right, root); + return binary.Operator switch { - var left = EvaluateExpression(binary.Left, root); - var right = EvaluateExpression(binary.Right, root); - - return binary.Operator switch - { - "+" => ToNumber(left) + ToNumber(right), - "-" => ToNumber(left) - ToNumber(right), - "*" => ToNumber(left) * ToNumber(right), - "/" => ToNumber(left) / ToNumber(right), - "%" => ToNumber(left) % ToNumber(right), - "**" => Math.Pow(ToNumber(left), ToNumber(right)), - ">" => ToNumber(left) > ToNumber(right) ? 1.0 : 0.0, - "<" => ToNumber(left) < ToNumber(right) ? 1.0 : 0.0, - ">=" => ToNumber(left) >= ToNumber(right) ? 1.0 : 0.0, - "<=" => ToNumber(left) <= ToNumber(right) ? 1.0 : 0.0, - "==" => AreEqual(left, right) ? 1.0 : 0.0, - "!=" => !AreEqual(left, right) ? 1.0 : 0.0, - "in" => IsIn(left, right) ? 1.0 : 0.0, - _ => throw new InvalidOperationException($"Unknown operator: {binary.Operator}") - }; - } + OperatorKind.Add => FilterValue.FromNumber(ToNumber(left) + ToNumber(right)), + OperatorKind.Subtract => FilterValue.FromNumber(ToNumber(left) - ToNumber(right)), + OperatorKind.Multiply => FilterValue.FromNumber(ToNumber(left) * ToNumber(right)), + OperatorKind.Divide => FilterValue.FromNumber(ToNumber(left) / ToNumber(right)), + OperatorKind.Modulo => FilterValue.FromNumber(ToNumber(left) % ToNumber(right)), + OperatorKind.Power => FilterValue.FromNumber(Math.Pow(ToNumber(left), ToNumber(right))), + OperatorKind.GreaterThan => FilterValue.FromBool(ToNumber(left) > ToNumber(right)), + OperatorKind.LessThan => FilterValue.FromBool(ToNumber(left) < ToNumber(right)), + OperatorKind.GreaterEqual => FilterValue.FromBool(ToNumber(left) >= ToNumber(right)), + OperatorKind.LessEqual => FilterValue.FromBool(ToNumber(left) <= ToNumber(right)), + OperatorKind.Equal => FilterValue.FromBool(AreEqual(left, right)), + OperatorKind.NotEqual => FilterValue.FromBool(!AreEqual(left, right)), + OperatorKind.In => FilterValue.FromBool(IsIn(left, right)), + _ => throw new InvalidOperationException($"Unknown operator: {binary.Operator}") + }; } + } - return null; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ToNumber(FilterValue value) + { + return value.Kind switch + { + FilterValueKind.Number => value.AsNumber(), + FilterValueKind.String => double.TryParse(value.AsString(), out var result) ? result : 0, + _ => 0 + }; } - private static double ToNumber(object value) + /// + /// Determine if a FilterValue is truthy. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsTruthy(FilterValue value) { - if (value is double d) return d; - if (value is int i) return i; - if (value is string s && double.TryParse(s, out var result)) return result; - return 0; + return value.Kind switch + { + FilterValueKind.Number => value.AsNumber() != 0, + FilterValueKind.String => !string.IsNullOrEmpty(value.AsString()), + FilterValueKind.Null => false, + _ => true // JsonArray etc. are truthy + }; } + /// + /// Overload accepting object for backward compatibility with tests that pass + /// boxed int, bool, string, double, or null directly. + /// public static bool IsTruthy(object value) { if (value == null) return false; @@ -105,40 +144,51 @@ public static bool IsTruthy(object value) if (value is int i) return i != 0; if (value is string s) return !string.IsNullOrEmpty(s); if (value is bool b) return b; + if (value is FilterValue fv) return IsTruthy(fv); return true; } - private static bool AreEqual(object left, object right) + private static bool AreEqual(FilterValue left, FilterValue right) { - if (left == null && right == null) return true; - if (left == null || right == null) return false; + if (left.IsNull && right.IsNull) return true; + if (left.IsNull || right.IsNull) return false; - if (left is double || right is double) + // Both are numbers — fast numeric comparison + if (left.Kind == FilterValueKind.Number && right.Kind == FilterValueKind.Number) + return Math.Abs(left.AsNumber() - right.AsNumber()) < 0.0001; + + // If either is a number and the other might be convertible + if (left.Kind == FilterValueKind.Number || right.Kind == FilterValueKind.Number) return Math.Abs(ToNumber(left) - ToNumber(right)) < 0.0001; - if (left is string ls && right is string rs) - return ls == rs; + // Both are strings + if (left.Kind == FilterValueKind.String && right.Kind == FilterValueKind.String) + return left.AsString() == right.AsString(); - return left.Equals(right); + return false; } - private static bool IsIn(object needle, object haystack) + private static bool IsIn(FilterValue needle, FilterValue haystack) { - if (haystack is JsonElement elem && elem.ValueKind == JsonValueKind.Array) + if (haystack.Kind == FilterValueKind.JsonArray) { - foreach (var item in elem.EnumerateArray()) + var elem = haystack.AsJsonElement(); + if (elem.ValueKind == JsonValueKind.Array) { - var itemValue = item.ValueKind switch + foreach (var item in elem.EnumerateArray()) { - JsonValueKind.Number => (object)item.GetDouble(), - JsonValueKind.String => item.GetString(), - JsonValueKind.True => 1.0, - JsonValueKind.False => 0.0, - _ => null - }; - - if (AreEqual(needle, itemValue)) - return true; + var itemValue = item.ValueKind switch + { + JsonValueKind.Number => FilterValue.FromNumber(item.GetDouble()), + JsonValueKind.String => FilterValue.FromString(item.GetString()), + JsonValueKind.True => FilterValue.True, + JsonValueKind.False => FilterValue.False, + _ => FilterValue.Null + }; + + if (AreEqual(needle, itemValue)) + return true; + } } } return false; diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index b1832f28dc6..fe0d17c854e 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -1,8 +1,128 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text.Json; + namespace Garnet.server.Vector.Filter { + /// + /// Discriminated union value type to eliminate boxing of doubles/strings + /// throughout the filter evaluation pipeline. + /// + [StructLayout(LayoutKind.Auto)] + internal readonly struct FilterValue + { + private readonly double _number; + private readonly string _string; + private readonly JsonElement _jsonElement; + private readonly FilterValueKind _kind; + + private FilterValue(double number) + { + _number = number; + _string = null; + _jsonElement = default; + _kind = FilterValueKind.Number; + } + + private FilterValue(string str) + { + _number = 0; + _string = str; + _jsonElement = default; + _kind = FilterValueKind.String; + } + + private FilterValue(JsonElement element) + { + _number = 0; + _string = null; + _jsonElement = element; + _kind = FilterValueKind.JsonArray; + } + + public FilterValueKind Kind + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _kind; + } + + public bool IsNull + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _kind == FilterValueKind.Null; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public double AsNumber() => _number; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public string AsString() => _string; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public JsonElement AsJsonElement() => _jsonElement; + + public static readonly FilterValue Null = default; + public static readonly FilterValue True = new(1.0); + public static readonly FilterValue False = new(0.0); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static FilterValue FromNumber(double value) => new(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static FilterValue FromString(string value) => new(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static FilterValue FromJsonElement(JsonElement value) => new(value); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static FilterValue FromBool(bool value) => value ? True : False; + } + + internal enum FilterValueKind : byte + { + Null = 0, + Number = 1, + String = 2, + JsonArray = 3, + } + + /// + /// Enum for operator types, replacing string-based operators + /// to enable integer comparison instead of string comparison on hot paths. + /// + internal enum OperatorKind : byte + { + // Arithmetic + Add, // + + Subtract, // - + Multiply, // * + Divide, // / + Modulo, // % + Power, // ** + + // Comparison + GreaterThan, // > + LessThan, // < + GreaterEqual, // >= + LessEqual, // <= + Equal, // == + NotEqual, // != + + // Logical + And, // and, && + Or, // or, || + Not, // not, ! + + // Containment + In, // in + + // Unary + Negate, // - (unary) + } + /// /// Base class for filter expression tree nodes. /// @@ -10,16 +130,26 @@ internal abstract class Expr { } /// /// Represents a literal value (number, string, boolean). + /// Uses FilterValue to avoid boxing. /// - internal class LiteralExpr : Expr + internal sealed class LiteralExpr : Expr { - public object Value { get; init; } + public FilterValue Value { get; init; } + + // Keep object-returning property for test compatibility + public object BoxedValue => Value.Kind switch + { + FilterValueKind.Number => Value.AsNumber(), + FilterValueKind.String => Value.AsString(), + FilterValueKind.Null => null, + _ => null + }; } /// /// Represents a member access expression (e.g., .year, .rating). /// - internal class MemberExpr : Expr + internal sealed class MemberExpr : Expr { public string Property { get; init; } } @@ -27,19 +157,19 @@ internal class MemberExpr : Expr /// /// Represents a unary operation (e.g., not, -). /// - internal class UnaryExpr : Expr + internal sealed class UnaryExpr : Expr { - public string Operator { get; init; } + public OperatorKind Operator { get; init; } public Expr Operand { get; init; } } /// /// Represents a binary operation (e.g., +, -, ==, and). /// - internal class BinaryExpr : Expr + internal sealed class BinaryExpr : Expr { public Expr Left { get; init; } - public string Operator { get; init; } + public OperatorKind Operator { get; init; } public Expr Right { get; init; } } } \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs index b4672f547d3..345048f4824 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs @@ -28,7 +28,7 @@ private static Expr ParseLogicalOr(List tokens, int start, out int end) { end++; var right = ParseLogicalAnd(tokens, end, out end); - left = new BinaryExpr { Left = left, Operator = "or", Right = right }; + left = new BinaryExpr { Left = left, Operator = OperatorKind.Or, Right = right }; } return left; @@ -44,7 +44,7 @@ private static Expr ParseLogicalAnd(List tokens, int start, out int end) { end++; var right = ParseEquality(tokens, end, out end); - left = new BinaryExpr { Left = left, Operator = "and", Right = right }; + left = new BinaryExpr { Left = left, Operator = OperatorKind.And, Right = right }; } return left; @@ -57,7 +57,7 @@ private static Expr ParseEquality(List tokens, int start, out int end) while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "==" || tokens[end].Value == "!=")) { - var op = tokens[end].Value; + var op = tokens[end].Value == "==" ? OperatorKind.Equal : OperatorKind.NotEqual; end++; var right = ParseComparison(tokens, end, out end); left = new BinaryExpr { Left = left, Operator = op, Right = right }; @@ -74,7 +74,7 @@ private static Expr ParseComparison(List tokens, int start, out int end) (tokens[end].Value == ">" || tokens[end].Value == "<" || tokens[end].Value == ">=" || tokens[end].Value == "<=")) { - var op = tokens[end].Value; + var op = ParseComparisonOperator(tokens[end].Value); end++; var right = ParseContainment(tokens, end, out end); left = new BinaryExpr { Left = left, Operator = op, Right = right }; @@ -83,6 +83,14 @@ private static Expr ParseComparison(List tokens, int start, out int end) return left; } + private static OperatorKind ParseComparisonOperator(string value) + { + // Length check first for fast disambiguation + if (value.Length == 1) + return value[0] == '>' ? OperatorKind.GreaterThan : OperatorKind.LessThan; + return value[0] == '>' ? OperatorKind.GreaterEqual : OperatorKind.LessEqual; + } + private static Expr ParseContainment(List tokens, int start, out int end) { var left = ParseAdditive(tokens, start, out end); @@ -91,7 +99,7 @@ private static Expr ParseContainment(List tokens, int start, out int end) { end++; var right = ParseAdditive(tokens, end, out end); - left = new BinaryExpr { Left = left, Operator = "in", Right = right }; + left = new BinaryExpr { Left = left, Operator = OperatorKind.In, Right = right }; } return left; @@ -104,7 +112,7 @@ private static Expr ParseAdditive(List tokens, int start, out int end) while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "+" || tokens[end].Value == "-")) { - var op = tokens[end].Value; + var op = tokens[end].Value == "+" ? OperatorKind.Add : OperatorKind.Subtract; end++; var right = ParseMultiplicative(tokens, end, out end); left = new BinaryExpr { Left = left, Operator = op, Right = right }; @@ -120,7 +128,12 @@ private static Expr ParseMultiplicative(List tokens, int start, out int e while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "*" || tokens[end].Value == "/" || tokens[end].Value == "%")) { - var op = tokens[end].Value; + var op = tokens[end].Value[0] switch + { + '*' => OperatorKind.Multiply, + '/' => OperatorKind.Divide, + _ => OperatorKind.Modulo + }; end++; var right = ParseExponentiation(tokens, end, out end); left = new BinaryExpr { Left = left, Operator = op, Right = right }; @@ -137,7 +150,7 @@ private static Expr ParseExponentiation(List tokens, int start, out int e { end++; var right = ParseExponentiation(tokens, end, out end); // Right associative - left = new BinaryExpr { Left = left, Operator = "**", Right = right }; + left = new BinaryExpr { Left = left, Operator = OperatorKind.Power, Right = right }; } return left; @@ -152,14 +165,14 @@ private static Expr ParseUnary(List tokens, int start, out int end) { start++; var operand = ParseUnary(tokens, start, out end); - return new UnaryExpr { Operator = "not", Operand = operand }; + return new UnaryExpr { Operator = OperatorKind.Not, Operand = operand }; } if (tokens[start].Type == TokenType.Operator && tokens[start].Value == "-") { start++; var operand = ParseUnary(tokens, start, out end); - return new UnaryExpr { Operator = "-", Operand = operand }; + return new UnaryExpr { Operator = OperatorKind.Negate, Operand = operand }; } } @@ -183,23 +196,23 @@ private static Expr ParsePrimary(List tokens, int start, out int end) return expr; } - // Literals + // Literals — use FilterValue to avoid boxing doubles if (token.Type == TokenType.Number) { end = start + 1; - return new LiteralExpr { Value = double.Parse(token.Value, CultureInfo.InvariantCulture) }; + return new LiteralExpr { Value = FilterValue.FromNumber(double.Parse(token.Value, CultureInfo.InvariantCulture)) }; } if (token.Type == TokenType.String) { end = start + 1; - return new LiteralExpr { Value = token.Value }; + return new LiteralExpr { Value = FilterValue.FromString(token.Value) }; } if (token.Type == TokenType.Boolean) { end = start + 1; - return new LiteralExpr { Value = token.Value == "true" ? 1.0 : 0.0 }; + return new LiteralExpr { Value = token.Value == "true" ? FilterValue.True : FilterValue.False }; } // Identifier (field access) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs index 0715e100978..49644a7362c 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs @@ -3,21 +3,29 @@ using System; using System.Collections.Generic; +using System.Runtime.CompilerServices; namespace Garnet.server.Vector.Filter { /// /// Token types for vector filter expressions. /// - internal enum TokenType { Number, String, Boolean, Identifier, Operator, Keyword, Delimiter } + internal enum TokenType : byte { Number, String, Boolean, Identifier, Operator, Keyword, Delimiter } /// /// Represents a token in a filter expression. + /// Value type to avoid per-token heap allocations. /// - internal class Token + internal readonly struct Token { - public TokenType Type { get; set; } - public string Value { get; set; } + public Token(TokenType type, string value) + { + Type = type; + Value = value; + } + + public TokenType Type { get; } + public string Value { get; } } /// @@ -26,6 +34,26 @@ internal class Token /// internal static class VectorFilterTokenizer { + // Pre-cached operator strings to avoid per-token string allocations + private const string OpPlus = "+"; + private const string OpMinus = "-"; + private const string OpStar = "*"; + private const string OpSlash = "/"; + private const string OpPercent = "%"; + private const string OpGreater = ">"; + private const string OpLess = "<"; + private const string OpBang = "!"; + private const string OpOpenParen = "("; + private const string OpCloseParen = ")"; + + private const string OpEqualEqual = "=="; + private const string OpBangEqual = "!="; + private const string OpGreaterEqual = ">="; + private const string OpLessEqual = "<="; + private const string OpAmpAmp = "&&"; + private const string OpPipePipe = "||"; + private const string OpStarStar = "**"; + public static List Tokenize(string input) { var tokens = new List(); @@ -46,7 +74,7 @@ public static List Tokenize(string input) && (tokens.Count == 0 || tokens[^1].Type == TokenType.Operator || tokens[^1].Type == TokenType.Keyword - || (tokens[^1].Type == TokenType.Delimiter && tokens[^1].Value == "(")); + || (tokens[^1].Type == TokenType.Delimiter && tokens[^1].Value == OpOpenParen)); if (char.IsDigit(input[i]) || isNegativeNumber) { @@ -54,7 +82,7 @@ public static List Tokenize(string input) if (input[i] == '-') i++; while (i < input.Length && (char.IsDigit(input[i]) || input[i] == '.')) i++; - tokens.Add(new Token { Type = TokenType.Number, Value = input.Substring(start, i - start) }); + tokens.Add(new Token(TokenType.Number, input.Substring(start, i - start))); continue; } @@ -68,11 +96,11 @@ public static List Tokenize(string input) // Check for keywords if (value == "and" || value == "or" || value == "not" || value == "in") - tokens.Add(new Token { Type = TokenType.Keyword, Value = value }); + tokens.Add(new Token(TokenType.Keyword, value)); else if (value == "true" || value == "false") - tokens.Add(new Token { Type = TokenType.Boolean, Value = value }); + tokens.Add(new Token(TokenType.Boolean, value)); else - tokens.Add(new Token { Type = TokenType.Identifier, Value = value }); + tokens.Add(new Token(TokenType.Identifier, value)); continue; } @@ -88,28 +116,29 @@ public static List Tokenize(string input) } if (i >= input.Length) throw new InvalidOperationException($"Unterminated string literal starting at position {start - 1}"); - tokens.Add(new Token { Type = TokenType.String, Value = input.Substring(start, i - start) }); + tokens.Add(new Token(TokenType.String, input.Substring(start, i - start))); i++; // Skip closing quote continue; } - // Two-character operators + // Two-character operators — avoid Substring allocation by comparing chars directly if (i + 1 < input.Length) { - var twoChar = input.Substring(i, 2); - if (twoChar == "==" || twoChar == "!=" || twoChar == ">=" || twoChar == "<=" || - twoChar == "&&" || twoChar == "||" || twoChar == "**") + var twoCharOp = MatchTwoCharOperator(input[i], input[i + 1]); + if (twoCharOp != null) { - tokens.Add(new Token { Type = TokenType.Operator, Value = twoChar }); + tokens.Add(new Token(TokenType.Operator, twoCharOp)); i += 2; continue; } } - // Single-character operators and delimiters - if ("+-*/%> Tokenize(string input) return tokens; } + + /// + /// Match a two-character operator. Returns the cached string or null. + /// Avoids Substring(i, 2) allocation on every iteration. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static string MatchTwoCharOperator(char c1, char c2) + { + return c1 switch + { + '=' when c2 == '=' => OpEqualEqual, + '!' when c2 == '=' => OpBangEqual, + '>' when c2 == '=' => OpGreaterEqual, + '<' when c2 == '=' => OpLessEqual, + '&' when c2 == '&' => OpAmpAmp, + '|' when c2 == '|' => OpPipePipe, + '*' when c2 == '*' => OpStarStar, + _ => null + }; + } + + /// + /// Match a single-character operator or delimiter. Returns the cached string or null. + /// Avoids char.ToString() allocation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static string MatchSingleChar(char c) + { + return c switch + { + '+' => OpPlus, + '-' => OpMinus, + '*' => OpStar, + '/' => OpSlash, + '%' => OpPercent, + '>' => OpGreater, + '<' => OpLess, + '!' => OpBang, + '(' => OpOpenParen, + ')' => OpCloseParen, + _ => null + }; + } } } \ No newline at end of file From 7aa8b132c8b94b0cb1bb8e83088d20306813afe8 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 11:37:15 -0800 Subject: [PATCH 07/31] add Supported vector filter syntax --- test/Garnet.test/VectorFilterTests.cs | 89 +++++++++++++++++++++------ 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/test/Garnet.test/VectorFilterTests.cs b/test/Garnet.test/VectorFilterTests.cs index 85e2d8a4895..7f0caac68da 100644 --- a/test/Garnet.test/VectorFilterTests.cs +++ b/test/Garnet.test/VectorFilterTests.cs @@ -16,6 +16,21 @@ namespace Garnet.test [TestFixture] public class VectorFilterTests : AllureTestBase { + /// + /// Supported vector filter syntax includes: + /// - Literals: numbers (42, 3.14, -5), strings ("x", 'x'), booleans (true/false) + /// - Member access: .field, .nested.field, _identifier + /// - Arithmetic: +, -, *, /, %, ** (power) + /// - Comparison: >, <, >=, <=, ==, != + /// - Logical: and/or/not and aliases &&, ||, ! + /// - Containment: in (for example, "classic" in .tags) + /// - Grouping: parentheses ( ) + /// + /// Operator precedence (high to low): + /// primary/parentheses, unary, power, multiplicative, additive, in, + /// comparison, equality, and, or. + /// + #region Helper Methods /// @@ -28,13 +43,23 @@ private static JsonElement ParseJson(string json) /// /// Helper to tokenize, parse, and evaluate a filter expression against JSON. + /// Returns object for test backward compatibility. /// private static object EvaluateFilter(string expression, string json) { var tokens = VectorFilterTokenizer.Tokenize(expression); var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); using var doc = JsonDocument.Parse(json); - return VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + + // Convert FilterValue back to object for test assertions + return result.Kind switch + { + FilterValueKind.Number => (object)result.AsNumber(), + FilterValueKind.String => result.AsString(), + FilterValueKind.Null => null, + _ => result.AsNumber() // fallback + }; } /// @@ -42,7 +67,11 @@ private static object EvaluateFilter(string expression, string json) /// private static bool EvaluateFilterTruthy(string expression, string json) { - return VectorFilterEvaluator.IsTruthy(EvaluateFilter(expression, json)); + var tokens = VectorFilterTokenizer.Tokenize(expression); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + using var doc = JsonDocument.Parse(json); + var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + return VectorFilterEvaluator.IsTruthy(result); } #endregion @@ -230,7 +259,9 @@ public void Parser_NumberLiteral() var expr = VectorFilterParser.ParseExpression(tokens, 0, out var end); ClassicAssert.AreEqual(1, end); ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual(42.0, ((LiteralExpr)expr).Value); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(42.0, lit.Value.AsNumber()); } [Test] @@ -239,7 +270,9 @@ public void Parser_StringLiteral() var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual("hello", ((LiteralExpr)expr).Value); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.String, lit.Value.Kind); + ClassicAssert.AreEqual("hello", lit.Value.AsString()); } [Test] @@ -248,12 +281,16 @@ public void Parser_BooleanLiteral() var tokens = VectorFilterTokenizer.Tokenize("true"); var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual(1.0, ((LiteralExpr)expr).Value); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(1.0, lit.Value.AsNumber()); tokens = VectorFilterTokenizer.Tokenize("false"); expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual(0.0, ((LiteralExpr)expr).Value); + lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(0.0, lit.Value.AsNumber()); } [Test] @@ -272,7 +309,7 @@ public void Parser_UnaryNot() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var unary = (UnaryExpr)expr; - ClassicAssert.AreEqual("not", unary.Operator); + ClassicAssert.AreEqual(OperatorKind.Not, unary.Operator); ClassicAssert.IsInstanceOf(unary.Operand); } @@ -283,9 +320,9 @@ public void Parser_UnaryNegation() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("+", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual("-", ((UnaryExpr)binary.Right).Operator); + ClassicAssert.AreEqual(OperatorKind.Negate, ((UnaryExpr)binary.Right).Operator); } [Test] @@ -296,10 +333,10 @@ public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("+", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); ClassicAssert.IsInstanceOf(binary.Left); ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual("*", ((BinaryExpr)binary.Right).Operator); + ClassicAssert.AreEqual(OperatorKind.Multiply, ((BinaryExpr)binary.Right).Operator); } [Test] @@ -310,10 +347,10 @@ public void Parser_OperatorPrecedence_AndBeforeOr() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("or", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.Or, binary.Operator); ClassicAssert.IsInstanceOf(binary.Left); ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual("and", ((BinaryExpr)binary.Right).Operator); + ClassicAssert.AreEqual(OperatorKind.And, ((BinaryExpr)binary.Right).Operator); } [Test] @@ -324,9 +361,9 @@ public void Parser_ParenthesesOverridePrecedence() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("*", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.Multiply, binary.Operator); ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.AreEqual("+", ((BinaryExpr)binary.Left).Operator); + ClassicAssert.AreEqual(OperatorKind.Add, ((BinaryExpr)binary.Left).Operator); } [Test] @@ -336,7 +373,7 @@ public void Parser_Containment() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("in", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.In, binary.Operator); ClassicAssert.IsInstanceOf(binary.Left); ClassicAssert.IsInstanceOf(binary.Right); } @@ -349,10 +386,10 @@ public void Parser_ExponentiationRightAssociative() var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual("**", binary.Operator); + ClassicAssert.AreEqual(OperatorKind.Power, binary.Operator); ClassicAssert.IsInstanceOf(binary.Left); ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual("**", ((BinaryExpr)binary.Right).Operator); + ClassicAssert.AreEqual(OperatorKind.Power, ((BinaryExpr)binary.Right).Operator); } [Test] @@ -486,6 +523,7 @@ public void Evaluator_InOperatorWithNumericArray() [Test] public void Evaluator_IsTruthy() { + // Test the object-accepting overload for backward compatibility ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(null)); ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0.0)); ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0)); @@ -499,6 +537,21 @@ public void Evaluator_IsTruthy() ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(true)); } + [Test] + public void Evaluator_IsTruthy_FilterValue() + { + // Test the FilterValue-accepting overload (the hot-path version) + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.Null)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.False)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(0.0))); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromString(""))); + + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.True)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(1.0))); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(-1.0))); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromString("hello"))); + } + [Test] public void Evaluator_ComplexExpression() { From 95ba208abd18fe0139f166cbcfe9cd652a3f8d26 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 11:43:52 -0800 Subject: [PATCH 08/31] update doc with syntac --- website/docs/dev/vector-sets.md | 55 +++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/website/docs/dev/vector-sets.md b/website/docs/dev/vector-sets.md index 79fa84f09e1..fbd442deb21 100644 --- a/website/docs/dev/vector-sets.md +++ b/website/docs/dev/vector-sets.md @@ -415,6 +415,61 @@ Garnet calls into the following DiskANN functions: - `context` is always the `Context` value created by Garnet and stored in [`Index`](#indexes) for a Vector Set, this implies it is always a non-0 multiple of 8 - `search_vector`, `search_element`, and `continue_search` all return the number of ids written into `output_ids`, and if there are more values to return they set the `nint` _pointed to by_ `continuation` or `new_continuation` +## Vector Filter Expressions (`VSIM ... FILTER`) + +`VSIM` supports `FILTER ` for attribute-based post filtering. + +### Expression syntax + +- Arithmetic: `+`, `-`, `*`, `/`, `%`, `**` +- Comparison: `==`, `!=`, `>`, `<`, `>=`, `<=` +- Logical: `and`, `or`, `not` (also `&&`, `||`, `!`) +- Containment: `in` +- Grouping: parentheses `()` + +Field access uses dot notation (for example, `.year`, `.rating`, `.genre`). + +### Supported values + +- Numbers +- Strings +- Booleans (`true` / `false`, evaluated as `1` / `0`) +- Arrays (for `in` when the right side is an attribute array) + +### Operator precedence (high to low) + +1. primary / parentheses +2. unary (`not`, `!`, unary `-`) +3. power (`**`, right-associative) +4. multiplicative (`*`, `/`, `%`) +5. additive (`+`, `-`) +6. containment (`in`) +7. comparison (`>`, `<`, `>=`, `<=`) +8. equality (`==`, `!=`) +9. logical and (`and`, `&&`) +10. logical or (`or`, `||`) + +### Notes + +- Keywords are lowercase (`and`, `or`, `not`, `in`, `true`, `false`) +- Missing attributes are treated as non-matching (null/falsy) +- Array literals inside expressions (for example, `.director in ["a","b"]`) are not currently supported + +### Examples + +```text +VSIM movies ELE dune FILTER '.year >= 1980 and .rating > 7' +VSIM movies ELE dune FILTER '.genre == "action" && .rating > 8.0' +VSIM movies ELE dune FILTER '"classic" in .tags' +VSIM movies ELE dune FILTER '(.year - 2000) ** 2 < 100 and .rating / 2 > 4' +``` + +### Reference + +- Redis `VSIM`: https://redis.io/docs/latest/commands/vsim/ +- Redis vector sets: https://redis.io/docs/latest/develop/data-types/vector-sets/ +- Redis filter expressions: https://redis.io/docs/latest/develop/data-types/vector-sets/filtered-search/ + > [!IMPORTANT] > These p/invoke definitions are all a little rough and should be cleaned up. > From 54064a0b161b7d9531c430b7ca4cb1956783943f Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 13:20:17 -0800 Subject: [PATCH 09/31] fix build --- test/Garnet.test/VectorFilterTests.cs | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/test/Garnet.test/VectorFilterTests.cs b/test/Garnet.test/VectorFilterTests.cs index 7f0caac68da..5331a1417f0 100644 --- a/test/Garnet.test/VectorFilterTests.cs +++ b/test/Garnet.test/VectorFilterTests.cs @@ -16,20 +16,18 @@ namespace Garnet.test [TestFixture] public class VectorFilterTests : AllureTestBase { - /// - /// Supported vector filter syntax includes: - /// - Literals: numbers (42, 3.14, -5), strings ("x", 'x'), booleans (true/false) - /// - Member access: .field, .nested.field, _identifier - /// - Arithmetic: +, -, *, /, %, ** (power) - /// - Comparison: >, <, >=, <=, ==, != - /// - Logical: and/or/not and aliases &&, ||, ! - /// - Containment: in (for example, "classic" in .tags) - /// - Grouping: parentheses ( ) - /// - /// Operator precedence (high to low): - /// primary/parentheses, unary, power, multiplicative, additive, in, - /// comparison, equality, and, or. - /// + // Supported vector filter syntax includes: + // - Literals: numbers (42, 3.14, -5), strings ("x", 'x'), booleans (true/false) + // - Member access: .field, .nested.field, _identifier + // - Arithmetic: +, -, *, /, %, ** (power) + // - Comparison: >, <, >=, <=, ==, != + // - Logical: and/or/not and aliases &&, ||, ! + // - Containment: in (for example, "classic" in .tags) + // - Grouping: parentheses ( ) + // + // Operator precedence (high to low): + // primary/parentheses, unary, power, multiplicative, additive, in, + // comparison, equality, and, or. #region Helper Methods From aa07eb032fefd7214f0513171c6e297ad27657d0 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 14:58:58 -0800 Subject: [PATCH 10/31] update test with ELE style syntax --- test/Garnet.test/RespVectorSetTests.cs | 77 +++++++++++++++++++++----- website/docs/dev/vector-sets.md | 2 + 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index c77938b6d63..af688f2270d 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -700,20 +700,7 @@ public void VSIMWithAdvancedFiltering() using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); var db = redis.GetDatabase(0); - _ = db.KeyDelete("movies"); - - // Add vectors with rich attributes to test advanced filtering - var res1 = db.Execute("VADD", ["movies", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); - ClassicAssert.AreEqual(1, (int)res1); - - var res2 = db.Execute("VADD", ["movies", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); - ClassicAssert.AreEqual(1, (int)res2); - - var res3 = db.Execute("VADD", ["movies", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); - ClassicAssert.AreEqual(1, (int)res3); + _ = SeedMoviesForAdvancedFiltering(db); // Test logical AND var res4 = (byte[][])db.Execute("VSIM", ["movies", "VALUES", "3", "0.0", "0.0", "0.0", @@ -756,6 +743,68 @@ public void VSIMWithAdvancedFiltering() ClassicAssert.AreEqual(4, res11.Length, "Complex: rating*2 > 8 AND (year>=1980 OR 'modern' in tags)"); } + [Test] + public void VSIMWithAdvancedFilteringELEWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + var queryElementId = SeedMoviesForAdvancedFiltering(db); + + var res1 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".genre == \"action\"", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res1.Length, "ELE + FILTER + WITHATTRIBS: genre == 'action'"); + + var res2 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", "\"classic\" in .tags", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res2.Length, "ELE + FILTER + WITHATTRIBS: 'classic' in tags"); + + var res3 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".rating / 2 > 2 and .year >= 1980", "COUNT", "3", "WITHATTRIBS"]); + ClassicAssert.AreEqual(4, res3.Length, "ELE + FILTER + WITHATTRIBS: arithmetic and comparison"); + } + + [Test] + public void VSIMWithAdvancedFilteringELEWithoutWithAttribs() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + var queryElementId = SeedMoviesForAdvancedFiltering(db); + + var res1 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".genre == \"action\"", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res1.Length, "ELE + FILTER without WITHATTRIBS: genre == 'action'"); + + var res2 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", "\"classic\" in .tags", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res2.Length, "ELE + FILTER without WITHATTRIBS: 'classic' in tags"); + + var res3 = (byte[][])db.Execute("VSIM", ["movies", "ELE", queryElementId, + "FILTER", ".rating / 2 > 2 and .year >= 1980", "COUNT", "3"]); + ClassicAssert.AreEqual(2, res3.Length, "ELE + FILTER without WITHATTRIBS: arithmetic and comparison"); + } + + private static byte[] SeedMoviesForAdvancedFiltering(IDatabase db) + { + _ = db.KeyDelete("movies"); + + var queryElementId = new byte[] { 0, 0, 0, 0 }; + var res1 = db.Execute("VADD", ["movies", "VALUES", "3", "1.0", "2.0", "3.0", queryElementId, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); + ClassicAssert.AreEqual(1, (int)res1); + + var res2 = db.Execute("VADD", ["movies", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); + ClassicAssert.AreEqual(1, (int)res2); + + var res3 = db.Execute("VADD", ["movies", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, + "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); + ClassicAssert.AreEqual(1, (int)res3); + + return queryElementId; + } + [Test] public void DeleteVectorSet() { diff --git a/website/docs/dev/vector-sets.md b/website/docs/dev/vector-sets.md index fbd442deb21..c801c5b8d78 100644 --- a/website/docs/dev/vector-sets.md +++ b/website/docs/dev/vector-sets.md @@ -454,6 +454,7 @@ Field access uses dot notation (for example, `.year`, `.rating`, `.genre`). - Keywords are lowercase (`and`, `or`, `not`, `in`, `true`, `false`) - Missing attributes are treated as non-matching (null/falsy) - Array literals inside expressions (for example, `.director in ["a","b"]`) are not currently supported +- `VSIM` query source can be either `ELE ` or `VALUES ... ` ### Examples @@ -462,6 +463,7 @@ VSIM movies ELE dune FILTER '.year >= 1980 and .rating > 7' VSIM movies ELE dune FILTER '.genre == "action" && .rating > 8.0' VSIM movies ELE dune FILTER '"classic" in .tags' VSIM movies ELE dune FILTER '(.year - 2000) ** 2 < 100 and .rating / 2 > 4' +VSIM movies VALUES 3 0.12 0.34 0.56 FILTER '.year >= 1980 and .rating > 7' ``` ### Reference From 1e5cd340a489bc3a369b1eea3826700a1b92efdd Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 15:26:39 -0800 Subject: [PATCH 11/31] split the filter engine tests --- .../Filter/VectorFilterEvaluatorTests.cs | 205 ++++++ .../Filter/VectorFilterParserTests.cs | 185 ++++++ .../Filter/VectorFilterTestHelpers.cs | 36 ++ .../Filter/VectorFilterTokenizerTests.cs | 185 ++++++ test/Garnet.test/VectorFilterTests.cs | 592 ------------------ 5 files changed, 611 insertions(+), 592 deletions(-) create mode 100644 test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs create mode 100644 test/Garnet.test/Filter/VectorFilterParserTests.cs create mode 100644 test/Garnet.test/Filter/VectorFilterTestHelpers.cs create mode 100644 test/Garnet.test/Filter/VectorFilterTokenizerTests.cs delete mode 100644 test/Garnet.test/VectorFilterTests.cs diff --git a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs new file mode 100644 index 00000000000..cabb4b7afa5 --- /dev/null +++ b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs @@ -0,0 +1,205 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + [AllureNUnit] + [TestFixture] + public class VectorFilterEvaluatorTests : AllureTestBase + { + [Test] + public void Evaluator_Arithmetic() + { + var json = "{}"; + ClassicAssert.AreEqual(5.0, VectorFilterTestHelpers.EvaluateFilter("2 + 3", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 - 2", json)); + ClassicAssert.AreEqual(6.0, VectorFilterTestHelpers.EvaluateFilter("2 * 3", json)); + ClassicAssert.AreEqual(2.5, VectorFilterTestHelpers.EvaluateFilter("5 / 2", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("7 % 3", json)); + ClassicAssert.AreEqual(8.0, VectorFilterTestHelpers.EvaluateFilter("2 ** 3", json)); + } + + [Test] + public void Evaluator_SubtractionWithField() + { + var json = "{\"year\":1980}"; + ClassicAssert.AreEqual(1975.0, VectorFilterTestHelpers.EvaluateFilter(".year - 5", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".year - 5 > 0", json)); + } + + [Test] + public void Evaluator_Comparison() + { + var json = "{}"; + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 > 3", json)); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("3 > 5", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 < 5", json)); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 < 3", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 >= 5", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 <= 5", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 == 5", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 != 3", json)); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 != 5", json)); + } + + [Test] + public void Evaluator_LogicalAnd() + { + var json = "{}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true and true", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("true and false", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("false and true", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true && true", json)); + } + + [Test] + public void Evaluator_LogicalOr() + { + var json = "{}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true or false", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("false or true", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("false or false", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("false || true", json)); + } + + [Test] + public void Evaluator_LogicalNot() + { + var json = "{}"; + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("not true", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("not false", json)); + } + + [Test] + public void Evaluator_StringEquality() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == \"action\"", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == \"drama\"", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre != \"drama\"", json)); + } + + [Test] + public void Evaluator_MemberAccess() + { + var json = "{\"year\":1980,\"rating\":4.5}"; + ClassicAssert.AreEqual(1980.0, VectorFilterTestHelpers.EvaluateFilter(".year", json)); + ClassicAssert.AreEqual(4.5, VectorFilterTestHelpers.EvaluateFilter(".rating", json)); + } + + [Test] + public void Evaluator_MissingFieldReturnsNull() + { + var json = "{\"year\":1980}"; + var result = VectorFilterTestHelpers.EvaluateFilter(".missing", json); + ClassicAssert.IsNull(result); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".missing", json)); + } + + [Test] + public void Evaluator_InOperatorWithArray() + { + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("\"classic\" in .tags", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("\"popular\" in .tags", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("\"modern\" in .tags", json)); + } + + [Test] + public void Evaluator_InOperatorWithNumericArray() + { + var json = "{\"scores\":[1,2,3]}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("2 in .scores", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("5 in .scores", json)); + } + + [Test] + public void Evaluator_IsTruthy() + { + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(null)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0.0)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy("")); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(false)); + + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(1.0)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(-1.0)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(42)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy("hello")); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(true)); + } + + [Test] + public void Evaluator_IsTruthy_FilterValue() + { + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.Null)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.False)); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(0.0))); + ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromString(""))); + + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.True)); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(1.0))); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(-1.0))); + ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromString("hello"))); + } + + [Test] + public void Evaluator_ComplexExpression() + { + var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; + + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy( + ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); + + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy( + "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); + + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("not (.genre == \"drama\")", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".year / 10 >= 198", json)); + } + + [Test] + public void Evaluator_ComparisonWithMissingField() + { + var json = "{\"year\":1980}"; + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".missing > 1950", json)); + } + + [Test] + public void Evaluator_BooleanJsonValues() + { + var json = "{\"active\":true,\"deleted\":false}"; + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".active", json)); + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".deleted", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".active == true", json)); + } + + [Test] + public void Evaluator_ArithmeticWithNonNumericString_CoercesToZero() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.AreEqual(2.0, VectorFilterTestHelpers.EvaluateFilter(".genre + 2", json)); + ClassicAssert.AreEqual(-1.0, VectorFilterTestHelpers.EvaluateFilter(".genre - 1", json)); + } + + [Test] + public void Evaluator_InOperatorWithNonArrayHaystack_ReturnsFalse() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("\"action\" in .genre", json)); + } + + [Test] + public void Evaluator_EqualityBetweenNumberAndNonNumericString_ReturnsFalse() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == 1", json)); + ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == 0", json)); + } + } +} diff --git a/test/Garnet.test/Filter/VectorFilterParserTests.cs b/test/Garnet.test/Filter/VectorFilterParserTests.cs new file mode 100644 index 00000000000..2fb614ecbf8 --- /dev/null +++ b/test/Garnet.test/Filter/VectorFilterParserTests.cs @@ -0,0 +1,185 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Collections.Generic; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + [AllureNUnit] + [TestFixture] + public class VectorFilterParserTests : AllureTestBase + { + [Test] + public void Parser_NumberLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("42"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out var end); + ClassicAssert.AreEqual(1, end); + ClassicAssert.IsInstanceOf(expr); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(42.0, lit.Value.AsNumber()); + } + + [Test] + public void Parser_StringLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.String, lit.Value.Kind); + ClassicAssert.AreEqual("hello", lit.Value.AsString()); + } + + [Test] + public void Parser_BooleanLiteral() + { + var tokens = VectorFilterTokenizer.Tokenize("true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(1.0, lit.Value.AsNumber()); + + tokens = VectorFilterTokenizer.Tokenize("false"); + expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + lit = (LiteralExpr)expr; + ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); + ClassicAssert.AreEqual(0.0, lit.Value.AsNumber()); + } + + [Test] + public void Parser_MemberAccess() + { + var tokens = VectorFilterTokenizer.Tokenize(".year"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + ClassicAssert.AreEqual("year", ((MemberExpr)expr).Property); + } + + [Test] + public void Parser_UnaryNot() + { + var tokens = VectorFilterTokenizer.Tokenize("not true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var unary = (UnaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Not, unary.Operator); + ClassicAssert.IsInstanceOf(unary.Operand); + } + + [Test] + public void Parser_UnaryNegation() + { + var tokens = VectorFilterTokenizer.Tokenize(".a + (-.b)"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual(OperatorKind.Negate, ((UnaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() + { + var tokens = VectorFilterTokenizer.Tokenize("1 + 2 * 3"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual(OperatorKind.Multiply, ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_OperatorPrecedence_AndBeforeOr() + { + var tokens = VectorFilterTokenizer.Tokenize("true or false and true"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Or, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual(OperatorKind.And, ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_ParenthesesOverridePrecedence() + { + var tokens = VectorFilterTokenizer.Tokenize("(1 + 2) * 3"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Multiply, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.AreEqual(OperatorKind.Add, ((BinaryExpr)binary.Left).Operator); + } + + [Test] + public void Parser_Containment() + { + var tokens = VectorFilterTokenizer.Tokenize("\"action\" in .tags"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.In, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + } + + [Test] + public void Parser_ExponentiationRightAssociative() + { + var tokens = VectorFilterTokenizer.Tokenize("2 ** 3 ** 2"); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsInstanceOf(expr); + var binary = (BinaryExpr)expr; + ClassicAssert.AreEqual(OperatorKind.Power, binary.Operator); + ClassicAssert.IsInstanceOf(binary.Left); + ClassicAssert.IsInstanceOf(binary.Right); + ClassicAssert.AreEqual(OperatorKind.Power, ((BinaryExpr)binary.Right).Operator); + } + + [Test] + public void Parser_ErrorOnUnexpectedEnd() + { + var tokens = new List(); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + + [Test] + public void Parser_ErrorOnMissingClosingParen() + { + var tokens = VectorFilterTokenizer.Tokenize("(1 + 2"); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + + [Test] + public void Parser_ErrorOnInvalidNumberLiteral_DoubleDot() + { + var tokens = VectorFilterTokenizer.Tokenize("1..023"); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + + [Test] + public void Parser_ErrorOnInvalidNumberLiteral_MultipleDots() + { + var tokens = VectorFilterTokenizer.Tokenize("1.2.3"); + ClassicAssert.Throws(() => + VectorFilterParser.ParseExpression(tokens, 0, out _)); + } + } +} diff --git a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs new file mode 100644 index 00000000000..4fcaa3752f3 --- /dev/null +++ b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text.Json; +using Garnet.server.Vector.Filter; + +namespace Garnet.test +{ + internal static class VectorFilterTestHelpers + { + internal static object EvaluateFilter(string expression, string json) + { + var tokens = VectorFilterTokenizer.Tokenize(expression); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + using var doc = JsonDocument.Parse(json); + var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + + return result.Kind switch + { + FilterValueKind.Number => (object)result.AsNumber(), + FilterValueKind.String => result.AsString(), + FilterValueKind.Null => null, + _ => result.AsNumber() + }; + } + + internal static bool EvaluateFilterTruthy(string expression, string json) + { + var tokens = VectorFilterTokenizer.Tokenize(expression); + var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + using var doc = JsonDocument.Parse(json); + var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); + return VectorFilterEvaluator.IsTruthy(result); + } + } +} diff --git a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs new file mode 100644 index 00000000000..e55869cee8b --- /dev/null +++ b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs @@ -0,0 +1,185 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Linq; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + [AllureNUnit] + [TestFixture] + public class VectorFilterTokenizerTests : AllureTestBase + { + [Test] + public void Tokenizer_IntegerNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("42"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("42", tokens[0].Value); + } + + [Test] + public void Tokenizer_DecimalNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("3.14"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("3.14", tokens[0].Value); + } + + [Test] + public void Tokenizer_NegativeNumbers() + { + var tokens = VectorFilterTokenizer.Tokenize("-5"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); + ClassicAssert.AreEqual("-5", tokens[0].Value); + } + + [Test] + public void Tokenizer_StringLiterals() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("hello", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("'world'"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("world", tokens[0].Value); + } + + [Test] + public void Tokenizer_EscapedStringLiterals() + { + var tokens = VectorFilterTokenizer.Tokenize("\"hello\\\"world\""); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); + ClassicAssert.AreEqual("hello\\\"world", tokens[0].Value); + } + + [Test] + public void Tokenizer_UnterminatedStringThrows() + { + ClassicAssert.Throws(() => + VectorFilterTokenizer.Tokenize("\"hello")); + } + + [Test] + public void Tokenizer_SubtractionNotConfusedWithNegative() + { + var tokens = VectorFilterTokenizer.Tokenize(".a - 5"); + ClassicAssert.AreEqual(3, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); + ClassicAssert.AreEqual("-", tokens[1].Value); + ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); + ClassicAssert.AreEqual("5", tokens[2].Value); + } + + [Test] + public void Tokenizer_Identifiers() + { + var tokens = VectorFilterTokenizer.Tokenize(".year"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual(".year", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("_field"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual("_field", tokens[0].Value); + } + + [Test] + public void Tokenizer_Keywords() + { + var keywords = new[] { "and", "or", "not", "in" }; + foreach (var kw in keywords) + { + var tokens = VectorFilterTokenizer.Tokenize(kw); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Keyword, tokens[0].Type); + ClassicAssert.AreEqual(kw, tokens[0].Value); + } + } + + [Test] + public void Tokenizer_Booleans() + { + var tokens = VectorFilterTokenizer.Tokenize("true"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); + ClassicAssert.AreEqual("true", tokens[0].Value); + + tokens = VectorFilterTokenizer.Tokenize("false"); + ClassicAssert.AreEqual(1, tokens.Count); + ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); + ClassicAssert.AreEqual("false", tokens[0].Value); + } + + [Test] + public void Tokenizer_TwoCharOperators() + { + var ops = new[] { "==", "!=", ">=", "<=", "&&", "||", "**" }; + foreach (var op in ops) + { + var tokens = VectorFilterTokenizer.Tokenize($"1 {op} 2"); + var opToken = tokens.First(t => t.Type == TokenType.Operator); + ClassicAssert.AreEqual(op, opToken.Value); + } + } + + [Test] + public void Tokenizer_SingleCharOperators() + { + var ops = new[] { ">", "<", "+", "-", "*", "/", "%", "!" }; + foreach (var op in ops) + { + var tokens = VectorFilterTokenizer.Tokenize($".a {op} .b"); + var opToken = tokens.First(t => t.Type == TokenType.Operator); + ClassicAssert.AreEqual(op, opToken.Value); + } + } + + [Test] + public void Tokenizer_Delimiters() + { + var tokens = VectorFilterTokenizer.Tokenize("(.year > 10)"); + ClassicAssert.AreEqual(TokenType.Delimiter, tokens[0].Type); + ClassicAssert.AreEqual("(", tokens[0].Value); + ClassicAssert.AreEqual(TokenType.Delimiter, tokens[4].Type); + ClassicAssert.AreEqual(")", tokens[4].Value); + } + + [Test] + public void Tokenizer_ComplexExpression() + { + var tokens = VectorFilterTokenizer.Tokenize(".year > 1950 and .rating >= 4.0"); + ClassicAssert.AreEqual(7, tokens.Count); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); + ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); + ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); + ClassicAssert.AreEqual(TokenType.Keyword, tokens[3].Type); + ClassicAssert.AreEqual(TokenType.Identifier, tokens[4].Type); + ClassicAssert.AreEqual(TokenType.Operator, tokens[5].Type); + ClassicAssert.AreEqual(TokenType.Number, tokens[6].Type); + } + + [Test] + public void Tokenizer_EmptyInput() + { + var tokens = VectorFilterTokenizer.Tokenize(""); + ClassicAssert.AreEqual(0, tokens.Count); + + tokens = VectorFilterTokenizer.Tokenize(" "); + ClassicAssert.AreEqual(0, tokens.Count); + } + } +} diff --git a/test/Garnet.test/VectorFilterTests.cs b/test/Garnet.test/VectorFilterTests.cs deleted file mode 100644 index 5331a1417f0..00000000000 --- a/test/Garnet.test/VectorFilterTests.cs +++ /dev/null @@ -1,592 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text.Json; -using Allure.NUnit; -using Garnet.server.Vector.Filter; -using NUnit.Framework; -using NUnit.Framework.Legacy; - -namespace Garnet.test -{ - [AllureNUnit] - [TestFixture] - public class VectorFilterTests : AllureTestBase - { - // Supported vector filter syntax includes: - // - Literals: numbers (42, 3.14, -5), strings ("x", 'x'), booleans (true/false) - // - Member access: .field, .nested.field, _identifier - // - Arithmetic: +, -, *, /, %, ** (power) - // - Comparison: >, <, >=, <=, ==, != - // - Logical: and/or/not and aliases &&, ||, ! - // - Containment: in (for example, "classic" in .tags) - // - Grouping: parentheses ( ) - // - // Operator precedence (high to low): - // primary/parentheses, unary, power, multiplicative, additive, in, - // comparison, equality, and, or. - - #region Helper Methods - - /// - /// Helper to parse a JSON string into a JsonElement for evaluator tests. - /// - private static JsonElement ParseJson(string json) - { - return JsonDocument.Parse(json).RootElement; - } - - /// - /// Helper to tokenize, parse, and evaluate a filter expression against JSON. - /// Returns object for test backward compatibility. - /// - private static object EvaluateFilter(string expression, string json) - { - var tokens = VectorFilterTokenizer.Tokenize(expression); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - using var doc = JsonDocument.Parse(json); - var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); - - // Convert FilterValue back to object for test assertions - return result.Kind switch - { - FilterValueKind.Number => (object)result.AsNumber(), - FilterValueKind.String => result.AsString(), - FilterValueKind.Null => null, - _ => result.AsNumber() // fallback - }; - } - - /// - /// Helper to check if a filter expression is truthy against JSON. - /// - private static bool EvaluateFilterTruthy(string expression, string json) - { - var tokens = VectorFilterTokenizer.Tokenize(expression); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - using var doc = JsonDocument.Parse(json); - var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); - return VectorFilterEvaluator.IsTruthy(result); - } - - #endregion - - #region Tokenizer Tests - - [Test] - public void Tokenizer_IntegerNumbers() - { - var tokens = VectorFilterTokenizer.Tokenize("42"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("42", tokens[0].Value); - } - - [Test] - public void Tokenizer_DecimalNumbers() - { - var tokens = VectorFilterTokenizer.Tokenize("3.14"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("3.14", tokens[0].Value); - } - - [Test] - public void Tokenizer_NegativeNumbers() - { - var tokens = VectorFilterTokenizer.Tokenize("-5"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("-5", tokens[0].Value); - } - - [Test] - public void Tokenizer_StringLiterals() - { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("hello", tokens[0].Value); - - tokens = VectorFilterTokenizer.Tokenize("'world'"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("world", tokens[0].Value); - } - - [Test] - public void Tokenizer_EscapedStringLiterals() - { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\\\"world\""); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("hello\\\"world", tokens[0].Value); - } - - [Test] - public void Tokenizer_UnterminatedStringThrows() - { - ClassicAssert.Throws(() => - VectorFilterTokenizer.Tokenize("\"hello")); - } - - [Test] - public void Tokenizer_SubtractionNotConfusedWithNegative() - { - // ".a - 5" should tokenize as [.a, -, 5], not [.a, -5] - var tokens = VectorFilterTokenizer.Tokenize(".a - 5"); - ClassicAssert.AreEqual(3, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); - ClassicAssert.AreEqual("-", tokens[1].Value); - ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); - ClassicAssert.AreEqual("5", tokens[2].Value); - } - - [Test] - public void Tokenizer_Identifiers() - { - var tokens = VectorFilterTokenizer.Tokenize(".year"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual(".year", tokens[0].Value); - - tokens = VectorFilterTokenizer.Tokenize("_field"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual("_field", tokens[0].Value); - } - - [Test] - public void Tokenizer_Keywords() - { - var keywords = new[] { "and", "or", "not", "in" }; - foreach (var kw in keywords) - { - var tokens = VectorFilterTokenizer.Tokenize(kw); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Keyword, tokens[0].Type); - ClassicAssert.AreEqual(kw, tokens[0].Value); - } - } - - [Test] - public void Tokenizer_Booleans() - { - var tokens = VectorFilterTokenizer.Tokenize("true"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); - ClassicAssert.AreEqual("true", tokens[0].Value); - - tokens = VectorFilterTokenizer.Tokenize("false"); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); - ClassicAssert.AreEqual("false", tokens[0].Value); - } - - [Test] - public void Tokenizer_TwoCharOperators() - { - var ops = new[] { "==", "!=", ">=", "<=", "&&", "||", "**" }; - foreach (var op in ops) - { - var tokens = VectorFilterTokenizer.Tokenize($"1 {op} 2"); - var opToken = tokens.First(t => t.Type == TokenType.Operator); - ClassicAssert.AreEqual(op, opToken.Value); - } - } - - [Test] - public void Tokenizer_SingleCharOperators() - { - var ops = new[] { ">", "<", "+", "-", "*", "/", "%", "!" }; - foreach (var op in ops) - { - // Use identifiers to avoid ambiguity with negative numbers for "-" - var tokens = VectorFilterTokenizer.Tokenize($".a {op} .b"); - var opToken = tokens.First(t => t.Type == TokenType.Operator); - ClassicAssert.AreEqual(op, opToken.Value); - } - } - - [Test] - public void Tokenizer_Delimiters() - { - var tokens = VectorFilterTokenizer.Tokenize("(.year > 10)"); - ClassicAssert.AreEqual(TokenType.Delimiter, tokens[0].Type); - ClassicAssert.AreEqual("(", tokens[0].Value); - ClassicAssert.AreEqual(TokenType.Delimiter, tokens[4].Type); - ClassicAssert.AreEqual(")", tokens[4].Value); - } - - [Test] - public void Tokenizer_ComplexExpression() - { - var tokens = VectorFilterTokenizer.Tokenize(".year > 1950 and .rating >= 4.0"); - ClassicAssert.AreEqual(7, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); // .year - ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); // > - ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); // 1950 - ClassicAssert.AreEqual(TokenType.Keyword, tokens[3].Type); // and - ClassicAssert.AreEqual(TokenType.Identifier, tokens[4].Type); // .rating - ClassicAssert.AreEqual(TokenType.Operator, tokens[5].Type); // >= - ClassicAssert.AreEqual(TokenType.Number, tokens[6].Type); // 4.0 - } - - [Test] - public void Tokenizer_EmptyInput() - { - var tokens = VectorFilterTokenizer.Tokenize(""); - ClassicAssert.AreEqual(0, tokens.Count); - - tokens = VectorFilterTokenizer.Tokenize(" "); - ClassicAssert.AreEqual(0, tokens.Count); - } - - #endregion - - #region Parser Tests - - [Test] - public void Parser_NumberLiteral() - { - var tokens = VectorFilterTokenizer.Tokenize("42"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out var end); - ClassicAssert.AreEqual(1, end); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(42.0, lit.Value.AsNumber()); - } - - [Test] - public void Parser_StringLiteral() - { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.String, lit.Value.Kind); - ClassicAssert.AreEqual("hello", lit.Value.AsString()); - } - - [Test] - public void Parser_BooleanLiteral() - { - var tokens = VectorFilterTokenizer.Tokenize("true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(1.0, lit.Value.AsNumber()); - - tokens = VectorFilterTokenizer.Tokenize("false"); - expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(0.0, lit.Value.AsNumber()); - } - - [Test] - public void Parser_MemberAccess() - { - var tokens = VectorFilterTokenizer.Tokenize(".year"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual("year", ((MemberExpr)expr).Property); - } - - [Test] - public void Parser_UnaryNot() - { - var tokens = VectorFilterTokenizer.Tokenize("not true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var unary = (UnaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Not, unary.Operator); - ClassicAssert.IsInstanceOf(unary.Operand); - } - - [Test] - public void Parser_UnaryNegation() - { - var tokens = VectorFilterTokenizer.Tokenize(".a + (-.b)"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Negate, ((UnaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() - { - // 1 + 2 * 3 should parse as 1 + (2 * 3) - var tokens = VectorFilterTokenizer.Tokenize("1 + 2 * 3"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Multiply, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_OperatorPrecedence_AndBeforeOr() - { - // a or b and c should parse as a or (b and c) - var tokens = VectorFilterTokenizer.Tokenize("true or false and true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Or, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.And, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_ParenthesesOverridePrecedence() - { - // (1 + 2) * 3 should parse as (1 + 2) * 3 - var tokens = VectorFilterTokenizer.Tokenize("(1 + 2) * 3"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Multiply, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.AreEqual(OperatorKind.Add, ((BinaryExpr)binary.Left).Operator); - } - - [Test] - public void Parser_Containment() - { - var tokens = VectorFilterTokenizer.Tokenize("\"action\" in .tags"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.In, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - } - - [Test] - public void Parser_ExponentiationRightAssociative() - { - // 2 ** 3 ** 2 should parse as 2 ** (3 ** 2) - var tokens = VectorFilterTokenizer.Tokenize("2 ** 3 ** 2"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Power, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Power, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_ErrorOnUnexpectedEnd() - { - var tokens = new List(); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); - } - - [Test] - public void Parser_ErrorOnMissingClosingParen() - { - var tokens = VectorFilterTokenizer.Tokenize("(1 + 2"); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); - } - - #endregion - - #region Evaluator Tests - - [Test] - public void Evaluator_Arithmetic() - { - var json = "{}"; - ClassicAssert.AreEqual(5.0, EvaluateFilter("2 + 3", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("3 - 2", json)); - ClassicAssert.AreEqual(6.0, EvaluateFilter("2 * 3", json)); - ClassicAssert.AreEqual(2.5, EvaluateFilter("5 / 2", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("7 % 3", json)); - ClassicAssert.AreEqual(8.0, EvaluateFilter("2 ** 3", json)); - } - - [Test] - public void Evaluator_SubtractionWithField() - { - var json = "{\"year\":1980}"; - ClassicAssert.AreEqual(1975.0, EvaluateFilter(".year - 5", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy(".year - 5 > 0", json)); - } - - [Test] - public void Evaluator_Comparison() - { - var json = "{}"; - ClassicAssert.AreEqual(1.0, EvaluateFilter("5 > 3", json)); - ClassicAssert.AreEqual(0.0, EvaluateFilter("3 > 5", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("3 < 5", json)); - ClassicAssert.AreEqual(0.0, EvaluateFilter("5 < 3", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("5 >= 5", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("5 <= 5", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("5 == 5", json)); - ClassicAssert.AreEqual(1.0, EvaluateFilter("5 != 3", json)); - ClassicAssert.AreEqual(0.0, EvaluateFilter("5 != 5", json)); - } - - [Test] - public void Evaluator_LogicalAnd() - { - var json = "{}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy("true and true", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy("true and false", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy("false and true", json)); - // Also test && syntax - ClassicAssert.IsTrue(EvaluateFilterTruthy("true && true", json)); - } - - [Test] - public void Evaluator_LogicalOr() - { - var json = "{}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy("true or false", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy("false or true", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy("false or false", json)); - // Also test || syntax - ClassicAssert.IsTrue(EvaluateFilterTruthy("false || true", json)); - } - - [Test] - public void Evaluator_LogicalNot() - { - var json = "{}"; - ClassicAssert.IsFalse(EvaluateFilterTruthy("not true", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy("not false", json)); - } - - [Test] - public void Evaluator_StringEquality() - { - var json = "{\"genre\":\"action\"}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy(".genre == \"action\"", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy(".genre == \"drama\"", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy(".genre != \"drama\"", json)); - } - - [Test] - public void Evaluator_MemberAccess() - { - var json = "{\"year\":1980,\"rating\":4.5}"; - ClassicAssert.AreEqual(1980.0, EvaluateFilter(".year", json)); - ClassicAssert.AreEqual(4.5, EvaluateFilter(".rating", json)); - } - - [Test] - public void Evaluator_MissingFieldReturnsNull() - { - var json = "{\"year\":1980}"; - var result = EvaluateFilter(".missing", json); - ClassicAssert.IsNull(result); - ClassicAssert.IsFalse(EvaluateFilterTruthy(".missing", json)); - } - - [Test] - public void Evaluator_InOperatorWithArray() - { - var json = "{\"tags\":[\"classic\",\"popular\"]}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy("\"classic\" in .tags", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy("\"popular\" in .tags", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy("\"modern\" in .tags", json)); - } - - [Test] - public void Evaluator_InOperatorWithNumericArray() - { - var json = "{\"scores\":[1,2,3]}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy("2 in .scores", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy("5 in .scores", json)); - } - - [Test] - public void Evaluator_IsTruthy() - { - // Test the object-accepting overload for backward compatibility - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(null)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0.0)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy("")); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(false)); - - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(1.0)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(-1.0)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(42)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy("hello")); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(true)); - } - - [Test] - public void Evaluator_IsTruthy_FilterValue() - { - // Test the FilterValue-accepting overload (the hot-path version) - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.Null)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.False)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(0.0))); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromString(""))); - - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.True)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(1.0))); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(-1.0))); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromString("hello"))); - } - - [Test] - public void Evaluator_ComplexExpression() - { - var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; - - // .rating * 2 > 8 and (.year >= 1980 or "modern" in .tags) - ClassicAssert.IsTrue(EvaluateFilterTruthy( - ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); - - // (.year > 2000 or .year < 1970) and .rating >= 4.0 - ClassicAssert.IsFalse(EvaluateFilterTruthy( - "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); - - // not (.genre == "drama") - ClassicAssert.IsTrue(EvaluateFilterTruthy("not (.genre == \"drama\")", json)); - - // .year / 10 >= 198 - ClassicAssert.IsTrue(EvaluateFilterTruthy(".year / 10 >= 198", json)); - } - - [Test] - public void Evaluator_ComparisonWithMissingField() - { - var json = "{\"year\":1980}"; - // Missing field compared to number: ToNumber(null) = 0, so 0 > 1950 is false - ClassicAssert.IsFalse(EvaluateFilterTruthy(".missing > 1950", json)); - } - - [Test] - public void Evaluator_BooleanJsonValues() - { - var json = "{\"active\":true,\"deleted\":false}"; - ClassicAssert.IsTrue(EvaluateFilterTruthy(".active", json)); - ClassicAssert.IsFalse(EvaluateFilterTruthy(".deleted", json)); - ClassicAssert.IsTrue(EvaluateFilterTruthy(".active == true", json)); - } - - #endregion - } -} \ No newline at end of file From c939609e8cda3d3fb85feb123fac435905b9ccf8 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 15:40:32 -0800 Subject: [PATCH 12/31] remove object value type --- .../Resp/Vector/Filter/VectorFilterEvaluator.cs | 15 --------------- .../Filter/VectorFilterEvaluatorTests.cs | 16 ---------------- 2 files changed, 31 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs index eda8053c9e5..f148d6124b9 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs @@ -133,21 +133,6 @@ public static bool IsTruthy(FilterValue value) }; } - /// - /// Overload accepting object for backward compatibility with tests that pass - /// boxed int, bool, string, double, or null directly. - /// - public static bool IsTruthy(object value) - { - if (value == null) return false; - if (value is double d) return d != 0; - if (value is int i) return i != 0; - if (value is string s) return !string.IsNullOrEmpty(s); - if (value is bool b) return b; - if (value is FilterValue fv) return IsTruthy(fv); - return true; - } - private static bool AreEqual(FilterValue left, FilterValue right) { if (left.IsNull && right.IsNull) return true; diff --git a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs index cabb4b7afa5..ea5f38a540d 100644 --- a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs +++ b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs @@ -118,22 +118,6 @@ public void Evaluator_InOperatorWithNumericArray() ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("5 in .scores", json)); } - [Test] - public void Evaluator_IsTruthy() - { - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(null)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0.0)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(0)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy("")); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(false)); - - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(1.0)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(-1.0)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(42)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy("hello")); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(true)); - } - [Test] public void Evaluator_IsTruthy_FilterValue() { From 54dfc424a9e4b774bcdd1239ed0f1e2fc1a6634b Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 15:46:01 -0800 Subject: [PATCH 13/31] remove object-returning property --- .../Vector/Filter/VectorFilterExpression.cs | 9 ---- .../Filter/VectorFilterEvaluatorTests.cs | 42 +++++++++---------- .../Filter/VectorFilterTestHelpers.cs | 12 +----- 3 files changed, 23 insertions(+), 40 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index fe0d17c854e..cb10eb05799 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -135,15 +135,6 @@ internal abstract class Expr { } internal sealed class LiteralExpr : Expr { public FilterValue Value { get; init; } - - // Keep object-returning property for test compatibility - public object BoxedValue => Value.Kind switch - { - FilterValueKind.Number => Value.AsNumber(), - FilterValueKind.String => Value.AsString(), - FilterValueKind.Null => null, - _ => null - }; } /// diff --git a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs index ea5f38a540d..111e8048928 100644 --- a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs +++ b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs @@ -16,19 +16,19 @@ public class VectorFilterEvaluatorTests : AllureTestBase public void Evaluator_Arithmetic() { var json = "{}"; - ClassicAssert.AreEqual(5.0, VectorFilterTestHelpers.EvaluateFilter("2 + 3", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 - 2", json)); - ClassicAssert.AreEqual(6.0, VectorFilterTestHelpers.EvaluateFilter("2 * 3", json)); - ClassicAssert.AreEqual(2.5, VectorFilterTestHelpers.EvaluateFilter("5 / 2", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("7 % 3", json)); - ClassicAssert.AreEqual(8.0, VectorFilterTestHelpers.EvaluateFilter("2 ** 3", json)); + ClassicAssert.AreEqual(5.0, VectorFilterTestHelpers.EvaluateFilter("2 + 3", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 - 2", json).AsNumber()); + ClassicAssert.AreEqual(6.0, VectorFilterTestHelpers.EvaluateFilter("2 * 3", json).AsNumber()); + ClassicAssert.AreEqual(2.5, VectorFilterTestHelpers.EvaluateFilter("5 / 2", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("7 % 3", json).AsNumber()); + ClassicAssert.AreEqual(8.0, VectorFilterTestHelpers.EvaluateFilter("2 ** 3", json).AsNumber()); } [Test] public void Evaluator_SubtractionWithField() { var json = "{\"year\":1980}"; - ClassicAssert.AreEqual(1975.0, VectorFilterTestHelpers.EvaluateFilter(".year - 5", json)); + ClassicAssert.AreEqual(1975.0, VectorFilterTestHelpers.EvaluateFilter(".year - 5", json).AsNumber()); ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".year - 5 > 0", json)); } @@ -36,15 +36,15 @@ public void Evaluator_SubtractionWithField() public void Evaluator_Comparison() { var json = "{}"; - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 > 3", json)); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("3 > 5", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 < 5", json)); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 < 3", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 >= 5", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 <= 5", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 == 5", json)); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 != 3", json)); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 != 5", json)); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 > 3", json).AsNumber()); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("3 > 5", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 < 5", json).AsNumber()); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 < 3", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 >= 5", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 <= 5", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 == 5", json).AsNumber()); + ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 != 3", json).AsNumber()); + ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 != 5", json).AsNumber()); } [Test] @@ -88,8 +88,8 @@ public void Evaluator_StringEquality() public void Evaluator_MemberAccess() { var json = "{\"year\":1980,\"rating\":4.5}"; - ClassicAssert.AreEqual(1980.0, VectorFilterTestHelpers.EvaluateFilter(".year", json)); - ClassicAssert.AreEqual(4.5, VectorFilterTestHelpers.EvaluateFilter(".rating", json)); + ClassicAssert.AreEqual(1980.0, VectorFilterTestHelpers.EvaluateFilter(".year", json).AsNumber()); + ClassicAssert.AreEqual(4.5, VectorFilterTestHelpers.EvaluateFilter(".rating", json).AsNumber()); } [Test] @@ -97,7 +97,7 @@ public void Evaluator_MissingFieldReturnsNull() { var json = "{\"year\":1980}"; var result = VectorFilterTestHelpers.EvaluateFilter(".missing", json); - ClassicAssert.IsNull(result); + ClassicAssert.IsTrue(result.IsNull); ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".missing", json)); } @@ -167,8 +167,8 @@ public void Evaluator_BooleanJsonValues() public void Evaluator_ArithmeticWithNonNumericString_CoercesToZero() { var json = "{\"genre\":\"action\"}"; - ClassicAssert.AreEqual(2.0, VectorFilterTestHelpers.EvaluateFilter(".genre + 2", json)); - ClassicAssert.AreEqual(-1.0, VectorFilterTestHelpers.EvaluateFilter(".genre - 1", json)); + ClassicAssert.AreEqual(2.0, VectorFilterTestHelpers.EvaluateFilter(".genre + 2", json).AsNumber()); + ClassicAssert.AreEqual(-1.0, VectorFilterTestHelpers.EvaluateFilter(".genre - 1", json).AsNumber()); } [Test] diff --git a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs index 4fcaa3752f3..00766fe1f23 100644 --- a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs +++ b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs @@ -8,20 +8,12 @@ namespace Garnet.test { internal static class VectorFilterTestHelpers { - internal static object EvaluateFilter(string expression, string json) + internal static FilterValue EvaluateFilter(string expression, string json) { var tokens = VectorFilterTokenizer.Tokenize(expression); var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); using var doc = JsonDocument.Parse(json); - var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); - - return result.Kind switch - { - FilterValueKind.Number => (object)result.AsNumber(), - FilterValueKind.String => result.AsString(), - FilterValueKind.Null => null, - _ => result.AsNumber() - }; + return VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); } internal static bool EvaluateFilterTruthy(string expression, string json) From b65dc7c97f7846651178465bc11732f3f1527809 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 24 Feb 2026 15:58:30 -0800 Subject: [PATCH 14/31] fix format error --- test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs | 2 +- test/Garnet.test/Filter/VectorFilterParserTests.cs | 2 +- test/Garnet.test/Filter/VectorFilterTestHelpers.cs | 2 +- test/Garnet.test/Filter/VectorFilterTokenizerTests.cs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs index 111e8048928..ced420b8d39 100644 --- a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs +++ b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs @@ -186,4 +186,4 @@ public void Evaluator_EqualityBetweenNumberAndNonNumericString_ReturnsFalse() ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == 0", json)); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterParserTests.cs b/test/Garnet.test/Filter/VectorFilterParserTests.cs index 2fb614ecbf8..77841c5cdbe 100644 --- a/test/Garnet.test/Filter/VectorFilterParserTests.cs +++ b/test/Garnet.test/Filter/VectorFilterParserTests.cs @@ -182,4 +182,4 @@ public void Parser_ErrorOnInvalidNumberLiteral_MultipleDots() VectorFilterParser.ParseExpression(tokens, 0, out _)); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs index 00766fe1f23..d3c2afbc485 100644 --- a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs +++ b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs @@ -25,4 +25,4 @@ internal static bool EvaluateFilterTruthy(string expression, string json) return VectorFilterEvaluator.IsTruthy(result); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs index e55869cee8b..4130098a50a 100644 --- a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs +++ b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs @@ -182,4 +182,4 @@ public void Tokenizer_EmptyInput() ClassicAssert.AreEqual(0, tokens.Count); } } -} +} \ No newline at end of file From 513be500d33c14e190672a890fb24fb19cff9895 Mon Sep 17 00:00:00 2001 From: CI Fix Date: Thu, 26 Feb 2026 13:48:16 -0800 Subject: [PATCH 15/31] resove comments --- .../Vector/Filter/VectorFilterEvaluator.cs | 22 +- .../Vector/Filter/VectorFilterExpression.cs | 2 - .../Resp/Vector/Filter/VectorFilterParser.cs | 241 ++++++++++++++---- .../Vector/Filter/VectorFilterTokenizer.cs | 41 ++- libs/server/Resp/Vector/VectorManager.cs | 137 +++++----- .../Filter/VectorFilterParserTests.cs | 100 +++++--- .../Filter/VectorFilterTestHelpers.cs | 31 ++- .../Filter/VectorFilterTokenizerTests.cs | 83 ++++-- 8 files changed, 464 insertions(+), 193 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs index f148d6124b9..7b469926381 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Globalization; using System.Runtime.CompilerServices; using System.Text.Json; @@ -11,9 +12,24 @@ namespace Garnet.server.Vector.Filter /// Evaluator for vector filter expressions. /// Evaluates parsed expression trees against JSON attribute data. /// Returns FilterValue (a struct) to avoid boxing allocations on every evaluation. + /// + /// Note: This evaluator operates over top-level properties of the JSON document only. + /// Nested property access is not supported. A future optimization could replace the + /// JsonElement-based lookup with a raw span + (offset, length) pairs approach for + /// better performance, avoiding JsonDocument allocation entirely. /// internal static class VectorFilterEvaluator { + /// + /// Evaluate a filter expression against a JSON element and return a boolean result. + /// This is the primary public API for filter evaluation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool EvaluateFilterBool(Expr expr, JsonElement root) + { + return IsTruthy(EvaluateExpression(expr, root)); + } + /// /// Evaluate a filter expression against a JSON element. /// Returns a FilterValue (struct) — no boxing occurs for numeric results. @@ -60,7 +76,7 @@ private static FilterValue EvaluateUnary(UnaryExpr unary, JsonElement root) { OperatorKind.Not => IsTruthy(operand) ? FilterValue.False : FilterValue.True, OperatorKind.Negate => FilterValue.FromNumber(-ToNumber(operand)), - _ => throw new InvalidOperationException($"Unknown unary operator: {unary.Operator}") + _ => FilterValue.Null }; } @@ -102,7 +118,7 @@ private static FilterValue EvaluateBinary(BinaryExpr binary, JsonElement root) OperatorKind.Equal => FilterValue.FromBool(AreEqual(left, right)), OperatorKind.NotEqual => FilterValue.FromBool(!AreEqual(left, right)), OperatorKind.In => FilterValue.FromBool(IsIn(left, right)), - _ => throw new InvalidOperationException($"Unknown operator: {binary.Operator}") + _ => FilterValue.Null }; } } @@ -113,7 +129,7 @@ private static double ToNumber(FilterValue value) return value.Kind switch { FilterValueKind.Number => value.AsNumber(), - FilterValueKind.String => double.TryParse(value.AsString(), out var result) ? result : 0, + FilterValueKind.String => double.TryParse(value.AsString(), NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var result) ? result : 0, _ => 0 }; } diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index cb10eb05799..04c731d5b1b 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -2,7 +2,6 @@ // Licensed under the MIT license. using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Text.Json; namespace Garnet.server.Vector.Filter @@ -11,7 +10,6 @@ namespace Garnet.server.Vector.Filter /// Discriminated union value type to eliminate boxing of doubles/strings /// throughout the filter evaluation pipeline. /// - [StructLayout(LayoutKind.Auto)] internal readonly struct FilterValue { private readonly double _number; diff --git a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs index 345048f4824..df6abdc6644 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System; using System.Collections.Generic; using System.Globalization; @@ -10,65 +9,115 @@ namespace Garnet.server.Vector.Filter /// /// Recursive descent parser for vector filter expressions. /// Supports arithmetic, comparison, logical operators, containment, and grouping. + /// Uses TryParse pattern to avoid exceptions in the hot path. + /// Includes a recursion depth guard to prevent stack overflow from deeply nested expressions. /// internal static class VectorFilterParser { - public static Expr ParseExpression(List tokens, int start, out int end) + /// + /// Maximum recursion depth allowed during parsing to prevent stack overflow. + /// This limit is intentionally conservative; typical filter expressions are shallow. + /// Windows and Linux may use different default stack sizes, so we keep this low. + /// + private const int MaxRecursionDepth = 64; + + /// + /// Attempt to parse a filter expression from the token list. + /// Returns false with an error message if parsing fails. + /// + /// The list of tokens to parse. + /// The starting token index. + /// The parsed expression tree, or null on failure. + /// The index past the last consumed token. + /// An error message describing the failure, or null on success. + /// True if parsing succeeded; false otherwise. + public static bool TryParseExpression(List tokens, int start, out Expr result, out int end, out string error) { - return ParseLogicalOr(tokens, start, out end); + return TryParseLogicalOr(tokens, start, out result, out end, out error, depth: 0); } - private static Expr ParseLogicalOr(List tokens, int start, out int end) + private static bool TryParseLogicalOr(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseLogicalAnd(tokens, start, out end); + if (!TryParseLogicalAnd(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "or") || (tokens[end].Type == TokenType.Operator && tokens[end].Value == "||"))) { end++; - var right = ParseLogicalAnd(tokens, end, out end); + if (!TryParseLogicalAnd(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = OperatorKind.Or, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseLogicalAnd(List tokens, int start, out int end) + private static bool TryParseLogicalAnd(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseEquality(tokens, start, out end); + if (!TryParseEquality(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "and") || (tokens[end].Type == TokenType.Operator && tokens[end].Value == "&&"))) { end++; - var right = ParseEquality(tokens, end, out end); + if (!TryParseEquality(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = OperatorKind.And, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseEquality(List tokens, int start, out int end) + private static bool TryParseEquality(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseComparison(tokens, start, out end); + if (!TryParseComparison(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "==" || tokens[end].Value == "!=")) { var op = tokens[end].Value == "==" ? OperatorKind.Equal : OperatorKind.NotEqual; end++; - var right = ParseComparison(tokens, end, out end); + if (!TryParseComparison(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = op, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseComparison(List tokens, int start, out int end) + private static bool TryParseComparison(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseContainment(tokens, start, out end); + if (!TryParseContainment(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == ">" || tokens[end].Value == "<" || @@ -76,11 +125,16 @@ private static Expr ParseComparison(List tokens, int start, out int end) { var op = ParseComparisonOperator(tokens[end].Value); end++; - var right = ParseContainment(tokens, end, out end); + if (!TryParseContainment(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = op, Right = right }; } - return left; + result = left; + return true; } private static OperatorKind ParseComparisonOperator(string value) @@ -91,39 +145,61 @@ private static OperatorKind ParseComparisonOperator(string value) return value[0] == '>' ? OperatorKind.GreaterEqual : OperatorKind.LessEqual; } - private static Expr ParseContainment(List tokens, int start, out int end) + private static bool TryParseContainment(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseAdditive(tokens, start, out end); + if (!TryParseAdditive(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } if (end < tokens.Count && tokens[end].Type == TokenType.Keyword && tokens[end].Value == "in") { end++; - var right = ParseAdditive(tokens, end, out end); + if (!TryParseAdditive(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = OperatorKind.In, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseAdditive(List tokens, int start, out int end) + private static bool TryParseAdditive(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseMultiplicative(tokens, start, out end); + if (!TryParseMultiplicative(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "+" || tokens[end].Value == "-")) { var op = tokens[end].Value == "+" ? OperatorKind.Add : OperatorKind.Subtract; end++; - var right = ParseMultiplicative(tokens, end, out end); + if (!TryParseMultiplicative(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = op, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseMultiplicative(List tokens, int start, out int end) + private static bool TryParseMultiplicative(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseExponentiation(tokens, start, out end); + if (!TryParseExponentiation(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } while (end < tokens.Count && tokens[end].Type == TokenType.Operator && (tokens[end].Value == "*" || tokens[end].Value == "/" || tokens[end].Value == "%")) @@ -135,28 +211,43 @@ private static Expr ParseMultiplicative(List tokens, int start, out int e _ => OperatorKind.Modulo }; end++; - var right = ParseExponentiation(tokens, end, out end); + if (!TryParseExponentiation(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = op, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseExponentiation(List tokens, int start, out int end) + private static bool TryParseExponentiation(List tokens, int start, out Expr result, out int end, out string error, int depth) { - var left = ParseUnary(tokens, start, out end); + if (!TryParseUnary(tokens, start, out var left, out end, out error, depth)) + { + result = null; + return false; + } if (end < tokens.Count && tokens[end].Type == TokenType.Operator && tokens[end].Value == "**") { end++; - var right = ParseExponentiation(tokens, end, out end); // Right associative + // Right associative — recurse into exponentiation + if (!TryParseExponentiation(tokens, end, out var right, out end, out error, depth)) + { + result = null; + return false; + } left = new BinaryExpr { Left = left, Operator = OperatorKind.Power, Right = right }; } - return left; + result = left; + return true; } - private static Expr ParseUnary(List tokens, int start, out int end) + private static bool TryParseUnary(List tokens, int start, out Expr result, out int end, out string error, int depth) { if (start < tokens.Count) { @@ -164,65 +255,111 @@ private static Expr ParseUnary(List tokens, int start, out int end) (tokens[start].Type == TokenType.Operator && tokens[start].Value == "!")) { start++; - var operand = ParseUnary(tokens, start, out end); - return new UnaryExpr { Operator = OperatorKind.Not, Operand = operand }; + if (!TryParseUnary(tokens, start, out var operand, out end, out error, depth)) + { + result = null; + return false; + } + result = new UnaryExpr { Operator = OperatorKind.Not, Operand = operand }; + return true; } if (tokens[start].Type == TokenType.Operator && tokens[start].Value == "-") { start++; - var operand = ParseUnary(tokens, start, out end); - return new UnaryExpr { Operator = OperatorKind.Negate, Operand = operand }; + if (!TryParseUnary(tokens, start, out var operand, out end, out error, depth)) + { + result = null; + return false; + } + result = new UnaryExpr { Operator = OperatorKind.Negate, Operand = operand }; + return true; } } - return ParsePrimary(tokens, start, out end); + return TryParsePrimary(tokens, start, out result, out end, out error, depth); } - private static Expr ParsePrimary(List tokens, int start, out int end) + private static bool TryParsePrimary(List tokens, int start, out Expr result, out int end, out string error, int depth) { + result = null; + if (start >= tokens.Count) - throw new InvalidOperationException("Unexpected end of expression"); + { + end = start; + error = "Unexpected end of expression"; + return false; + } var token = tokens[start]; - // Parentheses + // Parentheses — increase recursion depth if (token.Type == TokenType.Delimiter && token.Value == "(") { - var expr = ParseExpression(tokens, start + 1, out end); + var newDepth = depth + 1; + if (newDepth > MaxRecursionDepth) + { + end = start; + error = $"Filter expression exceeds maximum nesting depth of {MaxRecursionDepth}"; + return false; + } + + if (!TryParseLogicalOr(tokens, start + 1, out var expr, out end, out error, newDepth)) + return false; + if (end >= tokens.Count || tokens[end].Type != TokenType.Delimiter || tokens[end].Value != ")") - throw new InvalidOperationException("Missing closing parenthesis"); + { + error = "Missing closing parenthesis"; + return false; + } end++; - return expr; + result = expr; + return true; } // Literals — use FilterValue to avoid boxing doubles if (token.Type == TokenType.Number) { + if (!double.TryParse(token.Value, NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var numValue)) + { + end = start; + error = $"Invalid number literal: {token.Value}"; + return false; + } end = start + 1; - return new LiteralExpr { Value = FilterValue.FromNumber(double.Parse(token.Value, CultureInfo.InvariantCulture)) }; + result = new LiteralExpr { Value = FilterValue.FromNumber(numValue) }; + error = null; + return true; } if (token.Type == TokenType.String) { end = start + 1; - return new LiteralExpr { Value = FilterValue.FromString(token.Value) }; + result = new LiteralExpr { Value = FilterValue.FromString(token.Value) }; + error = null; + return true; } if (token.Type == TokenType.Boolean) { end = start + 1; - return new LiteralExpr { Value = token.Value == "true" ? FilterValue.True : FilterValue.False }; + result = new LiteralExpr { Value = token.Value == "true" ? FilterValue.True : FilterValue.False }; + error = null; + return true; } // Identifier (field access) if (token.Type == TokenType.Identifier) { end = start + 1; - return new MemberExpr { Property = token.Value.TrimStart('.') }; + result = new MemberExpr { Property = token.Value.TrimStart('.') }; + error = null; + return true; } - throw new InvalidOperationException($"Unexpected token: {token.Value}"); + end = start; + error = $"Unexpected token: {token.Value}"; + return false; } } } \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs index 49644a7362c..1780d33c915 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System; using System.Collections.Generic; using System.Runtime.CompilerServices; @@ -31,6 +30,7 @@ public Token(TokenType type, string value) /// /// Tokenizer for vector filter expressions. /// Converts filter strings into tokens for parsing. + /// Uses TryTokenize pattern to avoid exceptions in the hot path. /// internal static class VectorFilterTokenizer { @@ -54,9 +54,18 @@ internal static class VectorFilterTokenizer private const string OpPipePipe = "||"; private const string OpStarStar = "**"; - public static List Tokenize(string input) + /// + /// Attempt to tokenize the input string into a list of tokens. + /// Returns false with an error message if the input is malformed. + /// + /// The filter expression string to tokenize. + /// The resulting list of tokens, or null on failure. + /// An error message describing the failure, or null on success. + /// True if tokenization succeeded; false otherwise. + public static bool TryTokenize(string input, out List tokens, out string error) { - var tokens = new List(); + tokens = new List(); + error = null; var i = 0; while (i < input.Length) @@ -80,8 +89,22 @@ public static List Tokenize(string input) { var start = i; if (input[i] == '-') i++; + + var dotCount = 0; while (i < input.Length && (char.IsDigit(input[i]) || input[i] == '.')) + { + if (input[i] == '.') + { + dotCount++; + if (dotCount > 1) + { + error = $"Invalid number literal with multiple decimal points at position {start}"; + tokens = null; + return false; + } + } i++; + } tokens.Add(new Token(TokenType.Number, input.Substring(start, i - start))); continue; } @@ -115,7 +138,11 @@ public static List Tokenize(string input) i++; } if (i >= input.Length) - throw new InvalidOperationException($"Unterminated string literal starting at position {start - 1}"); + { + error = $"Unterminated string literal starting at position {start - 1}"; + tokens = null; + return false; + } tokens.Add(new Token(TokenType.String, input.Substring(start, i - start))); i++; // Skip closing quote continue; @@ -143,10 +170,12 @@ public static List Tokenize(string input) continue; } - throw new InvalidOperationException($"Unexpected character in filter expression: '{input[i]}' at position {i}"); + error = $"Unexpected character in filter expression: '{input[i]}' at position {i}"; + tokens = null; + return false; } - return tokens; + return true; } /// diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 5e2c323b4c0..66d5c35420b 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -935,6 +935,11 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// /// Apply post-filtering to vector search results based on JSON path filter expression. + /// Returns the number of results that passed the filter, or -1 if the filter expression is invalid. + /// + /// TODO: A better approach would be to produce a bitmap of passing elements and let + /// NetworkVSIM handle skipping non-matching entries, avoiding the in-place compaction copies. + /// For now we compact in-place to minimize the scope of changes. /// private int ApplyPostFilter( ReadOnlySpan filter, @@ -948,88 +953,92 @@ private int ApplyPostFilter( return numResults; } + // Convert filter bytes to string for tokenization. + // NOTE: This allocation is required because the tokenizer operates on strings. + // A future optimization could make the tokenizer work directly on ReadOnlySpan. var filterStr = Encoding.UTF8.GetString(filter); - try + // Tokenize and parse the filter expression. If this fails, we return -1 to indicate a bad filter expression. + if (!VectorFilterTokenizer.TryTokenize(filterStr, out var tokens, out _)) { - var filteredCount = 0; + return -1; + } - // Parse the filter expression once, then evaluate per result - var tokens = VectorFilterTokenizer.Tokenize(filterStr); - var filterExpr = VectorFilterParser.ParseExpression(tokens, 0, out var endIndex); + if (!VectorFilterParser.TryParseExpression(tokens, 0, out var filterExpr, out var endIndex, out _)) + { + return -1; + } - // Ensure the entire token stream was consumed by the parser - if (endIndex != tokens.Count) - { - throw new ArgumentException("Invalid filter expression: unexpected tokens after end of expression.", nameof(filter)); - } + // Ensure the entire token stream was consumed by the parser + if (endIndex != tokens.Count) + { + return -1; + } - var idsSpan = outputIds.AsSpan(); - var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); - var attributesSpan = outputAttributes.AsSpan(); + var filteredCount = 0; - var idReadPos = 0; - var attrReadPos = 0; - var idWritePos = 0; - var distWritePos = 0; - var attrWritePos = 0; + var idsSpan = outputIds.AsSpan(); + var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); + var attributesSpan = outputAttributes.AsSpan(); - for (var i = 0; i < numResults; i++) - { - // Read ID - var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); - var idTotalLen = sizeof(int) + idLen; + var idReadPos = 0; + var attrReadPos = 0; + var idWritePos = 0; + var distWritePos = 0; + var attrWritePos = 0; - // Read attribute - var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); - var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); + for (var i = 0; i < numResults; i++) + { + // Read ID + var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); + var idTotalLen = sizeof(int) + idLen; - // Evaluate filter - if (EvaluateFilter(filterExpr, attrData)) - { - // Copy ID if not already in place - if (idReadPos != idWritePos) - { - idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); - } + // Read attribute + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); + var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); - // Copy distance if not already in place - if (i != distWritePos) - { - distancesSpan[distWritePos] = distancesSpan[i]; - } + // Evaluate filter + if (EvaluateFilter(filterExpr, attrData)) + { + // Copy ID if not already in place + if (idReadPos != idWritePos) + { + idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); + } - // Copy attribute if not already in place - if (attrReadPos != attrWritePos) - { - attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); - } + // Copy distance if not already in place + if (i != distWritePos) + { + distancesSpan[distWritePos] = distancesSpan[i]; + } - idWritePos += idTotalLen; - distWritePos++; - attrWritePos += sizeof(int) + attrLen; - filteredCount++; + // Copy attribute if not already in place + if (attrReadPos != attrWritePos) + { + attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); } - idReadPos += idTotalLen; - attrReadPos += sizeof(int) + attrLen; + idWritePos += idTotalLen; + distWritePos++; + attrWritePos += sizeof(int) + attrLen; + filteredCount++; } - // Update lengths - outputIds.Length = idWritePos; - outputDistances.Length = distWritePos * sizeof(float); - outputAttributes.Length = attrWritePos; - - return filteredCount; - } - catch (Exception ex) when (ex is ArgumentException || ex is FormatException || ex is InvalidOperationException) - { - throw new ArgumentException("Invalid filter expression.", nameof(filter), ex); + idReadPos += idTotalLen; + attrReadPos += sizeof(int) + attrLen; } + + // Update lengths + outputIds.Length = idWritePos; + outputDistances.Length = distWritePos * sizeof(float); + outputAttributes.Length = attrWritePos; + + return filteredCount; } /// /// Evaluate a pre-parsed filter expression against attribute data. + /// Returns false if the JSON is malformed or the filter cannot be evaluated. /// private static bool EvaluateFilter(Expr filterExpr, ReadOnlySpan attributeJson) { @@ -1038,14 +1047,12 @@ private static bool EvaluateFilter(Expr filterExpr, ReadOnlySpan attribute var reader = new Utf8JsonReader(attributeJson); using var jsonDoc = JsonDocument.ParseValue(ref reader); var root = jsonDoc.RootElement; - var result = VectorFilterEvaluator.EvaluateExpression(filterExpr, root); - return VectorFilterEvaluator.IsTruthy(result); + return VectorFilterEvaluator.EvaluateFilterBool(filterExpr, root); } - catch (Exception ex) when (ex is JsonException or InvalidOperationException) + catch (JsonException) { - // If filter evaluation fails (malformed JSON or invalid expression), exclude the result - Trace.TraceWarning("Vector filter evaluation failed: {0}", ex); + // Malformed JSON in attribute data — exclude the result return false; } } diff --git a/test/Garnet.test/Filter/VectorFilterParserTests.cs b/test/Garnet.test/Filter/VectorFilterParserTests.cs index 77841c5cdbe..77bf17a2550 100644 --- a/test/Garnet.test/Filter/VectorFilterParserTests.cs +++ b/test/Garnet.test/Filter/VectorFilterParserTests.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System; using System.Collections.Generic; using Allure.NUnit; using Garnet.server.Vector.Filter; @@ -17,8 +16,8 @@ public class VectorFilterParserTests : AllureTestBase [Test] public void Parser_NumberLiteral() { - var tokens = VectorFilterTokenizer.Tokenize("42"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out var end); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("42", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out var end, out _)); ClassicAssert.AreEqual(1, end); ClassicAssert.IsInstanceOf(expr); var lit = (LiteralExpr)expr; @@ -29,8 +28,8 @@ public void Parser_NumberLiteral() [Test] public void Parser_StringLiteral() { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\"", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var lit = (LiteralExpr)expr; ClassicAssert.AreEqual(FilterValueKind.String, lit.Value.Kind); @@ -40,15 +39,15 @@ public void Parser_StringLiteral() [Test] public void Parser_BooleanLiteral() { - var tokens = VectorFilterTokenizer.Tokenize("true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var lit = (LiteralExpr)expr; ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); ClassicAssert.AreEqual(1.0, lit.Value.AsNumber()); - tokens = VectorFilterTokenizer.Tokenize("false"); - expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("false", out tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); lit = (LiteralExpr)expr; ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); @@ -58,8 +57,8 @@ public void Parser_BooleanLiteral() [Test] public void Parser_MemberAccess() { - var tokens = VectorFilterTokenizer.Tokenize(".year"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); ClassicAssert.AreEqual("year", ((MemberExpr)expr).Property); } @@ -67,8 +66,8 @@ public void Parser_MemberAccess() [Test] public void Parser_UnaryNot() { - var tokens = VectorFilterTokenizer.Tokenize("not true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("not true", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var unary = (UnaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Not, unary.Operator); @@ -78,8 +77,8 @@ public void Parser_UnaryNot() [Test] public void Parser_UnaryNegation() { - var tokens = VectorFilterTokenizer.Tokenize(".a + (-.b)"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".a + (-.b)", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); @@ -90,8 +89,8 @@ public void Parser_UnaryNegation() [Test] public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() { - var tokens = VectorFilterTokenizer.Tokenize("1 + 2 * 3"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("1 + 2 * 3", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); @@ -103,8 +102,8 @@ public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() [Test] public void Parser_OperatorPrecedence_AndBeforeOr() { - var tokens = VectorFilterTokenizer.Tokenize("true or false and true"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true or false and true", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Or, binary.Operator); @@ -116,8 +115,8 @@ public void Parser_OperatorPrecedence_AndBeforeOr() [Test] public void Parser_ParenthesesOverridePrecedence() { - var tokens = VectorFilterTokenizer.Tokenize("(1 + 2) * 3"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(1 + 2) * 3", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Multiply, binary.Operator); @@ -128,8 +127,8 @@ public void Parser_ParenthesesOverridePrecedence() [Test] public void Parser_Containment() { - var tokens = VectorFilterTokenizer.Tokenize("\"action\" in .tags"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"action\" in .tags", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.In, binary.Operator); @@ -140,8 +139,8 @@ public void Parser_Containment() [Test] public void Parser_ExponentiationRightAssociative() { - var tokens = VectorFilterTokenizer.Tokenize("2 ** 3 ** 2"); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("2 ** 3 ** 2", out var tokens, out _)); + ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); ClassicAssert.IsInstanceOf(expr); var binary = (BinaryExpr)expr; ClassicAssert.AreEqual(OperatorKind.Power, binary.Operator); @@ -154,32 +153,61 @@ public void Parser_ExponentiationRightAssociative() public void Parser_ErrorOnUnexpectedEnd() { var tokens = new List(); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); + ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); + ClassicAssert.IsNull(result); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("Unexpected end")); } [Test] public void Parser_ErrorOnMissingClosingParen() { - var tokens = VectorFilterTokenizer.Tokenize("(1 + 2"); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(1 + 2", out var tokens, out _)); + ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); + ClassicAssert.IsNull(result); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("Missing closing parenthesis")); } [Test] public void Parser_ErrorOnInvalidNumberLiteral_DoubleDot() { - var tokens = VectorFilterTokenizer.Tokenize("1..023"); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); + // Now caught at tokenization time: "1..023" has multiple decimal points + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1..023", out _, out var error)); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("multiple decimal points")); } [Test] public void Parser_ErrorOnInvalidNumberLiteral_MultipleDots() { - var tokens = VectorFilterTokenizer.Tokenize("1.2.3"); - ClassicAssert.Throws(() => - VectorFilterParser.ParseExpression(tokens, 0, out _)); + // Now caught at tokenization time: "1.2.3" has multiple decimal points + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1.2.3", out _, out var error)); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("multiple decimal points")); + } + + [Test] + public void Parser_ErrorOnExcessiveRecursionDepth() + { + // Build a deeply nested expression: (((((...(1)...)))) + var depth = 100; + var expr = new string('(', depth) + "1" + new string(')', depth); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(expr, out var tokens, out _)); + ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); + ClassicAssert.IsNull(result); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("maximum nesting depth")); + } + + [Test] + public void Parser_ErrorOnUnexpectedToken() + { + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(")", out var tokens, out _)); + ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); + ClassicAssert.IsNull(result); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("Unexpected token")); } } } \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs index d3c2afbc485..310732378f9 100644 --- a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs +++ b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs @@ -10,19 +10,38 @@ internal static class VectorFilterTestHelpers { internal static FilterValue EvaluateFilter(string expression, string json) { - var tokens = VectorFilterTokenizer.Tokenize(expression); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out var error)) + throw new System.InvalidOperationException($"Tokenization failed: {error}"); + if (!VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out error)) + throw new System.InvalidOperationException($"Parse failed: {error}"); using var doc = JsonDocument.Parse(json); return VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); } internal static bool EvaluateFilterTruthy(string expression, string json) { - var tokens = VectorFilterTokenizer.Tokenize(expression); - var expr = VectorFilterParser.ParseExpression(tokens, 0, out _); + if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out var error)) + throw new System.InvalidOperationException($"Tokenization failed: {error}"); + if (!VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out error)) + throw new System.InvalidOperationException($"Parse failed: {error}"); using var doc = JsonDocument.Parse(json); - var result = VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); - return VectorFilterEvaluator.IsTruthy(result); + return VectorFilterEvaluator.EvaluateFilterBool(expr, doc.RootElement); + } + + internal static bool TryTokenize(string expression, out System.Collections.Generic.List tokens, out string error) + { + return VectorFilterTokenizer.TryTokenize(expression, out tokens, out error); + } + + internal static bool TryParse(string expression, out Expr result, out int end, out string error) + { + if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out error)) + { + result = null; + end = 0; + return false; + } + return VectorFilterParser.TryParseExpression(tokens, 0, out result, out end, out error); } } } \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs index 4130098a50a..bfba38dc1d3 100644 --- a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs +++ b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System; using System.Linq; using Allure.NUnit; using Garnet.server.Vector.Filter; @@ -17,7 +16,7 @@ public class VectorFilterTokenizerTests : AllureTestBase [Test] public void Tokenizer_IntegerNumbers() { - var tokens = VectorFilterTokenizer.Tokenize("42"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("42", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); ClassicAssert.AreEqual("42", tokens[0].Value); @@ -26,7 +25,7 @@ public void Tokenizer_IntegerNumbers() [Test] public void Tokenizer_DecimalNumbers() { - var tokens = VectorFilterTokenizer.Tokenize("3.14"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("3.14", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); ClassicAssert.AreEqual("3.14", tokens[0].Value); @@ -35,7 +34,7 @@ public void Tokenizer_DecimalNumbers() [Test] public void Tokenizer_NegativeNumbers() { - var tokens = VectorFilterTokenizer.Tokenize("-5"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("-5", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); ClassicAssert.AreEqual("-5", tokens[0].Value); @@ -44,12 +43,12 @@ public void Tokenizer_NegativeNumbers() [Test] public void Tokenizer_StringLiterals() { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\""); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\"", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); ClassicAssert.AreEqual("hello", tokens[0].Value); - tokens = VectorFilterTokenizer.Tokenize("'world'"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("'world'", out tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); ClassicAssert.AreEqual("world", tokens[0].Value); @@ -58,23 +57,25 @@ public void Tokenizer_StringLiterals() [Test] public void Tokenizer_EscapedStringLiterals() { - var tokens = VectorFilterTokenizer.Tokenize("\"hello\\\"world\""); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\\\"world\"", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); ClassicAssert.AreEqual("hello\\\"world", tokens[0].Value); } [Test] - public void Tokenizer_UnterminatedStringThrows() + public void Tokenizer_UnterminatedStringReturnsFalse() { - ClassicAssert.Throws(() => - VectorFilterTokenizer.Tokenize("\"hello")); + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("\"hello", out var tokens, out var error)); + ClassicAssert.IsNull(tokens); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("Unterminated string")); } [Test] public void Tokenizer_SubtractionNotConfusedWithNegative() { - var tokens = VectorFilterTokenizer.Tokenize(".a - 5"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".a - 5", out var tokens, out _)); ClassicAssert.AreEqual(3, tokens.Count); ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); @@ -86,12 +87,12 @@ public void Tokenizer_SubtractionNotConfusedWithNegative() [Test] public void Tokenizer_Identifiers() { - var tokens = VectorFilterTokenizer.Tokenize(".year"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); ClassicAssert.AreEqual(".year", tokens[0].Value); - tokens = VectorFilterTokenizer.Tokenize("_field"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("_field", out tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); ClassicAssert.AreEqual("_field", tokens[0].Value); @@ -103,7 +104,7 @@ public void Tokenizer_Keywords() var keywords = new[] { "and", "or", "not", "in" }; foreach (var kw in keywords) { - var tokens = VectorFilterTokenizer.Tokenize(kw); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(kw, out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Keyword, tokens[0].Type); ClassicAssert.AreEqual(kw, tokens[0].Value); @@ -113,12 +114,12 @@ public void Tokenizer_Keywords() [Test] public void Tokenizer_Booleans() { - var tokens = VectorFilterTokenizer.Tokenize("true"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true", out var tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); ClassicAssert.AreEqual("true", tokens[0].Value); - tokens = VectorFilterTokenizer.Tokenize("false"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("false", out tokens, out _)); ClassicAssert.AreEqual(1, tokens.Count); ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); ClassicAssert.AreEqual("false", tokens[0].Value); @@ -130,7 +131,7 @@ public void Tokenizer_TwoCharOperators() var ops = new[] { "==", "!=", ">=", "<=", "&&", "||", "**" }; foreach (var op in ops) { - var tokens = VectorFilterTokenizer.Tokenize($"1 {op} 2"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize($"1 {op} 2", out var tokens, out _)); var opToken = tokens.First(t => t.Type == TokenType.Operator); ClassicAssert.AreEqual(op, opToken.Value); } @@ -142,7 +143,7 @@ public void Tokenizer_SingleCharOperators() var ops = new[] { ">", "<", "+", "-", "*", "/", "%", "!" }; foreach (var op in ops) { - var tokens = VectorFilterTokenizer.Tokenize($".a {op} .b"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize($".a {op} .b", out var tokens, out _)); var opToken = tokens.First(t => t.Type == TokenType.Operator); ClassicAssert.AreEqual(op, opToken.Value); } @@ -151,7 +152,7 @@ public void Tokenizer_SingleCharOperators() [Test] public void Tokenizer_Delimiters() { - var tokens = VectorFilterTokenizer.Tokenize("(.year > 10)"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(.year > 10)", out var tokens, out _)); ClassicAssert.AreEqual(TokenType.Delimiter, tokens[0].Type); ClassicAssert.AreEqual("(", tokens[0].Value); ClassicAssert.AreEqual(TokenType.Delimiter, tokens[4].Type); @@ -161,7 +162,7 @@ public void Tokenizer_Delimiters() [Test] public void Tokenizer_ComplexExpression() { - var tokens = VectorFilterTokenizer.Tokenize(".year > 1950 and .rating >= 4.0"); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year > 1950 and .rating >= 4.0", out var tokens, out _)); ClassicAssert.AreEqual(7, tokens.Count); ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); @@ -175,11 +176,47 @@ public void Tokenizer_ComplexExpression() [Test] public void Tokenizer_EmptyInput() { - var tokens = VectorFilterTokenizer.Tokenize(""); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("", out var tokens, out _)); ClassicAssert.AreEqual(0, tokens.Count); - tokens = VectorFilterTokenizer.Tokenize(" "); + ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(" ", out tokens, out _)); ClassicAssert.AreEqual(0, tokens.Count); } + + [Test] + public void Tokenizer_UnexpectedCharacterReturnsFalse() + { + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("@", out var tokens, out var error)); + ClassicAssert.IsNull(tokens); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("Unexpected character")); + } + + [Test] + public void Tokenizer_MultipleDotsInNumberReturnsFalse() + { + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1.2.3", out var tokens, out var error)); + ClassicAssert.IsNull(tokens); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("multiple decimal points")); + } + + [Test] + public void Tokenizer_DoubleDotInNumberReturnsFalse() + { + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1..023", out var tokens, out var error)); + ClassicAssert.IsNull(tokens); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("multiple decimal points")); + } + + [Test] + public void Tokenizer_ManyDotsInNumberReturnsFalse() + { + ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("123.....23", out var tokens, out var error)); + ClassicAssert.IsNull(tokens); + ClassicAssert.IsNotNull(error); + ClassicAssert.IsTrue(error.Contains("multiple decimal points")); + } } -} \ No newline at end of file +} From 6770b05c6040a863e3f2d4d47e62a0af17c304b1 Mon Sep 17 00:00:00 2001 From: CI Fix Date: Thu, 26 Feb 2026 15:08:39 -0800 Subject: [PATCH 16/31] refactor to stack-based postfix --- .../Resp/Vector/Filter/AttributeExtractor.cs | 332 +++++++++++++ .../server/Resp/Vector/Filter/ExprCompiler.cs | 462 ++++++++++++++++++ libs/server/Resp/Vector/Filter/ExprRunner.cs | 228 +++++++++ .../Vector/Filter/VectorFilterEvaluator.cs | 198 -------- .../Vector/Filter/VectorFilterExpression.cs | 320 +++++++----- .../Resp/Vector/Filter/VectorFilterParser.cs | 365 -------------- .../Vector/Filter/VectorFilterTokenizer.cs | 224 --------- libs/server/Resp/Vector/VectorManager.cs | 58 +-- test/Garnet.test/Filter/ExprCompilerTests.cs | 358 ++++++++++++++ test/Garnet.test/Filter/ExprRunnerTests.cs | 213 ++++++++ test/Garnet.test/Filter/ExprTestHelpers.cs | 200 ++++++++ .../Filter/VectorFilterEvaluatorTests.cs | 189 ------- .../Filter/VectorFilterParserTests.cs | 213 -------- .../Filter/VectorFilterTestHelpers.cs | 47 -- .../Filter/VectorFilterTokenizerTests.cs | 222 --------- 15 files changed, 2010 insertions(+), 1619 deletions(-) create mode 100644 libs/server/Resp/Vector/Filter/AttributeExtractor.cs create mode 100644 libs/server/Resp/Vector/Filter/ExprCompiler.cs create mode 100644 libs/server/Resp/Vector/Filter/ExprRunner.cs delete mode 100644 libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs delete mode 100644 libs/server/Resp/Vector/Filter/VectorFilterParser.cs delete mode 100644 libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs create mode 100644 test/Garnet.test/Filter/ExprCompilerTests.cs create mode 100644 test/Garnet.test/Filter/ExprRunnerTests.cs create mode 100644 test/Garnet.test/Filter/ExprTestHelpers.cs delete mode 100644 test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs delete mode 100644 test/Garnet.test/Filter/VectorFilterParserTests.cs delete mode 100644 test/Garnet.test/Filter/VectorFilterTestHelpers.cs delete mode 100644 test/Garnet.test/Filter/VectorFilterTokenizerTests.cs diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs new file mode 100644 index 00000000000..3986d95e984 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -0,0 +1,332 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Globalization; +using System.Runtime.CompilerServices; +using System.Text; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Ultra-lightweight top-level JSON field extractor. + /// Returns fields directly as values. + /// + /// 1. Zero heap allocations while seeking the requested key. + /// 2. A single parse (and allocation) when the key matches. + /// 3. Supports: strings (with \n \r \t \\ \" escapes), numbers, booleans, null, + /// and flat arrays of these primitives. Nested objects return null. + /// 4. Operates on raw UTF-8 bytes (ReadOnlySpan<byte>) — no JsonDocument DOM. + /// + internal static class AttributeExtractor + { + /// + /// Extract a top-level field from a JSON object and return it as an ExprToken. + /// Returns null if the field is not found or the JSON is malformed. + /// + public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) + { + var p = 0; + SkipWhiteSpace(json, ref p); + if (p >= json.Length || json[p] != (byte)'{') return null; + p++; // Skip '{' + + while (true) + { + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return null; + if (json[p] == (byte)'}') return null; // End of object, field not found + + // Expect a key string + if (json[p] != (byte)'"') return null; + + var keyStart = p + 1; + if (!SkipString(json, ref p)) return null; + var keyEnd = p - 1; // p is now past the closing quote + + // Compare key with field name + var match = MatchKey(json, keyStart, keyEnd, fieldName); + + // Expect ':' + SkipWhiteSpace(json, ref p); + if (p >= json.Length || json[p] != (byte)':') return null; + p++; // Skip ':' + + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return null; + + if (match) + { + // Found the field — parse the value into a token + return ParseValueToken(json, ref p); + } + else + { + // Skip the value + if (!SkipValue(json, ref p)) return null; + } + + // Look for ',' or '}' + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return null; + if (json[p] == (byte)',') { p++; continue; } + if (json[p] == (byte)'}') return null; // End of object, not found + return null; // Malformed JSON + } + } + + // ======================== Value parsing (allocating) ======================== + + private static ExprToken ParseValueToken(ReadOnlySpan json, ref int p) + { + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return null; + + var c = json[p]; + if (c == (byte)'"') return ParseStringToken(json, ref p); + if (c == (byte)'[') return ParseArrayToken(json, ref p); + if (c == (byte)'{') return null; // Nested objects not supported + if (c == (byte)'t') return ParseLiteralToken(json, ref p, "true"u8, ExprTokenType.Num, 1); + if (c == (byte)'f') return ParseLiteralToken(json, ref p, "false"u8, ExprTokenType.Num, 0); + if (c == (byte)'n') return ParseLiteralToken(json, ref p, "null"u8, ExprTokenType.Null, 0); + if (char.IsDigit((char)c) || c == (byte)'-' || c == (byte)'+') + return ParseNumberToken(json, ref p); + + return null; + } + + private static ExprToken ParseStringToken(ReadOnlySpan json, ref int p) + { + if (p >= json.Length || json[p] != (byte)'"') return null; + p++; // Skip opening quote + var start = p; + var hasEscape = false; + + while (p < json.Length) + { + if (json[p] == (byte)'\\') + { + hasEscape = true; + p += 2; // Skip escape sequence + continue; + } + if (json[p] == (byte)'"') + { + string value; + if (!hasEscape) + { + // Zero-copy: decode directly from the span + value = Encoding.UTF8.GetString(json.Slice(start, p - start)); + } + else + { + // Process escapes + value = UnescapeJsonString(json, start, p); + } + p++; // Skip closing quote + return ExprToken.NewStr(value); + } + p++; + } + return null; // Unterminated string + } + + private static ExprToken ParseNumberToken(ReadOnlySpan json, ref int p) + { + var start = p; + while (p < json.Length && IsNumberChar(json[p])) p++; + if (p == start) return null; + + var numStr = Encoding.UTF8.GetString(json.Slice(start, p - start)); + if (!double.TryParse(numStr, NumberStyles.Float | NumberStyles.AllowLeadingSign, + CultureInfo.InvariantCulture, out var value)) + { + p = start; + return null; + } + return ExprToken.NewNum(value); + } + + private static ExprToken ParseLiteralToken(ReadOnlySpan json, ref int p, + ReadOnlySpan literal, ExprTokenType type, double num) + { + if (p + literal.Length > json.Length) return null; + if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return null; + + // Verify delimiter follows (space, comma, bracket, brace, or end) + if (p + literal.Length < json.Length) + { + var next = (char)json[p + literal.Length]; + if (!char.IsWhiteSpace(next) && next != ',' && next != ']' && next != '}') + return null; + } + + p += literal.Length; + var t = type == ExprTokenType.Null ? ExprToken.NewNull() : ExprToken.NewNum(num); + return t; + } + + private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) + { + if (p >= json.Length || json[p] != (byte)'[') return null; + p++; // Skip '[' + SkipWhiteSpace(json, ref p); + + var elements = new ExprToken[64]; + var count = 0; + + // Handle empty array + if (p < json.Length && json[p] == (byte)']') + { + p++; + return ExprToken.NewTuple([], 0); + } + + while (true) + { + SkipWhiteSpace(json, ref p); + if (p >= json.Length || count >= elements.Length) return null; + + var ele = ParseValueToken(json, ref p); + if (ele == null) return null; + elements[count++] = ele; + + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return null; + if (json[p] == (byte)',') { p++; continue; } + if (json[p] == (byte)']') { p++; break; } + return null; // Malformed + } + + var result = new ExprToken[count]; + Array.Copy(elements, result, count); + return ExprToken.NewTuple(result, count); + } + + // ======================== Fast skipping (non-allocating) ======================== + + private static bool SkipValue(ReadOnlySpan json, ref int p) + { + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return false; + + var c = (char)json[p]; + return c switch + { + '"' => SkipString(json, ref p), + '{' => SkipBracketed(json, ref p, (byte)'{', (byte)'}'), + '[' => SkipBracketed(json, ref p, (byte)'[', (byte)']'), + 't' => SkipLiteral(json, ref p, "true"u8), + 'f' => SkipLiteral(json, ref p, "false"u8), + 'n' => SkipLiteral(json, ref p, "null"u8), + _ => SkipNumber(json, ref p), + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool SkipString(ReadOnlySpan json, ref int p) + { + if (p >= json.Length || json[p] != (byte)'"') return false; + p++; // Skip opening quote + while (p < json.Length) + { + if (json[p] == (byte)'\\') { p += 2; continue; } + if (json[p] == (byte)'"') { p++; return true; } + p++; + } + return false; // Unterminated + } + + private static bool SkipBracketed(ReadOnlySpan json, ref int p, byte opener, byte closer) + { + var depth = 1; + p++; // Skip opener + while (p < json.Length && depth > 0) + { + if (json[p] == (byte)'"') + { + if (!SkipString(json, ref p)) return false; + continue; + } + if (json[p] == opener) depth++; + else if (json[p] == closer) depth--; + p++; + } + return depth == 0; + } + + private static bool SkipLiteral(ReadOnlySpan json, ref int p, ReadOnlySpan literal) + { + if (p + literal.Length > json.Length) return false; + if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return false; + p += literal.Length; + return true; + } + + private static bool SkipNumber(ReadOnlySpan json, ref int p) + { + var start = p; + while (p < json.Length && IsNumberChar(json[p])) p++; + return p > start; + } + + // ======================== Utility ======================== + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void SkipWhiteSpace(ReadOnlySpan json, ref int p) + { + while (p < json.Length && IsWhiteSpace(json[p])) p++; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsNumberChar(byte b) => + (b >= (byte)'0' && b <= (byte)'9') || b == (byte)'-' || b == (byte)'+' || + b == (byte)'.' || b == (byte)'e' || b == (byte)'E'; + + private static bool MatchKey(ReadOnlySpan json, int keyStart, int keyEnd, string fieldName) + { + var keyLen = keyEnd - keyStart; + if (keyLen != fieldName.Length) return false; + for (var i = 0; i < keyLen; i++) + { + if (json[keyStart + i] != (byte)fieldName[i]) return false; + } + return true; + } + + private static string UnescapeJsonString(ReadOnlySpan json, int start, int end) + { + // Worst case: each byte is a character + var chars = new char[end - start]; + var len = 0; + var i = start; + while (i < end) + { + if (json[i] == (byte)'\\' && i + 1 < end) + { + i++; + chars[len++] = (char)json[i] switch + { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '"' => '"', + '/' => '/', + _ => (char)json[i], + }; + i++; + } + else + { + chars[len++] = (char)json[i]; + i++; + } + } + return new string(chars, 0, len); + } + } +} diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs new file mode 100644 index 00000000000..d4b29b13387 --- /dev/null +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -0,0 +1,462 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Globalization; +using System.Runtime.CompilerServices; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Shunting-yard compiler that tokenizes and compiles a filter expression string + /// into a flat postfix . + /// + /// Single-pass tokenize-and-compile approach modeled after Redis expr.c. + /// + /// The compiled program is a flat array of instructions + /// (values + operators in postfix order) that can be executed by . + /// + /// Safety limits: + /// - Maximum 1024 tokens per expression (prevents unbounded allocation). + /// - Maximum 256 instructions in the compiled program. + /// + internal static class ExprCompiler + { + private const int MaxTokens = 1024; + private const int MaxProgram = 256; + private const string SelectorSpecialChars = "_-"; + private const string OperatorSpecialChars = "+-*%/!()<>=|&"; + + /// + /// Compile a filter expression string into a flat postfix program. + /// Returns null on syntax error; optionally reports the error position. + /// + public static ExprProgram TryCompile(string expr, out int errpos) + { + errpos = -1; + if (string.IsNullOrEmpty(expr)) + return null; + + // Phase 1: Tokenize into a flat list + var tokens = new ExprToken[MaxTokens]; + var numTokens = 0; + + var p = 0; + while (p < expr.Length) + { + SkipSpaces(expr, ref p); + if (p >= expr.Length) + break; + + if (numTokens >= MaxTokens) + { + errpos = p; + return null; + } + + // Determine if '-' should be a negative number sign or a subtraction operator + var minusIsNumber = false; + if (expr[p] == '-' && p + 1 < expr.Length && (char.IsDigit(expr[p + 1]) || expr[p + 1] == '.')) + { + if (numTokens == 0) + { + minusIsNumber = true; + } + else + { + var prev = tokens[numTokens - 1]; + if (prev.TokenType == ExprTokenType.Op && prev.OpCode != OpCode.CParen) + minusIsNumber = true; + } + } + + // Number + if (char.IsDigit(expr[p]) || (minusIsNumber && expr[p] == '-')) + { + var t = ParseNumber(expr, ref p); + if (t == null) { errpos = p; return null; } + tokens[numTokens++] = t; + continue; + } + + // String literal + if (expr[p] == '"' || expr[p] == '\'') + { + var t = ParseString(expr, ref p); + if (t == null) { errpos = p; return null; } + tokens[numTokens++] = t; + continue; + } + + // Selector (field access starting with '.') + if (expr[p] == '.' && p + 1 < expr.Length && IsSelectorChar(expr[p + 1])) + { + var t = ParseSelector(expr, ref p); + tokens[numTokens++] = t; + continue; + } + + // Tuple literal [1, "foo", 42] + if (expr[p] == '[') + { + var t = ParseTuple(expr, ref p); + if (t == null) { errpos = p; return null; } + tokens[numTokens++] = t; + continue; + } + + // Operator or literal keyword (null, true, false, not, and, or, in) + if (char.IsLetter(expr[p]) || OperatorSpecialChars.IndexOf(expr[p]) >= 0) + { + var t = ParseOperatorOrLiteral(expr, ref p); + if (t == null) { errpos = p; return null; } + tokens[numTokens++] = t; + continue; + } + + errpos = p; + return null; + } + + // Phase 2: Shunting-yard compilation to postfix + var program = new ExprToken[MaxProgram]; + var programLen = 0; + var opsStack = new ExprToken[MaxTokens]; + var opsLen = 0; + var stackItems = 0; // track what would be on the values stack at runtime + + for (var i = 0; i < numTokens; i++) + { + var token = tokens[i]; + + // Values go directly to program + if (token.TokenType == ExprTokenType.Num || + token.TokenType == ExprTokenType.Str || + token.TokenType == ExprTokenType.Tuple || + token.TokenType == ExprTokenType.Selector || + token.TokenType == ExprTokenType.Null) + { + if (programLen >= MaxProgram) { errpos = 0; return null; } + program[programLen++] = token; + stackItems++; + continue; + } + + // Operators + if (token.TokenType == ExprTokenType.Op) + { + if (!ProcessOperator(token, program, ref programLen, opsStack, ref opsLen, ref stackItems, out errpos)) + return null; + continue; + } + } + + // Flush remaining operators from the stack + while (opsLen > 0) + { + var op = opsStack[--opsLen]; + if (op.OpCode == OpCode.OParen) + { + errpos = 0; + return null; // Unmatched '(' + } + + var arity = OpTable.GetArity(op.OpCode); + if (stackItems < arity) { errpos = 0; return null; } + if (programLen >= MaxProgram) { errpos = 0; return null; } + program[programLen++] = op; + stackItems = stackItems - arity + 1; + } + + // After compilation, exactly one value should remain on the stack + if (stackItems != 1) { errpos = 0; return null; } + + return new ExprProgram { Instructions = program, Length = programLen }; + } + + /// + /// Process an operator during shunting-yard compilation. + /// Handles parentheses, precedence, and right-associativity of **. + /// + private static bool ProcessOperator( + ExprToken op, + ExprToken[] program, ref int programLen, + ExprToken[] opsStack, ref int opsLen, + ref int stackItems, + out int errpos) + { + errpos = -1; + + if (op.OpCode == OpCode.OParen) + { + if (opsLen >= opsStack.Length) { errpos = 0; return false; } + opsStack[opsLen++] = op; + return true; + } + + if (op.OpCode == OpCode.CParen) + { + // Pop operators until matching '(' + while (true) + { + if (opsLen == 0) { errpos = 0; return false; } // Unmatched ')' + var topOp = opsStack[--opsLen]; + if (topOp.OpCode == OpCode.OParen) + return true; + + var arity = OpTable.GetArity(topOp.OpCode); + if (stackItems < arity) { errpos = 0; return false; } + if (programLen >= MaxProgram) { errpos = 0; return false; } + program[programLen++] = topOp; + stackItems = stackItems - arity + 1; + } + } + + var curPrec = OpTable.GetPrecedence(op.OpCode); + + // Pop operators with higher or equal precedence + while (opsLen > 0) + { + var topOp = opsStack[opsLen - 1]; + if (topOp.OpCode == OpCode.OParen) break; + + var topPrec = OpTable.GetPrecedence(topOp.OpCode); + if (topPrec < curPrec) break; + + // Right-associative: ** only pops if strictly higher + if (op.OpCode == OpCode.Pow && topPrec <= curPrec) break; + + opsLen--; + var arity = OpTable.GetArity(topOp.OpCode); + if (stackItems < arity) { errpos = 0; return false; } + if (programLen >= MaxProgram) { errpos = 0; return false; } + program[programLen++] = topOp; + stackItems = stackItems - arity + 1; + } + + if (opsLen >= opsStack.Length) { errpos = 0; return false; } + opsStack[opsLen++] = op; + return true; + } + + // ======================== Tokenization helpers ======================== + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void SkipSpaces(string s, ref int p) + { + while (p < s.Length && char.IsWhiteSpace(s[p])) p++; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsSelectorChar(char c) + { + return char.IsLetterOrDigit(c) || SelectorSpecialChars.IndexOf(c) >= 0; + } + + private static ExprToken ParseNumber(string s, ref int p) + { + var start = p; + if (p < s.Length && s[p] == '-') p++; + + while (p < s.Length && (char.IsDigit(s[p]) || s[p] == '.' || s[p] == 'e' || s[p] == 'E')) + p++; + + var numStr = s.Substring(start, p - start); + if (!double.TryParse(numStr, NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var value)) + { + p = start; + return null; + } + return ExprToken.NewNum(value); + } + + private static ExprToken ParseString(string s, ref int p) + { + var quote = s[p]; + p++; // Skip opening quote + var start = p; + var hasEscape = false; + + while (p < s.Length) + { + if (s[p] == '\\' && p + 1 < s.Length) + { + hasEscape = true; + p += 2; // Skip escaped char + continue; + } + if (s[p] == quote) + { + string value; + if (!hasEscape) + { + value = s.Substring(start, p - start); + } + else + { + // Process escape sequences (matching Redis fastjson.c behavior) + var chars = new char[p - start]; + var len = 0; + for (var i = start; i < p; i++) + { + if (s[i] == '\\' && i + 1 < p) + { + i++; + chars[len++] = s[i] switch + { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '"' => '"', + '\'' => '\'', + _ => s[i], // Unknown escape — copy verbatim + }; + } + else + { + chars[len++] = s[i]; + } + } + value = new string(chars, 0, len); + } + p++; // Skip closing quote + return ExprToken.NewStr(value); + } + p++; + } + return null; // Unterminated string + } + + private static ExprToken ParseSelector(string s, ref int p) + { + p++; // Skip the leading dot + var start = p; + while (p < s.Length && IsSelectorChar(s[p])) p++; + var name = s.Substring(start, p - start); + return ExprToken.NewSelector(name); + } + + private static ExprToken ParseTuple(string s, ref int p) + { + p++; // Skip '[' + var elements = new ExprToken[64]; // max 64 elements + var count = 0; + + SkipSpaces(s, ref p); + + // Handle empty tuple [] + if (p < s.Length && s[p] == ']') + { + p++; + return ExprToken.NewTuple([], 0); + } + + while (true) + { + SkipSpaces(s, ref p); + if (p >= s.Length) return null; + if (count >= elements.Length) return null; + + // Parse element: number or string + ExprToken ele; + if (char.IsDigit(s[p]) || s[p] == '-') + { + ele = ParseNumber(s, ref p); + } + else if (s[p] == '"' || s[p] == '\'') + { + ele = ParseString(s, ref p); + } + else + { + return null; + } + if (ele == null) return null; + + elements[count++] = ele; + + SkipSpaces(s, ref p); + if (p >= s.Length) return null; + + if (s[p] == ']') { p++; break; } + if (s[p] != ',') return null; + p++; // Skip comma + } + + var result = new ExprToken[count]; + Array.Copy(elements, result, count); + return ExprToken.NewTuple(result, count); + } + + private static ExprToken ParseOperatorOrLiteral(string s, ref int p) + { + var start = p; + + // Consume alphabetic or operator-special characters + while (p < s.Length && (char.IsLetter(s[p]) || OperatorSpecialChars.IndexOf(s[p]) >= 0)) + p++; + + var matchLen = p - start; + if (matchLen == 0) return null; + + // Check for literals + if (matchLen == 4 && string.Compare(s, start, "null", 0, 4, StringComparison.Ordinal) == 0) + return ExprToken.NewNull(); + + if (matchLen == 4 && string.Compare(s, start, "true", 0, 4, StringComparison.Ordinal) == 0) + return ExprToken.NewNum(1); + + if (matchLen == 5 && string.Compare(s, start, "false", 0, 5, StringComparison.Ordinal) == 0) + return ExprToken.NewNum(0); + + // Find best matching operator (longest match) + OpCode bestCode = default; + var bestLen = 0; + TryMatchOp(s, start, matchLen, "||", OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "or", OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "&&", OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "and", OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "**", OpCode.Pow, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, ">=", OpCode.Gte, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "<=", OpCode.Lte, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "==", OpCode.Eq, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "!=", OpCode.Neq, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "not", OpCode.Not, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "in", OpCode.In, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "(", OpCode.OParen, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, ")", OpCode.CParen, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "+", OpCode.Add, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "-", OpCode.Sub, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "*", OpCode.Mul, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "/", OpCode.Div, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "%", OpCode.Mod, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, ">", OpCode.Gt, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "<", OpCode.Lt, ref bestCode, ref bestLen); + TryMatchOp(s, start, matchLen, "!", OpCode.Not, ref bestCode, ref bestLen); + + if (bestLen == 0) + { + p = start; + return null; + } + + // Rewind p to consume only the matched operator length + p = start + bestLen; + return ExprToken.NewOp(bestCode); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void TryMatchOp(string s, int start, int matchLen, string opName, OpCode opCode, ref OpCode bestCode, ref int bestLen) + { + var opLen = opName.Length; + if (opLen > matchLen) return; + if (string.Compare(s, start, opName, 0, opLen, StringComparison.Ordinal) != 0) return; + if (opLen > bestLen) + { + bestCode = opCode; + bestLen = opLen; + } + } + } +} diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs new file mode 100644 index 00000000000..d0dd1b3e4ab --- /dev/null +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -0,0 +1,228 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Globalization; +using System.Runtime.CompilerServices; + +namespace Garnet.server.Vector.Filter +{ + /// + /// Stack-based VM that executes a compiled against + /// raw JSON attribute bytes. + /// + /// Modeled after Redis expr.c exprRun() — walks the flat postfix program, + /// pushes values, and pops operands for operators. Selectors trigger + /// on-demand JSON field extraction via . + /// + /// Key design properties (matching Redis): + /// - No DOM allocation: JSON fields are extracted directly from the raw bytes. + /// - Compile once, run many: the program is reused across all candidate elements. + /// - Exact numeric equality (no epsilon) to match Redis behavior. + /// - Substring support for the IN operator when both sides are strings. + /// - null is a first-class token type. + /// + internal static class ExprRunner + { + private const int MaxStack = 256; + + /// + /// Execute the compiled program against JSON attribute data. + /// Returns true if the expression evaluates to a truthy value, false otherwise. + /// Returns false if the JSON is malformed or a selector cannot be resolved. + /// + public static bool Run(ExprProgram program, ReadOnlySpan json) + { + // Stack for values during execution + var stack = new ExprToken[MaxStack]; + var stackLen = 0; + + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; + + // Selectors — extract field from JSON + if (inst.TokenType == ExprTokenType.Selector) + { + var extracted = AttributeExtractor.ExtractField(json, inst.Str); + if (extracted == null) + return false; // Selector not found → expression is false (matches Redis) + + if (stackLen >= MaxStack) return false; + stack[stackLen++] = extracted; + continue; + } + + // Non-operator values — push directly + if (inst.TokenType != ExprTokenType.Op) + { + if (stackLen >= MaxStack) return false; + stack[stackLen++] = inst; + continue; + } + + // Operators — pop operands, compute, push result + var arity = OpTable.GetArity(inst.OpCode); + if (stackLen < arity) return false; + + ExprToken b = stackLen > 0 ? stack[--stackLen] : null; + ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : null; + + var result = ExprToken.NewNum(0); + + switch (inst.OpCode) + { + case OpCode.Not: + result.Num = ToBool(b) == 0 ? 1 : 0; + break; + case OpCode.Pow: + result.Num = Math.Pow(ToNum(a), ToNum(b)); + break; + case OpCode.Mul: + result.Num = ToNum(a) * ToNum(b); + break; + case OpCode.Div: + result.Num = ToNum(a) / ToNum(b); + break; + case OpCode.Mod: + result.Num = ToNum(a) % ToNum(b); + break; + case OpCode.Add: + result.Num = ToNum(a) + ToNum(b); + break; + case OpCode.Sub: + result.Num = ToNum(a) - ToNum(b); + break; + case OpCode.Gt: + result.Num = ToNum(a) > ToNum(b) ? 1 : 0; + break; + case OpCode.Gte: + result.Num = ToNum(a) >= ToNum(b) ? 1 : 0; + break; + case OpCode.Lt: + result.Num = ToNum(a) < ToNum(b) ? 1 : 0; + break; + case OpCode.Lte: + result.Num = ToNum(a) <= ToNum(b) ? 1 : 0; + break; + case OpCode.Eq: + result.Num = AreEqual(a, b) ? 1 : 0; + break; + case OpCode.Neq: + result.Num = !AreEqual(a, b) ? 1 : 0; + break; + case OpCode.In: + result.Num = EvalIn(a, b) ? 1 : 0; + break; + case OpCode.And: + result.Num = ToBool(a) != 0 && ToBool(b) != 0 ? 1 : 0; + break; + case OpCode.Or: + result.Num = ToBool(a) != 0 || ToBool(b) != 0 ? 1 : 0; + break; + } + + if (stackLen >= MaxStack) return false; + stack[stackLen++] = result; + } + + if (stackLen == 0) return false; + return ToBool(stack[stackLen - 1]) != 0; + } + + // ======================== Type conversion helpers ======================== + + /// + /// Convert a token to its numeric value. + /// Strings are parsed as numbers; unparseable strings return 0. + /// Matches Redis exprTokenToNum(). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ToNum(ExprToken t) + { + if (t == null) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num; + if (t.TokenType == ExprTokenType.Str && t.Str != null) + { + return double.TryParse(t.Str, NumberStyles.Float | NumberStyles.AllowLeadingSign, + CultureInfo.InvariantCulture, out var result) ? result : 0; + } + return 0; + } + + /// + /// Convert a token to boolean (0 or 1). + /// Matches Redis exprTokenToBool(): null=0, num!=0=1, empty string=0, else=1. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ToBool(ExprToken t) + { + if (t == null) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; + if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; + if (t.TokenType == ExprTokenType.Null) return 0; + return 1; // Non-empty strings, tuples, etc. are truthy + } + + /// + /// Compare two tokens for equality. + /// Matches Redis exprTokensEqual(): + /// - Both strings → exact string comparison + /// - Both numbers → exact numeric equality (no epsilon) + /// - One/both null → equal only if both null + /// - Mixed types → coerce to numbers and compare + /// + private static bool AreEqual(ExprToken a, ExprToken b) + { + if (a == null || b == null) return a == null && b == null; + + // Both strings + if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + + // Both numbers + if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) + return a.Num == b.Num; // Exact comparison, matching Redis + + // One/both null + if (a.TokenType == ExprTokenType.Null || b.TokenType == ExprTokenType.Null) + return a.TokenType == b.TokenType; + + // Mixed types — coerce to number + return ToNum(a) == ToNum(b); + } + + /// + /// Evaluate the IN operator. + /// Matches Redis expr.c behavior: + /// 1. If b is a Tuple, check membership (element-wise AreEqual) + /// 2. If both a and b are strings, check substring containment + /// 3. Otherwise, false + /// + private static bool EvalIn(ExprToken a, ExprToken b) + { + if (b == null) return false; + + // Tuple membership (works for both expression tuples [1,2,3] and JSON array tuples) + if (b.TokenType == ExprTokenType.Tuple) + { + for (var i = 0; i < b.TupleLength; i++) + { + if (AreEqual(a, b.TupleElements[i])) + return true; + } + return false; + } + + // String substring check (matching Redis exprTokensStringIn) + if (a != null && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + { + if (a.Str == null || b.Str == null) return false; + if (a.Str.Length > b.Str.Length) return false; + return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + } + + return false; + } + } +} diff --git a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs b/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs deleted file mode 100644 index 7b469926381..00000000000 --- a/libs/server/Resp/Vector/Filter/VectorFilterEvaluator.cs +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System; -using System.Globalization; -using System.Runtime.CompilerServices; -using System.Text.Json; - -namespace Garnet.server.Vector.Filter -{ - /// - /// Evaluator for vector filter expressions. - /// Evaluates parsed expression trees against JSON attribute data. - /// Returns FilterValue (a struct) to avoid boxing allocations on every evaluation. - /// - /// Note: This evaluator operates over top-level properties of the JSON document only. - /// Nested property access is not supported. A future optimization could replace the - /// JsonElement-based lookup with a raw span + (offset, length) pairs approach for - /// better performance, avoiding JsonDocument allocation entirely. - /// - internal static class VectorFilterEvaluator - { - /// - /// Evaluate a filter expression against a JSON element and return a boolean result. - /// This is the primary public API for filter evaluation. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool EvaluateFilterBool(Expr expr, JsonElement root) - { - return IsTruthy(EvaluateExpression(expr, root)); - } - - /// - /// Evaluate a filter expression against a JSON element. - /// Returns a FilterValue (struct) — no boxing occurs for numeric results. - /// - public static FilterValue EvaluateExpression(Expr expr, JsonElement root) - { - if (expr is LiteralExpr lit) - return lit.Value; - - if (expr is MemberExpr member) - return EvaluateMember(member, root); - - if (expr is UnaryExpr unary) - return EvaluateUnary(unary, root); - - if (expr is BinaryExpr binary) - return EvaluateBinary(binary, root); - - return FilterValue.Null; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static FilterValue EvaluateMember(MemberExpr member, JsonElement root) - { - if (root.TryGetProperty(member.Property, out var value)) - { - return value.ValueKind switch - { - JsonValueKind.Number => FilterValue.FromNumber(value.GetDouble()), - JsonValueKind.String => FilterValue.FromString(value.GetString()), - JsonValueKind.True => FilterValue.True, - JsonValueKind.False => FilterValue.False, - JsonValueKind.Array => FilterValue.FromJsonElement(value), - _ => FilterValue.Null - }; - } - return FilterValue.Null; - } - - private static FilterValue EvaluateUnary(UnaryExpr unary, JsonElement root) - { - var operand = EvaluateExpression(unary.Operand, root); - return unary.Operator switch - { - OperatorKind.Not => IsTruthy(operand) ? FilterValue.False : FilterValue.True, - OperatorKind.Negate => FilterValue.FromNumber(-ToNumber(operand)), - _ => FilterValue.Null - }; - } - - private static FilterValue EvaluateBinary(BinaryExpr binary, JsonElement root) - { - // Short-circuit logical operators - if (binary.Operator == OperatorKind.And) - { - var left = EvaluateExpression(binary.Left, root); - if (!IsTruthy(left)) return FilterValue.False; - var right = EvaluateExpression(binary.Right, root); - return IsTruthy(right) ? FilterValue.True : FilterValue.False; - } - - if (binary.Operator == OperatorKind.Or) - { - var left = EvaluateExpression(binary.Left, root); - if (IsTruthy(left)) return FilterValue.True; - var right = EvaluateExpression(binary.Right, root); - return IsTruthy(right) ? FilterValue.True : FilterValue.False; - } - - { - var left = EvaluateExpression(binary.Left, root); - var right = EvaluateExpression(binary.Right, root); - - return binary.Operator switch - { - OperatorKind.Add => FilterValue.FromNumber(ToNumber(left) + ToNumber(right)), - OperatorKind.Subtract => FilterValue.FromNumber(ToNumber(left) - ToNumber(right)), - OperatorKind.Multiply => FilterValue.FromNumber(ToNumber(left) * ToNumber(right)), - OperatorKind.Divide => FilterValue.FromNumber(ToNumber(left) / ToNumber(right)), - OperatorKind.Modulo => FilterValue.FromNumber(ToNumber(left) % ToNumber(right)), - OperatorKind.Power => FilterValue.FromNumber(Math.Pow(ToNumber(left), ToNumber(right))), - OperatorKind.GreaterThan => FilterValue.FromBool(ToNumber(left) > ToNumber(right)), - OperatorKind.LessThan => FilterValue.FromBool(ToNumber(left) < ToNumber(right)), - OperatorKind.GreaterEqual => FilterValue.FromBool(ToNumber(left) >= ToNumber(right)), - OperatorKind.LessEqual => FilterValue.FromBool(ToNumber(left) <= ToNumber(right)), - OperatorKind.Equal => FilterValue.FromBool(AreEqual(left, right)), - OperatorKind.NotEqual => FilterValue.FromBool(!AreEqual(left, right)), - OperatorKind.In => FilterValue.FromBool(IsIn(left, right)), - _ => FilterValue.Null - }; - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static double ToNumber(FilterValue value) - { - return value.Kind switch - { - FilterValueKind.Number => value.AsNumber(), - FilterValueKind.String => double.TryParse(value.AsString(), NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var result) ? result : 0, - _ => 0 - }; - } - - /// - /// Determine if a FilterValue is truthy. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsTruthy(FilterValue value) - { - return value.Kind switch - { - FilterValueKind.Number => value.AsNumber() != 0, - FilterValueKind.String => !string.IsNullOrEmpty(value.AsString()), - FilterValueKind.Null => false, - _ => true // JsonArray etc. are truthy - }; - } - - private static bool AreEqual(FilterValue left, FilterValue right) - { - if (left.IsNull && right.IsNull) return true; - if (left.IsNull || right.IsNull) return false; - - // Both are numbers — fast numeric comparison - if (left.Kind == FilterValueKind.Number && right.Kind == FilterValueKind.Number) - return Math.Abs(left.AsNumber() - right.AsNumber()) < 0.0001; - - // If either is a number and the other might be convertible - if (left.Kind == FilterValueKind.Number || right.Kind == FilterValueKind.Number) - return Math.Abs(ToNumber(left) - ToNumber(right)) < 0.0001; - - // Both are strings - if (left.Kind == FilterValueKind.String && right.Kind == FilterValueKind.String) - return left.AsString() == right.AsString(); - - return false; - } - - private static bool IsIn(FilterValue needle, FilterValue haystack) - { - if (haystack.Kind == FilterValueKind.JsonArray) - { - var elem = haystack.AsJsonElement(); - if (elem.ValueKind == JsonValueKind.Array) - { - foreach (var item in elem.EnumerateArray()) - { - var itemValue = item.ValueKind switch - { - JsonValueKind.Number => FilterValue.FromNumber(item.GetDouble()), - JsonValueKind.String => FilterValue.FromString(item.GetString()), - JsonValueKind.True => FilterValue.True, - JsonValueKind.False => FilterValue.False, - _ => FilterValue.Null - }; - - if (AreEqual(needle, itemValue)) - return true; - } - } - } - return false; - } - } -} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 04c731d5b1b..9aa5622e058 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -2,163 +2,243 @@ // Licensed under the MIT license. using System.Runtime.CompilerServices; -using System.Text.Json; namespace Garnet.server.Vector.Filter { /// - /// Discriminated union value type to eliminate boxing of doubles/strings - /// throughout the filter evaluation pipeline. + /// Token types for the filter expression virtual machine. + /// + /// The filter engine uses a stack-based postfix VM (modeled after Redis expr.c). + /// A filter string like .year >= 2000 and .rating > 7 is compiled into a flat + /// array of instructions in postfix (reverse-Polish) order: + /// + /// + /// [SEL:year] [NUM:2000] [OP:Gte] [SEL:rating] [NUM:7] [OP:Gt] [OP:And] + /// + /// + /// At execution time, walks this array left-to-right: + /// + /// Value tokens (, , , + /// ) are pushed onto the evaluation stack. + /// tokens trigger on-demand JSON field extraction + /// via ; the extracted value is pushed. + /// tokens pop 1 or 2 operands, compute the result, + /// and push it back. + /// + /// + /// After processing all instructions the top-of-stack value is tested for truthiness + /// to produce the final bool filter result. /// - internal readonly struct FilterValue + internal enum ExprTokenType : byte { - private readonly double _number; - private readonly string _string; - private readonly JsonElement _jsonElement; - private readonly FilterValueKind _kind; + Num = 0, + Str = 1, + Tuple = 2, + Selector = 3, + Op = 4, + Null = 5, + Eof = 6, + } - private FilterValue(double number) - { - _number = number; - _string = null; - _jsonElement = default; - _kind = FilterValueKind.Number; - } + /// + /// Operator opcodes used by the filter expression VM. + /// + /// Each opcode has a fixed precedence and arity defined in . + /// During compilation, uses the shunting-yard algorithm + /// to reorder operators from infix to postfix based on these precedence values. + /// During execution, pops the required number of operands + /// (arity), applies the operation, and pushes the result. + /// + /// Precedence and semantics match the Redis expr.c ExprOptable[]. + /// + internal enum OpCode : byte + { + // Precedence 0 + Or = 0, + + // Precedence 1 + And = 1, + + // Precedence 2 + Gt = 2, + Gte = 3, + Lt = 4, + Lte = 5, + Eq = 6, + Neq = 7, + In = 8, + + // Precedence 3 + Add = 9, + Sub = 10, + + // Precedence 4 + Mul = 11, + Div = 12, + Mod = 13, + + // Precedence 5 + Pow = 14, + + // Precedence 6 + Not = 15, + + // Precedence 7 (markers, not real operators) + OParen = 16, + CParen = 17, + } - private FilterValue(string str) - { - _number = 0; - _string = str; - _jsonElement = default; - _kind = FilterValueKind.String; - } + /// + /// A token in the compiled filter program or on the evaluation stack. + /// + /// Designed after Redis expr.c exprtoken — a single type that can represent any + /// value the VM needs: + /// + /// + /// TokenTypePayload used + /// + /// double (booleans are 1/0). + /// + /// — an interned or extracted string. + /// + /// — the JSON field name (e.g. "year" from .year). + /// + /// + — for the + /// in operator or JSON array values. + /// + /// — the operator to execute. + /// + /// No payload — represents JSON null or the null keyword. + /// + /// + /// Lifetime: Tokens inside the compiled are + /// allocated once and reused across all candidate evaluations. Tokens created during + /// execution (e.g. from JSON field extraction) are + /// transient and discarded after each call. + /// + internal sealed class ExprToken + { + public ExprTokenType TokenType; - private FilterValue(JsonElement element) - { - _number = 0; - _string = null; - _jsonElement = element; - _kind = FilterValueKind.JsonArray; - } + /// Numeric value. Also used for bool: true=1, false=0. + public double Num; - public FilterValueKind Kind - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get => _kind; - } + /// String value — for Str and Selector types. + public string Str; - public bool IsNull - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - get => _kind == FilterValueKind.Null; - } + /// Operator opcode — for Op type. + public OpCode OpCode; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public double AsNumber() => _number; + /// Tuple elements for IN operator. + public ExprToken[] TupleElements; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public string AsString() => _string; + /// Number of elements in the tuple. + public int TupleLength; [MethodImpl(MethodImplOptions.AggressiveInlining)] - public JsonElement AsJsonElement() => _jsonElement; - - public static readonly FilterValue Null = default; - public static readonly FilterValue True = new(1.0); - public static readonly FilterValue False = new(0.0); + public static ExprToken NewNum(double value) + { + return new ExprToken { TokenType = ExprTokenType.Num, Num = value }; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static FilterValue FromNumber(double value) => new(value); + public static ExprToken NewStr(string value) + { + return new ExprToken { TokenType = ExprTokenType.Str, Str = value }; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static FilterValue FromString(string value) => new(value); + public static ExprToken NewSelector(string fieldName) + { + return new ExprToken { TokenType = ExprTokenType.Selector, Str = fieldName }; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static FilterValue FromJsonElement(JsonElement value) => new(value); + public static ExprToken NewOp(OpCode opCode) + { + return new ExprToken { TokenType = ExprTokenType.Op, OpCode = opCode }; + } [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static FilterValue FromBool(bool value) => value ? True : False; - } + public static ExprToken NewNull() + { + return new ExprToken { TokenType = ExprTokenType.Null }; + } - internal enum FilterValueKind : byte - { - Null = 0, - Number = 1, - String = 2, - JsonArray = 3, + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ExprToken NewTuple(ExprToken[] elements, int length) + { + return new ExprToken { TokenType = ExprTokenType.Tuple, TupleElements = elements, TupleLength = length }; + } } /// - /// Enum for operator types, replacing string-based operators - /// to enable integer comparison instead of string comparison on hot paths. + /// Operator metadata table, mirroring Redis ExprOptable. + /// Provides precedence and arity lookup for shunting-yard compilation. /// - internal enum OperatorKind : byte + internal static class OpTable { - // Arithmetic - Add, // + - Subtract, // - - Multiply, // * - Divide, // / - Modulo, // % - Power, // ** - - // Comparison - GreaterThan, // > - LessThan, // < - GreaterEqual, // >= - LessEqual, // <= - Equal, // == - NotEqual, // != - - // Logical - And, // and, && - Or, // or, || - Not, // not, ! - - // Containment - In, // in - - // Unary - Negate, // - (unary) - } + // Indexed by OpCode for O(1) lookup. + // Entries: (Precedence, Arity). OpCode enum values are consecutive 0..17. + private static readonly (int Precedence, int Arity)[] Table; - /// - /// Base class for filter expression tree nodes. - /// - internal abstract class Expr { } + static OpTable() + { + Table = new (int, int)[18]; + Table[(int)OpCode.Or] = (0, 2); + Table[(int)OpCode.And] = (1, 2); + Table[(int)OpCode.Gt] = (2, 2); + Table[(int)OpCode.Gte] = (2, 2); + Table[(int)OpCode.Lt] = (2, 2); + Table[(int)OpCode.Lte] = (2, 2); + Table[(int)OpCode.Eq] = (2, 2); + Table[(int)OpCode.Neq] = (2, 2); + Table[(int)OpCode.In] = (2, 2); + Table[(int)OpCode.Add] = (3, 2); + Table[(int)OpCode.Sub] = (3, 2); + Table[(int)OpCode.Mul] = (4, 2); + Table[(int)OpCode.Div] = (4, 2); + Table[(int)OpCode.Mod] = (4, 2); + Table[(int)OpCode.Pow] = (5, 2); + Table[(int)OpCode.Not] = (6, 1); + Table[(int)OpCode.OParen] = (7, 0); + Table[(int)OpCode.CParen] = (7, 0); + } - /// - /// Represents a literal value (number, string, boolean). - /// Uses FilterValue to avoid boxing. - /// - internal sealed class LiteralExpr : Expr - { - public FilterValue Value { get; init; } - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetPrecedence(OpCode code) => Table[(int)code].Precedence; - /// - /// Represents a member access expression (e.g., .year, .rating). - /// - internal sealed class MemberExpr : Expr - { - public string Property { get; init; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetArity(OpCode code) => Table[(int)code].Arity; } /// - /// Represents a unary operation (e.g., not, -). + /// Compiled filter expression program — the output of + /// and the input to . + /// + /// Contains a flat postfix (reverse-Polish notation) instruction sequence where every + /// element is an : + /// + /// + /// Source: .year >= 2000 and .rating > 7 + /// Program: [SEL:year] [NUM:2000] [OP:>=] [SEL:rating] [NUM:7] [OP:>] [OP:and] + /// + /// + /// Compile-once, run-many: The program is compiled once per query, then + /// executed against every candidate element's raw JSON bytes. The program itself is + /// read-only during execution — all mutable state lives in the per-call evaluation + /// stack inside . + /// + /// This is the C# equivalent of the exprstate.program[] array in + /// Redis expr.c. The evaluation stack (values_stack in Redis) is + /// not stored here — it is allocated per-call in . /// - internal sealed class UnaryExpr : Expr + internal sealed class ExprProgram { - public OperatorKind Operator { get; init; } - public Expr Operand { get; init; } - } + /// The compiled postfix instruction sequence. + public ExprToken[] Instructions; - /// - /// Represents a binary operation (e.g., +, -, ==, and). - /// - internal sealed class BinaryExpr : Expr - { - public Expr Left { get; init; } - public OperatorKind Operator { get; init; } - public Expr Right { get; init; } + /// Number of instructions in the program. + public int Length; } } \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs b/libs/server/Resp/Vector/Filter/VectorFilterParser.cs deleted file mode 100644 index df6abdc6644..00000000000 --- a/libs/server/Resp/Vector/Filter/VectorFilterParser.cs +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System.Collections.Generic; -using System.Globalization; - -namespace Garnet.server.Vector.Filter -{ - /// - /// Recursive descent parser for vector filter expressions. - /// Supports arithmetic, comparison, logical operators, containment, and grouping. - /// Uses TryParse pattern to avoid exceptions in the hot path. - /// Includes a recursion depth guard to prevent stack overflow from deeply nested expressions. - /// - internal static class VectorFilterParser - { - /// - /// Maximum recursion depth allowed during parsing to prevent stack overflow. - /// This limit is intentionally conservative; typical filter expressions are shallow. - /// Windows and Linux may use different default stack sizes, so we keep this low. - /// - private const int MaxRecursionDepth = 64; - - /// - /// Attempt to parse a filter expression from the token list. - /// Returns false with an error message if parsing fails. - /// - /// The list of tokens to parse. - /// The starting token index. - /// The parsed expression tree, or null on failure. - /// The index past the last consumed token. - /// An error message describing the failure, or null on success. - /// True if parsing succeeded; false otherwise. - public static bool TryParseExpression(List tokens, int start, out Expr result, out int end, out string error) - { - return TryParseLogicalOr(tokens, start, out result, out end, out error, depth: 0); - } - - private static bool TryParseLogicalOr(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseLogicalAnd(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && - ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "or") || - (tokens[end].Type == TokenType.Operator && tokens[end].Value == "||"))) - { - end++; - if (!TryParseLogicalAnd(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = OperatorKind.Or, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseLogicalAnd(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseEquality(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && - ((tokens[end].Type == TokenType.Keyword && tokens[end].Value == "and") || - (tokens[end].Type == TokenType.Operator && tokens[end].Value == "&&"))) - { - end++; - if (!TryParseEquality(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = OperatorKind.And, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseEquality(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseComparison(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && tokens[end].Type == TokenType.Operator && - (tokens[end].Value == "==" || tokens[end].Value == "!=")) - { - var op = tokens[end].Value == "==" ? OperatorKind.Equal : OperatorKind.NotEqual; - end++; - if (!TryParseComparison(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = op, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseComparison(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseContainment(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && tokens[end].Type == TokenType.Operator && - (tokens[end].Value == ">" || tokens[end].Value == "<" || - tokens[end].Value == ">=" || tokens[end].Value == "<=")) - { - var op = ParseComparisonOperator(tokens[end].Value); - end++; - if (!TryParseContainment(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = op, Right = right }; - } - - result = left; - return true; - } - - private static OperatorKind ParseComparisonOperator(string value) - { - // Length check first for fast disambiguation - if (value.Length == 1) - return value[0] == '>' ? OperatorKind.GreaterThan : OperatorKind.LessThan; - return value[0] == '>' ? OperatorKind.GreaterEqual : OperatorKind.LessEqual; - } - - private static bool TryParseContainment(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseAdditive(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - if (end < tokens.Count && tokens[end].Type == TokenType.Keyword && tokens[end].Value == "in") - { - end++; - if (!TryParseAdditive(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = OperatorKind.In, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseAdditive(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseMultiplicative(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && tokens[end].Type == TokenType.Operator && - (tokens[end].Value == "+" || tokens[end].Value == "-")) - { - var op = tokens[end].Value == "+" ? OperatorKind.Add : OperatorKind.Subtract; - end++; - if (!TryParseMultiplicative(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = op, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseMultiplicative(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseExponentiation(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - while (end < tokens.Count && tokens[end].Type == TokenType.Operator && - (tokens[end].Value == "*" || tokens[end].Value == "/" || tokens[end].Value == "%")) - { - var op = tokens[end].Value[0] switch - { - '*' => OperatorKind.Multiply, - '/' => OperatorKind.Divide, - _ => OperatorKind.Modulo - }; - end++; - if (!TryParseExponentiation(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = op, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseExponentiation(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (!TryParseUnary(tokens, start, out var left, out end, out error, depth)) - { - result = null; - return false; - } - - if (end < tokens.Count && tokens[end].Type == TokenType.Operator && tokens[end].Value == "**") - { - end++; - // Right associative — recurse into exponentiation - if (!TryParseExponentiation(tokens, end, out var right, out end, out error, depth)) - { - result = null; - return false; - } - left = new BinaryExpr { Left = left, Operator = OperatorKind.Power, Right = right }; - } - - result = left; - return true; - } - - private static bool TryParseUnary(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - if (start < tokens.Count) - { - if ((tokens[start].Type == TokenType.Keyword && tokens[start].Value == "not") || - (tokens[start].Type == TokenType.Operator && tokens[start].Value == "!")) - { - start++; - if (!TryParseUnary(tokens, start, out var operand, out end, out error, depth)) - { - result = null; - return false; - } - result = new UnaryExpr { Operator = OperatorKind.Not, Operand = operand }; - return true; - } - - if (tokens[start].Type == TokenType.Operator && tokens[start].Value == "-") - { - start++; - if (!TryParseUnary(tokens, start, out var operand, out end, out error, depth)) - { - result = null; - return false; - } - result = new UnaryExpr { Operator = OperatorKind.Negate, Operand = operand }; - return true; - } - } - - return TryParsePrimary(tokens, start, out result, out end, out error, depth); - } - - private static bool TryParsePrimary(List tokens, int start, out Expr result, out int end, out string error, int depth) - { - result = null; - - if (start >= tokens.Count) - { - end = start; - error = "Unexpected end of expression"; - return false; - } - - var token = tokens[start]; - - // Parentheses — increase recursion depth - if (token.Type == TokenType.Delimiter && token.Value == "(") - { - var newDepth = depth + 1; - if (newDepth > MaxRecursionDepth) - { - end = start; - error = $"Filter expression exceeds maximum nesting depth of {MaxRecursionDepth}"; - return false; - } - - if (!TryParseLogicalOr(tokens, start + 1, out var expr, out end, out error, newDepth)) - return false; - - if (end >= tokens.Count || tokens[end].Type != TokenType.Delimiter || tokens[end].Value != ")") - { - error = "Missing closing parenthesis"; - return false; - } - end++; - result = expr; - return true; - } - - // Literals — use FilterValue to avoid boxing doubles - if (token.Type == TokenType.Number) - { - if (!double.TryParse(token.Value, NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var numValue)) - { - end = start; - error = $"Invalid number literal: {token.Value}"; - return false; - } - end = start + 1; - result = new LiteralExpr { Value = FilterValue.FromNumber(numValue) }; - error = null; - return true; - } - - if (token.Type == TokenType.String) - { - end = start + 1; - result = new LiteralExpr { Value = FilterValue.FromString(token.Value) }; - error = null; - return true; - } - - if (token.Type == TokenType.Boolean) - { - end = start + 1; - result = new LiteralExpr { Value = token.Value == "true" ? FilterValue.True : FilterValue.False }; - error = null; - return true; - } - - // Identifier (field access) - if (token.Type == TokenType.Identifier) - { - end = start + 1; - result = new MemberExpr { Property = token.Value.TrimStart('.') }; - error = null; - return true; - } - - end = start; - error = $"Unexpected token: {token.Value}"; - return false; - } - } -} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs b/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs deleted file mode 100644 index 1780d33c915..00000000000 --- a/libs/server/Resp/Vector/Filter/VectorFilterTokenizer.cs +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System.Collections.Generic; -using System.Runtime.CompilerServices; - -namespace Garnet.server.Vector.Filter -{ - /// - /// Token types for vector filter expressions. - /// - internal enum TokenType : byte { Number, String, Boolean, Identifier, Operator, Keyword, Delimiter } - - /// - /// Represents a token in a filter expression. - /// Value type to avoid per-token heap allocations. - /// - internal readonly struct Token - { - public Token(TokenType type, string value) - { - Type = type; - Value = value; - } - - public TokenType Type { get; } - public string Value { get; } - } - - /// - /// Tokenizer for vector filter expressions. - /// Converts filter strings into tokens for parsing. - /// Uses TryTokenize pattern to avoid exceptions in the hot path. - /// - internal static class VectorFilterTokenizer - { - // Pre-cached operator strings to avoid per-token string allocations - private const string OpPlus = "+"; - private const string OpMinus = "-"; - private const string OpStar = "*"; - private const string OpSlash = "/"; - private const string OpPercent = "%"; - private const string OpGreater = ">"; - private const string OpLess = "<"; - private const string OpBang = "!"; - private const string OpOpenParen = "("; - private const string OpCloseParen = ")"; - - private const string OpEqualEqual = "=="; - private const string OpBangEqual = "!="; - private const string OpGreaterEqual = ">="; - private const string OpLessEqual = "<="; - private const string OpAmpAmp = "&&"; - private const string OpPipePipe = "||"; - private const string OpStarStar = "**"; - - /// - /// Attempt to tokenize the input string into a list of tokens. - /// Returns false with an error message if the input is malformed. - /// - /// The filter expression string to tokenize. - /// The resulting list of tokens, or null on failure. - /// An error message describing the failure, or null on success. - /// True if tokenization succeeded; false otherwise. - public static bool TryTokenize(string input, out List tokens, out string error) - { - tokens = new List(); - error = null; - var i = 0; - - while (i < input.Length) - { - // Skip whitespace - if (char.IsWhiteSpace(input[i])) - { - i++; - continue; - } - - // Numbers (treat '-' as negative sign only at start or after operator/keyword/open-paren) - var isNegativeNumber = input[i] == '-' - && i + 1 < input.Length && char.IsDigit(input[i + 1]) - && (tokens.Count == 0 - || tokens[^1].Type == TokenType.Operator - || tokens[^1].Type == TokenType.Keyword - || (tokens[^1].Type == TokenType.Delimiter && tokens[^1].Value == OpOpenParen)); - - if (char.IsDigit(input[i]) || isNegativeNumber) - { - var start = i; - if (input[i] == '-') i++; - - var dotCount = 0; - while (i < input.Length && (char.IsDigit(input[i]) || input[i] == '.')) - { - if (input[i] == '.') - { - dotCount++; - if (dotCount > 1) - { - error = $"Invalid number literal with multiple decimal points at position {start}"; - tokens = null; - return false; - } - } - i++; - } - tokens.Add(new Token(TokenType.Number, input.Substring(start, i - start))); - continue; - } - - // Identifiers and keywords (field names starting with .) - if (input[i] == '.' || char.IsLetter(input[i]) || input[i] == '_') - { - var start = i; - while (i < input.Length && (char.IsLetterOrDigit(input[i]) || input[i] == '_' || input[i] == '.')) - i++; - var value = input.Substring(start, i - start); - - // Check for keywords - if (value == "and" || value == "or" || value == "not" || value == "in") - tokens.Add(new Token(TokenType.Keyword, value)); - else if (value == "true" || value == "false") - tokens.Add(new Token(TokenType.Boolean, value)); - else - tokens.Add(new Token(TokenType.Identifier, value)); - continue; - } - - // String literals - if (input[i] == '"' || input[i] == '\'') - { - var quote = input[i]; - var start = ++i; - while (i < input.Length && input[i] != quote) - { - if (input[i] == '\\' && i + 1 < input.Length) i++; // Skip escaped characters - i++; - } - if (i >= input.Length) - { - error = $"Unterminated string literal starting at position {start - 1}"; - tokens = null; - return false; - } - tokens.Add(new Token(TokenType.String, input.Substring(start, i - start))); - i++; // Skip closing quote - continue; - } - - // Two-character operators — avoid Substring allocation by comparing chars directly - if (i + 1 < input.Length) - { - var twoCharOp = MatchTwoCharOperator(input[i], input[i + 1]); - if (twoCharOp != null) - { - tokens.Add(new Token(TokenType.Operator, twoCharOp)); - i += 2; - continue; - } - } - - // Single-character operators and delimiters — avoid input[i].ToString() allocation - var singleCharOp = MatchSingleChar(input[i]); - if (singleCharOp != null) - { - var type = input[i] == '(' || input[i] == ')' ? TokenType.Delimiter : TokenType.Operator; - tokens.Add(new Token(type, singleCharOp)); - i++; - continue; - } - - error = $"Unexpected character in filter expression: '{input[i]}' at position {i}"; - tokens = null; - return false; - } - - return true; - } - - /// - /// Match a two-character operator. Returns the cached string or null. - /// Avoids Substring(i, 2) allocation on every iteration. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string MatchTwoCharOperator(char c1, char c2) - { - return c1 switch - { - '=' when c2 == '=' => OpEqualEqual, - '!' when c2 == '=' => OpBangEqual, - '>' when c2 == '=' => OpGreaterEqual, - '<' when c2 == '=' => OpLessEqual, - '&' when c2 == '&' => OpAmpAmp, - '|' when c2 == '|' => OpPipePipe, - '*' when c2 == '*' => OpStarStar, - _ => null - }; - } - - /// - /// Match a single-character operator or delimiter. Returns the cached string or null. - /// Avoids char.ToString() allocation. - /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static string MatchSingleChar(char c) - { - return c switch - { - '+' => OpPlus, - '-' => OpMinus, - '*' => OpStar, - '/' => OpSlash, - '%' => OpPercent, - '>' => OpGreater, - '<' => OpLess, - '!' => OpBang, - '(' => OpOpenParen, - ')' => OpCloseParen, - _ => null - }; - } - } -} \ No newline at end of file diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 66d5c35420b..549083755b4 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -7,7 +7,6 @@ using System.Diagnostics; using System.Runtime.InteropServices; using System.Text; -using System.Text.Json; using System.Threading.Channels; using System.Threading.Tasks; using Garnet.common; @@ -934,9 +933,15 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl } /// - /// Apply post-filtering to vector search results based on JSON path filter expression. + /// Apply post-filtering to vector search results using a compiled filter expression. /// Returns the number of results that passed the filter, or -1 if the filter expression is invalid. /// + /// Architecture (modeled after Redis expr.c + fastjson.c): + /// 1. The filter string is compiled ONCE into a flat postfix program (ExprCompiler). + /// 2. For each candidate, the program is executed against the raw JSON attribute bytes + /// using a stack-based VM (ExprRunner) with on-demand field extraction (AttributeExtractor). + /// 3. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. + /// /// TODO: A better approach would be to produce a bitmap of passing elements and let /// NetworkVSIM handle skipping non-matching entries, avoiding the in-place compaction copies. /// For now we compact in-place to minimize the scope of changes. @@ -953,24 +958,15 @@ private int ApplyPostFilter( return numResults; } - // Convert filter bytes to string for tokenization. - // NOTE: This allocation is required because the tokenizer operates on strings. - // A future optimization could make the tokenizer work directly on ReadOnlySpan. + // Convert filter bytes to string for compilation. + // NOTE: This allocation is required because the compiler operates on strings. + // A future optimization could make the compiler work directly on ReadOnlySpan. var filterStr = Encoding.UTF8.GetString(filter); - // Tokenize and parse the filter expression. If this fails, we return -1 to indicate a bad filter expression. - if (!VectorFilterTokenizer.TryTokenize(filterStr, out var tokens, out _)) - { - return -1; - } - - if (!VectorFilterParser.TryParseExpression(tokens, 0, out var filterExpr, out var endIndex, out _)) - { - return -1; - } - - // Ensure the entire token stream was consumed by the parser - if (endIndex != tokens.Count) + // Compile the filter expression into a flat postfix program. + // This is done once and reused for all candidate evaluations. + var program = ExprCompiler.TryCompile(filterStr, out _); + if (program == null) { return -1; } @@ -997,8 +993,9 @@ private int ApplyPostFilter( var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); - // Evaluate filter - if (EvaluateFilter(filterExpr, attrData)) + // Execute the compiled filter program against raw JSON bytes. + // No JsonDocument DOM allocation — AttributeExtractor extracts fields on demand. + if (ExprRunner.Run(program, attrData)) { // Copy ID if not already in place if (idReadPos != idWritePos) @@ -1036,27 +1033,6 @@ private int ApplyPostFilter( return filteredCount; } - /// - /// Evaluate a pre-parsed filter expression against attribute data. - /// Returns false if the JSON is malformed or the filter cannot be evaluated. - /// - private static bool EvaluateFilter(Expr filterExpr, ReadOnlySpan attributeJson) - { - try - { - var reader = new Utf8JsonReader(attributeJson); - using var jsonDoc = JsonDocument.ParseValue(ref reader); - var root = jsonDoc.RootElement; - - return VectorFilterEvaluator.EvaluateFilterBool(filterExpr, root); - } - catch (JsonException) - { - // Malformed JSON in attribute data — exclude the result - return false; - } - } - [Conditional("DEBUG")] private static void AssertHaveStorageSession() { diff --git a/test/Garnet.test/Filter/ExprCompilerTests.cs b/test/Garnet.test/Filter/ExprCompilerTests.cs new file mode 100644 index 00000000000..bf462240790 --- /dev/null +++ b/test/Garnet.test/Filter/ExprCompilerTests.cs @@ -0,0 +1,358 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for the ExprCompiler (shunting-yard tokenizer + compiler). + /// Verifies tokenization and compilation to flat postfix programs. + /// + [AllureNUnit] + [TestFixture] + public class ExprCompilerTests : AllureTestBase + { + [Test] + public void Compiler_IntegerNumbers() + { + var program = ExprCompiler.TryCompile("42", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(42.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_DecimalNumbers() + { + var program = ExprCompiler.TryCompile("3.14", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(3.14, program.Instructions[0].Num, 0.001); + } + + [Test] + public void Compiler_NegativeNumbers() + { + var program = ExprCompiler.TryCompile("-5", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(-5.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_StringLiterals() + { + var program = ExprCompiler.TryCompile("\"hello\"", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("hello", program.Instructions[0].Str); + + program = ExprCompiler.TryCompile("'world'", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("world", program.Instructions[0].Str); + } + + [Test] + public void Compiler_EscapedStringLiterals() + { + var program = ExprCompiler.TryCompile("\"hello\\\"world\"", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("hello\"world", program.Instructions[0].Str); + } + + [Test] + public void Compiler_UnterminatedStringReturnsFalse() + { + var program = ExprCompiler.TryCompile("\"hello", out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_SubtractionNotConfusedWithNegative() + { + // ".a - 5" → postfix: [SEL:a] [NUM:5] [OP:Sub] + var program = ExprCompiler.TryCompile(".a - 5", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(5.0, program.Instructions[1].Num); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[2].TokenType); + ClassicAssert.AreEqual(OpCode.Sub, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Selectors() + { + var program = ExprCompiler.TryCompile(".year", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("year", program.Instructions[0].Str); + } + + [Test] + public void Compiler_Keywords() + { + // "true and false" → [NUM:1] [NUM:0] [OP:And] + var program = ExprCompiler.TryCompile("true and false", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(1.0, program.Instructions[0].Num); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(0.0, program.Instructions[1].Num); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[2].TokenType); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Booleans() + { + var program = ExprCompiler.TryCompile("true", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(1.0, program.Instructions[0].Num); + + program = ExprCompiler.TryCompile("false", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(0.0, program.Instructions[0].Num); + } + + [Test] + public void Compiler_TwoCharOperators() + { + var program = ExprCompiler.TryCompile("1 == 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Eq, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 != 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Neq, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 >= 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Gte, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 <= 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Lte, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("true && false", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("true || false", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Or, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("2 ** 3", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_SingleCharOperators() + { + var program = ExprCompiler.TryCompile("1 > 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Gt, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 < 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Lt, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 + 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 * 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 / 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Div, program.Instructions[2].OpCode); + + program = ExprCompiler.TryCompile("1 % 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(OpCode.Mod, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_Parentheses() + { + var program = ExprCompiler.TryCompile("(.year > 10)", out _); + ClassicAssert.IsNotNull(program); + // Postfix: [SEL:year] [NUM:10] [OP:Gt] + ClassicAssert.AreEqual(3, program.Length); + } + + [Test] + public void Compiler_ComplexExpression() + { + var program = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0", out _); + ClassicAssert.IsNotNull(program); + // Postfix: [SEL:year] [NUM:1950] [OP:Gt] [SEL:rating] [NUM:4.0] [OP:Gte] [OP:And] + ClassicAssert.AreEqual(7, program.Length); + } + + [Test] + public void Compiler_EmptyInput() + { + var program = ExprCompiler.TryCompile("", out _); + ClassicAssert.IsNull(program); + + program = ExprCompiler.TryCompile(" ", out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_UnexpectedCharacterReturnsFalse() + { + var program = ExprCompiler.TryCompile("@", out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_NullLiteral() + { + var program = ExprCompiler.TryCompile("null", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Null, program.Instructions[0].TokenType); + } + + [Test] + public void Compiler_TupleLiteral() + { + var program = ExprCompiler.TryCompile("[1, \"foo\", 42]", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Tuple, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(3, program.Instructions[0].TupleLength); + } + + [Test] + public void Compiler_HyphenInSelector() + { + var program = ExprCompiler.TryCompile(".my-field", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(1, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual("my-field", program.Instructions[0].Str); + } + + [Test] + public void Compiler_PrecedenceMultiplicationBeforeAddition() + { + // "1 + 2 * 3" → [1] [2] [3] [*] [+] + var program = ExprCompiler.TryCompile("1 + 2 * 3", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_PrecedenceAndBeforeOr() + { + // "true or false and true" → [1] [0] [1] [and] [or] + var program = ExprCompiler.TryCompile("true or false and true", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.And, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Or, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_ParenthesesOverridePrecedence() + { + // "(1 + 2) * 3" → [1] [2] [+] [3] [*] + var program = ExprCompiler.TryCompile("(1 + 2) * 3", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); + ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[4].OpCode); + } + + [Test] + public void Compiler_ContainmentOperator() + { + // '"action" in .tags' → [STR:action] [SEL:tags] [OP:In] + var program = ExprCompiler.TryCompile("\"action\" in .tags", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(OpCode.In, program.Instructions[2].OpCode); + } + + [Test] + public void Compiler_ExponentiationRightAssociative() + { + // "2 ** 3 ** 2" → 2 ** (3 ** 2) = 512 + var program = ExprCompiler.TryCompile("2 ** 3 ** 2", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(5, program.Length); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[3].OpCode); + ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[4].OpCode); + + var result = ExprTestHelpers.EvaluateFilter("2 ** 3 ** 2", "{}"); + ClassicAssert.AreEqual(512.0, result.Num); + } + + [Test] + public void Compiler_UnaryNot() + { + // "not true" → [NUM:1] [OP:Not] + var program = ExprCompiler.TryCompile("not true", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(2, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Op, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(OpCode.Not, program.Instructions[1].OpCode); + } + + [Test] + public void Compiler_ErrorOnMissingClosingParen() + { + var program = ExprCompiler.TryCompile("(1 + 2", out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_ErrorOnUnexpectedToken() + { + var program = ExprCompiler.TryCompile(")", out _); + ClassicAssert.IsNull(program); + } + + [Test] + public void Compiler_InWithTupleLiteral() + { + var program = ExprCompiler.TryCompile(".director in [\"Spielberg\", \"Nolan\"]", out _); + ClassicAssert.IsNotNull(program); + ClassicAssert.AreEqual(3, program.Length); + ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); + ClassicAssert.AreEqual(ExprTokenType.Tuple, program.Instructions[1].TokenType); + ClassicAssert.AreEqual(2, program.Instructions[1].TupleLength); + ClassicAssert.AreEqual(OpCode.In, program.Instructions[2].OpCode); + } + } +} diff --git a/test/Garnet.test/Filter/ExprRunnerTests.cs b/test/Garnet.test/Filter/ExprRunnerTests.cs new file mode 100644 index 00000000000..682b0afb6c7 --- /dev/null +++ b/test/Garnet.test/Filter/ExprRunnerTests.cs @@ -0,0 +1,213 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for ExprRunner (stack-based VM) + AttributeExtractor (raw byte JSON extractor). + /// Verifies the compile-once-run-many evaluation pipeline. + /// + [AllureNUnit] + [TestFixture] + public class ExprRunnerTests : AllureTestBase + { + [Test] + public void Runner_Arithmetic() + { + var json = "{}"; + ClassicAssert.AreEqual(5.0, ExprTestHelpers.EvaluateFilter("2 + 3", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("3 - 2", json).Num); + ClassicAssert.AreEqual(6.0, ExprTestHelpers.EvaluateFilter("2 * 3", json).Num); + ClassicAssert.AreEqual(2.5, ExprTestHelpers.EvaluateFilter("5 / 2", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("7 % 3", json).Num); + ClassicAssert.AreEqual(8.0, ExprTestHelpers.EvaluateFilter("2 ** 3", json).Num); + } + + [Test] + public void Runner_SubtractionWithField() + { + var json = "{\"year\":1980}"; + ClassicAssert.AreEqual(1975.0, ExprTestHelpers.EvaluateFilter(".year - 5", json).Num); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year - 5 > 0", json)); + } + + [Test] + public void Runner_Comparison() + { + var json = "{}"; + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 > 3", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("3 > 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("3 < 5", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("5 < 3", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 >= 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 <= 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 == 5", json).Num); + ClassicAssert.AreEqual(1.0, ExprTestHelpers.EvaluateFilter("5 != 3", json).Num); + ClassicAssert.AreEqual(0.0, ExprTestHelpers.EvaluateFilter("5 != 5", json).Num); + } + + [Test] + public void Runner_LogicalAnd() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true and true", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("true and false", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("false and true", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true && true", json)); + } + + [Test] + public void Runner_LogicalOr() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("true or false", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("false or true", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("false or false", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("false || true", json)); + } + + [Test] + public void Runner_LogicalNot() + { + var json = "{}"; + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("not true", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("not false", json)); + } + + [Test] + public void Runner_StringEquality() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".genre == \"action\"", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".genre == \"drama\"", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".genre != \"drama\"", json)); + } + + [Test] + public void Runner_MemberAccess() + { + var json = "{\"year\":1980,\"rating\":4.5}"; + ClassicAssert.AreEqual(1980.0, ExprTestHelpers.EvaluateFilter(".year", json).Num); + ClassicAssert.AreEqual(4.5, ExprTestHelpers.EvaluateFilter(".rating", json).Num); + } + + [Test] + public void Runner_MissingFieldReturnsFalse() + { + var json = "{\"year\":1980}"; + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".missing", json)); + } + + [Test] + public void Runner_InOperatorWithJsonArray() + { + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"classic\" in .tags", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"popular\" in .tags", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("\"modern\" in .tags", json)); + } + + [Test] + public void Runner_InOperatorWithNumericJsonArray() + { + var json = "{\"scores\":[1,2,3]}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("2 in .scores", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("5 in .scores", json)); + } + + [Test] + public void Runner_InOperatorWithTupleLiteral() + { + var json = "{\"director\":\"Nolan\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".director in [\"Spielberg\", \"Nolan\"]", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".director in [\"Spielberg\", \"Kubrick\"]", json)); + } + + [Test] + public void Runner_InOperatorSubstringCheck() + { + var json = "{\"name\":\"barfoobar\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("\"foo\" in .name", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("\"xyz\" in .name", json)); + } + + [Test] + public void Runner_ComplexExpression() + { + var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; + + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy( + ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); + + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy( + "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); + + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("not (.genre == \"drama\")", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year / 10 >= 198", json)); + } + + [Test] + public void Runner_BooleanJsonValues() + { + var json = "{\"active\":true,\"deleted\":false}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".active", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy(".deleted", json)); + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".active == true", json)); + } + + [Test] + public void Runner_ArithmeticWithNonNumericString_CoercesToZero() + { + var json = "{\"genre\":\"action\"}"; + ClassicAssert.AreEqual(2.0, ExprTestHelpers.EvaluateFilter(".genre + 2", json).Num); + ClassicAssert.AreEqual(-1.0, ExprTestHelpers.EvaluateFilter(".genre - 1", json).Num); + } + + [Test] + public void Runner_NullLiteral() + { + var json = "{\"year\":1980}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".year != null", json)); + } + + [Test] + public void Runner_NonJsonAttributesExcluded() + { + var program = ExprCompiler.TryCompile(".year > 1950", out _); + ClassicAssert.IsNotNull(program); + + var nonJson = System.Text.Encoding.UTF8.GetBytes("this is not json"); + ClassicAssert.IsFalse(ExprRunner.Run(program, nonJson)); + + var emptyJson = System.Text.Encoding.UTF8.GetBytes(""); + ClassicAssert.IsFalse(ExprRunner.Run(program, emptyJson)); + } + + [Test] + public void Runner_ExactNumericEquality() + { + var json = "{}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy("5 == 5", json)); + ClassicAssert.IsFalse(ExprTestHelpers.EvaluateFilterTruthy("5 == 5.0001", json)); + } + + [Test] + public void Runner_HyphenatedField() + { + var json = "{\"my-field\":42}"; + ClassicAssert.AreEqual(42.0, ExprTestHelpers.EvaluateFilter(".my-field", json).Num); + } + + [Test] + public void Runner_JsonEscapeHandling() + { + var json = "{\"name\":\"hello\\\"world\"}"; + ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".name == \"hello\\\"world\"", json)); + } + } +} diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs new file mode 100644 index 00000000000..6f6eaecc03f --- /dev/null +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -0,0 +1,200 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System; +using System.Text; +using Garnet.server.Vector.Filter; + +namespace Garnet.test +{ + /// + /// Test helpers for the Redis-style filter pipeline. + /// Compiles filter expressions and runs them against JSON attribute data. + /// + internal static class ExprTestHelpers + { + /// + /// Compile and run a filter expression against JSON, returning the result as an ExprToken. + /// This is useful for testing arithmetic/comparison results. + /// + internal static ExprToken EvaluateFilter(string expression, string json) + { + var program = ExprCompiler.TryCompile(expression, out var errpos); + if (program == null) + throw new InvalidOperationException($"Compilation failed at position {errpos}"); + + // For single-value expressions (no selectors), run returns bool. + // To get the actual value, we use RunAndReturnTop. + var jsonBytes = Encoding.UTF8.GetBytes(json); + return RunAndReturnTop(program, jsonBytes); + } + + /// + /// Compile and run a filter expression against JSON, returning a boolean result. + /// + internal static bool EvaluateFilterTruthy(string expression, string json) + { + var program = ExprCompiler.TryCompile(expression, out var errpos); + if (program == null) + throw new InvalidOperationException($"Compilation failed at position {errpos}"); + + var jsonBytes = Encoding.UTF8.GetBytes(json); + return ExprRunner.Run(program, jsonBytes); + } + + /// + /// Try to compile a filter expression. Returns true on success. + /// + internal static bool TryCompile(string expression, out ExprProgram program) + { + program = ExprCompiler.TryCompile(expression, out _); + return program != null; + } + + /// + /// Execute a compiled program and return the top-of-stack value (for testing). + /// This is a test-only method that mirrors ExprRunner.Run but returns the raw result + /// instead of a boolean, so tests can inspect numeric/string values. + /// + private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan json) + { + var stack = new ExprToken[256]; + var stackLen = 0; + + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; + + if (inst.TokenType == ExprTokenType.Selector) + { + var extracted = AttributeExtractor.ExtractField(json, inst.Str); + if (extracted == null) + return ExprToken.NewNull(); + stack[stackLen++] = extracted; + continue; + } + + if (inst.TokenType != ExprTokenType.Op) + { + stack[stackLen++] = inst; + continue; + } + + var arity = OpTable.GetArity(inst.OpCode); + ExprToken b = stackLen > 0 ? stack[--stackLen] : null; + ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : null; + + var result = ExprToken.NewNum(0); + + switch (inst.OpCode) + { + case OpCode.Not: + result.Num = TokenToBool(b) == 0 ? 1 : 0; + break; + case OpCode.Pow: + result.Num = Math.Pow(TokenToNum(a), TokenToNum(b)); + break; + case OpCode.Mul: + result.Num = TokenToNum(a) * TokenToNum(b); + break; + case OpCode.Div: + result.Num = TokenToNum(a) / TokenToNum(b); + break; + case OpCode.Mod: + result.Num = TokenToNum(a) % TokenToNum(b); + break; + case OpCode.Add: + result.Num = TokenToNum(a) + TokenToNum(b); + break; + case OpCode.Sub: + result.Num = TokenToNum(a) - TokenToNum(b); + break; + case OpCode.Gt: + result.Num = TokenToNum(a) > TokenToNum(b) ? 1 : 0; + break; + case OpCode.Gte: + result.Num = TokenToNum(a) >= TokenToNum(b) ? 1 : 0; + break; + case OpCode.Lt: + result.Num = TokenToNum(a) < TokenToNum(b) ? 1 : 0; + break; + case OpCode.Lte: + result.Num = TokenToNum(a) <= TokenToNum(b) ? 1 : 0; + break; + case OpCode.Eq: + result.Num = TokensEqual(a, b) ? 1 : 0; + break; + case OpCode.Neq: + result.Num = !TokensEqual(a, b) ? 1 : 0; + break; + case OpCode.In: + result.Num = EvalIn(a, b) ? 1 : 0; + break; + case OpCode.And: + result.Num = TokenToBool(a) != 0 && TokenToBool(b) != 0 ? 1 : 0; + break; + case OpCode.Or: + result.Num = TokenToBool(a) != 0 || TokenToBool(b) != 0 ? 1 : 0; + break; + } + + stack[stackLen++] = result; + } + + return stackLen > 0 ? stack[stackLen - 1] : ExprToken.NewNull(); + } + + private static double TokenToNum(ExprToken t) + { + if (t == null) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num; + if (t.TokenType == ExprTokenType.Str && t.Str != null) + { + return double.TryParse(t.Str, System.Globalization.NumberStyles.Float | System.Globalization.NumberStyles.AllowLeadingSign, + System.Globalization.CultureInfo.InvariantCulture, out var result) ? result : 0; + } + return 0; + } + + private static double TokenToBool(ExprToken t) + { + if (t == null) return 0; + if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; + if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; + if (t.TokenType == ExprTokenType.Null) return 0; + return 1; + } + + private static bool TokensEqual(ExprToken a, ExprToken b) + { + if (a == null || b == null) return a == null && b == null; + if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) + return a.Num == b.Num; + if (a.TokenType == ExprTokenType.Null || b.TokenType == ExprTokenType.Null) + return a.TokenType == b.TokenType; + return TokenToNum(a) == TokenToNum(b); + } + + private static bool EvalIn(ExprToken a, ExprToken b) + { + if (b == null) return false; + if (b.TokenType == ExprTokenType.Tuple) + { + for (var i = 0; i < b.TupleLength; i++) + { + if (TokensEqual(a, b.TupleElements[i])) + return true; + } + return false; + } + if (a != null && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + { + if (a.Str == null || b.Str == null) return false; + return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + } + return false; + } + } +} diff --git a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs b/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs deleted file mode 100644 index ced420b8d39..00000000000 --- a/test/Garnet.test/Filter/VectorFilterEvaluatorTests.cs +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using Allure.NUnit; -using Garnet.server.Vector.Filter; -using NUnit.Framework; -using NUnit.Framework.Legacy; - -namespace Garnet.test -{ - [AllureNUnit] - [TestFixture] - public class VectorFilterEvaluatorTests : AllureTestBase - { - [Test] - public void Evaluator_Arithmetic() - { - var json = "{}"; - ClassicAssert.AreEqual(5.0, VectorFilterTestHelpers.EvaluateFilter("2 + 3", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 - 2", json).AsNumber()); - ClassicAssert.AreEqual(6.0, VectorFilterTestHelpers.EvaluateFilter("2 * 3", json).AsNumber()); - ClassicAssert.AreEqual(2.5, VectorFilterTestHelpers.EvaluateFilter("5 / 2", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("7 % 3", json).AsNumber()); - ClassicAssert.AreEqual(8.0, VectorFilterTestHelpers.EvaluateFilter("2 ** 3", json).AsNumber()); - } - - [Test] - public void Evaluator_SubtractionWithField() - { - var json = "{\"year\":1980}"; - ClassicAssert.AreEqual(1975.0, VectorFilterTestHelpers.EvaluateFilter(".year - 5", json).AsNumber()); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".year - 5 > 0", json)); - } - - [Test] - public void Evaluator_Comparison() - { - var json = "{}"; - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 > 3", json).AsNumber()); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("3 > 5", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("3 < 5", json).AsNumber()); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 < 3", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 >= 5", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 <= 5", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 == 5", json).AsNumber()); - ClassicAssert.AreEqual(1.0, VectorFilterTestHelpers.EvaluateFilter("5 != 3", json).AsNumber()); - ClassicAssert.AreEqual(0.0, VectorFilterTestHelpers.EvaluateFilter("5 != 5", json).AsNumber()); - } - - [Test] - public void Evaluator_LogicalAnd() - { - var json = "{}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true and true", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("true and false", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("false and true", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true && true", json)); - } - - [Test] - public void Evaluator_LogicalOr() - { - var json = "{}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("true or false", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("false or true", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("false or false", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("false || true", json)); - } - - [Test] - public void Evaluator_LogicalNot() - { - var json = "{}"; - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("not true", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("not false", json)); - } - - [Test] - public void Evaluator_StringEquality() - { - var json = "{\"genre\":\"action\"}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == \"action\"", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == \"drama\"", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre != \"drama\"", json)); - } - - [Test] - public void Evaluator_MemberAccess() - { - var json = "{\"year\":1980,\"rating\":4.5}"; - ClassicAssert.AreEqual(1980.0, VectorFilterTestHelpers.EvaluateFilter(".year", json).AsNumber()); - ClassicAssert.AreEqual(4.5, VectorFilterTestHelpers.EvaluateFilter(".rating", json).AsNumber()); - } - - [Test] - public void Evaluator_MissingFieldReturnsNull() - { - var json = "{\"year\":1980}"; - var result = VectorFilterTestHelpers.EvaluateFilter(".missing", json); - ClassicAssert.IsTrue(result.IsNull); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".missing", json)); - } - - [Test] - public void Evaluator_InOperatorWithArray() - { - var json = "{\"tags\":[\"classic\",\"popular\"]}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("\"classic\" in .tags", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("\"popular\" in .tags", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("\"modern\" in .tags", json)); - } - - [Test] - public void Evaluator_InOperatorWithNumericArray() - { - var json = "{\"scores\":[1,2,3]}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("2 in .scores", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("5 in .scores", json)); - } - - [Test] - public void Evaluator_IsTruthy_FilterValue() - { - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.Null)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.False)); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(0.0))); - ClassicAssert.IsFalse(VectorFilterEvaluator.IsTruthy(FilterValue.FromString(""))); - - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.True)); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(1.0))); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromNumber(-1.0))); - ClassicAssert.IsTrue(VectorFilterEvaluator.IsTruthy(FilterValue.FromString("hello"))); - } - - [Test] - public void Evaluator_ComplexExpression() - { - var json = "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"; - - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy( - ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags)", json)); - - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy( - "(.year > 2000 or .year < 1970) and .rating >= 4.0", json)); - - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy("not (.genre == \"drama\")", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".year / 10 >= 198", json)); - } - - [Test] - public void Evaluator_ComparisonWithMissingField() - { - var json = "{\"year\":1980}"; - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".missing > 1950", json)); - } - - [Test] - public void Evaluator_BooleanJsonValues() - { - var json = "{\"active\":true,\"deleted\":false}"; - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".active", json)); - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".deleted", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".active == true", json)); - } - - [Test] - public void Evaluator_ArithmeticWithNonNumericString_CoercesToZero() - { - var json = "{\"genre\":\"action\"}"; - ClassicAssert.AreEqual(2.0, VectorFilterTestHelpers.EvaluateFilter(".genre + 2", json).AsNumber()); - ClassicAssert.AreEqual(-1.0, VectorFilterTestHelpers.EvaluateFilter(".genre - 1", json).AsNumber()); - } - - [Test] - public void Evaluator_InOperatorWithNonArrayHaystack_ReturnsFalse() - { - var json = "{\"genre\":\"action\"}"; - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy("\"action\" in .genre", json)); - } - - [Test] - public void Evaluator_EqualityBetweenNumberAndNonNumericString_ReturnsFalse() - { - var json = "{\"genre\":\"action\"}"; - ClassicAssert.IsFalse(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == 1", json)); - ClassicAssert.IsTrue(VectorFilterTestHelpers.EvaluateFilterTruthy(".genre == 0", json)); - } - } -} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterParserTests.cs b/test/Garnet.test/Filter/VectorFilterParserTests.cs deleted file mode 100644 index 77bf17a2550..00000000000 --- a/test/Garnet.test/Filter/VectorFilterParserTests.cs +++ /dev/null @@ -1,213 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System.Collections.Generic; -using Allure.NUnit; -using Garnet.server.Vector.Filter; -using NUnit.Framework; -using NUnit.Framework.Legacy; - -namespace Garnet.test -{ - [AllureNUnit] - [TestFixture] - public class VectorFilterParserTests : AllureTestBase - { - [Test] - public void Parser_NumberLiteral() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("42", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out var end, out _)); - ClassicAssert.AreEqual(1, end); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(42.0, lit.Value.AsNumber()); - } - - [Test] - public void Parser_StringLiteral() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\"", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.String, lit.Value.Kind); - ClassicAssert.AreEqual("hello", lit.Value.AsString()); - } - - [Test] - public void Parser_BooleanLiteral() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(1.0, lit.Value.AsNumber()); - - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("false", out tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - lit = (LiteralExpr)expr; - ClassicAssert.AreEqual(FilterValueKind.Number, lit.Value.Kind); - ClassicAssert.AreEqual(0.0, lit.Value.AsNumber()); - } - - [Test] - public void Parser_MemberAccess() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - ClassicAssert.AreEqual("year", ((MemberExpr)expr).Property); - } - - [Test] - public void Parser_UnaryNot() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("not true", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var unary = (UnaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Not, unary.Operator); - ClassicAssert.IsInstanceOf(unary.Operand); - } - - [Test] - public void Parser_UnaryNegation() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".a + (-.b)", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Negate, ((UnaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_OperatorPrecedence_MultiplicationBeforeAddition() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("1 + 2 * 3", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Add, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Multiply, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_OperatorPrecedence_AndBeforeOr() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true or false and true", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Or, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.And, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_ParenthesesOverridePrecedence() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(1 + 2) * 3", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Multiply, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.AreEqual(OperatorKind.Add, ((BinaryExpr)binary.Left).Operator); - } - - [Test] - public void Parser_Containment() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"action\" in .tags", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.In, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - } - - [Test] - public void Parser_ExponentiationRightAssociative() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("2 ** 3 ** 2", out var tokens, out _)); - ClassicAssert.IsTrue(VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out _)); - ClassicAssert.IsInstanceOf(expr); - var binary = (BinaryExpr)expr; - ClassicAssert.AreEqual(OperatorKind.Power, binary.Operator); - ClassicAssert.IsInstanceOf(binary.Left); - ClassicAssert.IsInstanceOf(binary.Right); - ClassicAssert.AreEqual(OperatorKind.Power, ((BinaryExpr)binary.Right).Operator); - } - - [Test] - public void Parser_ErrorOnUnexpectedEnd() - { - var tokens = new List(); - ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); - ClassicAssert.IsNull(result); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("Unexpected end")); - } - - [Test] - public void Parser_ErrorOnMissingClosingParen() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(1 + 2", out var tokens, out _)); - ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); - ClassicAssert.IsNull(result); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("Missing closing parenthesis")); - } - - [Test] - public void Parser_ErrorOnInvalidNumberLiteral_DoubleDot() - { - // Now caught at tokenization time: "1..023" has multiple decimal points - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1..023", out _, out var error)); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("multiple decimal points")); - } - - [Test] - public void Parser_ErrorOnInvalidNumberLiteral_MultipleDots() - { - // Now caught at tokenization time: "1.2.3" has multiple decimal points - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1.2.3", out _, out var error)); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("multiple decimal points")); - } - - [Test] - public void Parser_ErrorOnExcessiveRecursionDepth() - { - // Build a deeply nested expression: (((((...(1)...)))) - var depth = 100; - var expr = new string('(', depth) + "1" + new string(')', depth); - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(expr, out var tokens, out _)); - ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); - ClassicAssert.IsNull(result); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("maximum nesting depth")); - } - - [Test] - public void Parser_ErrorOnUnexpectedToken() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(")", out var tokens, out _)); - ClassicAssert.IsFalse(VectorFilterParser.TryParseExpression(tokens, 0, out var result, out _, out var error)); - ClassicAssert.IsNull(result); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("Unexpected token")); - } - } -} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs b/test/Garnet.test/Filter/VectorFilterTestHelpers.cs deleted file mode 100644 index 310732378f9..00000000000 --- a/test/Garnet.test/Filter/VectorFilterTestHelpers.cs +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System.Text.Json; -using Garnet.server.Vector.Filter; - -namespace Garnet.test -{ - internal static class VectorFilterTestHelpers - { - internal static FilterValue EvaluateFilter(string expression, string json) - { - if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out var error)) - throw new System.InvalidOperationException($"Tokenization failed: {error}"); - if (!VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out error)) - throw new System.InvalidOperationException($"Parse failed: {error}"); - using var doc = JsonDocument.Parse(json); - return VectorFilterEvaluator.EvaluateExpression(expr, doc.RootElement); - } - - internal static bool EvaluateFilterTruthy(string expression, string json) - { - if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out var error)) - throw new System.InvalidOperationException($"Tokenization failed: {error}"); - if (!VectorFilterParser.TryParseExpression(tokens, 0, out var expr, out _, out error)) - throw new System.InvalidOperationException($"Parse failed: {error}"); - using var doc = JsonDocument.Parse(json); - return VectorFilterEvaluator.EvaluateFilterBool(expr, doc.RootElement); - } - - internal static bool TryTokenize(string expression, out System.Collections.Generic.List tokens, out string error) - { - return VectorFilterTokenizer.TryTokenize(expression, out tokens, out error); - } - - internal static bool TryParse(string expression, out Expr result, out int end, out string error) - { - if (!VectorFilterTokenizer.TryTokenize(expression, out var tokens, out error)) - { - result = null; - end = 0; - return false; - } - return VectorFilterParser.TryParseExpression(tokens, 0, out result, out end, out error); - } - } -} \ No newline at end of file diff --git a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs b/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs deleted file mode 100644 index bfba38dc1d3..00000000000 --- a/test/Garnet.test/Filter/VectorFilterTokenizerTests.cs +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -using System.Linq; -using Allure.NUnit; -using Garnet.server.Vector.Filter; -using NUnit.Framework; -using NUnit.Framework.Legacy; - -namespace Garnet.test -{ - [AllureNUnit] - [TestFixture] - public class VectorFilterTokenizerTests : AllureTestBase - { - [Test] - public void Tokenizer_IntegerNumbers() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("42", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("42", tokens[0].Value); - } - - [Test] - public void Tokenizer_DecimalNumbers() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("3.14", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("3.14", tokens[0].Value); - } - - [Test] - public void Tokenizer_NegativeNumbers() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("-5", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Number, tokens[0].Type); - ClassicAssert.AreEqual("-5", tokens[0].Value); - } - - [Test] - public void Tokenizer_StringLiterals() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\"", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("hello", tokens[0].Value); - - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("'world'", out tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("world", tokens[0].Value); - } - - [Test] - public void Tokenizer_EscapedStringLiterals() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("\"hello\\\"world\"", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.String, tokens[0].Type); - ClassicAssert.AreEqual("hello\\\"world", tokens[0].Value); - } - - [Test] - public void Tokenizer_UnterminatedStringReturnsFalse() - { - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("\"hello", out var tokens, out var error)); - ClassicAssert.IsNull(tokens); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("Unterminated string")); - } - - [Test] - public void Tokenizer_SubtractionNotConfusedWithNegative() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".a - 5", out var tokens, out _)); - ClassicAssert.AreEqual(3, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); - ClassicAssert.AreEqual("-", tokens[1].Value); - ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); - ClassicAssert.AreEqual("5", tokens[2].Value); - } - - [Test] - public void Tokenizer_Identifiers() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual(".year", tokens[0].Value); - - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("_field", out tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual("_field", tokens[0].Value); - } - - [Test] - public void Tokenizer_Keywords() - { - var keywords = new[] { "and", "or", "not", "in" }; - foreach (var kw in keywords) - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(kw, out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Keyword, tokens[0].Type); - ClassicAssert.AreEqual(kw, tokens[0].Value); - } - } - - [Test] - public void Tokenizer_Booleans() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("true", out var tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); - ClassicAssert.AreEqual("true", tokens[0].Value); - - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("false", out tokens, out _)); - ClassicAssert.AreEqual(1, tokens.Count); - ClassicAssert.AreEqual(TokenType.Boolean, tokens[0].Type); - ClassicAssert.AreEqual("false", tokens[0].Value); - } - - [Test] - public void Tokenizer_TwoCharOperators() - { - var ops = new[] { "==", "!=", ">=", "<=", "&&", "||", "**" }; - foreach (var op in ops) - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize($"1 {op} 2", out var tokens, out _)); - var opToken = tokens.First(t => t.Type == TokenType.Operator); - ClassicAssert.AreEqual(op, opToken.Value); - } - } - - [Test] - public void Tokenizer_SingleCharOperators() - { - var ops = new[] { ">", "<", "+", "-", "*", "/", "%", "!" }; - foreach (var op in ops) - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize($".a {op} .b", out var tokens, out _)); - var opToken = tokens.First(t => t.Type == TokenType.Operator); - ClassicAssert.AreEqual(op, opToken.Value); - } - } - - [Test] - public void Tokenizer_Delimiters() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("(.year > 10)", out var tokens, out _)); - ClassicAssert.AreEqual(TokenType.Delimiter, tokens[0].Type); - ClassicAssert.AreEqual("(", tokens[0].Value); - ClassicAssert.AreEqual(TokenType.Delimiter, tokens[4].Type); - ClassicAssert.AreEqual(")", tokens[4].Value); - } - - [Test] - public void Tokenizer_ComplexExpression() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(".year > 1950 and .rating >= 4.0", out var tokens, out _)); - ClassicAssert.AreEqual(7, tokens.Count); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[0].Type); - ClassicAssert.AreEqual(TokenType.Operator, tokens[1].Type); - ClassicAssert.AreEqual(TokenType.Number, tokens[2].Type); - ClassicAssert.AreEqual(TokenType.Keyword, tokens[3].Type); - ClassicAssert.AreEqual(TokenType.Identifier, tokens[4].Type); - ClassicAssert.AreEqual(TokenType.Operator, tokens[5].Type); - ClassicAssert.AreEqual(TokenType.Number, tokens[6].Type); - } - - [Test] - public void Tokenizer_EmptyInput() - { - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize("", out var tokens, out _)); - ClassicAssert.AreEqual(0, tokens.Count); - - ClassicAssert.IsTrue(VectorFilterTokenizer.TryTokenize(" ", out tokens, out _)); - ClassicAssert.AreEqual(0, tokens.Count); - } - - [Test] - public void Tokenizer_UnexpectedCharacterReturnsFalse() - { - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("@", out var tokens, out var error)); - ClassicAssert.IsNull(tokens); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("Unexpected character")); - } - - [Test] - public void Tokenizer_MultipleDotsInNumberReturnsFalse() - { - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1.2.3", out var tokens, out var error)); - ClassicAssert.IsNull(tokens); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("multiple decimal points")); - } - - [Test] - public void Tokenizer_DoubleDotInNumberReturnsFalse() - { - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("1..023", out var tokens, out var error)); - ClassicAssert.IsNull(tokens); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("multiple decimal points")); - } - - [Test] - public void Tokenizer_ManyDotsInNumberReturnsFalse() - { - ClassicAssert.IsFalse(VectorFilterTokenizer.TryTokenize("123.....23", out var tokens, out var error)); - ClassicAssert.IsNull(tokens); - ClassicAssert.IsNotNull(error); - ClassicAssert.IsTrue(error.Contains("multiple decimal points")); - } - } -} From e09603d4cd8594adf5ee628faa236ad993ad4515 Mon Sep 17 00:00:00 2001 From: CI Fix Date: Thu, 26 Feb 2026 15:40:31 -0800 Subject: [PATCH 17/31] remove hot path allocate --- .../server/Resp/Vector/Filter/ExprCompiler.cs | 164 ++++++++++-------- libs/server/Resp/Vector/Filter/ExprRunner.cs | 4 - .../Vector/Filter/VectorFilterExpression.cs | 10 -- libs/server/Resp/Vector/VectorManager.cs | 17 +- test/Garnet.test/Filter/ExprCompilerTests.cs | 85 ++++----- test/Garnet.test/Filter/ExprRunnerTests.cs | 3 +- test/Garnet.test/Filter/ExprTestHelpers.cs | 6 +- 7 files changed, 140 insertions(+), 149 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index d4b29b13387..e081f1fd945 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -2,8 +2,8 @@ // Licensed under the MIT license. using System; -using System.Globalization; -using System.Runtime.CompilerServices; +using System.Buffers.Text; +using System.Text; namespace Garnet.server.Vector.Filter { @@ -24,17 +24,15 @@ internal static class ExprCompiler { private const int MaxTokens = 1024; private const int MaxProgram = 256; - private const string SelectorSpecialChars = "_-"; - private const string OperatorSpecialChars = "+-*%/!()<>=|&"; /// - /// Compile a filter expression string into a flat postfix program. + /// Compile a filter expression (as UTF-8 bytes) into a flat postfix program. /// Returns null on syntax error; optionally reports the error position. /// - public static ExprProgram TryCompile(string expr, out int errpos) + public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) { errpos = -1; - if (string.IsNullOrEmpty(expr)) + if (expr.IsEmpty) return null; // Phase 1: Tokenize into a flat list @@ -56,7 +54,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) // Determine if '-' should be a negative number sign or a subtraction operator var minusIsNumber = false; - if (expr[p] == '-' && p + 1 < expr.Length && (char.IsDigit(expr[p + 1]) || expr[p + 1] == '.')) + if (expr[p] == (byte)'-' && p + 1 < expr.Length && (IsDigit(expr[p + 1]) || expr[p + 1] == (byte)'.')) { if (numTokens == 0) { @@ -71,7 +69,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) } // Number - if (char.IsDigit(expr[p]) || (minusIsNumber && expr[p] == '-')) + if (IsDigit(expr[p]) || (minusIsNumber && expr[p] == (byte)'-')) { var t = ParseNumber(expr, ref p); if (t == null) { errpos = p; return null; } @@ -80,7 +78,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) } // String literal - if (expr[p] == '"' || expr[p] == '\'') + if (expr[p] == (byte)'"' || expr[p] == (byte)'\'') { var t = ParseString(expr, ref p); if (t == null) { errpos = p; return null; } @@ -89,7 +87,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) } // Selector (field access starting with '.') - if (expr[p] == '.' && p + 1 < expr.Length && IsSelectorChar(expr[p + 1])) + if (expr[p] == (byte)'.' && p + 1 < expr.Length && IsSelectorChar(expr[p + 1])) { var t = ParseSelector(expr, ref p); tokens[numTokens++] = t; @@ -97,7 +95,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) } // Tuple literal [1, "foo", 42] - if (expr[p] == '[') + if (expr[p] == (byte)'[') { var t = ParseTuple(expr, ref p); if (t == null) { errpos = p; return null; } @@ -106,7 +104,7 @@ public static ExprProgram TryCompile(string expr, out int errpos) } // Operator or literal keyword (null, true, false, not, and, or, in) - if (char.IsLetter(expr[p]) || OperatorSpecialChars.IndexOf(expr[p]) >= 0) + if (IsLetter(expr[p]) || IsOperatorSpecialChar(expr[p])) { var t = ParseOperatorOrLiteral(expr, ref p); if (t == null) { errpos = p; return null; } @@ -241,28 +239,42 @@ private static bool ProcessOperator( // ======================== Tokenization helpers ======================== - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void SkipSpaces(string s, ref int p) + private static bool IsDigit(byte b) => b >= (byte)'0' && b <= (byte)'9'; + + private static bool IsLetter(byte b) => (b >= (byte)'a' && b <= (byte)'z') || (b >= (byte)'A' && b <= (byte)'Z'); + + private static bool IsLetterOrDigit(byte b) => IsLetter(b) || IsDigit(b); + + private static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; + + private static bool IsOperatorSpecialChar(byte b) + { + return b == (byte)'+' || b == (byte)'-' || b == (byte)'*' || b == (byte)'%' || + b == (byte)'/' || b == (byte)'!' || b == (byte)'(' || b == (byte)')' || + b == (byte)'<' || b == (byte)'>' || b == (byte)'=' || b == (byte)'|' || + b == (byte)'&'; + } + + private static void SkipSpaces(ReadOnlySpan s, ref int p) { - while (p < s.Length && char.IsWhiteSpace(s[p])) p++; + while (p < s.Length && IsWhiteSpace(s[p])) p++; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsSelectorChar(char c) + private static bool IsSelectorChar(byte c) { - return char.IsLetterOrDigit(c) || SelectorSpecialChars.IndexOf(c) >= 0; + return IsLetterOrDigit(c) || c == (byte)'_' || c == (byte)'-'; } - private static ExprToken ParseNumber(string s, ref int p) + private static ExprToken ParseNumber(ReadOnlySpan s, ref int p) { var start = p; - if (p < s.Length && s[p] == '-') p++; + if (p < s.Length && s[p] == (byte)'-') p++; - while (p < s.Length && (char.IsDigit(s[p]) || s[p] == '.' || s[p] == 'e' || s[p] == 'E')) + while (p < s.Length && (IsDigit(s[p]) || s[p] == (byte)'.' || s[p] == (byte)'e' || s[p] == (byte)'E')) p++; - var numStr = s.Substring(start, p - start); - if (!double.TryParse(numStr, NumberStyles.Float | NumberStyles.AllowLeadingSign, CultureInfo.InvariantCulture, out var value)) + var numSpan = s.Slice(start, p - start); + if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) { p = start; return null; @@ -270,7 +282,7 @@ private static ExprToken ParseNumber(string s, ref int p) return ExprToken.NewNum(value); } - private static ExprToken ParseString(string s, ref int p) + private static ExprToken ParseString(ReadOnlySpan s, ref int p) { var quote = s[p]; p++; // Skip opening quote @@ -279,7 +291,7 @@ private static ExprToken ParseString(string s, ref int p) while (p < s.Length) { - if (s[p] == '\\' && p + 1 < s.Length) + if (s[p] == (byte)'\\' && p + 1 < s.Length) { hasEscape = true; p += 2; // Skip escaped char @@ -290,35 +302,35 @@ private static ExprToken ParseString(string s, ref int p) string value; if (!hasEscape) { - value = s.Substring(start, p - start); + value = Encoding.UTF8.GetString(s.Slice(start, p - start)); } else { // Process escape sequences (matching Redis fastjson.c behavior) - var chars = new char[p - start]; + var bytes = new byte[p - start]; var len = 0; for (var i = start; i < p; i++) { - if (s[i] == '\\' && i + 1 < p) + if (s[i] == (byte)'\\' && i + 1 < p) { i++; - chars[len++] = s[i] switch + bytes[len++] = s[i] switch { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '"' => '"', - '\'' => '\'', + (byte)'n' => (byte)'\n', + (byte)'r' => (byte)'\r', + (byte)'t' => (byte)'\t', + (byte)'\\' => (byte)'\\', + (byte)'"' => (byte)'"', + (byte)'\'' => (byte)'\'', _ => s[i], // Unknown escape — copy verbatim }; } else { - chars[len++] = s[i]; + bytes[len++] = s[i]; } } - value = new string(chars, 0, len); + value = Encoding.UTF8.GetString(bytes, 0, len); } p++; // Skip closing quote return ExprToken.NewStr(value); @@ -328,16 +340,16 @@ private static ExprToken ParseString(string s, ref int p) return null; // Unterminated string } - private static ExprToken ParseSelector(string s, ref int p) + private static ExprToken ParseSelector(ReadOnlySpan s, ref int p) { p++; // Skip the leading dot var start = p; while (p < s.Length && IsSelectorChar(s[p])) p++; - var name = s.Substring(start, p - start); + var name = Encoding.UTF8.GetString(s.Slice(start, p - start)); return ExprToken.NewSelector(name); } - private static ExprToken ParseTuple(string s, ref int p) + private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) { p++; // Skip '[' var elements = new ExprToken[64]; // max 64 elements @@ -346,7 +358,7 @@ private static ExprToken ParseTuple(string s, ref int p) SkipSpaces(s, ref p); // Handle empty tuple [] - if (p < s.Length && s[p] == ']') + if (p < s.Length && s[p] == (byte)']') { p++; return ExprToken.NewTuple([], 0); @@ -360,11 +372,11 @@ private static ExprToken ParseTuple(string s, ref int p) // Parse element: number or string ExprToken ele; - if (char.IsDigit(s[p]) || s[p] == '-') + if (IsDigit(s[p]) || s[p] == (byte)'-') { ele = ParseNumber(s, ref p); } - else if (s[p] == '"' || s[p] == '\'') + else if (s[p] == (byte)'"' || s[p] == (byte)'\'') { ele = ParseString(s, ref p); } @@ -379,8 +391,8 @@ private static ExprToken ParseTuple(string s, ref int p) SkipSpaces(s, ref p); if (p >= s.Length) return null; - if (s[p] == ']') { p++; break; } - if (s[p] != ',') return null; + if (s[p] == (byte)']') { p++; break; } + if (s[p] != (byte)',') return null; p++; // Skip comma } @@ -389,51 +401,52 @@ private static ExprToken ParseTuple(string s, ref int p) return ExprToken.NewTuple(result, count); } - private static ExprToken ParseOperatorOrLiteral(string s, ref int p) + private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) { var start = p; // Consume alphabetic or operator-special characters - while (p < s.Length && (char.IsLetter(s[p]) || OperatorSpecialChars.IndexOf(s[p]) >= 0)) + while (p < s.Length && (IsLetter(s[p]) || IsOperatorSpecialChar(s[p]))) p++; var matchLen = p - start; if (matchLen == 0) return null; // Check for literals - if (matchLen == 4 && string.Compare(s, start, "null", 0, 4, StringComparison.Ordinal) == 0) + if (matchLen == 4 && s.Slice(start, 4).SequenceEqual("null"u8)) return ExprToken.NewNull(); - if (matchLen == 4 && string.Compare(s, start, "true", 0, 4, StringComparison.Ordinal) == 0) + if (matchLen == 4 && s.Slice(start, 4).SequenceEqual("true"u8)) return ExprToken.NewNum(1); - if (matchLen == 5 && string.Compare(s, start, "false", 0, 5, StringComparison.Ordinal) == 0) + if (matchLen == 5 && s.Slice(start, 5).SequenceEqual("false"u8)) return ExprToken.NewNum(0); // Find best matching operator (longest match) OpCode bestCode = default; var bestLen = 0; - TryMatchOp(s, start, matchLen, "||", OpCode.Or, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "or", OpCode.Or, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "&&", OpCode.And, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "and", OpCode.And, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "**", OpCode.Pow, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, ">=", OpCode.Gte, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "<=", OpCode.Lte, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "==", OpCode.Eq, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "!=", OpCode.Neq, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "not", OpCode.Not, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "in", OpCode.In, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "(", OpCode.OParen, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, ")", OpCode.CParen, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "+", OpCode.Add, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "-", OpCode.Sub, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "*", OpCode.Mul, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "/", OpCode.Div, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "%", OpCode.Mod, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, ">", OpCode.Gt, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "<", OpCode.Lt, ref bestCode, ref bestLen); - TryMatchOp(s, start, matchLen, "!", OpCode.Not, ref bestCode, ref bestLen); + var consumed = s.Slice(start, matchLen); + TryMatchOp(consumed, "||"u8, OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(consumed, "or"u8, OpCode.Or, ref bestCode, ref bestLen); + TryMatchOp(consumed, "&&"u8, OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(consumed, "and"u8, OpCode.And, ref bestCode, ref bestLen); + TryMatchOp(consumed, "**"u8, OpCode.Pow, ref bestCode, ref bestLen); + TryMatchOp(consumed, ">="u8, OpCode.Gte, ref bestCode, ref bestLen); + TryMatchOp(consumed, "<="u8, OpCode.Lte, ref bestCode, ref bestLen); + TryMatchOp(consumed, "=="u8, OpCode.Eq, ref bestCode, ref bestLen); + TryMatchOp(consumed, "!="u8, OpCode.Neq, ref bestCode, ref bestLen); + TryMatchOp(consumed, "not"u8, OpCode.Not, ref bestCode, ref bestLen); + TryMatchOp(consumed, "in"u8, OpCode.In, ref bestCode, ref bestLen); + TryMatchOp(consumed, "("u8, OpCode.OParen, ref bestCode, ref bestLen); + TryMatchOp(consumed, ")"u8, OpCode.CParen, ref bestCode, ref bestLen); + TryMatchOp(consumed, "+"u8, OpCode.Add, ref bestCode, ref bestLen); + TryMatchOp(consumed, "-"u8, OpCode.Sub, ref bestCode, ref bestLen); + TryMatchOp(consumed, "*"u8, OpCode.Mul, ref bestCode, ref bestLen); + TryMatchOp(consumed, "/"u8, OpCode.Div, ref bestCode, ref bestLen); + TryMatchOp(consumed, "%"u8, OpCode.Mod, ref bestCode, ref bestLen); + TryMatchOp(consumed, ">"u8, OpCode.Gt, ref bestCode, ref bestLen); + TryMatchOp(consumed, "<"u8, OpCode.Lt, ref bestCode, ref bestLen); + TryMatchOp(consumed, "!"u8, OpCode.Not, ref bestCode, ref bestLen); if (bestLen == 0) { @@ -446,12 +459,11 @@ private static ExprToken ParseOperatorOrLiteral(string s, ref int p) return ExprToken.NewOp(bestCode); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void TryMatchOp(string s, int start, int matchLen, string opName, OpCode opCode, ref OpCode bestCode, ref int bestLen) + private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan opName, OpCode opCode, ref OpCode bestCode, ref int bestLen) { var opLen = opName.Length; - if (opLen > matchLen) return; - if (string.Compare(s, start, opName, 0, opLen, StringComparison.Ordinal) != 0) return; + if (opLen > consumed.Length) return; + if (!consumed.Slice(0, opLen).SequenceEqual(opName)) return; if (opLen > bestLen) { bestCode = opCode; diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index d0dd1b3e4ab..1b36d739ac2 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -3,8 +3,6 @@ using System; using System.Globalization; -using System.Runtime.CompilerServices; - namespace Garnet.server.Vector.Filter { /// @@ -137,7 +135,6 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) /// Strings are parsed as numbers; unparseable strings return 0. /// Matches Redis exprTokenToNum(). /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static double ToNum(ExprToken t) { if (t == null) return 0; @@ -154,7 +151,6 @@ private static double ToNum(ExprToken t) /// Convert a token to boolean (0 or 1). /// Matches Redis exprTokenToBool(): null=0, num!=0=1, empty string=0, else=1. /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static double ToBool(ExprToken t) { if (t == null) return 0; diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 9aa5622e058..9f099657f9c 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -1,8 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System.Runtime.CompilerServices; - namespace Garnet.server.Vector.Filter { /// @@ -135,37 +133,31 @@ internal sealed class ExprToken /// Number of elements in the tuple. public int TupleLength; - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewNum(double value) { return new ExprToken { TokenType = ExprTokenType.Num, Num = value }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewStr(string value) { return new ExprToken { TokenType = ExprTokenType.Str, Str = value }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewSelector(string fieldName) { return new ExprToken { TokenType = ExprTokenType.Selector, Str = fieldName }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewOp(OpCode opCode) { return new ExprToken { TokenType = ExprTokenType.Op, OpCode = opCode }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewNull() { return new ExprToken { TokenType = ExprTokenType.Null }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ExprToken NewTuple(ExprToken[] elements, int length) { return new ExprToken { TokenType = ExprTokenType.Tuple, TupleElements = elements, TupleLength = length }; @@ -205,10 +197,8 @@ static OpTable() Table[(int)OpCode.CParen] = (7, 0); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int GetPrecedence(OpCode code) => Table[(int)code].Precedence; - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int GetArity(OpCode code) => Table[(int)code].Arity; } diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 78facaf9c35..1128a4b389a 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // Licensed under the MIT license. using System; @@ -947,7 +947,6 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// /// Apply post-filtering to vector search results using a compiled filter expression. - /// Returns the number of results that passed the filter, or -1 if the filter expression is invalid. /// /// Architecture (modeled after Redis expr.c + fastjson.c): /// 1. The filter string is compiled ONCE into a flat postfix program (ExprCompiler). @@ -955,9 +954,6 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// using a stack-based VM (ExprRunner) with on-demand field extraction (AttributeExtractor). /// 3. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. /// - /// TODO: A better approach would be to produce a bitmap of passing elements and let - /// NetworkVSIM handle skipping non-matching entries, avoiding the in-place compaction copies. - /// For now we compact in-place to minimize the scope of changes. /// private int ApplyPostFilter( ReadOnlySpan filter, @@ -971,17 +967,12 @@ private int ApplyPostFilter( return numResults; } - // Convert filter bytes to string for compilation. - // NOTE: This allocation is required because the compiler operates on strings. - // A future optimization could make the compiler work directly on ReadOnlySpan. - var filterStr = Encoding.UTF8.GetString(filter); - - // Compile the filter expression into a flat postfix program. + // Compile the filter expression (UTF-8 bytes) into a flat postfix program. // This is done once and reused for all candidate evaluations. - var program = ExprCompiler.TryCompile(filterStr, out _); + var program = ExprCompiler.TryCompile(filter, out _); if (program == null) { - return -1; + return 0; // If the filter doesn't compile, treat it as filtering out all results (matches Redis behavior) } var filteredCount = 0; diff --git a/test/Garnet.test/Filter/ExprCompilerTests.cs b/test/Garnet.test/Filter/ExprCompilerTests.cs index bf462240790..9fd4e2f85a5 100644 --- a/test/Garnet.test/Filter/ExprCompilerTests.cs +++ b/test/Garnet.test/Filter/ExprCompilerTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. +using System.Text; using Allure.NUnit; using Garnet.server.Vector.Filter; using NUnit.Framework; @@ -19,7 +20,7 @@ public class ExprCompilerTests : AllureTestBase [Test] public void Compiler_IntegerNumbers() { - var program = ExprCompiler.TryCompile("42", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("42"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -29,7 +30,7 @@ public void Compiler_IntegerNumbers() [Test] public void Compiler_DecimalNumbers() { - var program = ExprCompiler.TryCompile("3.14", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("3.14"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -39,7 +40,7 @@ public void Compiler_DecimalNumbers() [Test] public void Compiler_NegativeNumbers() { - var program = ExprCompiler.TryCompile("-5", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("-5"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -49,13 +50,13 @@ public void Compiler_NegativeNumbers() [Test] public void Compiler_StringLiterals() { - var program = ExprCompiler.TryCompile("\"hello\"", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello\""), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); ClassicAssert.AreEqual("hello", program.Instructions[0].Str); - program = ExprCompiler.TryCompile("'world'", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("'world'"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); @@ -65,7 +66,7 @@ public void Compiler_StringLiterals() [Test] public void Compiler_EscapedStringLiterals() { - var program = ExprCompiler.TryCompile("\"hello\\\"world\"", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello\\\"world\""), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); @@ -75,7 +76,7 @@ public void Compiler_EscapedStringLiterals() [Test] public void Compiler_UnterminatedStringReturnsFalse() { - var program = ExprCompiler.TryCompile("\"hello", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"hello"), out _); ClassicAssert.IsNull(program); } @@ -83,7 +84,7 @@ public void Compiler_UnterminatedStringReturnsFalse() public void Compiler_SubtractionNotConfusedWithNegative() { // ".a - 5" → postfix: [SEL:a] [NUM:5] [OP:Sub] - var program = ExprCompiler.TryCompile(".a - 5", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".a - 5"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(3, program.Length); ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); @@ -96,7 +97,7 @@ public void Compiler_SubtractionNotConfusedWithNegative() [Test] public void Compiler_Selectors() { - var program = ExprCompiler.TryCompile(".year", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); @@ -107,7 +108,7 @@ public void Compiler_Selectors() public void Compiler_Keywords() { // "true and false" → [NUM:1] [NUM:0] [OP:And] - var program = ExprCompiler.TryCompile("true and false", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true and false"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(3, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -121,13 +122,13 @@ public void Compiler_Keywords() [Test] public void Compiler_Booleans() { - var program = ExprCompiler.TryCompile("true", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); ClassicAssert.AreEqual(1.0, program.Instructions[0].Num); - program = ExprCompiler.TryCompile("false", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("false"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -137,31 +138,31 @@ public void Compiler_Booleans() [Test] public void Compiler_TwoCharOperators() { - var program = ExprCompiler.TryCompile("1 == 2", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 == 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Eq, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 != 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 != 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Neq, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 >= 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 >= 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Gte, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 <= 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 <= 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Lte, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("true && false", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true && false"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.And, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("true || false", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true || false"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Or, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("2 ** 3", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("2 ** 3"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[2].OpCode); } @@ -169,27 +170,27 @@ public void Compiler_TwoCharOperators() [Test] public void Compiler_SingleCharOperators() { - var program = ExprCompiler.TryCompile("1 > 2", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 > 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Gt, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 < 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 < 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Lt, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 + 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 + 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 * 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 * 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 / 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 / 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Div, program.Instructions[2].OpCode); - program = ExprCompiler.TryCompile("1 % 2", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 % 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(OpCode.Mod, program.Instructions[2].OpCode); } @@ -197,7 +198,7 @@ public void Compiler_SingleCharOperators() [Test] public void Compiler_Parentheses() { - var program = ExprCompiler.TryCompile("(.year > 10)", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(.year > 10)"), out _); ClassicAssert.IsNotNull(program); // Postfix: [SEL:year] [NUM:10] [OP:Gt] ClassicAssert.AreEqual(3, program.Length); @@ -206,7 +207,7 @@ public void Compiler_Parentheses() [Test] public void Compiler_ComplexExpression() { - var program = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year > 1950 and .rating >= 4.0"), out _); ClassicAssert.IsNotNull(program); // Postfix: [SEL:year] [NUM:1950] [OP:Gt] [SEL:rating] [NUM:4.0] [OP:Gte] [OP:And] ClassicAssert.AreEqual(7, program.Length); @@ -215,24 +216,24 @@ public void Compiler_ComplexExpression() [Test] public void Compiler_EmptyInput() { - var program = ExprCompiler.TryCompile("", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(""), out _); ClassicAssert.IsNull(program); - program = ExprCompiler.TryCompile(" ", out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(" "), out _); ClassicAssert.IsNull(program); } [Test] public void Compiler_UnexpectedCharacterReturnsFalse() { - var program = ExprCompiler.TryCompile("@", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("@"), out _); ClassicAssert.IsNull(program); } [Test] public void Compiler_NullLiteral() { - var program = ExprCompiler.TryCompile("null", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("null"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Null, program.Instructions[0].TokenType); @@ -241,7 +242,7 @@ public void Compiler_NullLiteral() [Test] public void Compiler_TupleLiteral() { - var program = ExprCompiler.TryCompile("[1, \"foo\", 42]", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("[1, \"foo\", 42]"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Tuple, program.Instructions[0].TokenType); @@ -251,7 +252,7 @@ public void Compiler_TupleLiteral() [Test] public void Compiler_HyphenInSelector() { - var program = ExprCompiler.TryCompile(".my-field", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".my-field"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(1, program.Length); ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); @@ -262,7 +263,7 @@ public void Compiler_HyphenInSelector() public void Compiler_PrecedenceMultiplicationBeforeAddition() { // "1 + 2 * 3" → [1] [2] [3] [*] [+] - var program = ExprCompiler.TryCompile("1 + 2 * 3", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("1 + 2 * 3"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(5, program.Length); ClassicAssert.AreEqual(OpCode.Mul, program.Instructions[3].OpCode); @@ -273,7 +274,7 @@ public void Compiler_PrecedenceMultiplicationBeforeAddition() public void Compiler_PrecedenceAndBeforeOr() { // "true or false and true" → [1] [0] [1] [and] [or] - var program = ExprCompiler.TryCompile("true or false and true", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("true or false and true"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(5, program.Length); ClassicAssert.AreEqual(OpCode.And, program.Instructions[3].OpCode); @@ -284,7 +285,7 @@ public void Compiler_PrecedenceAndBeforeOr() public void Compiler_ParenthesesOverridePrecedence() { // "(1 + 2) * 3" → [1] [2] [+] [3] [*] - var program = ExprCompiler.TryCompile("(1 + 2) * 3", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(1 + 2) * 3"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(5, program.Length); ClassicAssert.AreEqual(OpCode.Add, program.Instructions[2].OpCode); @@ -295,7 +296,7 @@ public void Compiler_ParenthesesOverridePrecedence() public void Compiler_ContainmentOperator() { // '"action" in .tags' → [STR:action] [SEL:tags] [OP:In] - var program = ExprCompiler.TryCompile("\"action\" in .tags", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("\"action\" in .tags"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(3, program.Length); ClassicAssert.AreEqual(ExprTokenType.Str, program.Instructions[0].TokenType); @@ -307,7 +308,7 @@ public void Compiler_ContainmentOperator() public void Compiler_ExponentiationRightAssociative() { // "2 ** 3 ** 2" → 2 ** (3 ** 2) = 512 - var program = ExprCompiler.TryCompile("2 ** 3 ** 2", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("2 ** 3 ** 2"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(5, program.Length); ClassicAssert.AreEqual(OpCode.Pow, program.Instructions[3].OpCode); @@ -321,7 +322,7 @@ public void Compiler_ExponentiationRightAssociative() public void Compiler_UnaryNot() { // "not true" → [NUM:1] [OP:Not] - var program = ExprCompiler.TryCompile("not true", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("not true"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(2, program.Length); ClassicAssert.AreEqual(ExprTokenType.Num, program.Instructions[0].TokenType); @@ -332,21 +333,21 @@ public void Compiler_UnaryNot() [Test] public void Compiler_ErrorOnMissingClosingParen() { - var program = ExprCompiler.TryCompile("(1 + 2", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes("(1 + 2"), out _); ClassicAssert.IsNull(program); } [Test] public void Compiler_ErrorOnUnexpectedToken() { - var program = ExprCompiler.TryCompile(")", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(")"), out _); ClassicAssert.IsNull(program); } [Test] public void Compiler_InWithTupleLiteral() { - var program = ExprCompiler.TryCompile(".director in [\"Spielberg\", \"Nolan\"]", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".director in [\"Spielberg\", \"Nolan\"]"), out _); ClassicAssert.IsNotNull(program); ClassicAssert.AreEqual(3, program.Length); ClassicAssert.AreEqual(ExprTokenType.Selector, program.Instructions[0].TokenType); diff --git a/test/Garnet.test/Filter/ExprRunnerTests.cs b/test/Garnet.test/Filter/ExprRunnerTests.cs index 682b0afb6c7..76ecd202171 100644 --- a/test/Garnet.test/Filter/ExprRunnerTests.cs +++ b/test/Garnet.test/Filter/ExprRunnerTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. +using System.Text; using Allure.NUnit; using Garnet.server.Vector.Filter; using NUnit.Framework; @@ -178,7 +179,7 @@ public void Runner_NullLiteral() [Test] public void Runner_NonJsonAttributesExcluded() { - var program = ExprCompiler.TryCompile(".year > 1950", out _); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(".year > 1950"), out _); ClassicAssert.IsNotNull(program); var nonJson = System.Text.Encoding.UTF8.GetBytes("this is not json"); diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs index 6f6eaecc03f..40e94eb7cd6 100644 --- a/test/Garnet.test/Filter/ExprTestHelpers.cs +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -19,7 +19,7 @@ internal static class ExprTestHelpers /// internal static ExprToken EvaluateFilter(string expression, string json) { - var program = ExprCompiler.TryCompile(expression, out var errpos); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out var errpos); if (program == null) throw new InvalidOperationException($"Compilation failed at position {errpos}"); @@ -34,7 +34,7 @@ internal static ExprToken EvaluateFilter(string expression, string json) /// internal static bool EvaluateFilterTruthy(string expression, string json) { - var program = ExprCompiler.TryCompile(expression, out var errpos); + var program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out var errpos); if (program == null) throw new InvalidOperationException($"Compilation failed at position {errpos}"); @@ -47,7 +47,7 @@ internal static bool EvaluateFilterTruthy(string expression, string json) /// internal static bool TryCompile(string expression, out ExprProgram program) { - program = ExprCompiler.TryCompile(expression, out _); + program = ExprCompiler.TryCompile(Encoding.UTF8.GetBytes(expression), out _); return program != null; } From 8d6a90b5324eadd9eaf67fabf01415fb3612d518 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Fri, 27 Feb 2026 11:06:11 -0800 Subject: [PATCH 18/31] Fix formatting: remove trailing newlines per editorconfig Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- libs/server/Resp/Vector/Filter/AttributeExtractor.cs | 2 +- libs/server/Resp/Vector/Filter/ExprCompiler.cs | 2 +- libs/server/Resp/Vector/Filter/ExprRunner.cs | 2 +- test/Garnet.test/Filter/ExprCompilerTests.cs | 2 +- test/Garnet.test/Filter/ExprRunnerTests.cs | 2 +- test/Garnet.test/Filter/ExprTestHelpers.cs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 3986d95e984..217dc25453e 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -329,4 +329,4 @@ private static string UnescapeJsonString(ReadOnlySpan json, int start, int return new string(chars, 0, len); } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index e081f1fd945..5b6b6f24bd5 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -471,4 +471,4 @@ private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan o } } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index 1b36d739ac2..c2eefedb559 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -221,4 +221,4 @@ private static bool EvalIn(ExprToken a, ExprToken b) return false; } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprCompilerTests.cs b/test/Garnet.test/Filter/ExprCompilerTests.cs index 9fd4e2f85a5..dcbdcf1fa73 100644 --- a/test/Garnet.test/Filter/ExprCompilerTests.cs +++ b/test/Garnet.test/Filter/ExprCompilerTests.cs @@ -356,4 +356,4 @@ public void Compiler_InWithTupleLiteral() ClassicAssert.AreEqual(OpCode.In, program.Instructions[2].OpCode); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprRunnerTests.cs b/test/Garnet.test/Filter/ExprRunnerTests.cs index 76ecd202171..41f2f015655 100644 --- a/test/Garnet.test/Filter/ExprRunnerTests.cs +++ b/test/Garnet.test/Filter/ExprRunnerTests.cs @@ -211,4 +211,4 @@ public void Runner_JsonEscapeHandling() ClassicAssert.IsTrue(ExprTestHelpers.EvaluateFilterTruthy(".name == \"hello\\\"world\"", json)); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs index 40e94eb7cd6..c4aa5e44f7e 100644 --- a/test/Garnet.test/Filter/ExprTestHelpers.cs +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -197,4 +197,4 @@ private static bool EvalIn(ExprToken a, ExprToken b) return false; } } -} +} \ No newline at end of file From c5fd1f96636694e38566822abe330bc206925174 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Fri, 27 Feb 2026 15:47:27 -0800 Subject: [PATCH 19/31] optimize allocate --- .../Resp/Vector/Filter/AttributeExtractor.cs | 61 +++++++++---------- .../server/Resp/Vector/Filter/ExprCompiler.cs | 28 ++++----- libs/server/Resp/Vector/Filter/ExprRunner.cs | 59 +++++++++++++----- .../Vector/Filter/VectorFilterExpression.cs | 20 +++--- test/Garnet.test/Filter/ExprTestHelpers.cs | 16 ++--- 5 files changed, 107 insertions(+), 77 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 217dc25453e..0e0bd82203f 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -2,7 +2,7 @@ // Licensed under the MIT license. using System; -using System.Globalization; +using System.Buffers.Text; using System.Runtime.CompilerServices; using System.Text; @@ -22,26 +22,26 @@ internal static class AttributeExtractor { /// /// Extract a top-level field from a JSON object and return it as an ExprToken. - /// Returns null if the field is not found or the JSON is malformed. + /// Returns default (IsNone) if the field is not found or the JSON is malformed. /// public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) { var p = 0; SkipWhiteSpace(json, ref p); - if (p >= json.Length || json[p] != (byte)'{') return null; + if (p >= json.Length || json[p] != (byte)'{') return default; p++; // Skip '{' while (true) { SkipWhiteSpace(json, ref p); - if (p >= json.Length) return null; - if (json[p] == (byte)'}') return null; // End of object, field not found + if (p >= json.Length) return default; + if (json[p] == (byte)'}') return default; // End of object, field not found // Expect a key string - if (json[p] != (byte)'"') return null; + if (json[p] != (byte)'"') return default; var keyStart = p + 1; - if (!SkipString(json, ref p)) return null; + if (!SkipString(json, ref p)) return default; var keyEnd = p - 1; // p is now past the closing quote // Compare key with field name @@ -49,11 +49,11 @@ public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) // Expect ':' SkipWhiteSpace(json, ref p); - if (p >= json.Length || json[p] != (byte)':') return null; + if (p >= json.Length || json[p] != (byte)':') return default; p++; // Skip ':' SkipWhiteSpace(json, ref p); - if (p >= json.Length) return null; + if (p >= json.Length) return default; if (match) { @@ -63,15 +63,15 @@ public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) else { // Skip the value - if (!SkipValue(json, ref p)) return null; + if (!SkipValue(json, ref p)) return default; } // Look for ',' or '}' SkipWhiteSpace(json, ref p); - if (p >= json.Length) return null; + if (p >= json.Length) return default; if (json[p] == (byte)',') { p++; continue; } - if (json[p] == (byte)'}') return null; // End of object, not found - return null; // Malformed JSON + if (json[p] == (byte)'}') return default; // End of object, not found + return default; // Malformed JSON } } @@ -80,24 +80,24 @@ public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) private static ExprToken ParseValueToken(ReadOnlySpan json, ref int p) { SkipWhiteSpace(json, ref p); - if (p >= json.Length) return null; + if (p >= json.Length) return default; var c = json[p]; if (c == (byte)'"') return ParseStringToken(json, ref p); if (c == (byte)'[') return ParseArrayToken(json, ref p); - if (c == (byte)'{') return null; // Nested objects not supported + if (c == (byte)'{') return default; // Nested objects not supported if (c == (byte)'t') return ParseLiteralToken(json, ref p, "true"u8, ExprTokenType.Num, 1); if (c == (byte)'f') return ParseLiteralToken(json, ref p, "false"u8, ExprTokenType.Num, 0); if (c == (byte)'n') return ParseLiteralToken(json, ref p, "null"u8, ExprTokenType.Null, 0); if (char.IsDigit((char)c) || c == (byte)'-' || c == (byte)'+') return ParseNumberToken(json, ref p); - return null; + return default; } private static ExprToken ParseStringToken(ReadOnlySpan json, ref int p) { - if (p >= json.Length || json[p] != (byte)'"') return null; + if (p >= json.Length || json[p] != (byte)'"') return default; p++; // Skip opening quote var start = p; var hasEscape = false; @@ -128,21 +128,20 @@ private static ExprToken ParseStringToken(ReadOnlySpan json, ref int p) } p++; } - return null; // Unterminated string + return default; // Unterminated string } private static ExprToken ParseNumberToken(ReadOnlySpan json, ref int p) { var start = p; while (p < json.Length && IsNumberChar(json[p])) p++; - if (p == start) return null; + if (p == start) return default; - var numStr = Encoding.UTF8.GetString(json.Slice(start, p - start)); - if (!double.TryParse(numStr, NumberStyles.Float | NumberStyles.AllowLeadingSign, - CultureInfo.InvariantCulture, out var value)) + var numSpan = json.Slice(start, p - start); + if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) { p = start; - return null; + return default; } return ExprToken.NewNum(value); } @@ -150,15 +149,15 @@ private static ExprToken ParseNumberToken(ReadOnlySpan json, ref int p) private static ExprToken ParseLiteralToken(ReadOnlySpan json, ref int p, ReadOnlySpan literal, ExprTokenType type, double num) { - if (p + literal.Length > json.Length) return null; - if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return null; + if (p + literal.Length > json.Length) return default; + if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return default; // Verify delimiter follows (space, comma, bracket, brace, or end) if (p + literal.Length < json.Length) { var next = (char)json[p + literal.Length]; if (!char.IsWhiteSpace(next) && next != ',' && next != ']' && next != '}') - return null; + return default; } p += literal.Length; @@ -168,7 +167,7 @@ private static ExprToken ParseLiteralToken(ReadOnlySpan json, ref int p, private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) { - if (p >= json.Length || json[p] != (byte)'[') return null; + if (p >= json.Length || json[p] != (byte)'[') return default; p++; // Skip '[' SkipWhiteSpace(json, ref p); @@ -185,17 +184,17 @@ private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) while (true) { SkipWhiteSpace(json, ref p); - if (p >= json.Length || count >= elements.Length) return null; + if (p >= json.Length || count >= elements.Length) return default; var ele = ParseValueToken(json, ref p); - if (ele == null) return null; + if (ele.IsNone) return default; elements[count++] = ele; SkipWhiteSpace(json, ref p); - if (p >= json.Length) return null; + if (p >= json.Length) return default; if (json[p] == (byte)',') { p++; continue; } if (json[p] == (byte)']') { p++; break; } - return null; // Malformed + return default; // Malformed } var result = new ExprToken[count]; diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index 5b6b6f24bd5..d8d43c9a5b5 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -72,7 +72,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (IsDigit(expr[p]) || (minusIsNumber && expr[p] == (byte)'-')) { var t = ParseNumber(expr, ref p); - if (t == null) { errpos = p; return null; } + if (t.IsNone) { errpos = p; return null; } tokens[numTokens++] = t; continue; } @@ -81,7 +81,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (expr[p] == (byte)'"' || expr[p] == (byte)'\'') { var t = ParseString(expr, ref p); - if (t == null) { errpos = p; return null; } + if (t.IsNone) { errpos = p; return null; } tokens[numTokens++] = t; continue; } @@ -98,7 +98,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (expr[p] == (byte)'[') { var t = ParseTuple(expr, ref p); - if (t == null) { errpos = p; return null; } + if (t.IsNone) { errpos = p; return null; } tokens[numTokens++] = t; continue; } @@ -107,7 +107,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (IsLetter(expr[p]) || IsOperatorSpecialChar(expr[p])) { var t = ParseOperatorOrLiteral(expr, ref p); - if (t == null) { errpos = p; return null; } + if (t.IsNone) { errpos = p; return null; } tokens[numTokens++] = t; continue; } @@ -277,7 +277,7 @@ private static ExprToken ParseNumber(ReadOnlySpan s, ref int p) if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) { p = start; - return null; + return default; } return ExprToken.NewNum(value); } @@ -337,7 +337,7 @@ private static ExprToken ParseString(ReadOnlySpan s, ref int p) } p++; } - return null; // Unterminated string + return default; // Unterminated string } private static ExprToken ParseSelector(ReadOnlySpan s, ref int p) @@ -367,8 +367,8 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) while (true) { SkipSpaces(s, ref p); - if (p >= s.Length) return null; - if (count >= elements.Length) return null; + if (p >= s.Length) return default; + if (count >= elements.Length) return default; // Parse element: number or string ExprToken ele; @@ -382,17 +382,17 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) } else { - return null; + return default; } - if (ele == null) return null; + if (ele.IsNone) return default; elements[count++] = ele; SkipSpaces(s, ref p); - if (p >= s.Length) return null; + if (p >= s.Length) return default; if (s[p] == (byte)']') { p++; break; } - if (s[p] != (byte)',') return null; + if (s[p] != (byte)',') return default; p++; // Skip comma } @@ -410,7 +410,7 @@ private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) p++; var matchLen = p - start; - if (matchLen == 0) return null; + if (matchLen == 0) return default; // Check for literals if (matchLen == 4 && s.Slice(start, 4).SequenceEqual("null"u8)) @@ -451,7 +451,7 @@ private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) if (bestLen == 0) { p = start; - return null; + return default; } // Rewind p to consume only the matched operator length diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index c2eefedb559..a344c0913df 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -24,6 +24,9 @@ internal static class ExprRunner { private const int MaxStack = 256; + [ThreadStatic] + private static ExprToken[] t_stack; + /// /// Execute the compiled program against JSON attribute data. /// Returns true if the expression evaluates to a truthy value, false otherwise. @@ -31,8 +34,8 @@ internal static class ExprRunner /// public static bool Run(ExprProgram program, ReadOnlySpan json) { - // Stack for values during execution - var stack = new ExprToken[MaxStack]; + // Reuse thread-local stack to avoid per-call allocation + var stack = t_stack ??= new ExprToken[MaxStack]; var stackLen = 0; for (var i = 0; i < program.Length; i++) @@ -43,10 +46,17 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) if (inst.TokenType == ExprTokenType.Selector) { var extracted = AttributeExtractor.ExtractField(json, inst.Str); - if (extracted == null) + if (extracted.IsNone) + { + stack.AsSpan(0, stackLen).Clear(); return false; // Selector not found → expression is false (matches Redis) + } - if (stackLen >= MaxStack) return false; + if (stackLen >= MaxStack) + { + stack.AsSpan(0, stackLen).Clear(); + return false; + } stack[stackLen++] = extracted; continue; } @@ -54,17 +64,25 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) // Non-operator values — push directly if (inst.TokenType != ExprTokenType.Op) { - if (stackLen >= MaxStack) return false; + if (stackLen >= MaxStack) + { + stack.AsSpan(0, stackLen).Clear(); + return false; + } stack[stackLen++] = inst; continue; } // Operators — pop operands, compute, push result var arity = OpTable.GetArity(inst.OpCode); - if (stackLen < arity) return false; + if (stackLen < arity) + { + stack.AsSpan(0, stackLen).Clear(); + return false; + } - ExprToken b = stackLen > 0 ? stack[--stackLen] : null; - ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : null; + ExprToken b = stackLen > 0 ? stack[--stackLen] : default; + ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : default; var result = ExprToken.NewNum(0); @@ -120,12 +138,21 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) break; } - if (stackLen >= MaxStack) return false; + if (stackLen >= MaxStack) + { + stack.AsSpan(0, stackLen).Clear(); + return false; + } stack[stackLen++] = result; } - if (stackLen == 0) return false; - return ToBool(stack[stackLen - 1]) != 0; + var returnValue = false; + if (stackLen > 0) + returnValue = ToBool(stack[stackLen - 1]) != 0; + + // Clear used portion to release string references for GC + stack.AsSpan(0, stackLen).Clear(); + return returnValue; } // ======================== Type conversion helpers ======================== @@ -137,7 +164,7 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) /// private static double ToNum(ExprToken t) { - if (t == null) return 0; + if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num; if (t.TokenType == ExprTokenType.Str && t.Str != null) { @@ -153,7 +180,7 @@ private static double ToNum(ExprToken t) /// private static double ToBool(ExprToken t) { - if (t == null) return 0; + if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; if (t.TokenType == ExprTokenType.Null) return 0; @@ -170,7 +197,7 @@ private static double ToBool(ExprToken t) /// private static bool AreEqual(ExprToken a, ExprToken b) { - if (a == null || b == null) return a == null && b == null; + if (a.IsNone || b.IsNone) return a.IsNone && b.IsNone; // Both strings if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) @@ -197,7 +224,7 @@ private static bool AreEqual(ExprToken a, ExprToken b) /// private static bool EvalIn(ExprToken a, ExprToken b) { - if (b == null) return false; + if (b.IsNone) return false; // Tuple membership (works for both expression tuples [1,2,3] and JSON array tuples) if (b.TokenType == ExprTokenType.Tuple) @@ -211,7 +238,7 @@ private static bool EvalIn(ExprToken a, ExprToken b) } // String substring check (matching Redis exprTokensStringIn) - if (a != null && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + if (!a.IsNone && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) { if (a.Str == null || b.Str == null) return false; if (a.Str.Length > b.Str.Length) return false; diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 9f099657f9c..307e176dfa1 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -29,13 +29,14 @@ namespace Garnet.server.Vector.Filter /// internal enum ExprTokenType : byte { - Num = 0, - Str = 1, - Tuple = 2, - Selector = 3, - Op = 4, - Null = 5, - Eof = 6, + None = 0, + Num = 1, + Str = 2, + Tuple = 3, + Selector = 4, + Op = 5, + Null = 6, + Eof = 7, } /// @@ -114,7 +115,7 @@ internal enum OpCode : byte /// execution (e.g. from JSON field extraction) are /// transient and discarded after each call. /// - internal sealed class ExprToken + internal struct ExprToken { public ExprTokenType TokenType; @@ -133,6 +134,9 @@ internal sealed class ExprToken /// Number of elements in the tuple. public int TupleLength; + /// True when this token is the default (uninitialized) value, replacing null checks. + public readonly bool IsNone => TokenType == ExprTokenType.None; + public static ExprToken NewNum(double value) { return new ExprToken { TokenType = ExprTokenType.Num, Num = value }; diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs index c4aa5e44f7e..9ce1a6fb17a 100644 --- a/test/Garnet.test/Filter/ExprTestHelpers.cs +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -68,7 +68,7 @@ private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan if (inst.TokenType == ExprTokenType.Selector) { var extracted = AttributeExtractor.ExtractField(json, inst.Str); - if (extracted == null) + if (extracted.IsNone) return ExprToken.NewNull(); stack[stackLen++] = extracted; continue; @@ -81,8 +81,8 @@ private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan } var arity = OpTable.GetArity(inst.OpCode); - ExprToken b = stackLen > 0 ? stack[--stackLen] : null; - ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : null; + ExprToken b = stackLen > 0 ? stack[--stackLen] : default; + ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : default; var result = ExprToken.NewNum(0); @@ -146,7 +146,7 @@ private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan private static double TokenToNum(ExprToken t) { - if (t == null) return 0; + if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num; if (t.TokenType == ExprTokenType.Str && t.Str != null) { @@ -158,7 +158,7 @@ private static double TokenToNum(ExprToken t) private static double TokenToBool(ExprToken t) { - if (t == null) return 0; + if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; if (t.TokenType == ExprTokenType.Null) return 0; @@ -167,7 +167,7 @@ private static double TokenToBool(ExprToken t) private static bool TokensEqual(ExprToken a, ExprToken b) { - if (a == null || b == null) return a == null && b == null; + if (a.IsNone || b.IsNone) return a.IsNone && b.IsNone; if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) return string.Equals(a.Str, b.Str, StringComparison.Ordinal); if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) @@ -179,7 +179,7 @@ private static bool TokensEqual(ExprToken a, ExprToken b) private static bool EvalIn(ExprToken a, ExprToken b) { - if (b == null) return false; + if (b.IsNone) return false; if (b.TokenType == ExprTokenType.Tuple) { for (var i = 0; i < b.TupleLength; i++) @@ -189,7 +189,7 @@ private static bool EvalIn(ExprToken a, ExprToken b) } return false; } - if (a != null && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) + if (!a.IsNone && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) { if (a.Str == null || b.Str == null) return false; return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; From a03695122892593ff6ab1368e51369c466b2682a Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Fri, 27 Feb 2026 16:18:42 -0800 Subject: [PATCH 20/31] fix test --- test/Garnet.test/RespVectorSetTests.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index 8e43600caa6..5b4d72ead1d 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -686,17 +686,17 @@ public void VSIMWithAttributeFiltering() // Add first vector with year=1980 var res1 = db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); ClassicAssert.AreEqual(1, (int)res1); // Add second vector with year=1960 var res2 = db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); ClassicAssert.AreEqual(1, (int)res2); // Add third vector with year=1940 var res3 = db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); ClassicAssert.AreEqual(1, (int)res3); @@ -734,11 +734,11 @@ public void VSIMWithFilterButWithoutWithAttribs() // Add vectors with attributes db.Execute("VADD", ["foo", "VALUES", "3", "1.0", "2.0", "3.0", new byte[] { 0, 0, 0, 0 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980}"]); db.Execute("VADD", ["foo", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960}"]); db.Execute("VADD", ["foo", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1940}"]); // FILTER without WITHATTRIBS should work: fetch attributes internally and apply filter var res = (byte[][])db.Execute("VSIM", ["foo", "VALUES", "3", "0.0", "0.0", "0.0", @@ -846,15 +846,15 @@ private static byte[] SeedMoviesForAdvancedFiltering(IDatabase db) var queryElementId = new byte[] { 0, 0, 0, 0 }; var res1 = db.Execute("VADD", ["movies", "VALUES", "3", "1.0", "2.0", "3.0", queryElementId, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"tags\":[\"classic\",\"popular\"]}"]); ClassicAssert.AreEqual(1, (int)res1); var res2 = db.Execute("VADD", ["movies", "VALUES", "3", "2.0", "3.0", "4.0", new byte[] { 0, 0, 0, 1 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":1960,\"rating\":3.8,\"genre\":\"drama\",\"tags\":[\"classic\"]}"]); ClassicAssert.AreEqual(1, (int)res2); var res3 = db.Execute("VADD", ["movies", "VALUES", "3", "1.5", "2.5", "3.5", new byte[] { 0, 0, 0, 2 }, - "CAS", "Q8", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); + "CAS", "NOQUANT", "EF", "16", "M", "32", "SETATTR", "{\"year\":2010,\"rating\":4.2,\"genre\":\"action\",\"tags\":[\"modern\"]}"]); ClassicAssert.AreEqual(1, (int)res3); return queryElementId; From f14b6d519b71ab9c5e97e9997257e75a6bac871d Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 13:01:31 -0800 Subject: [PATCH 21/31] reusable evaluation stack with default capacity (16) --- .../server/Resp/Vector/Filter/ExprCompiler.cs | 8 ++- libs/server/Resp/Vector/Filter/ExprRunner.cs | 68 +++++++++---------- libs/server/Resp/Vector/VectorManager.cs | 5 +- test/Garnet.test/Filter/ExprRunnerTests.cs | 5 +- test/Garnet.test/Filter/ExprTestHelpers.cs | 2 +- 5 files changed, 49 insertions(+), 39 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index d8d43c9a5b5..fa18c2a6bbe 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -8,7 +8,7 @@ namespace Garnet.server.Vector.Filter { /// - /// Shunting-yard compiler that tokenizes and compiles a filter expression string + /// Shunting-Yard compiler that tokenizes and compiles a filter expression string /// into a flat postfix . /// /// Single-pass tokenize-and-compile approach modeled after Redis expr.c. @@ -16,6 +16,12 @@ namespace Garnet.server.Vector.Filter /// The compiled program is a flat array of instructions /// (values + operators in postfix order) that can be executed by . /// + /// Example: + /// Input expression: + /// .price < 100 and .category == "books" + /// Compiled postfix order: + /// .price 100 < .category "books" == and + /// /// Safety limits: /// - Maximum 1024 tokens per expression (prevents unbounded allocation). /// - Maximum 256 instructions in the compiled program. diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index a344c0913df..4be7bf72c93 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -2,6 +2,7 @@ // Licensed under the MIT license. using System; +using System.Collections.Generic; using System.Globalization; namespace Garnet.server.Vector.Filter { @@ -22,21 +23,26 @@ namespace Garnet.server.Vector.Filter /// internal static class ExprRunner { - private const int MaxStack = 256; + private const int DefaultStackCapacity = 16; - [ThreadStatic] - private static ExprToken[] t_stack; + /// + /// Create a reusable evaluation stack with default capacity (16). + /// The caller owns the list and can pass it to across multiple calls. + /// The list is cleared at the start of each Run call, so the caller does not need to clear it. + /// + public static List CreateStack() => new List(DefaultStackCapacity); /// /// Execute the compiled program against JSON attribute data. /// Returns true if the expression evaluates to a truthy value, false otherwise. /// Returns false if the JSON is malformed or a selector cannot be resolved. /// - public static bool Run(ExprProgram program, ReadOnlySpan json) + /// The compiled postfix program. + /// Raw JSON attribute bytes to evaluate against. + /// A reusable evaluation stack obtained from . + public static bool Run(ExprProgram program, ReadOnlySpan json, List stack) { - // Reuse thread-local stack to avoid per-call allocation - var stack = t_stack ??= new ExprToken[MaxStack]; - var stackLen = 0; + stack.Clear(); for (var i = 0; i < program.Length; i++) { @@ -48,41 +54,31 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) var extracted = AttributeExtractor.ExtractField(json, inst.Str); if (extracted.IsNone) { - stack.AsSpan(0, stackLen).Clear(); + stack.Clear(); return false; // Selector not found → expression is false (matches Redis) } - if (stackLen >= MaxStack) - { - stack.AsSpan(0, stackLen).Clear(); - return false; - } - stack[stackLen++] = extracted; + stack.Add(extracted); continue; } // Non-operator values — push directly if (inst.TokenType != ExprTokenType.Op) { - if (stackLen >= MaxStack) - { - stack.AsSpan(0, stackLen).Clear(); - return false; - } - stack[stackLen++] = inst; + stack.Add(inst); continue; } // Operators — pop operands, compute, push result var arity = OpTable.GetArity(inst.OpCode); - if (stackLen < arity) + if (stack.Count < arity) { - stack.AsSpan(0, stackLen).Clear(); + stack.Clear(); return false; } - ExprToken b = stackLen > 0 ? stack[--stackLen] : default; - ExprToken a = arity == 2 && stackLen > 0 ? stack[--stackLen] : default; + ExprToken b = stack.Count > 0 ? Pop(stack) : default; + ExprToken a = arity == 2 && stack.Count > 0 ? Pop(stack) : default; var result = ExprToken.NewNum(0); @@ -138,23 +134,27 @@ public static bool Run(ExprProgram program, ReadOnlySpan json) break; } - if (stackLen >= MaxStack) - { - stack.AsSpan(0, stackLen).Clear(); - return false; - } - stack[stackLen++] = result; + stack.Add(result); } var returnValue = false; - if (stackLen > 0) - returnValue = ToBool(stack[stackLen - 1]) != 0; + if (stack.Count > 0) + returnValue = ToBool(stack[stack.Count - 1]) != 0; - // Clear used portion to release string references for GC - stack.AsSpan(0, stackLen).Clear(); + // Clear to release string references for GC + stack.Clear(); return returnValue; } + /// Pop the last element from the stack. + private static ExprToken Pop(List stack) + { + var last = stack.Count - 1; + var value = stack[last]; + stack.RemoveAt(last); + return value; + } + // ======================== Type conversion helpers ======================== /// diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 1128a4b389a..acd9c3739ca 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -977,6 +977,9 @@ private int ApplyPostFilter( var filteredCount = 0; + // Allocate the evaluation stack once and reuse it across all candidate evaluations + var stack = ExprRunner.CreateStack(); + var idsSpan = outputIds.AsSpan(); var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); var attributesSpan = outputAttributes.AsSpan(); @@ -999,7 +1002,7 @@ private int ApplyPostFilter( // Execute the compiled filter program against raw JSON bytes. // No JsonDocument DOM allocation — AttributeExtractor extracts fields on demand. - if (ExprRunner.Run(program, attrData)) + if (ExprRunner.Run(program, attrData, stack)) { // Copy ID if not already in place if (idReadPos != idWritePos) diff --git a/test/Garnet.test/Filter/ExprRunnerTests.cs b/test/Garnet.test/Filter/ExprRunnerTests.cs index 41f2f015655..8a756092bf0 100644 --- a/test/Garnet.test/Filter/ExprRunnerTests.cs +++ b/test/Garnet.test/Filter/ExprRunnerTests.cs @@ -183,10 +183,11 @@ public void Runner_NonJsonAttributesExcluded() ClassicAssert.IsNotNull(program); var nonJson = System.Text.Encoding.UTF8.GetBytes("this is not json"); - ClassicAssert.IsFalse(ExprRunner.Run(program, nonJson)); + var stack = ExprRunner.CreateStack(); + ClassicAssert.IsFalse(ExprRunner.Run(program, nonJson, stack)); var emptyJson = System.Text.Encoding.UTF8.GetBytes(""); - ClassicAssert.IsFalse(ExprRunner.Run(program, emptyJson)); + ClassicAssert.IsFalse(ExprRunner.Run(program, emptyJson, stack)); } [Test] diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs index 9ce1a6fb17a..e5d50697718 100644 --- a/test/Garnet.test/Filter/ExprTestHelpers.cs +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -39,7 +39,7 @@ internal static bool EvaluateFilterTruthy(string expression, string json) throw new InvalidOperationException($"Compilation failed at position {errpos}"); var jsonBytes = Encoding.UTF8.GetBytes(json); - return ExprRunner.Run(program, jsonBytes); + return ExprRunner.Run(program, jsonBytes, ExprRunner.CreateStack()); } /// From 891bed25b9b077cea657ed58bacfa92a509d4c42 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 13:10:58 -0800 Subject: [PATCH 22/31] use stack with default 16 capacity to minimize the allocation as most of the filters should just fit in --- .../server/Resp/Vector/Filter/ExprCompiler.cs | 80 +++++++------------ libs/server/Resp/Vector/Filter/ExprRunner.cs | 29 +++---- 2 files changed, 41 insertions(+), 68 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index fa18c2a6bbe..dc92b6054d7 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -3,6 +3,7 @@ using System; using System.Buffers.Text; +using System.Collections.Generic; using System.Text; namespace Garnet.server.Vector.Filter @@ -22,14 +23,10 @@ namespace Garnet.server.Vector.Filter /// Compiled postfix order: /// .price 100 < .category "books" == and /// - /// Safety limits: - /// - Maximum 1024 tokens per expression (prevents unbounded allocation). - /// - Maximum 256 instructions in the compiled program. /// internal static class ExprCompiler { - private const int MaxTokens = 1024; - private const int MaxProgram = 256; + private const int DefaultCapacity = 16; /// /// Compile a filter expression (as UTF-8 bytes) into a flat postfix program. @@ -42,8 +39,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) return null; // Phase 1: Tokenize into a flat list - var tokens = new ExprToken[MaxTokens]; - var numTokens = 0; + var tokens = new List(DefaultCapacity); var p = 0; while (p < expr.Length) @@ -52,23 +48,17 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (p >= expr.Length) break; - if (numTokens >= MaxTokens) - { - errpos = p; - return null; - } - // Determine if '-' should be a negative number sign or a subtraction operator var minusIsNumber = false; if (expr[p] == (byte)'-' && p + 1 < expr.Length && (IsDigit(expr[p + 1]) || expr[p + 1] == (byte)'.')) { - if (numTokens == 0) + if (tokens.Count == 0) { minusIsNumber = true; } else { - var prev = tokens[numTokens - 1]; + var prev = tokens[tokens.Count - 1]; if (prev.TokenType == ExprTokenType.Op && prev.OpCode != OpCode.CParen) minusIsNumber = true; } @@ -79,7 +69,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) { var t = ParseNumber(expr, ref p); if (t.IsNone) { errpos = p; return null; } - tokens[numTokens++] = t; + tokens.Add(t); continue; } @@ -88,7 +78,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) { var t = ParseString(expr, ref p); if (t.IsNone) { errpos = p; return null; } - tokens[numTokens++] = t; + tokens.Add(t); continue; } @@ -96,7 +86,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) if (expr[p] == (byte)'.' && p + 1 < expr.Length && IsSelectorChar(expr[p + 1])) { var t = ParseSelector(expr, ref p); - tokens[numTokens++] = t; + tokens.Add(t); continue; } @@ -105,7 +95,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) { var t = ParseTuple(expr, ref p); if (t.IsNone) { errpos = p; return null; } - tokens[numTokens++] = t; + tokens.Add(t); continue; } @@ -114,7 +104,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) { var t = ParseOperatorOrLiteral(expr, ref p); if (t.IsNone) { errpos = p; return null; } - tokens[numTokens++] = t; + tokens.Add(t); continue; } @@ -123,13 +113,11 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) } // Phase 2: Shunting-yard compilation to postfix - var program = new ExprToken[MaxProgram]; - var programLen = 0; - var opsStack = new ExprToken[MaxTokens]; - var opsLen = 0; + var program = new List(DefaultCapacity); + var opsStack = new Stack(DefaultCapacity); var stackItems = 0; // track what would be on the values stack at runtime - for (var i = 0; i < numTokens; i++) + for (var i = 0; i < tokens.Count; i++) { var token = tokens[i]; @@ -140,8 +128,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) token.TokenType == ExprTokenType.Selector || token.TokenType == ExprTokenType.Null) { - if (programLen >= MaxProgram) { errpos = 0; return null; } - program[programLen++] = token; + program.Add(token); stackItems++; continue; } @@ -149,16 +136,16 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) // Operators if (token.TokenType == ExprTokenType.Op) { - if (!ProcessOperator(token, program, ref programLen, opsStack, ref opsLen, ref stackItems, out errpos)) + if (!ProcessOperator(token, program, opsStack, ref stackItems, out errpos)) return null; continue; } } // Flush remaining operators from the stack - while (opsLen > 0) + while (opsStack.Count > 0) { - var op = opsStack[--opsLen]; + var op = opsStack.Pop(); if (op.OpCode == OpCode.OParen) { errpos = 0; @@ -167,15 +154,14 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) var arity = OpTable.GetArity(op.OpCode); if (stackItems < arity) { errpos = 0; return null; } - if (programLen >= MaxProgram) { errpos = 0; return null; } - program[programLen++] = op; + program.Add(op); stackItems = stackItems - arity + 1; } // After compilation, exactly one value should remain on the stack if (stackItems != 1) { errpos = 0; return null; } - return new ExprProgram { Instructions = program, Length = programLen }; + return new ExprProgram { Instructions = program.ToArray(), Length = program.Count }; } /// @@ -184,8 +170,8 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) /// private static bool ProcessOperator( ExprToken op, - ExprToken[] program, ref int programLen, - ExprToken[] opsStack, ref int opsLen, + List program, + Stack opsStack, ref int stackItems, out int errpos) { @@ -193,8 +179,7 @@ private static bool ProcessOperator( if (op.OpCode == OpCode.OParen) { - if (opsLen >= opsStack.Length) { errpos = 0; return false; } - opsStack[opsLen++] = op; + opsStack.Push(op); return true; } @@ -203,15 +188,14 @@ private static bool ProcessOperator( // Pop operators until matching '(' while (true) { - if (opsLen == 0) { errpos = 0; return false; } // Unmatched ')' - var topOp = opsStack[--opsLen]; + if (opsStack.Count == 0) { errpos = 0; return false; } // Unmatched ')' + var topOp = opsStack.Pop(); if (topOp.OpCode == OpCode.OParen) return true; var arity = OpTable.GetArity(topOp.OpCode); if (stackItems < arity) { errpos = 0; return false; } - if (programLen >= MaxProgram) { errpos = 0; return false; } - program[programLen++] = topOp; + program.Add(topOp); stackItems = stackItems - arity + 1; } } @@ -219,9 +203,9 @@ private static bool ProcessOperator( var curPrec = OpTable.GetPrecedence(op.OpCode); // Pop operators with higher or equal precedence - while (opsLen > 0) + while (opsStack.Count > 0) { - var topOp = opsStack[opsLen - 1]; + var topOp = opsStack.Peek(); if (topOp.OpCode == OpCode.OParen) break; var topPrec = OpTable.GetPrecedence(topOp.OpCode); @@ -230,16 +214,14 @@ private static bool ProcessOperator( // Right-associative: ** only pops if strictly higher if (op.OpCode == OpCode.Pow && topPrec <= curPrec) break; - opsLen--; + opsStack.Pop(); var arity = OpTable.GetArity(topOp.OpCode); if (stackItems < arity) { errpos = 0; return false; } - if (programLen >= MaxProgram) { errpos = 0; return false; } - program[programLen++] = topOp; + program.Add(topOp); stackItems = stackItems - arity + 1; } - if (opsLen >= opsStack.Length) { errpos = 0; return false; } - opsStack[opsLen++] = op; + opsStack.Push(op); return true; } @@ -477,4 +459,4 @@ private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan o } } } -} \ No newline at end of file +} diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index 4be7bf72c93..32896bcf762 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -27,10 +27,10 @@ internal static class ExprRunner /// /// Create a reusable evaluation stack with default capacity (16). - /// The caller owns the list and can pass it to across multiple calls. - /// The list is cleared at the start of each Run call, so the caller does not need to clear it. + /// The caller owns the stack and can pass it to across multiple calls. + /// The stack is cleared at the start of each Run call, so the caller does not need to clear it. /// - public static List CreateStack() => new List(DefaultStackCapacity); + public static Stack CreateStack() => new Stack(DefaultStackCapacity); /// /// Execute the compiled program against JSON attribute data. @@ -40,7 +40,7 @@ internal static class ExprRunner /// The compiled postfix program. /// Raw JSON attribute bytes to evaluate against. /// A reusable evaluation stack obtained from . - public static bool Run(ExprProgram program, ReadOnlySpan json, List stack) + public static bool Run(ExprProgram program, ReadOnlySpan json, Stack stack) { stack.Clear(); @@ -58,14 +58,14 @@ public static bool Run(ExprProgram program, ReadOnlySpan json, List json, List 0 ? Pop(stack) : default; - ExprToken a = arity == 2 && stack.Count > 0 ? Pop(stack) : default; + ExprToken b = stack.Count > 0 ? stack.Pop() : default; + ExprToken a = arity == 2 && stack.Count > 0 ? stack.Pop() : default; var result = ExprToken.NewNum(0); @@ -134,27 +134,18 @@ public static bool Run(ExprProgram program, ReadOnlySpan json, List 0) - returnValue = ToBool(stack[stack.Count - 1]) != 0; + returnValue = ToBool(stack.Peek()) != 0; // Clear to release string references for GC stack.Clear(); return returnValue; } - /// Pop the last element from the stack. - private static ExprToken Pop(List stack) - { - var last = stack.Count - 1; - var value = stack[last]; - stack.RemoveAt(last); - return value; - } - // ======================== Type conversion helpers ======================== /// From 331adeb9cb2dcf404074ada8b6d63c10b2986228 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 13:37:44 -0800 Subject: [PATCH 23/31] add more tests for attribute extractor --- .../Resp/Vector/Filter/AttributeExtractor.cs | 7 +- .../Vector/Filter/VectorFilterExpression.cs | 1 - .../Filter/AttributeExtractorTests.cs | 476 ++++++++++++++++++ 3 files changed, 477 insertions(+), 7 deletions(-) create mode 100644 test/Garnet.test/Filter/AttributeExtractorTests.cs diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 0e0bd82203f..3a14664f483 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -3,7 +3,6 @@ using System; using System.Buffers.Text; -using System.Runtime.CompilerServices; using System.Text; namespace Garnet.server.Vector.Filter @@ -89,7 +88,7 @@ private static ExprToken ParseValueToken(ReadOnlySpan json, ref int p) if (c == (byte)'t') return ParseLiteralToken(json, ref p, "true"u8, ExprTokenType.Num, 1); if (c == (byte)'f') return ParseLiteralToken(json, ref p, "false"u8, ExprTokenType.Num, 0); if (c == (byte)'n') return ParseLiteralToken(json, ref p, "null"u8, ExprTokenType.Null, 0); - if (char.IsDigit((char)c) || c == (byte)'-' || c == (byte)'+') + if ((c >= (byte)'0' && c <= (byte)'9') || c == (byte)'-' || c == (byte)'+') return ParseNumberToken(json, ref p); return default; @@ -222,7 +221,6 @@ private static bool SkipValue(ReadOnlySpan json, ref int p) }; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool SkipString(ReadOnlySpan json, ref int p) { if (p >= json.Length || json[p] != (byte)'"') return false; @@ -271,16 +269,13 @@ private static bool SkipNumber(ReadOnlySpan json, ref int p) // ======================== Utility ======================== - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void SkipWhiteSpace(ReadOnlySpan json, ref int p) { while (p < json.Length && IsWhiteSpace(json[p])) p++; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsNumberChar(byte b) => (b >= (byte)'0' && b <= (byte)'9') || b == (byte)'-' || b == (byte)'+' || b == (byte)'.' || b == (byte)'e' || b == (byte)'E'; diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 307e176dfa1..86966b0fbaa 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -36,7 +36,6 @@ internal enum ExprTokenType : byte Selector = 4, Op = 5, Null = 6, - Eof = 7, } /// diff --git a/test/Garnet.test/Filter/AttributeExtractorTests.cs b/test/Garnet.test/Filter/AttributeExtractorTests.cs new file mode 100644 index 00000000000..f5b69ac62a6 --- /dev/null +++ b/test/Garnet.test/Filter/AttributeExtractorTests.cs @@ -0,0 +1,476 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using Allure.NUnit; +using Garnet.server.Vector.Filter; +using NUnit.Framework; +using NUnit.Framework.Legacy; + +namespace Garnet.test +{ + /// + /// Tests for — the raw-byte JSON field extractor + /// used by the filter expression VM to resolve selectors on demand. + /// + [AllureNUnit] + [TestFixture] + public class AttributeExtractorTests : AllureTestBase + { + private static ExprToken Extract(string json, string field) + => AttributeExtractor.ExtractField(Encoding.UTF8.GetBytes(json), field); + + // ======================== Number extraction ======================== + + [Test] + public void ExtractField_Integer() + { + var token = Extract("{\"year\":1980}", "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1980.0, token.Num); + } + + [Test] + public void ExtractField_NegativeInteger() + { + var token = Extract("{\"temp\":-42}", "temp"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(-42.0, token.Num); + } + + [Test] + public void ExtractField_Decimal() + { + var token = Extract("{\"rating\":4.5}", "rating"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(4.5, token.Num, 0.001); + } + + [Test] + public void ExtractField_ScientificNotation() + { + var token = Extract("{\"val\":1.5e3}", "val"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1500.0, token.Num); + } + + [Test] + public void ExtractField_Zero() + { + var token = Extract("{\"val\":0}", "val"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(0.0, token.Num); + } + + // ======================== String extraction ======================== + + [Test] + public void ExtractField_SimpleString() + { + var token = Extract("{\"genre\":\"action\"}", "genre"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("action", token.Str); + } + + [Test] + public void ExtractField_EmptyString() + { + var token = Extract("{\"name\":\"\"}", "name"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedQuote() + { + var token = Extract("{\"name\":\"hello\\\"world\"}", "name"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("hello\"world", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedBackslash() + { + var token = Extract("{\"path\":\"c:\\\\temp\"}", "path"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("c:\\temp", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedNewline() + { + var token = Extract("{\"text\":\"line1\\nline2\"}", "text"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("line1\nline2", token.Str); + } + + [Test] + public void ExtractField_StringWithEscapedTab() + { + var token = Extract("{\"text\":\"col1\\tcol2\"}", "text"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("col1\tcol2", token.Str); + } + + [Test] + public void ExtractField_StringWithSlashEscape() + { + var token = Extract("{\"url\":\"http:\\/\\/example.com\"}", "url"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("http://example.com", token.Str); + } + + // ======================== Boolean extraction ======================== + + [Test] + public void ExtractField_True() + { + var token = Extract("{\"active\":true}", "active"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_False() + { + var token = Extract("{\"deleted\":false}", "deleted"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(0.0, token.Num); + } + + // ======================== Null extraction ======================== + + [Test] + public void ExtractField_Null() + { + var token = Extract("{\"value\":null}", "value"); + ClassicAssert.AreEqual(ExprTokenType.Null, token.TokenType); + } + + // ======================== Array extraction ======================== + + [Test] + public void ExtractField_StringArray() + { + var token = Extract("{\"tags\":[\"classic\",\"popular\"]}", "tags"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(2, token.TupleLength); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual("classic", token.TupleElements[0].Str); + ClassicAssert.AreEqual("popular", token.TupleElements[1].Str); + } + + [Test] + public void ExtractField_NumericArray() + { + var token = Extract("{\"scores\":[1,2,3]}", "scores"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(3, token.TupleLength); + ClassicAssert.AreEqual(1.0, token.TupleElements[0].Num); + ClassicAssert.AreEqual(2.0, token.TupleElements[1].Num); + ClassicAssert.AreEqual(3.0, token.TupleElements[2].Num); + } + + [Test] + public void ExtractField_MixedArray() + { + var token = Extract("{\"data\":[1,\"two\",true,null]}", "data"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(4, token.TupleLength); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual(1.0, token.TupleElements[0].Num); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[1].TokenType); + ClassicAssert.AreEqual("two", token.TupleElements[1].Str); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[2].TokenType); + ClassicAssert.AreEqual(1.0, token.TupleElements[2].Num); // true → 1 + ClassicAssert.AreEqual(ExprTokenType.Null, token.TupleElements[3].TokenType); + } + + [Test] + public void ExtractField_EmptyArray() + { + var token = Extract("{\"items\":[]}", "items"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(0, token.TupleLength); + } + + // ======================== Multiple fields ======================== + + [Test] + public void ExtractField_FirstField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "a"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_MiddleField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "b"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2.0, token.Num); + } + + [Test] + public void ExtractField_LastField() + { + var token = Extract("{\"a\":1,\"b\":2,\"c\":3}", "c"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(3.0, token.Num); + } + + [Test] + public void ExtractField_SkipsValuesOfDifferentTypes() + { + // Ensure the extractor correctly skips strings, arrays, objects, booleans, nulls, and numbers + // when seeking a later field + var json = "{\"s\":\"hello\",\"a\":[1,2],\"o\":{\"nested\":true},\"b\":false,\"n\":null,\"target\":42}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + // ======================== Missing / not found ======================== + + [Test] + public void ExtractField_MissingField_ReturnsNone() + { + var token = Extract("{\"year\":1980}", "rating"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_EmptyObject_ReturnsNone() + { + var token = Extract("{}", "anything"); + ClassicAssert.IsTrue(token.IsNone); + } + + // ======================== Whitespace handling ======================== + + [Test] + public void ExtractField_WithWhitespace() + { + var token = Extract(" { \"year\" : 1980 } ", "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1980.0, token.Num); + } + + [Test] + public void ExtractField_WithNewlines() + { + var json = "{\n \"year\": 1980,\n \"rating\": 4.5\n}"; + var token = Extract(json, "rating"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(4.5, token.Num, 0.001); + } + + // ======================== Nested objects (skipped) ======================== + + [Test] + public void ExtractField_NestedObject_ReturnsNone() + { + // Nested objects are not supported as values — should return IsNone + var token = Extract("{\"meta\":{\"key\":\"val\"}}", "meta"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_SkipsNestedObjectToFindLaterField() + { + var json = "{\"meta\":{\"key\":\"val\"},\"year\":2020}"; + var token = Extract(json, "year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2020.0, token.Num); + } + + [Test] + public void ExtractField_SkipsDeeplyNestedObject() + { + var json = "{\"deep\":{\"a\":{\"b\":{\"c\":1}}},\"target\":99}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + // ======================== Malformed / non-JSON input ======================== + + [Test] + public void ExtractField_NotJson_ReturnsNone() + { + var token = Extract("this is not json", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_EmptyInput_ReturnsNone() + { + var token = AttributeExtractor.ExtractField([], "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_ArrayAtRoot_ReturnsNone() + { + var token = Extract("[1,2,3]", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_TruncatedJson_ReturnsNone() + { + var token = Extract("{\"year\":", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_MissingColon_ReturnsNone() + { + var token = Extract("{\"year\" 1980}", "year"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_UnterminatedString_ReturnsNone() + { + var token = Extract("{\"name\":\"hello}", "name"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_UnterminatedKey_ReturnsNone() + { + var token = Extract("{\"name:\"hello\"}", "name"); + // The key "name will match to :, parsing should fail gracefully + ClassicAssert.IsTrue(token.IsNone); + } + + // ======================== Edge cases ======================== + + [Test] + public void ExtractField_StringValueContainingBraces() + { + var token = Extract("{\"data\":\"{not an object}\"}", "data"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("{not an object}", token.Str); + } + + [Test] + public void ExtractField_StringValueContainingBrackets() + { + var token = Extract("{\"data\":\"[not an array]\"}", "data"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("[not an array]", token.Str); + } + + [Test] + public void ExtractField_StringValueContainingComma() + { + var token = Extract("{\"msg\":\"hello, world\"}", "msg"); + ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.AreEqual("hello, world", token.Str); + } + + [Test] + public void ExtractField_FieldNameCaseSensitive() + { + var token = Extract("{\"Year\":2020}", "year"); + ClassicAssert.IsTrue(token.IsNone); // Case mismatch + + var token2 = Extract("{\"Year\":2020}", "Year"); + ClassicAssert.AreEqual(ExprTokenType.Num, token2.TokenType); + ClassicAssert.AreEqual(2020.0, token2.Num); + } + + [Test] + public void ExtractField_FieldWithHyphen() + { + // Hyphens in JSON keys are valid + var token = Extract("{\"my-field\":42}", "my-field"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + [Test] + public void ExtractField_FieldWithUnderscore() + { + var token = Extract("{\"my_field\":42}", "my_field"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(42.0, token.Num); + } + + [Test] + public void ExtractField_FieldWithDigits() + { + var token = Extract("{\"field123\":99}", "field123"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + [Test] + public void ExtractField_BooleanLiteralNotFollowedByDelimiter_ReturnsNone() + { + // "trueish" should not match as true + var token = Extract("{\"val\":trueish}", "val"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_NullLiteralNotFollowedByDelimiter_ReturnsNone() + { + var token = Extract("{\"val\":nullify}", "val"); + ClassicAssert.IsTrue(token.IsNone); + } + + [Test] + public void ExtractField_ArrayWithNestedArrays() + { + // ParseArrayToken calls ParseValueToken which handles nested arrays recursively + var token = Extract("{\"matrix\":[[1,2],[3,4]]}", "matrix"); + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); + ClassicAssert.AreEqual(2, token.TupleLength); + // Each inner element is itself a Tuple + ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TupleElements[0].TokenType); + ClassicAssert.AreEqual(2, token.TupleElements[0].TupleLength); + } + + [Test] + public void ExtractField_LargeNumberOfFields() + { + // Ensure we can skip many fields to find the target + var sb = new StringBuilder("{"); + for (var i = 0; i < 100; i++) + { + if (i > 0) sb.Append(','); + sb.Append($"\"field{i}\":{i}"); + } + sb.Append('}'); + + var token = Extract(sb.ToString(), "field99"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(99.0, token.Num); + } + + [Test] + public void ExtractField_SkipsArrayWithStringsContainingQuotes() + { + // Ensure the array skipper handles escaped quotes inside string elements + var json = "{\"arr\":[\"he\\\"llo\",\"world\"],\"target\":1}"; + var token = Extract(json, "target"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(1.0, token.Num); + } + + [Test] + public void ExtractField_SkipsStringWithEscapedBackslashBeforeClosingQuote() + { + // The value is the string: ends_with_backslash\ (the JSON encodes \\ at the end) + // This tests that \\\" is parsed as \\ + " (close quote), not \ + \" + var json = "{\"a\":\"ends_with_backslash\\\\\",\"b\":2}"; + var token = Extract(json, "b"); + ClassicAssert.AreEqual(ExprTokenType.Num, token.TokenType); + ClassicAssert.AreEqual(2.0, token.Num); + } + } +} From e0bad92cd7afb56b6ad128d7658bd182c1604f07 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 14:00:29 -0800 Subject: [PATCH 24/31] add benchmark --- .../Filter/FilterExpressionBenchmarks.cs | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs diff --git a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs new file mode 100644 index 00000000000..5564ee90c05 --- /dev/null +++ b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs @@ -0,0 +1,170 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +using System.Text; +using BenchmarkDotNet.Attributes; +using Garnet.server.Vector.Filter; + +namespace BDN.benchmark.Filter +{ + /// + /// Benchmarks for the vector filter expression engine: + /// - ExprCompiler: compile filter string → postfix program + /// - ExprRunner: execute compiled program against JSON attributes + /// - AttributeExtractor: extract fields from raw JSON bytes + /// - End-to-end: compile + run for realistic filter scenarios + /// + /// These benchmarks measure the hot path of VSIM ... FILTER post-processing. + /// + [MemoryDiagnoser] + public class FilterExpressionBenchmarks + { + // ======================== Shared test data ======================== + + // Simple filter: single comparison + private byte[] _simpleFilterBytes; + private ExprProgram _simpleProgram; + + // Medium filter: two comparisons joined by AND + private byte[] _mediumFilterBytes; + private ExprProgram _mediumProgram; + + // Complex filter: multiple ops, arithmetic, containment, string equality + private byte[] _complexFilterBytes; + private ExprProgram _complexProgram; + + // JSON attribute payloads (varying sizes) + private byte[] _smallJson; // 2 fields + private byte[] _mediumJson; // 5 fields + private byte[] _largeJson; // 10+ fields, array, nested object to skip + + // Reusable evaluation stack + private Stack _stack; + + [GlobalSetup] + public void Setup() + { + // Filter expressions (UTF-8) + _simpleFilterBytes = Encoding.UTF8.GetBytes(".year > 1950"); + _mediumFilterBytes = Encoding.UTF8.GetBytes(".year > 1950 and .rating >= 4.0"); + _complexFilterBytes = Encoding.UTF8.GetBytes(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""); + + // Compile once for run benchmarks + _simpleProgram = ExprCompiler.TryCompile(_simpleFilterBytes, out _); + _mediumProgram = ExprCompiler.TryCompile(_mediumFilterBytes, out _); + _complexProgram = ExprCompiler.TryCompile(_complexFilterBytes, out _); + + // JSON payloads + _smallJson = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _mediumJson = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _largeJson = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + + _stack = ExprRunner.CreateStack(); + } + + // ======================== Compilation benchmarks ======================== + + [Benchmark] + public void Compile_Simple() + => ExprCompiler.TryCompile(_simpleFilterBytes, out _); + + [Benchmark] + public void Compile_Medium() + => ExprCompiler.TryCompile(_mediumFilterBytes, out _); + + [Benchmark] + public void Compile_Complex() + => ExprCompiler.TryCompile(_complexFilterBytes, out _); + + // ======================== Execution benchmarks (compile once, run many) ======================== + + [Benchmark] + public bool Run_Simple_SmallJson() + => ExprRunner.Run(_simpleProgram, _smallJson, _stack); + + [Benchmark] + public bool Run_Simple_MediumJson() + => ExprRunner.Run(_simpleProgram, _mediumJson, _stack); + + [Benchmark] + public bool Run_Medium_MediumJson() + => ExprRunner.Run(_mediumProgram, _mediumJson, _stack); + + [Benchmark] + public bool Run_Complex_MediumJson() + => ExprRunner.Run(_complexProgram, _mediumJson, _stack); + + [Benchmark] + public bool Run_Complex_LargeJson() + => ExprRunner.Run(_complexProgram, _largeJson, _stack); + + // ======================== Field extraction benchmarks ======================== + + [Benchmark] + public void Extract_FirstField() + => AttributeExtractor.ExtractField(_smallJson, "year"); + + [Benchmark] + public void Extract_LastField_MediumJson() + => AttributeExtractor.ExtractField(_mediumJson, "tags"); + + [Benchmark] + public void Extract_SkipNestedObject() + => AttributeExtractor.ExtractField(_largeJson, "active"); + + [Benchmark] + public void Extract_MissingField() + => AttributeExtractor.ExtractField(_mediumJson, "nonexistent"); + + // ======================== End-to-end benchmarks (compile + run) ======================== + + [Benchmark] + public bool EndToEnd_Simple() + { + var program = ExprCompiler.TryCompile(_simpleFilterBytes, out _); + return ExprRunner.Run(program, _mediumJson, _stack); + } + + [Benchmark] + public bool EndToEnd_Complex() + { + var program = ExprCompiler.TryCompile(_complexFilterBytes, out _); + return ExprRunner.Run(program, _mediumJson, _stack); + } + + // ======================== Batch simulation (N candidates) ======================== + + [Benchmark] + [Arguments(10)] + [Arguments(100)] + [Arguments(1000)] + public int RunBatch_Medium(int count) + { + var matched = 0; + for (var i = 0; i < count; i++) + { + // Alternate between matching and non-matching JSON to simulate realistic filtering + var json = (i % 3 == 0) ? _smallJson : _mediumJson; + if (ExprRunner.Run(_mediumProgram, json, _stack)) + matched++; + } + return matched; + } + + [Benchmark] + [Arguments(10)] + [Arguments(100)] + [Arguments(1000)] + public int RunBatch_Complex(int count) + { + var matched = 0; + for (var i = 0; i < count; i++) + { + var json = (i % 3 == 0) ? _smallJson : _mediumJson; + if (ExprRunner.Run(_complexProgram, json, _stack)) + matched++; + } + return matched; + } + } +} From 8b2cbb22d9338bac27e0d05c6c10df5324d896d7 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 14:36:41 -0800 Subject: [PATCH 25/31] optimize the allocate --- .../Filter/FilterExpressionBenchmarks.cs | 2 +- .../Resp/Vector/Filter/AttributeExtractor.cs | 66 +++--- .../server/Resp/Vector/Filter/ExprCompiler.cs | 2 +- libs/server/Resp/Vector/Filter/ExprRunner.cs | 191 +++++++++++++++--- .../Vector/Filter/VectorFilterExpression.cs | 18 ++ .../Filter/AttributeExtractorTests.cs | 60 ++++-- test/Garnet.test/Filter/ExprTestHelpers.cs | 13 +- 7 files changed, 278 insertions(+), 74 deletions(-) diff --git a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs index 5564ee90c05..5fb20fefdda 100644 --- a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs +++ b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs @@ -167,4 +167,4 @@ public int RunBatch_Complex(int count) return matched; } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 3a14664f483..41892d062a2 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -2,8 +2,8 @@ // Licensed under the MIT license. using System; +using System.Buffers; using System.Buffers.Text; -using System.Text; namespace Garnet.server.Vector.Filter { @@ -111,19 +111,20 @@ private static ExprToken ParseStringToken(ReadOnlySpan json, ref int p) } if (json[p] == (byte)'"') { - string value; if (!hasEscape) { - // Zero-copy: decode directly from the span - value = Encoding.UTF8.GetString(json.Slice(start, p - start)); + // Zero-allocation: store byte offset+length into the source JSON + var len = p - start; + p++; // Skip closing quote + return ExprToken.NewJsonStr(start, len); } else { - // Process escapes - value = UnescapeJsonString(json, start, p); + // Escaped strings must be materialized (rare path) + var value = UnescapeJsonString(json, start, p); + p++; // Skip closing quote + return ExprToken.NewStr(value); } - p++; // Skip closing quote - return ExprToken.NewStr(value); } p++; } @@ -164,15 +165,15 @@ private static ExprToken ParseLiteralToken(ReadOnlySpan json, ref int p, return t; } + /// Max array elements before rejecting. + private const int MaxArrayElements = 64; + private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) { if (p >= json.Length || json[p] != (byte)'[') return default; p++; // Skip '[' SkipWhiteSpace(json, ref p); - var elements = new ExprToken[64]; - var count = 0; - // Handle empty array if (p < json.Length && json[p] == (byte)']') { @@ -180,25 +181,36 @@ private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) return ExprToken.NewTuple([], 0); } - while (true) - { - SkipWhiteSpace(json, ref p); - if (p >= json.Length || count >= elements.Length) return default; + // Rent from pool instead of allocating a new scratch array every call + var elements = ArrayPool.Shared.Rent(MaxArrayElements); + var count = 0; - var ele = ParseValueToken(json, ref p); - if (ele.IsNone) return default; - elements[count++] = ele; + try + { + while (true) + { + SkipWhiteSpace(json, ref p); + if (p >= json.Length || count >= MaxArrayElements) return default; + + var ele = ParseValueToken(json, ref p); + if (ele.IsNone) return default; + elements[count++] = ele; + + SkipWhiteSpace(json, ref p); + if (p >= json.Length) return default; + if (json[p] == (byte)',') { p++; continue; } + if (json[p] == (byte)']') { p++; break; } + return default; // Malformed + } - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; - if (json[p] == (byte)',') { p++; continue; } - if (json[p] == (byte)']') { p++; break; } - return default; // Malformed + var result = new ExprToken[count]; + Array.Copy(elements, result, count); + return ExprToken.NewTuple(result, count); + } + finally + { + ArrayPool.Shared.Return(elements, clearArray: true); } - - var result = new ExprToken[count]; - Array.Copy(elements, result, count); - return ExprToken.NewTuple(result, count); } // ======================== Fast skipping (non-allocating) ======================== diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index dc92b6054d7..27c65585f54 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -459,4 +459,4 @@ private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan o } } } -} +} \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index 32896bcf762..0ad11d55dd9 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -2,8 +2,10 @@ // Licensed under the MIT license. using System; +using System.Buffers.Text; using System.Collections.Generic; using System.Globalization; +using System.Text; namespace Garnet.server.Vector.Filter { /// @@ -88,43 +90,43 @@ public static bool Run(ExprProgram program, ReadOnlySpan json, Stack ToNum(b) ? 1 : 0; + result.Num = ToNum(a, json) > ToNum(b, json) ? 1 : 0; break; case OpCode.Gte: - result.Num = ToNum(a) >= ToNum(b) ? 1 : 0; + result.Num = ToNum(a, json) >= ToNum(b, json) ? 1 : 0; break; case OpCode.Lt: - result.Num = ToNum(a) < ToNum(b) ? 1 : 0; + result.Num = ToNum(a, json) < ToNum(b, json) ? 1 : 0; break; case OpCode.Lte: - result.Num = ToNum(a) <= ToNum(b) ? 1 : 0; + result.Num = ToNum(a, json) <= ToNum(b, json) ? 1 : 0; break; case OpCode.Eq: - result.Num = AreEqual(a, b) ? 1 : 0; + result.Num = AreEqual(a, b, json) ? 1 : 0; break; case OpCode.Neq: - result.Num = !AreEqual(a, b) ? 1 : 0; + result.Num = !AreEqual(a, b, json) ? 1 : 0; break; case OpCode.In: - result.Num = EvalIn(a, b) ? 1 : 0; + result.Num = EvalIn(a, b, json) ? 1 : 0; break; case OpCode.And: result.Num = ToBool(a) != 0 && ToBool(b) != 0 ? 1 : 0; @@ -153,14 +155,23 @@ public static bool Run(ExprProgram program, ReadOnlySpan json, Stack - private static double ToNum(ExprToken t) + private static double ToNum(ExprToken t, ReadOnlySpan json) { if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num; - if (t.TokenType == ExprTokenType.Str && t.Str != null) + if (t.TokenType == ExprTokenType.Str) { - return double.TryParse(t.Str, NumberStyles.Float | NumberStyles.AllowLeadingSign, - CultureInfo.InvariantCulture, out var result) ? result : 0; + if (t.IsJsonRef) + { + var slice = json.Slice(t.Utf8Start, t.Utf8Length); + return Utf8Parser.TryParse(slice, out double result, out var consumed) && consumed == slice.Length ? result : 0; + } + + if (t.Str != null) + { + return double.TryParse(t.Str, NumberStyles.Float | NumberStyles.AllowLeadingSign, + CultureInfo.InvariantCulture, out var result) ? result : 0; + } } return 0; } @@ -173,7 +184,11 @@ private static double ToBool(ExprToken t) { if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; - if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; + if (t.TokenType == ExprTokenType.Str) + { + if (t.IsJsonRef) return t.Utf8Length == 0 ? 0 : 1; + return (t.Str == null || t.Str.Length == 0) ? 0 : 1; + } if (t.TokenType == ExprTokenType.Null) return 0; return 1; // Non-empty strings, tuples, etc. are truthy } @@ -181,18 +196,29 @@ private static double ToBool(ExprToken t) /// /// Compare two tokens for equality. /// Matches Redis exprTokensEqual(): - /// - Both strings → exact string comparison + /// - Both strings → exact string comparison (handles JSON refs) /// - Both numbers → exact numeric equality (no epsilon) /// - One/both null → equal only if both null /// - Mixed types → coerce to numbers and compare /// - private static bool AreEqual(ExprToken a, ExprToken b) + private static bool AreEqual(ExprToken a, ExprToken b, ReadOnlySpan json) { if (a.IsNone || b.IsNone) return a.IsNone && b.IsNone; - // Both strings + // Both strings — handle 4 combinations of string/JsonRef if (a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) - return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + { + if (!a.IsJsonRef && !b.IsJsonRef) + return string.Equals(a.Str, b.Str, StringComparison.Ordinal); + + if (a.IsJsonRef && b.IsJsonRef) + return json.Slice(a.Utf8Start, a.Utf8Length).SequenceEqual(json.Slice(b.Utf8Start, b.Utf8Length)); + + // One is a compiled string, one is a JSON ref + var str = a.IsJsonRef ? b.Str : a.Str; + var jsonRef = a.IsJsonRef ? a : b; + return Utf8Equals(str, json.Slice(jsonRef.Utf8Start, jsonRef.Utf8Length)); + } // Both numbers if (a.TokenType == ExprTokenType.Num && b.TokenType == ExprTokenType.Num) @@ -203,7 +229,7 @@ private static bool AreEqual(ExprToken a, ExprToken b) return a.TokenType == b.TokenType; // Mixed types — coerce to number - return ToNum(a) == ToNum(b); + return ToNum(a, json) == ToNum(b, json); } /// @@ -213,7 +239,7 @@ private static bool AreEqual(ExprToken a, ExprToken b) /// 2. If both a and b are strings, check substring containment /// 3. Otherwise, false /// - private static bool EvalIn(ExprToken a, ExprToken b) + private static bool EvalIn(ExprToken a, ExprToken b, ReadOnlySpan json) { if (b.IsNone) return false; @@ -222,7 +248,7 @@ private static bool EvalIn(ExprToken a, ExprToken b) { for (var i = 0; i < b.TupleLength; i++) { - if (AreEqual(a, b.TupleElements[i])) + if (AreEqual(a, b.TupleElements[i], json)) return true; } return false; @@ -231,12 +257,121 @@ private static bool EvalIn(ExprToken a, ExprToken b) // String substring check (matching Redis exprTokensStringIn) if (!a.IsNone && a.TokenType == ExprTokenType.Str && b.TokenType == ExprTokenType.Str) { - if (a.Str == null || b.Str == null) return false; - if (a.Str.Length > b.Str.Length) return false; - return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + // Both compiled strings + if (!a.IsJsonRef && !b.IsJsonRef) + { + if (a.Str == null || b.Str == null) return false; + if (a.Str.Length > b.Str.Length) return false; + return b.Str.IndexOf(a.Str, StringComparison.Ordinal) >= 0; + } + + // Needle is compiled string, haystack is JSON ref (most common filter case) + if (!a.IsJsonRef && b.IsJsonRef) + { + if (a.Str == null) return false; + return Utf8Contains(json.Slice(b.Utf8Start, b.Utf8Length), a.Str); + } + + // Needle is JSON ref, haystack is compiled string + if (a.IsJsonRef && !b.IsJsonRef) + { + if (b.Str == null) return false; + return Utf8ContainsReverse(b.Str, json.Slice(a.Utf8Start, a.Utf8Length)); + } + + // Both JSON refs + var needleSlice = json.Slice(a.Utf8Start, a.Utf8Length); + var haystackSlice = json.Slice(b.Utf8Start, b.Utf8Length); + return haystackSlice.IndexOf(needleSlice) >= 0; } return false; } + + // ======================== UTF-8 byte comparison helpers ======================== + + /// + /// Compare a .NET string to raw UTF-8 bytes for equality without allocating. + /// Uses ASCII fast path; falls back to encoding for non-ASCII. + /// + private static bool Utf8Equals(string str, ReadOnlySpan utf8) + { + // ASCII fast path: for single-byte chars, string length == byte length + if (str.Length == utf8.Length) + { + for (var i = 0; i < utf8.Length; i++) + { + if (str[i] > 127) goto slowPath; + if (utf8[i] != (byte)str[i]) return false; + } + return true; + } + + slowPath: + // Slow path for multi-byte UTF-8 characters (rare in filter expressions) + var maxBytes = Encoding.UTF8.GetMaxByteCount(str.Length); + Span buf = maxBytes <= 512 ? stackalloc byte[maxBytes] : new byte[maxBytes]; + var written = Encoding.UTF8.GetBytes(str.AsSpan(), buf); + return utf8.SequenceEqual(buf[..written]); + } + + /// + /// Check if a UTF-8 byte span contains a .NET string as a substring. + /// ASCII fast path; falls back to encoding for non-ASCII. + /// + private static bool Utf8Contains(ReadOnlySpan haystack, string needle) + { + if (needle.Length == 0) return true; + if (needle.Length > haystack.Length) return false; + + // ASCII fast path + for (var i = 0; i <= haystack.Length - needle.Length; i++) + { + if (haystack[i] == (byte)needle[0]) + { + var match = true; + for (var j = 1; j < needle.Length; j++) + { + if (needle[j] > 127) goto slowPath; + if (haystack[i + j] != (byte)needle[j]) { match = false; break; } + } + if (match) return true; + } + } + return false; + + slowPath: + var haystackStr = Encoding.UTF8.GetString(haystack); + return haystackStr.IndexOf(needle, StringComparison.Ordinal) >= 0; + } + + /// + /// Check if a .NET string contains a UTF-8 byte span as a substring. + /// + private static bool Utf8ContainsReverse(string haystack, ReadOnlySpan needle) + { + if (needle.Length == 0) return true; + if (needle.Length > haystack.Length) return false; + + // ASCII fast path + for (var i = 0; i <= haystack.Length - needle.Length; i++) + { + if ((byte)haystack[i] == needle[0]) + { + var match = true; + for (var j = 1; j < needle.Length; j++) + { + if (haystack[i + j] > 127) goto slowPath; + if ((byte)haystack[i + j] != needle[j]) { match = false; break; } + } + if (match) return true; + } + } + return false; + + slowPath: + var needleStr = Encoding.UTF8.GetString(needle); + return haystack.IndexOf(needleStr, StringComparison.Ordinal) >= 0; + } } } \ No newline at end of file diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 86966b0fbaa..72e3251e887 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -133,9 +133,18 @@ internal struct ExprToken /// Number of elements in the tuple. public int TupleLength; + /// Start byte-offset of a string value in the source JSON (for zero-allocation extraction). + public int Utf8Start; + + /// Byte-length of the string value in the source JSON (for zero-allocation extraction). + public int Utf8Length; + /// True when this token is the default (uninitialized) value, replacing null checks. public readonly bool IsNone => TokenType == ExprTokenType.None; + /// True when this is a Str token that references raw JSON bytes instead of an allocated string. + public readonly bool IsJsonRef => TokenType == ExprTokenType.Str && Str == null; + public static ExprToken NewNum(double value) { return new ExprToken { TokenType = ExprTokenType.Num, Num = value }; @@ -165,6 +174,15 @@ public static ExprToken NewTuple(ExprToken[] elements, int length) { return new ExprToken { TokenType = ExprTokenType.Tuple, TupleElements = elements, TupleLength = length }; } + + /// + /// Create a string token that references raw UTF-8 bytes in the source JSON — zero allocation. + /// The offset and length define the string content (excluding quotes) within the JSON span. + /// + public static ExprToken NewJsonStr(int utf8Start, int utf8Length) + { + return new ExprToken { TokenType = ExprTokenType.Str, Utf8Start = utf8Start, Utf8Length = utf8Length }; + } } /// diff --git a/test/Garnet.test/Filter/AttributeExtractorTests.cs b/test/Garnet.test/Filter/AttributeExtractorTests.cs index f5b69ac62a6..14721249912 100644 --- a/test/Garnet.test/Filter/AttributeExtractorTests.cs +++ b/test/Garnet.test/Filter/AttributeExtractorTests.cs @@ -20,6 +20,20 @@ public class AttributeExtractorTests : AllureTestBase private static ExprToken Extract(string json, string field) => AttributeExtractor.ExtractField(Encoding.UTF8.GetBytes(json), field); + /// + /// Get the string value from an ExprToken, handling both allocated strings and JSON byte refs. + /// + private static string GetStr(string json, ExprToken token) + { + if (token.Str != null) return token.Str; + if (token.IsJsonRef) + { + var bytes = Encoding.UTF8.GetBytes(json); + return Encoding.UTF8.GetString(bytes, token.Utf8Start, token.Utf8Length); + } + return null; + } + // ======================== Number extraction ======================== [Test] @@ -67,17 +81,21 @@ public void ExtractField_Zero() [Test] public void ExtractField_SimpleString() { - var token = Extract("{\"genre\":\"action\"}", "genre"); + var json = "{\"genre\":\"action\"}"; + var token = Extract(json, "genre"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); - ClassicAssert.AreEqual("action", token.Str); + ClassicAssert.IsTrue(token.IsJsonRef, "Non-escaped strings should be JSON byte refs"); + ClassicAssert.AreEqual("action", GetStr(json, token)); } [Test] public void ExtractField_EmptyString() { - var token = Extract("{\"name\":\"\"}", "name"); + var json = "{\"name\":\"\"}"; + var token = Extract(json, "name"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); - ClassicAssert.AreEqual("", token.Str); + ClassicAssert.IsTrue(token.IsJsonRef); + ClassicAssert.AreEqual("", GetStr(json, token)); } [Test] @@ -85,6 +103,7 @@ public void ExtractField_StringWithEscapedQuote() { var token = Extract("{\"name\":\"hello\\\"world\"}", "name"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef, "Escaped strings should be materialized"); ClassicAssert.AreEqual("hello\"world", token.Str); } @@ -93,6 +112,7 @@ public void ExtractField_StringWithEscapedBackslash() { var token = Extract("{\"path\":\"c:\\\\temp\"}", "path"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); ClassicAssert.AreEqual("c:\\temp", token.Str); } @@ -101,6 +121,7 @@ public void ExtractField_StringWithEscapedNewline() { var token = Extract("{\"text\":\"line1\\nline2\"}", "text"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); ClassicAssert.AreEqual("line1\nline2", token.Str); } @@ -109,6 +130,7 @@ public void ExtractField_StringWithEscapedTab() { var token = Extract("{\"text\":\"col1\\tcol2\"}", "text"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); ClassicAssert.AreEqual("col1\tcol2", token.Str); } @@ -117,6 +139,7 @@ public void ExtractField_StringWithSlashEscape() { var token = Extract("{\"url\":\"http:\\/\\/example.com\"}", "url"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); + ClassicAssert.IsFalse(token.IsJsonRef); ClassicAssert.AreEqual("http://example.com", token.Str); } @@ -152,12 +175,13 @@ public void ExtractField_Null() [Test] public void ExtractField_StringArray() { - var token = Extract("{\"tags\":[\"classic\",\"popular\"]}", "tags"); + var json = "{\"tags\":[\"classic\",\"popular\"]}"; + var token = Extract(json, "tags"); ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); ClassicAssert.AreEqual(2, token.TupleLength); ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[0].TokenType); - ClassicAssert.AreEqual("classic", token.TupleElements[0].Str); - ClassicAssert.AreEqual("popular", token.TupleElements[1].Str); + ClassicAssert.AreEqual("classic", GetStr(json, token.TupleElements[0])); + ClassicAssert.AreEqual("popular", GetStr(json, token.TupleElements[1])); } [Test] @@ -174,13 +198,14 @@ public void ExtractField_NumericArray() [Test] public void ExtractField_MixedArray() { - var token = Extract("{\"data\":[1,\"two\",true,null]}", "data"); + var json = "{\"data\":[1,\"two\",true,null]}"; + var token = Extract(json, "data"); ClassicAssert.AreEqual(ExprTokenType.Tuple, token.TokenType); ClassicAssert.AreEqual(4, token.TupleLength); ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[0].TokenType); ClassicAssert.AreEqual(1.0, token.TupleElements[0].Num); ClassicAssert.AreEqual(ExprTokenType.Str, token.TupleElements[1].TokenType); - ClassicAssert.AreEqual("two", token.TupleElements[1].Str); + ClassicAssert.AreEqual("two", GetStr(json, token.TupleElements[1])); ClassicAssert.AreEqual(ExprTokenType.Num, token.TupleElements[2].TokenType); ClassicAssert.AreEqual(1.0, token.TupleElements[2].Num); // true → 1 ClassicAssert.AreEqual(ExprTokenType.Null, token.TupleElements[3].TokenType); @@ -351,25 +376,28 @@ public void ExtractField_UnterminatedKey_ReturnsNone() [Test] public void ExtractField_StringValueContainingBraces() { - var token = Extract("{\"data\":\"{not an object}\"}", "data"); + var json = "{\"data\":\"{not an object}\"}"; + var token = Extract(json, "data"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); - ClassicAssert.AreEqual("{not an object}", token.Str); + ClassicAssert.AreEqual("{not an object}", GetStr(json, token)); } [Test] public void ExtractField_StringValueContainingBrackets() { - var token = Extract("{\"data\":\"[not an array]\"}", "data"); + var json = "{\"data\":\"[not an array]\"}"; + var token = Extract(json, "data"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); - ClassicAssert.AreEqual("[not an array]", token.Str); + ClassicAssert.AreEqual("[not an array]", GetStr(json, token)); } [Test] public void ExtractField_StringValueContainingComma() { - var token = Extract("{\"msg\":\"hello, world\"}", "msg"); + var json = "{\"msg\":\"hello, world\"}"; + var token = Extract(json, "msg"); ClassicAssert.AreEqual(ExprTokenType.Str, token.TokenType); - ClassicAssert.AreEqual("hello, world", token.Str); + ClassicAssert.AreEqual("hello, world", GetStr(json, token)); } [Test] @@ -473,4 +501,4 @@ public void ExtractField_SkipsStringWithEscapedBackslashBeforeClosingQuote() ClassicAssert.AreEqual(2.0, token.Num); } } -} +} \ No newline at end of file diff --git a/test/Garnet.test/Filter/ExprTestHelpers.cs b/test/Garnet.test/Filter/ExprTestHelpers.cs index e5d50697718..a6e2e9cd92f 100644 --- a/test/Garnet.test/Filter/ExprTestHelpers.cs +++ b/test/Garnet.test/Filter/ExprTestHelpers.cs @@ -70,6 +70,13 @@ private static ExprToken RunAndReturnTop(ExprProgram program, ReadOnlySpan var extracted = AttributeExtractor.ExtractField(json, inst.Str); if (extracted.IsNone) return ExprToken.NewNull(); + + // Materialize JSON refs to strings for test convenience (OK to allocate in tests) + if (extracted.IsJsonRef) + { + extracted = ExprToken.NewStr(Encoding.UTF8.GetString(json.Slice(extracted.Utf8Start, extracted.Utf8Length))); + } + stack[stackLen++] = extracted; continue; } @@ -160,7 +167,11 @@ private static double TokenToBool(ExprToken t) { if (t.IsNone) return 0; if (t.TokenType == ExprTokenType.Num) return t.Num != 0 ? 1 : 0; - if (t.TokenType == ExprTokenType.Str && (t.Str == null || t.Str.Length == 0)) return 0; + if (t.TokenType == ExprTokenType.Str) + { + if (t.IsJsonRef) return t.Utf8Length == 0 ? 0 : 1; + return (t.Str == null || t.Str.Length == 0) ? 0 : 1; + } if (t.TokenType == ExprTokenType.Null) return 0; return 1; } From cbef78397b1d4573fc8428f71f48ee7c1c1938aa Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 15:01:40 -0800 Subject: [PATCH 26/31] orgnize the benchmarks and ordered by real-world frequency --- .../Filter/FilterExpressionBenchmarks.cs | 356 +++++++++++++----- 1 file changed, 256 insertions(+), 100 deletions(-) diff --git a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs index 5fb20fefdda..265e79a6b03 100644 --- a/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs +++ b/benchmark/BDN.benchmark/Filter/FilterExpressionBenchmarks.cs @@ -7,162 +7,318 @@ namespace BDN.benchmark.Filter { + // ════════════════════════════════════════════════════════════════════════ + // 1. COMPILATION (one-time cost per VSIM query) + // ════════════════════════════════════════════════════════════════════════ + + /// Compile filter string → postfix program. Always allocates (List, Stack, ExprToken[]). + [MemoryDiagnoser] + public class FilterCompileBenchmarks + { + private byte[] _comparison; + private byte[] _logicalAnd; + private byte[] _stringEq; + private byte[] _arithmetic; + private byte[] _containment; + private byte[] _combined; + + [GlobalSetup] + public void Setup() + { + _comparison = ".year > 1950"u8.ToArray(); + _logicalAnd = ".year > 1950 and .rating >= 4.0"u8.ToArray(); + _stringEq = ".genre == \"action\""u8.ToArray(); + _arithmetic = "(.year - 2000) ** 2 < 100"u8.ToArray(); + _containment = "\"classic\" in .tags"u8.ToArray(); + _combined = ".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8.ToArray(); + } + + [Benchmark(Description = "Comparison (.year > N)")] + public void Comparison() => ExprCompiler.TryCompile(_comparison, out _); + + [Benchmark(Description = "Logical AND (2 clauses)")] + public void LogicalAnd() => ExprCompiler.TryCompile(_logicalAnd, out _); + + [Benchmark(Description = "String equality")] + public void StringEq() => ExprCompiler.TryCompile(_stringEq, out _); + + [Benchmark(Description = "Arithmetic + power")] + public void Arithmetic() => ExprCompiler.TryCompile(_arithmetic, out _); + + [Benchmark(Description = "Containment (in)")] + public void Containment() => ExprCompiler.TryCompile(_containment, out _); + + [Benchmark(Description = "Combined (all ops)")] + public void Combined() => ExprCompiler.TryCompile(_combined, out _); + } + + // ════════════════════════════════════════════════════════════════════════ + // 2. FIELD EXTRACTION (per candidate, per selector) + // ════════════════════════════════════════════════════════════════════════ + /// - /// Benchmarks for the vector filter expression engine: - /// - ExprCompiler: compile filter string → postfix program - /// - ExprRunner: execute compiled program against JSON attributes - /// - AttributeExtractor: extract fields from raw JSON bytes - /// - End-to-end: compile + run for realistic filter scenarios - /// - /// These benchmarks measure the hot path of VSIM ... FILTER post-processing. + /// Extract a single field from raw JSON bytes. + /// Parameterized by JSON size: Small (2 fields), Medium (5), Large (12 + nested obj). /// [MemoryDiagnoser] - public class FilterExpressionBenchmarks + public class FilterExtractBenchmarks { - // ======================== Shared test data ======================== + // Small: {"year":1980,"rating":4.5} + // Medium: {"year":1980,"rating":4.5,"genre":"action","director":"Spielberg","tags":["classic","popular"]} + // Large: 12 fields including nested object and 3-element array + private byte[] _small; + private byte[] _medium; + private byte[] _large; - // Simple filter: single comparison - private byte[] _simpleFilterBytes; - private ExprProgram _simpleProgram; + [GlobalSetup] + public void Setup() + { + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _large = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + } + + // --- Number fields (zero-alloc) --- + [Benchmark(Description = "Number · Small JSON (1st field)")] + public void Num_Small() => AttributeExtractor.ExtractField(_small, "year"); + + [Benchmark(Description = "Number · Medium JSON (2nd field)")] + public void Num_Medium() => AttributeExtractor.ExtractField(_medium, "rating"); + + [Benchmark(Description = "Number · Large JSON (skip 8 fields)")] + public void Num_Large() => AttributeExtractor.ExtractField(_large, "budget"); + + // --- String fields (zero-alloc for non-escaped) --- + [Benchmark(Description = "String · Medium JSON")] + public void Str_Medium() => AttributeExtractor.ExtractField(_medium, "genre"); + + [Benchmark(Description = "String · Large JSON (skip 5)")] + public void Str_Large() => AttributeExtractor.ExtractField(_large, "director"); - // Medium filter: two comparisons joined by AND - private byte[] _mediumFilterBytes; - private ExprProgram _mediumProgram; + // --- Array fields (ALLOCATES ExprToken[count]) --- + [Benchmark(Description = "Array[2] · Medium JSON → alloc")] + public void Arr_Medium() => AttributeExtractor.ExtractField(_medium, "tags"); - // Complex filter: multiple ops, arithmetic, containment, string equality - private byte[] _complexFilterBytes; - private ExprProgram _complexProgram; + [Benchmark(Description = "Array[3] · Large JSON → alloc")] + public void Arr_Large() => AttributeExtractor.ExtractField(_large, "tags"); - // JSON attribute payloads (varying sizes) - private byte[] _smallJson; // 2 fields - private byte[] _mediumJson; // 5 fields - private byte[] _largeJson; // 10+ fields, array, nested object to skip + // --- Boolean (zero-alloc) --- + [Benchmark(Description = "Boolean · Large JSON (skip nested obj)")] + public void Bool_Large() => AttributeExtractor.ExtractField(_large, "active"); - // Reusable evaluation stack + // --- Missing field (zero-alloc) --- + [Benchmark(Description = "Missing · Small JSON")] + public void Miss_Small() => AttributeExtractor.ExtractField(_small, "missing"); + + [Benchmark(Description = "Missing · Medium JSON")] + public void Miss_Medium() => AttributeExtractor.ExtractField(_medium, "missing"); + + [Benchmark(Description = "Missing · Large JSON")] + public void Miss_Large() => AttributeExtractor.ExtractField(_large, "missing"); + } + + // ════════════════════════════════════════════════════════════════════════ + // 3. EXECUTION BY EXPRESSION TYPE (compile-once, run per candidate) + // Fixed JSON: Medium (5 fields, includes array) + // Ordered: most frequent → least frequent real-world query patterns + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Run pre-compiled filters against medium JSON. + /// Ordered from most common to least common real-world usage patterns. + /// + [MemoryDiagnoser] + public class FilterRunByExprBenchmarks + { + // --- Common: range / categorical filters --- + private ExprProgram _comparison; // .year > 1950 + private ExprProgram _logicalAnd; // .year > 1950 and .rating >= 4.0 + private ExprProgram _stringEq; // .genre == "action" + private ExprProgram _containsArray; // "classic" in .tags + + // --- Moderate: logical combinations --- + private ExprProgram _logicalOr; // .year < 1960 or .rating > 4.0 + private ExprProgram _not; // not (.genre == "drama") + private ExprProgram _stringNeq; // .genre != "drama" + + // --- Less common: computed / advanced --- + private ExprProgram _arithmetic; // .rating * 2 > 8 + private ExprProgram _power; // (.year - 2000) ** 2 < 100 + private ExprProgram _containsString; // "act" in .genre (substring) + + // --- Realistic combined --- + private ExprProgram _combined; // all ops together + + private byte[] _json; private Stack _stack; [GlobalSetup] public void Setup() { - // Filter expressions (UTF-8) - _simpleFilterBytes = Encoding.UTF8.GetBytes(".year > 1950"); - _mediumFilterBytes = Encoding.UTF8.GetBytes(".year > 1950 and .rating >= 4.0"); - _complexFilterBytes = Encoding.UTF8.GetBytes(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""); + _comparison = ExprCompiler.TryCompile(".year > 1950"u8, out _); + _logicalAnd = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _stringEq = ExprCompiler.TryCompile(".genre == \"action\""u8, out _); + _containsArray = ExprCompiler.TryCompile("\"classic\" in .tags"u8, out _); + _logicalOr = ExprCompiler.TryCompile(".year < 1960 or .rating > 4.0"u8, out _); + _not = ExprCompiler.TryCompile("not (.genre == \"drama\")"u8, out _); + _stringNeq = ExprCompiler.TryCompile(".genre != \"drama\""u8, out _); + _arithmetic = ExprCompiler.TryCompile(".rating * 2 > 8"u8, out _); + _power = ExprCompiler.TryCompile("(.year - 2000) ** 2 < 100"u8, out _); + _containsString = ExprCompiler.TryCompile("\"act\" in .genre"u8, out _); + _combined = ExprCompiler.TryCompile(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8, out _); + + _json = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _stack = ExprRunner.CreateStack(); + } - // Compile once for run benchmarks - _simpleProgram = ExprCompiler.TryCompile(_simpleFilterBytes, out _); - _mediumProgram = ExprCompiler.TryCompile(_mediumFilterBytes, out _); - _complexProgram = ExprCompiler.TryCompile(_complexFilterBytes, out _); + // ── Common: range / categorical ────────────────────────────────── - // JSON payloads - _smallJson = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); - _mediumJson = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); - _largeJson = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + [Benchmark(Description = "1. .year > N (range)")] + public bool Comparison() => ExprRunner.Run(_comparison, _json, _stack); - _stack = ExprRunner.CreateStack(); - } + [Benchmark(Description = "2. .year > N and .rating >= M (multi-range)")] + public bool LogicalAnd() => ExprRunner.Run(_logicalAnd, _json, _stack); - // ======================== Compilation benchmarks ======================== + [Benchmark(Description = "3. .genre == \"action\" (category)")] + public bool StringEq() => ExprRunner.Run(_stringEq, _json, _stack); - [Benchmark] - public void Compile_Simple() - => ExprCompiler.TryCompile(_simpleFilterBytes, out _); + [Benchmark(Description = "4. \"x\" in .tags (tag search) → ALLOC")] + public bool InArray() => ExprRunner.Run(_containsArray, _json, _stack); - [Benchmark] - public void Compile_Medium() - => ExprCompiler.TryCompile(_mediumFilterBytes, out _); + // ── Moderate: logical combinations ─────────────────────────────── - [Benchmark] - public void Compile_Complex() - => ExprCompiler.TryCompile(_complexFilterBytes, out _); + [Benchmark(Description = "5. A or B (logical OR)")] + public bool LogicalOr() => ExprRunner.Run(_logicalOr, _json, _stack); - // ======================== Execution benchmarks (compile once, run many) ======================== + [Benchmark(Description = "6. not (A) (exclusion)")] + public bool Not() => ExprRunner.Run(_not, _json, _stack); - [Benchmark] - public bool Run_Simple_SmallJson() - => ExprRunner.Run(_simpleProgram, _smallJson, _stack); + [Benchmark(Description = "7. .genre != \"drama\" (not-equal)")] + public bool StringNeq() => ExprRunner.Run(_stringNeq, _json, _stack); - [Benchmark] - public bool Run_Simple_MediumJson() - => ExprRunner.Run(_simpleProgram, _mediumJson, _stack); + // ── Less common: computed / advanced ───────────────────────────── - [Benchmark] - public bool Run_Medium_MediumJson() - => ExprRunner.Run(_mediumProgram, _mediumJson, _stack); + [Benchmark(Description = "8. .rating * 2 > 8 (arithmetic)")] + public bool Arithmetic() => ExprRunner.Run(_arithmetic, _json, _stack); - [Benchmark] - public bool Run_Complex_MediumJson() - => ExprRunner.Run(_complexProgram, _mediumJson, _stack); + [Benchmark(Description = "9. (.year-2000)**2 < 100 (power)")] + public bool Power() => ExprRunner.Run(_power, _json, _stack); - [Benchmark] - public bool Run_Complex_LargeJson() - => ExprRunner.Run(_complexProgram, _largeJson, _stack); + [Benchmark(Description = "10. \"act\" in .genre (substring)")] + public bool InString() => ExprRunner.Run(_containsString, _json, _stack); - // ======================== Field extraction benchmarks ======================== + // ── Realistic combined ─────────────────────────────────────────── - [Benchmark] - public void Extract_FirstField() - => AttributeExtractor.ExtractField(_smallJson, "year"); + [Benchmark(Description = "11. Combined (all ops) → ALLOC")] + public bool Combined() => ExprRunner.Run(_combined, _json, _stack); + } - [Benchmark] - public void Extract_LastField_MediumJson() - => AttributeExtractor.ExtractField(_mediumJson, "tags"); + // ════════════════════════════════════════════════════════════════════════ + // 4. EXECUTION BY JSON COMPLEXITY (fixed filter, varying JSON) + // ════════════════════════════════════════════════════════════════════════ - [Benchmark] - public void Extract_SkipNestedObject() - => AttributeExtractor.ExtractField(_largeJson, "active"); + /// + /// Same filter run against small / medium / large JSON. + /// Shows how JSON size affects extraction + evaluation time. + /// + [MemoryDiagnoser] + public class FilterRunByJsonBenchmarks + { + private ExprProgram _numericFilter; + private ExprProgram _arrayFilter; - [Benchmark] - public void Extract_MissingField() - => AttributeExtractor.ExtractField(_mediumJson, "nonexistent"); + private byte[] _small; // 2 fields, no array + private byte[] _medium; // 5 fields, 2-element array + private byte[] _large; // 12 fields, 3-element array, nested object - // ======================== End-to-end benchmarks (compile + run) ======================== + private Stack _stack; - [Benchmark] - public bool EndToEnd_Simple() + [GlobalSetup] + public void Setup() { - var program = ExprCompiler.TryCompile(_simpleFilterBytes, out _); - return ExprRunner.Run(program, _mediumJson, _stack); + _numericFilter = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _arrayFilter = ExprCompiler.TryCompile("\"classic\" in .tags"u8, out _); + + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _large = Encoding.UTF8.GetBytes("{\"id\":12345,\"title\":\"Test Movie\",\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"studio\":\"Universal\",\"budget\":50000000,\"tags\":[\"classic\",\"popular\",\"award-winning\"],\"metadata\":{\"source\":\"imdb\",\"verified\":true},\"active\":true}"); + _stack = ExprRunner.CreateStack(); } - [Benchmark] - public bool EndToEnd_Complex() + // --- Numeric filter (zero-alloc regardless of JSON size) --- + [Benchmark(Description = "Numeric AND · Small JSON")] + public bool Numeric_Small() => ExprRunner.Run(_numericFilter, _small, _stack); + + [Benchmark(Description = "Numeric AND · Medium JSON")] + public bool Numeric_Medium() => ExprRunner.Run(_numericFilter, _medium, _stack); + + [Benchmark(Description = "Numeric AND · Large JSON")] + public bool Numeric_Large() => ExprRunner.Run(_numericFilter, _large, _stack); + + // --- Array filter (allocates when array is found) --- + [Benchmark(Description = "in .tags · Small JSON (no tags → false)")] + public bool Array_Small() => ExprRunner.Run(_arrayFilter, _small, _stack); + + [Benchmark(Description = "in .tags · Medium JSON (2 elem) → alloc")] + public bool Array_Medium() => ExprRunner.Run(_arrayFilter, _medium, _stack); + + [Benchmark(Description = "in .tags · Large JSON (3 elem) → alloc")] + public bool Array_Large() => ExprRunner.Run(_arrayFilter, _large, _stack); + } + + // ════════════════════════════════════════════════════════════════════════ + // 5. BATCH (compile once, run N candidates) + // ════════════════════════════════════════════════════════════════════════ + + /// + /// Simulate real VSIM post-filtering: compile once, evaluate N candidates. + /// Shows total allocation and throughput at scale. + /// + [MemoryDiagnoser] + public class FilterBatchBenchmarks + { + private ExprProgram _numericAnd; + private ExprProgram _combined; + private byte[] _small; + private byte[] _medium; + private Stack _stack; + + [GlobalSetup] + public void Setup() { - var program = ExprCompiler.TryCompile(_complexFilterBytes, out _); - return ExprRunner.Run(program, _mediumJson, _stack); + _numericAnd = ExprCompiler.TryCompile(".year > 1950 and .rating >= 4.0"u8, out _); + _combined = ExprCompiler.TryCompile(".rating * 2 > 8 and (.year >= 1980 or \"modern\" in .tags) and .genre == \"action\""u8, out _); + _small = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5}"); + _medium = Encoding.UTF8.GetBytes("{\"year\":1980,\"rating\":4.5,\"genre\":\"action\",\"director\":\"Spielberg\",\"tags\":[\"classic\",\"popular\"]}"); + _stack = ExprRunner.CreateStack(); } - // ======================== Batch simulation (N candidates) ======================== - - [Benchmark] + [Benchmark(Description = "Numeric AND · N candidates (zero-alloc)")] [Arguments(10)] [Arguments(100)] [Arguments(1000)] - public int RunBatch_Medium(int count) + public int NumericAnd(int N) { var matched = 0; - for (var i = 0; i < count; i++) + for (var i = 0; i < N; i++) { - // Alternate between matching and non-matching JSON to simulate realistic filtering - var json = (i % 3 == 0) ? _smallJson : _mediumJson; - if (ExprRunner.Run(_mediumProgram, json, _stack)) - matched++; + var json = (i % 3 == 0) ? _small : _medium; + if (ExprRunner.Run(_numericAnd, json, _stack)) matched++; } return matched; } - [Benchmark] + [Benchmark(Description = "Combined + array · N candidates (allocs)")] [Arguments(10)] [Arguments(100)] [Arguments(1000)] - public int RunBatch_Complex(int count) + public int Combined(int N) { var matched = 0; - for (var i = 0; i < count; i++) + for (var i = 0; i < N; i++) { - var json = (i % 3 == 0) ? _smallJson : _mediumJson; - if (ExprRunner.Run(_complexProgram, json, _stack)) - matched++; + var json = (i % 3 == 0) ? _small : _medium; + if (ExprRunner.Run(_combined, json, _stack)) matched++; } return matched; } From ee0ed586e6bd6e09c90a8360fceed61bd45f4867 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Tue, 3 Mar 2026 15:56:10 -0800 Subject: [PATCH 27/31] refactor --- .../Resp/Vector/Filter/AttributeExtractor.cs | 19 +++++++--- .../server/Resp/Vector/Filter/ExprCompiler.cs | 37 +++++++------------ 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 41892d062a2..7044c28f6db 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -279,17 +279,24 @@ private static bool SkipNumber(ReadOnlySpan json, ref int p) return p > start; } - // ======================== Utility ======================== + // ======================== Shared byte-level helpers ======================== + // These are used by both AttributeExtractor and ExprCompiler. - private static void SkipWhiteSpace(ReadOnlySpan json, ref int p) + internal static bool IsDigit(byte b) => b >= (byte)'0' && b <= (byte)'9'; + + internal static bool IsLetter(byte b) => (b >= (byte)'a' && b <= (byte)'z') || (b >= (byte)'A' && b <= (byte)'Z'); + + internal static bool IsLetterOrDigit(byte b) => IsLetter(b) || IsDigit(b); + + internal static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; + + internal static void SkipWhiteSpace(ReadOnlySpan s, ref int p) { - while (p < json.Length && IsWhiteSpace(json[p])) p++; + while (p < s.Length && IsWhiteSpace(s[p])) p++; } - private static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; - private static bool IsNumberChar(byte b) => - (b >= (byte)'0' && b <= (byte)'9') || b == (byte)'-' || b == (byte)'+' || + IsDigit(b) || b == (byte)'-' || b == (byte)'+' || b == (byte)'.' || b == (byte)'e' || b == (byte)'E'; private static bool MatchKey(ReadOnlySpan json, int keyStart, int keyEnd, string fieldName) diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index 27c65585f54..d03c55a1f4c 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -44,13 +44,13 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) var p = 0; while (p < expr.Length) { - SkipSpaces(expr, ref p); + AttributeExtractor.SkipWhiteSpace(expr, ref p); if (p >= expr.Length) break; // Determine if '-' should be a negative number sign or a subtraction operator var minusIsNumber = false; - if (expr[p] == (byte)'-' && p + 1 < expr.Length && (IsDigit(expr[p + 1]) || expr[p + 1] == (byte)'.')) + if (expr[p] == (byte)'-' && p + 1 < expr.Length && (AttributeExtractor.IsDigit(expr[p + 1]) || expr[p + 1] == (byte)'.')) { if (tokens.Count == 0) { @@ -65,7 +65,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) } // Number - if (IsDigit(expr[p]) || (minusIsNumber && expr[p] == (byte)'-')) + if (AttributeExtractor.IsDigit(expr[p]) || (minusIsNumber && expr[p] == (byte)'-')) { var t = ParseNumber(expr, ref p); if (t.IsNone) { errpos = p; return null; } @@ -100,7 +100,7 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) } // Operator or literal keyword (null, true, false, not, and, or, in) - if (IsLetter(expr[p]) || IsOperatorSpecialChar(expr[p])) + if (AttributeExtractor.IsLetter(expr[p]) || IsOperatorSpecialChar(expr[p])) { var t = ParseOperatorOrLiteral(expr, ref p); if (t.IsNone) { errpos = p; return null; } @@ -226,14 +226,8 @@ private static bool ProcessOperator( } // ======================== Tokenization helpers ======================== - - private static bool IsDigit(byte b) => b >= (byte)'0' && b <= (byte)'9'; - - private static bool IsLetter(byte b) => (b >= (byte)'a' && b <= (byte)'z') || (b >= (byte)'A' && b <= (byte)'Z'); - - private static bool IsLetterOrDigit(byte b) => IsLetter(b) || IsDigit(b); - - private static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; + // Shared helpers (IsDigit, IsLetter, IsLetterOrDigit, IsWhiteSpace, SkipWhiteSpace) + // live in AttributeExtractor and are reused here. private static bool IsOperatorSpecialChar(byte b) { @@ -243,14 +237,9 @@ private static bool IsOperatorSpecialChar(byte b) b == (byte)'&'; } - private static void SkipSpaces(ReadOnlySpan s, ref int p) - { - while (p < s.Length && IsWhiteSpace(s[p])) p++; - } - private static bool IsSelectorChar(byte c) { - return IsLetterOrDigit(c) || c == (byte)'_' || c == (byte)'-'; + return AttributeExtractor.IsLetterOrDigit(c) || c == (byte)'_' || c == (byte)'-'; } private static ExprToken ParseNumber(ReadOnlySpan s, ref int p) @@ -258,7 +247,7 @@ private static ExprToken ParseNumber(ReadOnlySpan s, ref int p) var start = p; if (p < s.Length && s[p] == (byte)'-') p++; - while (p < s.Length && (IsDigit(s[p]) || s[p] == (byte)'.' || s[p] == (byte)'e' || s[p] == (byte)'E')) + while (p < s.Length && (AttributeExtractor.IsDigit(s[p]) || s[p] == (byte)'.' || s[p] == (byte)'e' || s[p] == (byte)'E')) p++; var numSpan = s.Slice(start, p - start); @@ -343,7 +332,7 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) var elements = new ExprToken[64]; // max 64 elements var count = 0; - SkipSpaces(s, ref p); + AttributeExtractor.SkipWhiteSpace(s, ref p); // Handle empty tuple [] if (p < s.Length && s[p] == (byte)']') @@ -354,13 +343,13 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) while (true) { - SkipSpaces(s, ref p); + AttributeExtractor.SkipWhiteSpace(s, ref p); if (p >= s.Length) return default; if (count >= elements.Length) return default; // Parse element: number or string ExprToken ele; - if (IsDigit(s[p]) || s[p] == (byte)'-') + if (AttributeExtractor.IsDigit(s[p]) || s[p] == (byte)'-') { ele = ParseNumber(s, ref p); } @@ -376,7 +365,7 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) elements[count++] = ele; - SkipSpaces(s, ref p); + AttributeExtractor.SkipWhiteSpace(s, ref p); if (p >= s.Length) return default; if (s[p] == (byte)']') { p++; break; } @@ -394,7 +383,7 @@ private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) var start = p; // Consume alphabetic or operator-special characters - while (p < s.Length && (IsLetter(s[p]) || IsOperatorSpecialChar(s[p]))) + while (p < s.Length && (AttributeExtractor.IsLetter(s[p]) || IsOperatorSpecialChar(s[p]))) p++; var matchLen = p - start; From 5c9e14403bd578c3232a0b95d8b52ded44c62d7c Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 5 Mar 2026 15:40:27 -0800 Subject: [PATCH 28/31] avoid memory copy for final results output --- libs/server/API/GarnetApi.cs | 8 +- libs/server/API/GarnetWatchApi.cs | 8 +- libs/server/API/IGarnetApi.cs | 4 +- .../Resp/Vector/RespServerSessionVectors.cs | 57 +++++++-- libs/server/Resp/Vector/VectorManager.cs | 109 ++++-------------- .../Session/MainStore/VectorStoreOps.cs | 8 +- 6 files changed, 85 insertions(+), 109 deletions(-) diff --git a/libs/server/API/GarnetApi.cs b/libs/server/API/GarnetApi.cs index 85163bd5acf..435212cc982 100644 --- a/libs/server/API/GarnetApi.cs +++ b/libs/server/API/GarnetApi.cs @@ -520,12 +520,12 @@ public unsafe GarnetStatus VectorSetRemove(ArgSlice key, ArgSlice element) => storageSession.VectorSetRemove(SpanByte.FromPinnedPointer(key.ptr, key.length), SpanByte.FromPinnedPointer(element.ptr, element.length)); /// - public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) - => storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) + => storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); /// - public unsafe GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) - => storageSession.VectorSetElementSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), element.ReadOnlySpan, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + public unsafe GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) + => storageSession.VectorSetElementSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), element.ReadOnlySpan, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); /// public unsafe GarnetStatus VectorSetEmbedding(ArgSlice key, ArgSlice element, ref SpanByteAndMemory outputDistances) diff --git a/libs/server/API/GarnetWatchApi.cs b/libs/server/API/GarnetWatchApi.cs index 2cae35fdafe..d60d8546f65 100644 --- a/libs/server/API/GarnetWatchApi.cs +++ b/libs/server/API/GarnetWatchApi.cs @@ -650,17 +650,17 @@ public bool ResetScratchBuffer(int offset) #region Vector Sets /// - public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { garnetApi.WATCH(key, StoreType.Main); - return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); } /// - public GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { garnetApi.WATCH(key, StoreType.Main); - return garnetApi.VectorSetElementSimilarity(key, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result); + return garnetApi.VectorSetElementSimilarity(key, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap); } /// diff --git a/libs/server/API/IGarnetApi.cs b/libs/server/API/IGarnetApi.cs index 6acf3b3e303..ac4e370a762 100644 --- a/libs/server/API/IGarnetApi.cs +++ b/libs/server/API/IGarnetApi.cs @@ -2041,7 +2041,7 @@ public bool IterateObjectStore(ref TScanFunctions scanFunctions, /// Ids are encoded in as length prefixed blobs of bytes. /// Attributes are encoded in as length prefixed blobs of bytes. /// - GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result); + GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap); /// /// Perform a similarity search given an element already in the vector set and these parameters. @@ -2049,7 +2049,7 @@ public bool IterateObjectStore(ref TScanFunctions scanFunctions, /// Ids are encoded in as length prefixed blobs of bytes. /// Attributes are encoded in as length prefixed blobs of bytes. /// - GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result); + GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap); /// /// Fetch the embedding of a given element in a Vector set. diff --git a/libs/server/Resp/Vector/RespServerSessionVectors.cs b/libs/server/Resp/Vector/RespServerSessionVectors.cs index 6725785254e..3f34463356d 100644 --- a/libs/server/Resp/Vector/RespServerSessionVectors.cs +++ b/libs/server/Resp/Vector/RespServerSessionVectors.cs @@ -759,11 +759,16 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) // TODO: these stackallocs are dangerous, need logic to avoid stack overflow Span idSpace = stackalloc byte[(DefaultResultSetSize * DefaultIdSize) + (DefaultResultSetSize * sizeof(int))]; Span distanceSpace = stackalloc float[DefaultResultSetSize]; - Span attributeSpace = withAttributes.Value ? stackalloc byte[(DefaultResultSetSize * DefaultAttributeSize) + (DefaultResultSetSize * sizeof(int))] : default; + var needFilter = filter.Value.Length > 0; + var needAttributes = withAttributes.Value || needFilter; + Span attributeSpace = needAttributes ? stackalloc byte[(DefaultResultSetSize * DefaultAttributeSize) + (DefaultResultSetSize * sizeof(int))] : default; var idResult = SpanByteAndMemory.FromPinnedSpan(idSpace); var distanceResult = SpanByteAndMemory.FromPinnedSpan(MemoryMarshal.Cast(distanceSpace)); var attributeResult = SpanByteAndMemory.FromPinnedSpan(attributeSpace); + // Bitmap: 1 bit per result. DefaultResultSetSize results = 8 bytes on stack. + Span bitmapSpace = needFilter ? stackalloc byte[(DefaultResultSetSize + 7) >> 3] : default; + var filterBitmapResult = SpanByteAndMemory.FromPinnedSpan(bitmapSpace); try { @@ -772,11 +777,11 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) VectorIdFormat idFormat; if (!element.HasValue) { - res = storageApi.VectorSetValueSimilarity(key, valueType, ArgSlice.FromPinnedSpan(values), count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes); + res = storageApi.VectorSetValueSimilarity(key, valueType, ArgSlice.FromPinnedSpan(values), count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes, ref filterBitmapResult); } else { - res = storageApi.VectorSetElementSimilarity(key, element.Value, count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes); + res = storageApi.VectorSetElementSimilarity(key, element.Value, count.Value, delta.Value, searchExplorationFactor.Value, filter.Value, maxFilteringEffort.Value, withAttributes.Value, ref idResult, out idFormat, ref distanceResult, ref attributeResult, out vectorRes, ref filterBitmapResult); } if (res == GarnetStatus.NOTFOUND) @@ -804,22 +809,39 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) { var remainingIds = idResult.AsReadOnlySpan(); var distancesSpan = MemoryMarshal.Cast(distanceResult.AsReadOnlySpan()); - var remaininingAttributes = withAttributes.Value ? attributeResult.AsReadOnlySpan() : default; + var hasFilter = filterBitmapResult.Length > 0; + var filterBitmap = hasFilter ? filterBitmapResult.AsReadOnlySpan() : default; + var remaininingAttributes = (withAttributes.Value || hasFilter) ? attributeResult.AsReadOnlySpan() : default; - var arrayItemCount = distancesSpan.Length; + var totalFound = distancesSpan.Length; + + // Compute output count: if bitmap is present, popcount it; otherwise all results + int outputCount; + if (hasFilter) + { + outputCount = 0; + for (var b = 0; b < filterBitmap.Length; b++) + outputCount += System.Numerics.BitOperations.PopCount(filterBitmap[b]); + } + else + { + outputCount = totalFound; + } + + var arrayItemCount = outputCount; if (withScores.Value) { - arrayItemCount += distancesSpan.Length; + arrayItemCount += outputCount; } if (withAttributes.Value) { - arrayItemCount += distancesSpan.Length; + arrayItemCount += outputCount; } while (!RespWriteUtils.TryWriteArrayLength(arrayItemCount, ref dcurr, dend)) SendAndReset(); - for (var resultIndex = 0; resultIndex < distancesSpan.Length; resultIndex++) + for (var resultIndex = 0; resultIndex < totalFound; resultIndex++) { ReadOnlySpan elementData; @@ -855,6 +877,18 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) throw new GarnetException($"Unexpected id format: {idFormat}"); } + // Check filter bitmap — skip results that didn't pass the filter + if (hasFilter && (filterBitmap[resultIndex >> 3] & (1 << (resultIndex & 7))) == 0) + { + // Advance attribute reader for skipped results (attributes are always present when bitmap exists) + if (!remaininingAttributes.IsEmpty) + { + var skipAttrLen = BinaryPrimitives.ReadInt32LittleEndian(remaininingAttributes); + remaininingAttributes = remaininingAttributes[(sizeof(int) + skipAttrLen)..]; + } + continue; + } + while (!RespWriteUtils.TryWriteBulkString(elementData, ref dcurr, dend)) SendAndReset(); @@ -880,6 +914,12 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) while (!RespWriteUtils.TryWriteBulkString(attr, ref dcurr, dend)) SendAndReset(); } + else if (!remaininingAttributes.IsEmpty) + { + // Attributes fetched for filtering but not requested — advance reader + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaininingAttributes); + remaininingAttributes = remaininingAttributes[(sizeof(int) + attrLen)..]; + } } } } @@ -908,6 +948,7 @@ private bool NetworkVSIM(ref TGarnetApi storageApi) idResult.Memory?.Dispose(); distanceResult.Memory?.Dispose(); attributeResult.Memory?.Dispose(); + filterBitmapResult.Memory?.Dispose(); } } finally diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index acd9c3739ca..51e459e1d51 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -491,7 +491,8 @@ internal VectorManagerResult ValueSimilarity( ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, - ref SpanByteAndMemory outputAttributes + ref SpanByteAndMemory outputAttributes, + ref SpanByteAndMemory filterBitmap ) { AssertHaveStorageSession(); @@ -560,7 +561,7 @@ out var continuation return VectorManagerResult.BadParams; } - if (includeAttributes) + if (includeAttributes || !filter.IsEmpty) { FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes); } @@ -568,25 +569,7 @@ out var continuation // Apply post-filtering if filter is specified if (!filter.IsEmpty) { - if (includeAttributes) - { - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); - } - else - { - // Fetch attributes internally for filtering even when not returning them. - // FetchVectorElementAttributes will resize the buffer dynamically if needed. - var tempAttributes = new SpanByteAndMemory(MemoryPool.Shared.Rent(found * 64), found * 64); - try - { - FetchVectorElementAttributes(context, found, outputIds, ref tempAttributes); - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref tempAttributes); - } - finally - { - tempAttributes.Memory?.Dispose(); - } - } + ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan()); } if (continuation != 0) @@ -625,7 +608,8 @@ internal VectorManagerResult ElementSimilarity( ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, - ref SpanByteAndMemory outputAttributes + ref SpanByteAndMemory outputAttributes, + ref SpanByteAndMemory filterBitmap ) { AssertHaveStorageSession(); @@ -686,7 +670,7 @@ out var continuation return VectorManagerResult.BadParams; } - if (includeAttributes) + if (includeAttributes || !filter.IsEmpty) { FetchVectorElementAttributes(context, found, outputIds, ref outputAttributes); } @@ -694,25 +678,7 @@ out var continuation // Apply post-filtering if filter is specified if (!filter.IsEmpty) { - if (includeAttributes) - { - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref outputAttributes); - } - else - { - // Fetch attributes internally for filtering even when not returning them. - // FetchVectorElementAttributes will resize the buffer dynamically if needed. - var tempAttributes = new SpanByteAndMemory(MemoryPool.Shared.Rent(found * 64), found * 64); - try - { - FetchVectorElementAttributes(context, found, outputIds, ref tempAttributes); - found = ApplyPostFilter(filter, found, ref outputIds, ref outputDistances, ref tempAttributes); - } - finally - { - tempAttributes.Memory?.Dispose(); - } - } + ApplyPostFilter(filter, found, outputAttributes.AsReadOnlySpan(), filterBitmap.AsSpan()); } if (continuation != 0) @@ -954,17 +920,21 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// using a stack-based VM (ExprRunner) with on-demand field extraction (AttributeExtractor). /// 3. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. /// + /// The is populated with one bit per result: + /// bit i = 1 means result i passed the filter. Caller can test with: + /// (filterBitmap[i >> 3] & (1 << (i & 7))) != 0 + /// + /// No in-place compaction — the caller skips non-matching results using the bitmap. /// - private int ApplyPostFilter( + private static int ApplyPostFilter( ReadOnlySpan filter, int numResults, - ref SpanByteAndMemory outputIds, - ref SpanByteAndMemory outputDistances, - ref SpanByteAndMemory outputAttributes) + ReadOnlySpan attributesSpan, + Span filterBitmap) { if (numResults == 0) { - return numResults; + return 0; } // Compile the filter expression (UTF-8 bytes) into a flat postfix program. @@ -975,28 +945,19 @@ private int ApplyPostFilter( return 0; // If the filter doesn't compile, treat it as filtering out all results (matches Redis behavior) } + // Clear the bitmap + filterBitmap.Clear(); + var filteredCount = 0; // Allocate the evaluation stack once and reuse it across all candidate evaluations var stack = ExprRunner.CreateStack(); - var idsSpan = outputIds.AsSpan(); - var distancesSpan = MemoryMarshal.Cast(outputDistances.AsSpan()); - var attributesSpan = outputAttributes.AsSpan(); - - var idReadPos = 0; var attrReadPos = 0; - var idWritePos = 0; - var distWritePos = 0; - var attrWritePos = 0; for (var i = 0; i < numResults; i++) { - // Read ID - var idLen = BinaryPrimitives.ReadInt32LittleEndian(idsSpan[idReadPos..]); - var idTotalLen = sizeof(int) + idLen; - - // Read attribute + // Read attribute length-prefix + data var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); @@ -1004,39 +965,13 @@ private int ApplyPostFilter( // No JsonDocument DOM allocation — AttributeExtractor extracts fields on demand. if (ExprRunner.Run(program, attrData, stack)) { - // Copy ID if not already in place - if (idReadPos != idWritePos) - { - idsSpan.Slice(idReadPos, idTotalLen).CopyTo(idsSpan[idWritePos..]); - } - - // Copy distance if not already in place - if (i != distWritePos) - { - distancesSpan[distWritePos] = distancesSpan[i]; - } - - // Copy attribute if not already in place - if (attrReadPos != attrWritePos) - { - attributesSpan.Slice(attrReadPos, sizeof(int) + attrLen).CopyTo(attributesSpan[attrWritePos..]); - } - - idWritePos += idTotalLen; - distWritePos++; - attrWritePos += sizeof(int) + attrLen; + filterBitmap[i >> 3] |= (byte)(1 << (i & 7)); filteredCount++; } - idReadPos += idTotalLen; attrReadPos += sizeof(int) + attrLen; } - // Update lengths - outputIds.Length = idWritePos; - outputDistances.Length = distWritePos * sizeof(float); - outputAttributes.Length = attrWritePos; - return filteredCount; } diff --git a/libs/server/Storage/Session/MainStore/VectorStoreOps.cs b/libs/server/Storage/Session/MainStore/VectorStoreOps.cs index 24e04921775..9073d73fa83 100644 --- a/libs/server/Storage/Session/MainStore/VectorStoreOps.cs +++ b/libs/server/Storage/Session/MainStore/VectorStoreOps.cs @@ -200,7 +200,7 @@ public unsafe GarnetStatus VectorSetRemove(SpanByte key, SpanByte element) /// Perform a similarity search on an existing Vector Set given a vector as a bunch of floats. /// [SkipLocalsInit] - public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { parseState.InitializeWithArgument(ArgSlice.FromPinnedSpan(key.AsReadOnlySpan())); @@ -218,7 +218,7 @@ public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueTyp return status; } - result = vectorManager.ValueSimilarity(indexSpan, valueType, values.ReadOnlySpan, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes); + result = vectorManager.ValueSimilarity(indexSpan, valueType, values.ReadOnlySpan, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, ref filterBitmap); return GarnetStatus.OK; } @@ -228,7 +228,7 @@ public unsafe GarnetStatus VectorSetValueSimilarity(SpanByte key, VectorValueTyp /// Perform a similarity search on an existing Vector Set given an element that is already in the Vector Set. /// [SkipLocalsInit] - public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan element, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result) + public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan element, int count, float delta, int searchExplorationFactor, ReadOnlySpan filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap) { parseState.InitializeWithArgument(ArgSlice.FromPinnedSpan(key.AsReadOnlySpan())); @@ -245,7 +245,7 @@ public unsafe GarnetStatus VectorSetElementSimilarity(SpanByte key, ReadOnlySpan return status; } - result = vectorManager.ElementSimilarity(indexSpan, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes); + result = vectorManager.ElementSimilarity(indexSpan, element, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, ref filterBitmap); return GarnetStatus.OK; } } From e1ce83c14bf34d64d2d57b6ed5c5b05a2b0591f1 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 5 Mar 2026 16:19:47 -0800 Subject: [PATCH 29/31] refactor to all use slicing for josn extraction --- .../Resp/Vector/Filter/AttributeExtractor.cs | 256 +++++++++--------- .../server/Resp/Vector/Filter/ExprCompiler.cs | 167 ++++++------ libs/server/Resp/Vector/VectorManager.cs | 8 +- 3 files changed, 217 insertions(+), 214 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 7044c28f6db..c867b2130c8 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -25,159 +25,160 @@ internal static class AttributeExtractor /// public static ExprToken ExtractField(ReadOnlySpan json, string fieldName) { - var p = 0; - SkipWhiteSpace(json, ref p); - if (p >= json.Length || json[p] != (byte)'{') return default; - p++; // Skip '{' + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') return default; + s = s[1..]; // Skip '{' while (true) { - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; - if (json[p] == (byte)'}') return default; // End of object, field not found + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)'}') return default; // End of object, field not found // Expect a key string - if (json[p] != (byte)'"') return default; + if (s[0] != (byte)'"') return default; - var keyStart = p + 1; - if (!SkipString(json, ref p)) return default; - var keyEnd = p - 1; // p is now past the closing quote + // Extract key content (between quotes) + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) return default; + // Key content is between afterOpenQuote and s (minus the closing quote byte) + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; - // Compare key with field name - var match = MatchKey(json, keyStart, keyEnd, fieldName); + var match = MatchKey(keyContent, fieldName); // Expect ':' - SkipWhiteSpace(json, ref p); - if (p >= json.Length || json[p] != (byte)':') return default; - p++; // Skip ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') return default; + s = s[1..]; // Skip ':' - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; if (match) { // Found the field — parse the value into a token - return ParseValueToken(json, ref p); + return ParseValueToken(json, ref s); } else { // Skip the value - if (!SkipValue(json, ref p)) return default; + if (!SkipValue(ref s)) return default; } // Look for ',' or '}' - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; - if (json[p] == (byte)',') { p++; continue; } - if (json[p] == (byte)'}') return default; // End of object, not found + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return default; // End of object, not found return default; // Malformed JSON } } // ======================== Value parsing (allocating) ======================== - private static ExprToken ParseValueToken(ReadOnlySpan json, ref int p) + private static ExprToken ParseValueToken(ReadOnlySpan json, ref ReadOnlySpan s) { - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; - var c = json[p]; - if (c == (byte)'"') return ParseStringToken(json, ref p); - if (c == (byte)'[') return ParseArrayToken(json, ref p); + var c = s[0]; + if (c == (byte)'"') return ParseStringToken(json, ref s); + if (c == (byte)'[') return ParseArrayToken(json, ref s); if (c == (byte)'{') return default; // Nested objects not supported - if (c == (byte)'t') return ParseLiteralToken(json, ref p, "true"u8, ExprTokenType.Num, 1); - if (c == (byte)'f') return ParseLiteralToken(json, ref p, "false"u8, ExprTokenType.Num, 0); - if (c == (byte)'n') return ParseLiteralToken(json, ref p, "null"u8, ExprTokenType.Null, 0); - if ((c >= (byte)'0' && c <= (byte)'9') || c == (byte)'-' || c == (byte)'+') - return ParseNumberToken(json, ref p); + if (c == (byte)'t') return ParseLiteralToken(ref s, "true"u8, ExprTokenType.Num, 1); + if (c == (byte)'f') return ParseLiteralToken(ref s, "false"u8, ExprTokenType.Num, 0); + if (c == (byte)'n') return ParseLiteralToken(ref s, "null"u8, ExprTokenType.Null, 0); + if (IsDigit(c) || c == (byte)'-' || c == (byte)'+') + return ParseNumberToken(ref s); return default; } - private static ExprToken ParseStringToken(ReadOnlySpan json, ref int p) + private static ExprToken ParseStringToken(ReadOnlySpan json, ref ReadOnlySpan s) { - if (p >= json.Length || json[p] != (byte)'"') return default; - p++; // Skip opening quote - var start = p; + if (s.IsEmpty || s[0] != (byte)'"') return default; + s = s[1..]; // Skip opening quote + var body = s; var hasEscape = false; - while (p < json.Length) + while (!s.IsEmpty) { - if (json[p] == (byte)'\\') + if (s[0] == (byte)'\\') { hasEscape = true; - p += 2; // Skip escape sequence + s = s[2..]; // Skip escape sequence continue; } - if (json[p] == (byte)'"') + if (s[0] == (byte)'"') { + var content = body[..(body.Length - s.Length)]; if (!hasEscape) { // Zero-allocation: store byte offset+length into the source JSON - var len = p - start; - p++; // Skip closing quote - return ExprToken.NewJsonStr(start, len); + var absoluteStart = json.Length - body.Length; + s = s[1..]; // Skip closing quote + return ExprToken.NewJsonStr(absoluteStart, content.Length); } else { // Escaped strings must be materialized (rare path) - var value = UnescapeJsonString(json, start, p); - p++; // Skip closing quote + var value = UnescapeJsonString(content); + s = s[1..]; // Skip closing quote return ExprToken.NewStr(value); } } - p++; + s = s[1..]; } return default; // Unterminated string } - private static ExprToken ParseNumberToken(ReadOnlySpan json, ref int p) + private static ExprToken ParseNumberToken(ref ReadOnlySpan s) { - var start = p; - while (p < json.Length && IsNumberChar(json[p])) p++; - if (p == start) return default; + var original = s; + while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..]; + + var numSpan = original[..(original.Length - s.Length)]; + if (numSpan.IsEmpty) return default; - var numSpan = json.Slice(start, p - start); if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) { - p = start; + s = original; return default; } return ExprToken.NewNum(value); } - private static ExprToken ParseLiteralToken(ReadOnlySpan json, ref int p, + private static ExprToken ParseLiteralToken(ref ReadOnlySpan s, ReadOnlySpan literal, ExprTokenType type, double num) { - if (p + literal.Length > json.Length) return default; - if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return default; + if (s.Length < literal.Length) return default; + if (!s[..literal.Length].SequenceEqual(literal)) return default; // Verify delimiter follows (space, comma, bracket, brace, or end) - if (p + literal.Length < json.Length) + if (s.Length > literal.Length) { - var next = (char)json[p + literal.Length]; + var next = (char)s[literal.Length]; if (!char.IsWhiteSpace(next) && next != ',' && next != ']' && next != '}') return default; } - p += literal.Length; - var t = type == ExprTokenType.Null ? ExprToken.NewNull() : ExprToken.NewNum(num); - return t; + s = s[literal.Length..]; + return type == ExprTokenType.Null ? ExprToken.NewNull() : ExprToken.NewNum(num); } /// Max array elements before rejecting. private const int MaxArrayElements = 64; - private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) + private static ExprToken ParseArrayToken(ReadOnlySpan json, ref ReadOnlySpan s) { - if (p >= json.Length || json[p] != (byte)'[') return default; - p++; // Skip '[' - SkipWhiteSpace(json, ref p); + if (s.IsEmpty || s[0] != (byte)'[') return default; + s = s[1..]; // Skip '[' + s = TrimWhiteSpace(s); // Handle empty array - if (p < json.Length && json[p] == (byte)']') + if (!s.IsEmpty && s[0] == (byte)']') { - p++; + s = s[1..]; return ExprToken.NewTuple([], 0); } @@ -189,17 +190,17 @@ private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) { while (true) { - SkipWhiteSpace(json, ref p); - if (p >= json.Length || count >= MaxArrayElements) return default; + s = TrimWhiteSpace(s); + if (s.IsEmpty || count >= MaxArrayElements) return default; - var ele = ParseValueToken(json, ref p); + var ele = ParseValueToken(json, ref s); if (ele.IsNone) return default; elements[count++] = ele; - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return default; - if (json[p] == (byte)',') { p++; continue; } - if (json[p] == (byte)']') { p++; break; } + s = TrimWhiteSpace(s); + if (s.IsEmpty) return default; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)']') { s = s[1..]; break; } return default; // Malformed } @@ -215,68 +216,67 @@ private static ExprToken ParseArrayToken(ReadOnlySpan json, ref int p) // ======================== Fast skipping (non-allocating) ======================== - private static bool SkipValue(ReadOnlySpan json, ref int p) + private static bool SkipValue(ref ReadOnlySpan s) { - SkipWhiteSpace(json, ref p); - if (p >= json.Length) return false; + s = TrimWhiteSpace(s); + if (s.IsEmpty) return false; - var c = (char)json[p]; - return c switch + return (char)s[0] switch { - '"' => SkipString(json, ref p), - '{' => SkipBracketed(json, ref p, (byte)'{', (byte)'}'), - '[' => SkipBracketed(json, ref p, (byte)'[', (byte)']'), - 't' => SkipLiteral(json, ref p, "true"u8), - 'f' => SkipLiteral(json, ref p, "false"u8), - 'n' => SkipLiteral(json, ref p, "null"u8), - _ => SkipNumber(json, ref p), + '"' => SkipString(ref s), + '{' => SkipBracketed(ref s, (byte)'{', (byte)'}'), + '[' => SkipBracketed(ref s, (byte)'[', (byte)']'), + 't' => SkipLiteral(ref s, "true"u8), + 'f' => SkipLiteral(ref s, "false"u8), + 'n' => SkipLiteral(ref s, "null"u8), + _ => SkipNumber(ref s), }; } - private static bool SkipString(ReadOnlySpan json, ref int p) + private static bool SkipString(ref ReadOnlySpan s) { - if (p >= json.Length || json[p] != (byte)'"') return false; - p++; // Skip opening quote - while (p < json.Length) + if (s.IsEmpty || s[0] != (byte)'"') return false; + s = s[1..]; // Skip opening quote + while (!s.IsEmpty) { - if (json[p] == (byte)'\\') { p += 2; continue; } - if (json[p] == (byte)'"') { p++; return true; } - p++; + if (s[0] == (byte)'\\') { s = s[2..]; continue; } + if (s[0] == (byte)'"') { s = s[1..]; return true; } + s = s[1..]; } return false; // Unterminated } - private static bool SkipBracketed(ReadOnlySpan json, ref int p, byte opener, byte closer) + private static bool SkipBracketed(ref ReadOnlySpan s, byte opener, byte closer) { var depth = 1; - p++; // Skip opener - while (p < json.Length && depth > 0) + s = s[1..]; // Skip opener + while (!s.IsEmpty && depth > 0) { - if (json[p] == (byte)'"') + if (s[0] == (byte)'"') { - if (!SkipString(json, ref p)) return false; + if (!SkipString(ref s)) return false; continue; } - if (json[p] == opener) depth++; - else if (json[p] == closer) depth--; - p++; + if (s[0] == opener) depth++; + else if (s[0] == closer) depth--; + s = s[1..]; } return depth == 0; } - private static bool SkipLiteral(ReadOnlySpan json, ref int p, ReadOnlySpan literal) + private static bool SkipLiteral(ref ReadOnlySpan s, ReadOnlySpan literal) { - if (p + literal.Length > json.Length) return false; - if (!json.Slice(p, literal.Length).SequenceEqual(literal)) return false; - p += literal.Length; + if (s.Length < literal.Length) return false; + if (!s[..literal.Length].SequenceEqual(literal)) return false; + s = s[literal.Length..]; return true; } - private static bool SkipNumber(ReadOnlySpan json, ref int p) + private static bool SkipNumber(ref ReadOnlySpan s) { - var start = p; - while (p < json.Length && IsNumberChar(json[p])) p++; - return p > start; + var original = s; + while (!s.IsEmpty && IsNumberChar(s[0])) s = s[1..]; + return s.Length < original.Length; } // ======================== Shared byte-level helpers ======================== @@ -290,38 +290,42 @@ private static bool SkipNumber(ReadOnlySpan json, ref int p) internal static bool IsWhiteSpace(byte b) => b == (byte)' ' || b == (byte)'\t' || b == (byte)'\n' || b == (byte)'\r'; - internal static void SkipWhiteSpace(ReadOnlySpan s, ref int p) + /// + /// Returns the span with leading whitespace removed. + /// + internal static ReadOnlySpan TrimWhiteSpace(ReadOnlySpan s) { - while (p < s.Length && IsWhiteSpace(s[p])) p++; + var i = 0; + while (i < s.Length && IsWhiteSpace(s[i])) i++; + return s[i..]; } private static bool IsNumberChar(byte b) => IsDigit(b) || b == (byte)'-' || b == (byte)'+' || b == (byte)'.' || b == (byte)'e' || b == (byte)'E'; - private static bool MatchKey(ReadOnlySpan json, int keyStart, int keyEnd, string fieldName) + private static bool MatchKey(ReadOnlySpan key, string fieldName) { - var keyLen = keyEnd - keyStart; - if (keyLen != fieldName.Length) return false; - for (var i = 0; i < keyLen; i++) + if (key.Length != fieldName.Length) return false; + for (var i = 0; i < key.Length; i++) { - if (json[keyStart + i] != (byte)fieldName[i]) return false; + if (key[i] != (byte)fieldName[i]) return false; } return true; } - private static string UnescapeJsonString(ReadOnlySpan json, int start, int end) + private static string UnescapeJsonString(ReadOnlySpan content) { // Worst case: each byte is a character - var chars = new char[end - start]; + var chars = new char[content.Length]; var len = 0; - var i = start; - while (i < end) + var i = 0; + while (i < content.Length) { - if (json[i] == (byte)'\\' && i + 1 < end) + if (content[i] == (byte)'\\' && i + 1 < content.Length) { i++; - chars[len++] = (char)json[i] switch + chars[len++] = (char)content[i] switch { 'n' => '\n', 'r' => '\r', @@ -329,13 +333,13 @@ private static string UnescapeJsonString(ReadOnlySpan json, int start, int '\\' => '\\', '"' => '"', '/' => '/', - _ => (char)json[i], + _ => (char)content[i], }; i++; } else { - chars[len++] = (char)json[i]; + chars[len++] = (char)content[i]; i++; } } diff --git a/libs/server/Resp/Vector/Filter/ExprCompiler.cs b/libs/server/Resp/Vector/Filter/ExprCompiler.cs index d03c55a1f4c..b74f6a0bc70 100644 --- a/libs/server/Resp/Vector/Filter/ExprCompiler.cs +++ b/libs/server/Resp/Vector/Filter/ExprCompiler.cs @@ -40,17 +40,17 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) // Phase 1: Tokenize into a flat list var tokens = new List(DefaultCapacity); + var remaining = expr; - var p = 0; - while (p < expr.Length) + while (!remaining.IsEmpty) { - AttributeExtractor.SkipWhiteSpace(expr, ref p); - if (p >= expr.Length) + remaining = AttributeExtractor.TrimWhiteSpace(remaining); + if (remaining.IsEmpty) break; // Determine if '-' should be a negative number sign or a subtraction operator var minusIsNumber = false; - if (expr[p] == (byte)'-' && p + 1 < expr.Length && (AttributeExtractor.IsDigit(expr[p + 1]) || expr[p + 1] == (byte)'.')) + if (remaining[0] == (byte)'-' && remaining.Length > 1 && (AttributeExtractor.IsDigit(remaining[1]) || remaining[1] == (byte)'.')) { if (tokens.Count == 0) { @@ -65,50 +65,50 @@ public static ExprProgram TryCompile(ReadOnlySpan expr, out int errpos) } // Number - if (AttributeExtractor.IsDigit(expr[p]) || (minusIsNumber && expr[p] == (byte)'-')) + if (AttributeExtractor.IsDigit(remaining[0]) || (minusIsNumber && remaining[0] == (byte)'-')) { - var t = ParseNumber(expr, ref p); - if (t.IsNone) { errpos = p; return null; } + var t = ParseNumber(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } tokens.Add(t); continue; } // String literal - if (expr[p] == (byte)'"' || expr[p] == (byte)'\'') + if (remaining[0] == (byte)'"' || remaining[0] == (byte)'\'') { - var t = ParseString(expr, ref p); - if (t.IsNone) { errpos = p; return null; } + var t = ParseString(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } tokens.Add(t); continue; } // Selector (field access starting with '.') - if (expr[p] == (byte)'.' && p + 1 < expr.Length && IsSelectorChar(expr[p + 1])) + if (remaining[0] == (byte)'.' && remaining.Length > 1 && IsSelectorChar(remaining[1])) { - var t = ParseSelector(expr, ref p); + var t = ParseSelector(ref remaining); tokens.Add(t); continue; } // Tuple literal [1, "foo", 42] - if (expr[p] == (byte)'[') + if (remaining[0] == (byte)'[') { - var t = ParseTuple(expr, ref p); - if (t.IsNone) { errpos = p; return null; } + var t = ParseTuple(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } tokens.Add(t); continue; } // Operator or literal keyword (null, true, false, not, and, or, in) - if (AttributeExtractor.IsLetter(expr[p]) || IsOperatorSpecialChar(expr[p])) + if (AttributeExtractor.IsLetter(remaining[0]) || IsOperatorSpecialChar(remaining[0])) { - var t = ParseOperatorOrLiteral(expr, ref p); - if (t.IsNone) { errpos = p; return null; } + var t = ParseOperatorOrLiteral(ref remaining); + if (t.IsNone) { errpos = expr.Length - remaining.Length; return null; } tokens.Add(t); continue; } - errpos = p; + errpos = expr.Length - remaining.Length; return null; } @@ -226,7 +226,7 @@ private static bool ProcessOperator( } // ======================== Tokenization helpers ======================== - // Shared helpers (IsDigit, IsLetter, IsLetterOrDigit, IsWhiteSpace, SkipWhiteSpace) + // Shared helpers (IsDigit, IsLetter, IsLetterOrDigit, IsWhiteSpace, TrimWhiteSpace) // live in AttributeExtractor and are reused here. private static bool IsOperatorSpecialChar(byte b) @@ -242,56 +242,57 @@ private static bool IsSelectorChar(byte c) return AttributeExtractor.IsLetterOrDigit(c) || c == (byte)'_' || c == (byte)'-'; } - private static ExprToken ParseNumber(ReadOnlySpan s, ref int p) + private static ExprToken ParseNumber(ref ReadOnlySpan s) { - var start = p; - if (p < s.Length && s[p] == (byte)'-') p++; + var original = s; + if (s[0] == (byte)'-') s = s[1..]; - while (p < s.Length && (AttributeExtractor.IsDigit(s[p]) || s[p] == (byte)'.' || s[p] == (byte)'e' || s[p] == (byte)'E')) - p++; + while (!s.IsEmpty && (AttributeExtractor.IsDigit(s[0]) || s[0] == (byte)'.' || s[0] == (byte)'e' || s[0] == (byte)'E')) + s = s[1..]; - var numSpan = s.Slice(start, p - start); + var numSpan = original[..(original.Length - s.Length)]; if (!Utf8Parser.TryParse(numSpan, out double value, out var bytesConsumed) || bytesConsumed != numSpan.Length) { - p = start; + s = original; return default; } return ExprToken.NewNum(value); } - private static ExprToken ParseString(ReadOnlySpan s, ref int p) + private static ExprToken ParseString(ref ReadOnlySpan s) { - var quote = s[p]; - p++; // Skip opening quote - var start = p; + var quote = s[0]; + s = s[1..]; // Skip opening quote + var body = s; var hasEscape = false; - while (p < s.Length) + while (!s.IsEmpty) { - if (s[p] == (byte)'\\' && p + 1 < s.Length) + if (s[0] == (byte)'\\' && s.Length > 1) { hasEscape = true; - p += 2; // Skip escaped char + s = s[2..]; // Skip escaped char continue; } - if (s[p] == quote) + if (s[0] == quote) { + var content = body[..(body.Length - s.Length)]; string value; if (!hasEscape) { - value = Encoding.UTF8.GetString(s.Slice(start, p - start)); + value = Encoding.UTF8.GetString(content); } else { // Process escape sequences (matching Redis fastjson.c behavior) - var bytes = new byte[p - start]; + var bytes = new byte[content.Length]; var len = 0; - for (var i = start; i < p; i++) + for (var i = 0; i < content.Length; i++) { - if (s[i] == (byte)'\\' && i + 1 < p) + if (content[i] == (byte)'\\' && i + 1 < content.Length) { i++; - bytes[len++] = s[i] switch + bytes[len++] = content[i] switch { (byte)'n' => (byte)'\n', (byte)'r' => (byte)'\r', @@ -299,63 +300,63 @@ private static ExprToken ParseString(ReadOnlySpan s, ref int p) (byte)'\\' => (byte)'\\', (byte)'"' => (byte)'"', (byte)'\'' => (byte)'\'', - _ => s[i], // Unknown escape — copy verbatim + _ => content[i], // Unknown escape — copy verbatim }; } else { - bytes[len++] = s[i]; + bytes[len++] = content[i]; } } value = Encoding.UTF8.GetString(bytes, 0, len); } - p++; // Skip closing quote + s = s[1..]; // Skip closing quote return ExprToken.NewStr(value); } - p++; + s = s[1..]; } return default; // Unterminated string } - private static ExprToken ParseSelector(ReadOnlySpan s, ref int p) + private static ExprToken ParseSelector(ref ReadOnlySpan s) { - p++; // Skip the leading dot - var start = p; - while (p < s.Length && IsSelectorChar(s[p])) p++; - var name = Encoding.UTF8.GetString(s.Slice(start, p - start)); + s = s[1..]; // Skip the leading dot + var start = s; + while (!s.IsEmpty && IsSelectorChar(s[0])) s = s[1..]; + var name = Encoding.UTF8.GetString(start[..(start.Length - s.Length)]); return ExprToken.NewSelector(name); } - private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) + private static ExprToken ParseTuple(ref ReadOnlySpan s) { - p++; // Skip '[' + s = s[1..]; // Skip '[' var elements = new ExprToken[64]; // max 64 elements var count = 0; - AttributeExtractor.SkipWhiteSpace(s, ref p); + s = AttributeExtractor.TrimWhiteSpace(s); // Handle empty tuple [] - if (p < s.Length && s[p] == (byte)']') + if (!s.IsEmpty && s[0] == (byte)']') { - p++; + s = s[1..]; return ExprToken.NewTuple([], 0); } while (true) { - AttributeExtractor.SkipWhiteSpace(s, ref p); - if (p >= s.Length) return default; + s = AttributeExtractor.TrimWhiteSpace(s); + if (s.IsEmpty) return default; if (count >= elements.Length) return default; // Parse element: number or string ExprToken ele; - if (AttributeExtractor.IsDigit(s[p]) || s[p] == (byte)'-') + if (AttributeExtractor.IsDigit(s[0]) || s[0] == (byte)'-') { - ele = ParseNumber(s, ref p); + ele = ParseNumber(ref s); } - else if (s[p] == (byte)'"' || s[p] == (byte)'\'') + else if (s[0] == (byte)'"' || s[0] == (byte)'\'') { - ele = ParseString(s, ref p); + ele = ParseString(ref s); } else { @@ -365,12 +366,12 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) elements[count++] = ele; - AttributeExtractor.SkipWhiteSpace(s, ref p); - if (p >= s.Length) return default; + s = AttributeExtractor.TrimWhiteSpace(s); + if (s.IsEmpty) return default; - if (s[p] == (byte)']') { p++; break; } - if (s[p] != (byte)',') return default; - p++; // Skip comma + if (s[0] == (byte)']') { s = s[1..]; break; } + if (s[0] != (byte)',') return default; + s = s[1..]; // Skip comma } var result = new ExprToken[count]; @@ -378,31 +379,30 @@ private static ExprToken ParseTuple(ReadOnlySpan s, ref int p) return ExprToken.NewTuple(result, count); } - private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) + private static ExprToken ParseOperatorOrLiteral(ref ReadOnlySpan s) { - var start = p; + var start = s; // Consume alphabetic or operator-special characters - while (p < s.Length && (AttributeExtractor.IsLetter(s[p]) || IsOperatorSpecialChar(s[p]))) - p++; + while (!s.IsEmpty && (AttributeExtractor.IsLetter(s[0]) || IsOperatorSpecialChar(s[0]))) + s = s[1..]; - var matchLen = p - start; - if (matchLen == 0) return default; + var consumed = start[..(start.Length - s.Length)]; + if (consumed.IsEmpty) return default; // Check for literals - if (matchLen == 4 && s.Slice(start, 4).SequenceEqual("null"u8)) + if (consumed.Length == 4 && consumed.SequenceEqual("null"u8)) return ExprToken.NewNull(); - if (matchLen == 4 && s.Slice(start, 4).SequenceEqual("true"u8)) + if (consumed.Length == 4 && consumed.SequenceEqual("true"u8)) return ExprToken.NewNum(1); - if (matchLen == 5 && s.Slice(start, 5).SequenceEqual("false"u8)) + if (consumed.Length == 5 && consumed.SequenceEqual("false"u8)) return ExprToken.NewNum(0); // Find best matching operator (longest match) OpCode bestCode = default; var bestLen = 0; - var consumed = s.Slice(start, matchLen); TryMatchOp(consumed, "||"u8, OpCode.Or, ref bestCode, ref bestLen); TryMatchOp(consumed, "or"u8, OpCode.Or, ref bestCode, ref bestLen); TryMatchOp(consumed, "&&"u8, OpCode.And, ref bestCode, ref bestLen); @@ -427,24 +427,23 @@ private static ExprToken ParseOperatorOrLiteral(ReadOnlySpan s, ref int p) if (bestLen == 0) { - p = start; + s = start; return default; } - // Rewind p to consume only the matched operator length - p = start + bestLen; + // Rewind — only consume the matched operator length + s = start[bestLen..]; return ExprToken.NewOp(bestCode); } private static void TryMatchOp(ReadOnlySpan consumed, ReadOnlySpan opName, OpCode opCode, ref OpCode bestCode, ref int bestLen) { - var opLen = opName.Length; - if (opLen > consumed.Length) return; - if (!consumed.Slice(0, opLen).SequenceEqual(opName)) return; - if (opLen > bestLen) + if (opName.Length > consumed.Length) return; + if (!consumed[..opName.Length].SequenceEqual(opName)) return; + if (opName.Length > bestLen) { bestCode = opCode; - bestLen = opLen; + bestLen = opName.Length; } } } diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 51e459e1d51..646f77de92a 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -953,13 +953,13 @@ private static int ApplyPostFilter( // Allocate the evaluation stack once and reuse it across all candidate evaluations var stack = ExprRunner.CreateStack(); - var attrReadPos = 0; + var remaining = attributesSpan; for (var i = 0; i < numResults; i++) { // Read attribute length-prefix + data - var attrLen = BinaryPrimitives.ReadInt32LittleEndian(attributesSpan[attrReadPos..]); - var attrData = attributesSpan.Slice(attrReadPos + sizeof(int), attrLen); + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); + var attrData = remaining.Slice(sizeof(int), attrLen); // Execute the compiled filter program against raw JSON bytes. // No JsonDocument DOM allocation — AttributeExtractor extracts fields on demand. @@ -969,7 +969,7 @@ private static int ApplyPostFilter( filteredCount++; } - attrReadPos += sizeof(int) + attrLen; + remaining = remaining[(sizeof(int) + attrLen)..]; } return filteredCount; From 91c62cfe45411a731f715ecc65f1d8cfb79e36e1 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 5 Mar 2026 16:44:31 -0800 Subject: [PATCH 30/31] Single-pass extraction for all fields --- .../Resp/Vector/Filter/AttributeExtractor.cs | 71 ++++++ libs/server/Resp/Vector/Filter/ExprRunner.cs | 203 ++++++++++++------ .../Vector/Filter/VectorFilterExpression.cs | 32 ++- libs/server/Resp/Vector/VectorManager.cs | 22 +- 4 files changed, 251 insertions(+), 77 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index c867b2130c8..4388ad799d2 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -19,6 +19,77 @@ namespace Garnet.server.Vector.Filter /// internal static class AttributeExtractor { + /// + /// Extract multiple top-level fields from a JSON object in a single pass. + /// lists the fields to extract. + /// must be at least .Length long. + /// Entries for fields not found are set to default (IsNone). + /// Returns the number of fields successfully extracted. + /// + public static int ExtractFields(ReadOnlySpan json, string[] fieldNames, ExprToken[] results) + { + // Clear results + for (var i = 0; i < fieldNames.Length; i++) + results[i] = default; + + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') return 0; + s = s[1..]; // Skip '{' + + var found = 0; + var needed = fieldNames.Length; + + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + if (s[0] == (byte)'}') return found; + + // Expect a key string + if (s[0] != (byte)'"') return found; + + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) return found; + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; + + // Check against all requested field names + var matchIndex = -1; + for (var i = 0; i < fieldNames.Length; i++) + { + if (results[i].IsNone && MatchKey(keyContent, fieldNames[i])) + { + matchIndex = i; + break; + } + } + + // Expect ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') return found; + s = s[1..]; + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + + if (matchIndex >= 0) + { + results[matchIndex] = ParseValueToken(json, ref s); + found++; + if (found == needed) return found; // All fields found — early exit + } + else + { + if (!SkipValue(ref s)) return found; + } + + s = TrimWhiteSpace(s); + if (s.IsEmpty) return found; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return found; + return found; // Malformed JSON + } + } + /// /// Extract a top-level field from a JSON object and return it as an ExprToken. /// Returns default (IsNone) if the field is not found or the JSON is malformed. diff --git a/libs/server/Resp/Vector/Filter/ExprRunner.cs b/libs/server/Resp/Vector/Filter/ExprRunner.cs index 0ad11d55dd9..947b76cbe72 100644 --- a/libs/server/Resp/Vector/Filter/ExprRunner.cs +++ b/libs/server/Resp/Vector/Filter/ExprRunner.cs @@ -29,7 +29,7 @@ internal static class ExprRunner /// /// Create a reusable evaluation stack with default capacity (16). - /// The caller owns the stack and can pass it to across multiple calls. + /// The caller owns the stack and can pass it to Run across multiple calls. /// The stack is cleared at the start of each Run call, so the caller does not need to clear it. /// public static Stack CreateStack() => new Stack(DefaultStackCapacity); @@ -64,90 +64,157 @@ public static bool Run(ExprProgram program, ReadOnlySpan json, Stack 0) + returnValue = ToBool(stack.Peek()) != 0; + + // Clear to release string references for GC + stack.Clear(); + return returnValue; + } - ExprToken b = stack.Count > 0 ? stack.Pop() : default; - ExprToken a = arity == 2 && stack.Count > 0 ? stack.Pop() : default; + /// + /// Execute the compiled program using pre-extracted field values (single-pass extraction). + /// Selectors are resolved from instead of re-scanning JSON. + /// + /// The compiled postfix program. + /// Raw JSON attribute bytes (needed for JsonRef string comparisons). + /// Selector names matching indices in . + /// Pre-extracted field values (one per selector name). + /// A reusable evaluation stack obtained from . + public static bool Run(ExprProgram program, ReadOnlySpan json, + string[] selectorNames, ExprToken[] extractedFields, Stack stack) + { + stack.Clear(); - var result = ExprToken.NewNum(0); + for (var i = 0; i < program.Length; i++) + { + var inst = program.Instructions[i]; - switch (inst.OpCode) + // Selectors — look up from pre-extracted fields + if (inst.TokenType == ExprTokenType.Selector) { - case OpCode.Not: - result.Num = ToBool(b) == 0 ? 1 : 0; - break; - case OpCode.Pow: - result.Num = Math.Pow(ToNum(a, json), ToNum(b, json)); - break; - case OpCode.Mul: - result.Num = ToNum(a, json) * ToNum(b, json); - break; - case OpCode.Div: - result.Num = ToNum(a, json) / ToNum(b, json); - break; - case OpCode.Mod: - result.Num = ToNum(a, json) % ToNum(b, json); - break; - case OpCode.Add: - result.Num = ToNum(a, json) + ToNum(b, json); - break; - case OpCode.Sub: - result.Num = ToNum(a, json) - ToNum(b, json); - break; - case OpCode.Gt: - result.Num = ToNum(a, json) > ToNum(b, json) ? 1 : 0; - break; - case OpCode.Gte: - result.Num = ToNum(a, json) >= ToNum(b, json) ? 1 : 0; - break; - case OpCode.Lt: - result.Num = ToNum(a, json) < ToNum(b, json) ? 1 : 0; - break; - case OpCode.Lte: - result.Num = ToNum(a, json) <= ToNum(b, json) ? 1 : 0; - break; - case OpCode.Eq: - result.Num = AreEqual(a, b, json) ? 1 : 0; - break; - case OpCode.Neq: - result.Num = !AreEqual(a, b, json) ? 1 : 0; - break; - case OpCode.In: - result.Num = EvalIn(a, b, json) ? 1 : 0; - break; - case OpCode.And: - result.Num = ToBool(a) != 0 && ToBool(b) != 0 ? 1 : 0; - break; - case OpCode.Or: - result.Num = ToBool(a) != 0 || ToBool(b) != 0 ? 1 : 0; - break; + var found = false; + for (var j = 0; j < selectorNames.Length; j++) + { + if (string.Equals(inst.Str, selectorNames[j], System.StringComparison.Ordinal)) + { + if (extractedFields[j].IsNone) + { + stack.Clear(); + return false; // Selector not found → expression is false + } + stack.Push(extractedFields[j]); + found = true; + break; + } + } + if (!found) + { + stack.Clear(); + return false; + } + continue; } - stack.Push(result); + if (!ExecuteInstruction(inst, json, stack)) + return false; } var returnValue = false; if (stack.Count > 0) returnValue = ToBool(stack.Peek()) != 0; - // Clear to release string references for GC stack.Clear(); return returnValue; } + /// + /// Execute a single non-selector instruction (value push or operator evaluation). + /// Returns false if the stack is in an invalid state. + /// + private static bool ExecuteInstruction(ExprToken inst, ReadOnlySpan json, Stack stack) + { + // Non-operator values — push directly + if (inst.TokenType != ExprTokenType.Op) + { + stack.Push(inst); + return true; + } + + // Operators — pop operands, compute, push result + var arity = OpTable.GetArity(inst.OpCode); + if (stack.Count < arity) + { + stack.Clear(); + return false; + } + + ExprToken b = stack.Count > 0 ? stack.Pop() : default; + ExprToken a = arity == 2 && stack.Count > 0 ? stack.Pop() : default; + + var result = ExprToken.NewNum(0); + + switch (inst.OpCode) + { + case OpCode.Not: + result.Num = ToBool(b) == 0 ? 1 : 0; + break; + case OpCode.Pow: + result.Num = Math.Pow(ToNum(a, json), ToNum(b, json)); + break; + case OpCode.Mul: + result.Num = ToNum(a, json) * ToNum(b, json); + break; + case OpCode.Div: + result.Num = ToNum(a, json) / ToNum(b, json); + break; + case OpCode.Mod: + result.Num = ToNum(a, json) % ToNum(b, json); + break; + case OpCode.Add: + result.Num = ToNum(a, json) + ToNum(b, json); + break; + case OpCode.Sub: + result.Num = ToNum(a, json) - ToNum(b, json); + break; + case OpCode.Gt: + result.Num = ToNum(a, json) > ToNum(b, json) ? 1 : 0; + break; + case OpCode.Gte: + result.Num = ToNum(a, json) >= ToNum(b, json) ? 1 : 0; + break; + case OpCode.Lt: + result.Num = ToNum(a, json) < ToNum(b, json) ? 1 : 0; + break; + case OpCode.Lte: + result.Num = ToNum(a, json) <= ToNum(b, json) ? 1 : 0; + break; + case OpCode.Eq: + result.Num = AreEqual(a, b, json) ? 1 : 0; + break; + case OpCode.Neq: + result.Num = !AreEqual(a, b, json) ? 1 : 0; + break; + case OpCode.In: + result.Num = EvalIn(a, b, json) ? 1 : 0; + break; + case OpCode.And: + result.Num = ToBool(a) != 0 && ToBool(b) != 0 ? 1 : 0; + break; + case OpCode.Or: + result.Num = ToBool(a) != 0 || ToBool(b) != 0 ? 1 : 0; + break; + } + + stack.Push(result); + return true; + } + // ======================== Type conversion helpers ======================== /// diff --git a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs index 72e3251e887..b07f2e745e6 100644 --- a/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs +++ b/libs/server/Resp/Vector/Filter/VectorFilterExpression.cs @@ -112,7 +112,7 @@ internal enum OpCode : byte /// Lifetime: Tokens inside the compiled are /// allocated once and reused across all candidate evaluations. Tokens created during /// execution (e.g. from JSON field extraction) are - /// transient and discarded after each call. + /// transient and discarded after each ExprRunner.Run call. /// internal struct ExprToken { @@ -225,7 +225,7 @@ static OpTable() /// /// Compiled filter expression program — the output of - /// and the input to . + /// and the input to ExprRunner.Run. /// /// Contains a flat postfix (reverse-Polish notation) instruction sequence where every /// element is an : @@ -242,7 +242,7 @@ static OpTable() /// /// This is the C# equivalent of the exprstate.program[] array in /// Redis expr.c. The evaluation stack (values_stack in Redis) is - /// not stored here — it is allocated per-call in . + /// not stored here — it is allocated per-call in ExprRunner.Run. /// internal sealed class ExprProgram { @@ -251,5 +251,31 @@ internal sealed class ExprProgram /// Number of instructions in the program. public int Length; + + /// Cached unique selector names (field names) used in this program. + private string[] selectorNames; + + /// + /// Get the unique selector (field) names referenced by this program. + /// Cached after first call — safe to call repeatedly. + /// + public string[] GetSelectors() + { + if (selectorNames != null) + return selectorNames; + + // Count unique selectors + var seen = new System.Collections.Generic.HashSet(System.StringComparer.Ordinal); + for (var i = 0; i < Length; i++) + { + if (Instructions[i].TokenType == ExprTokenType.Selector) + seen.Add(Instructions[i].Str); + } + + var names = new string[seen.Count]; + seen.CopyTo(names); + selectorNames = names; + return selectorNames; + } } } \ No newline at end of file diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 646f77de92a..07149483d6f 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -916,9 +916,11 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// /// Architecture (modeled after Redis expr.c + fastjson.c): /// 1. The filter string is compiled ONCE into a flat postfix program (ExprCompiler). - /// 2. For each candidate, the program is executed against the raw JSON attribute bytes - /// using a stack-based VM (ExprRunner) with on-demand field extraction (AttributeExtractor). - /// 3. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. + /// 2. Unique selectors (field names) are collected from the program. + /// 3. For each candidate, ALL needed fields are extracted in a single JSON pass + /// via , then the program is + /// evaluated against the pre-extracted values. + /// 4. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. /// /// The is populated with one bit per result: /// bit i = 1 means result i passed the filter. Caller can test with: @@ -948,6 +950,12 @@ private static int ApplyPostFilter( // Clear the bitmap filterBitmap.Clear(); + // Collect unique selectors — these are the JSON fields we need per candidate. + var selectors = program.GetSelectors(); + + // Pre-allocate extraction buffer — reused across all candidates. + var extractedFields = new ExprToken[selectors.Length]; + var filteredCount = 0; // Allocate the evaluation stack once and reuse it across all candidate evaluations @@ -961,9 +969,11 @@ private static int ApplyPostFilter( var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); var attrData = remaining.Slice(sizeof(int), attrLen); - // Execute the compiled filter program against raw JSON bytes. - // No JsonDocument DOM allocation — AttributeExtractor extracts fields on demand. - if (ExprRunner.Run(program, attrData, stack)) + // Single-pass extraction: scan JSON once, extract all needed fields. + AttributeExtractor.ExtractFields(attrData, selectors, extractedFields); + + // Execute the compiled program against pre-extracted fields. + if (ExprRunner.Run(program, attrData, selectors, extractedFields, stack)) { filterBitmap[i >> 3] |= (byte)(1 << (i & 7)); filteredCount++; From b05fe8fbaa9f6d32d0e2e93f4747e1bde4f6a185 Mon Sep 17 00:00:00 2001 From: Haiyang Xu Date: Thu, 5 Mar 2026 17:14:47 -0800 Subject: [PATCH 31/31] with filed index --- .../Resp/Vector/Filter/AttributeExtractor.cs | 184 ++++++++++++++++++ libs/server/Resp/Vector/VectorManager.cs | 69 ++++--- 2 files changed, 228 insertions(+), 25 deletions(-) diff --git a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs index 4388ad799d2..2230080c73e 100644 --- a/libs/server/Resp/Vector/Filter/AttributeExtractor.cs +++ b/libs/server/Resp/Vector/Filter/AttributeExtractor.cs @@ -3,6 +3,7 @@ using System; using System.Buffers; +using System.Buffers.Binary; using System.Buffers.Text; namespace Garnet.server.Vector.Filter @@ -19,6 +20,189 @@ namespace Garnet.server.Vector.Filter /// internal static class AttributeExtractor { + /// + /// Stride (in ints) per document in the field index: 1 (count) + 2 per field (offset, length). + /// + internal static int FieldIndexStride(int numFields) => 1 + 2 * numFields; + + /// + /// Build a field offset index for ALL documents in the contiguous attributes span. + /// + /// The attributes span is a series of length-prefixed JSON blobs: + /// [len0][json0][len1][json1]... + /// + /// For each document, the index records: + /// [fieldCount, field0_offset, field0_length, field1_offset, field1_length, ...] + /// + /// Offsets are relative to the start of that document's JSON (after the length prefix). + /// A field offset of -1 means that field was not found in that document. + /// A fieldCount of -1 means malformed JSON. + /// + /// must contain at least + /// numDocs * FieldIndexStride(fieldNames.Length) ints. + /// + public static void BuildFieldIndex( + ReadOnlySpan attributesSpan, + int numDocs, + string[] fieldNames, + Span indexBuffer) + { + var numFields = fieldNames.Length; + var stride = FieldIndexStride(numFields); + var remaining = attributesSpan; + + for (var doc = 0; doc < numDocs; doc++) + { + var docIndex = indexBuffer.Slice(doc * stride, stride); + + // Read length prefix + var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); + var json = remaining.Slice(sizeof(int), attrLen); + + // Initialize: count = 0, all offsets = -1 + docIndex[0] = 0; + for (var f = 0; f < numFields; f++) + { + docIndex[1 + 2 * f] = -1; + docIndex[1 + 2 * f + 1] = 0; + } + + // Scan this document for requested field positions + ScanFieldPositions(json, fieldNames, docIndex); + + remaining = remaining[(sizeof(int) + attrLen)..]; + } + } + + /// + /// Create an ExprToken from a value at an indexed position in JSON bytes. + /// Uses the offset and length recorded by . + /// + public static ExprToken ParseValueAt(ReadOnlySpan json, int offset, int length) + { + if (offset < 0 || length <= 0 || offset + length > json.Length) + return default; + + var c = json[offset]; + + // String: content is between quotes + if (c == (byte)'"') + { + // offset points to opening quote, length includes both quotes + var contentStart = offset + 1; + var contentLen = length - 2; + // Check for escapes (scan for backslash) + var content = json.Slice(contentStart, contentLen); + if (content.IndexOf((byte)'\\') < 0) + { + // Zero-alloc: store byte offset+length into source JSON + return ExprToken.NewJsonStr(contentStart, contentLen); + } + else + { + // Escaped: materialize + return ExprToken.NewStr(UnescapeJsonString(content)); + } + } + + // Number + if (IsDigit(c) || c == (byte)'-' || c == (byte)'+') + { + var numSpan = json.Slice(offset, length); + if (Utf8Parser.TryParse(numSpan, out double value, out var consumed) && consumed == numSpan.Length) + return ExprToken.NewNum(value); + return default; + } + + // Boolean / null + if (c == (byte)'t' && length == 4) return ExprToken.NewNum(1); + if (c == (byte)'f' && length == 5) return ExprToken.NewNum(0); + if (c == (byte)'n' && length == 4) return ExprToken.NewNull(); + + // Array: parse via existing method + if (c == (byte)'[') + { + var s = json[offset..]; + return ParseArrayToken(json, ref s); + } + + return default; + } + + /// + /// Scan a single JSON object and record the byte positions of requested fields. + /// + private static void ScanFieldPositions(ReadOnlySpan json, string[] fieldNames, Span docIndex) + { + var numFields = fieldNames.Length; + var s = TrimWhiteSpace(json); + if (s.IsEmpty || s[0] != (byte)'{') + { + docIndex[0] = -1; // malformed + return; + } + s = s[1..]; // Skip '{' + + var found = 0; + + while (true) + { + s = TrimWhiteSpace(s); + if (s.IsEmpty) { if (found == 0) docIndex[0] = -1; return; } + if (s[0] == (byte)'}') return; + + // Expect key string + if (s[0] != (byte)'"') { if (found == 0) docIndex[0] = -1; return; } + + var afterOpenQuote = s[1..]; + if (!SkipString(ref s)) { docIndex[0] = -1; return; } + var keyContent = afterOpenQuote[..(afterOpenQuote.Length - s.Length - 1)]; + + // Match against requested fields + var matchIndex = -1; + for (var i = 0; i < numFields; i++) + { + if (docIndex[1 + 2 * i] < 0 && MatchKey(keyContent, fieldNames[i])) + { + matchIndex = i; + break; + } + } + + // Expect ':' + s = TrimWhiteSpace(s); + if (s.IsEmpty || s[0] != (byte)':') { docIndex[0] = -1; return; } + s = s[1..]; + s = TrimWhiteSpace(s); + if (s.IsEmpty) { docIndex[0] = -1; return; } + + // Record value position (offset relative to json start) + var valueStart = json.Length - s.Length; + var beforeSkip = s; + + if (!SkipValue(ref s)) { docIndex[0] = -1; return; } + + var valueLen = beforeSkip.Length - s.Length; + + if (matchIndex >= 0) + { + docIndex[1 + 2 * matchIndex] = valueStart; + docIndex[1 + 2 * matchIndex + 1] = valueLen; + found++; + docIndex[0] = found; + if (found == numFields) return; // All fields found — early exit + } + + // Look for ',' or '}' + s = TrimWhiteSpace(s); + if (s.IsEmpty) return; + if (s[0] == (byte)',') { s = s[1..]; continue; } + if (s[0] == (byte)'}') return; + docIndex[0] = -1; // Malformed + return; + } + } + /// /// Extract multiple top-level fields from a JSON object in a single pass. /// lists the fields to extract. diff --git a/libs/server/Resp/Vector/VectorManager.cs b/libs/server/Resp/Vector/VectorManager.cs index 07149483d6f..22bc076f891 100644 --- a/libs/server/Resp/Vector/VectorManager.cs +++ b/libs/server/Resp/Vector/VectorManager.cs @@ -914,19 +914,18 @@ internal static uint CalculateValueDimensions(VectorValueType valueType, ReadOnl /// /// Apply post-filtering to vector search results using a compiled filter expression. /// - /// Architecture (modeled after Redis expr.c + fastjson.c): - /// 1. The filter string is compiled ONCE into a flat postfix program (ExprCompiler). - /// 2. Unique selectors (field names) are collected from the program. - /// 3. For each candidate, ALL needed fields are extracted in a single JSON pass - /// via , then the program is - /// evaluated against the pre-extracted values. - /// 4. No JsonDocument DOM is allocated — fields are extracted directly from the raw bytes. + /// Two-phase approach: + /// 1. COMPILE: The filter string is compiled ONCE into a postfix program (ExprCompiler). + /// 2. INDEX: Build a field offset index for ALL candidates in one pass over the + /// contiguous attributes span. The index records (offset, length) for each + /// field the filter needs, per candidate. This is the "simdjson-style" structural + /// pass — future optimization can use SIMD to find delimiters. + /// 3. EVALUATE: For each candidate, create tokens lazily from indexed positions + /// and run the postfix program. Only touches attribute bytes for fields the + /// filter actually references. /// /// The is populated with one bit per result: - /// bit i = 1 means result i passed the filter. Caller can test with: - /// (filterBitmap[i >> 3] & (1 << (i & 7))) != 0 - /// - /// No in-place compaction — the caller skips non-matching results using the bitmap. + /// bit i = 1 means result i passed the filter. /// private static int ApplyPostFilter( ReadOnlySpan filter, @@ -939,40 +938,60 @@ private static int ApplyPostFilter( return 0; } - // Compile the filter expression (UTF-8 bytes) into a flat postfix program. - // This is done once and reused for all candidate evaluations. + // Phase 1: Compile the filter expression into a postfix program. var program = ExprCompiler.TryCompile(filter, out _); if (program == null) { - return 0; // If the filter doesn't compile, treat it as filtering out all results (matches Redis behavior) + return 0; // Invalid filter → filter out all results (matches Redis behavior) } - // Clear the bitmap filterBitmap.Clear(); - // Collect unique selectors — these are the JSON fields we need per candidate. var selectors = program.GetSelectors(); + var numSelectors = selectors.Length; + var stride = AttributeExtractor.FieldIndexStride(numSelectors); - // Pre-allocate extraction buffer — reused across all candidates. - var extractedFields = new ExprToken[selectors.Length]; + // Phase 2: Build field offset index for ALL candidates in one pass. + // Index layout per doc: [fieldCount, field0_offset, field0_len, ..., fieldN_offset, fieldN_len] + // Size is predictable: numResults * stride ints. + var indexSize = numResults * stride; + var fieldIndex = indexSize <= 256 + ? stackalloc int[indexSize] + : new int[indexSize]; - var filteredCount = 0; + AttributeExtractor.BuildFieldIndex(attributesSpan, numResults, selectors, fieldIndex); - // Allocate the evaluation stack once and reuse it across all candidate evaluations + // Phase 3: Evaluate filter per candidate using indexed positions. + var filteredCount = 0; var stack = ExprRunner.CreateStack(); - + var extractedFields = new ExprToken[numSelectors]; var remaining = attributesSpan; for (var i = 0; i < numResults; i++) { - // Read attribute length-prefix + data var attrLen = BinaryPrimitives.ReadInt32LittleEndian(remaining); var attrData = remaining.Slice(sizeof(int), attrLen); - // Single-pass extraction: scan JSON once, extract all needed fields. - AttributeExtractor.ExtractFields(attrData, selectors, extractedFields); + var docIndex = fieldIndex.Slice(i * stride, stride); + var fieldCount = docIndex[0]; + + if (fieldCount < 0) + { + // Malformed JSON — skip + remaining = remaining[(sizeof(int) + attrLen)..]; + continue; + } + + // Create tokens lazily from indexed positions — only parses values the filter touches + for (var f = 0; f < numSelectors; f++) + { + var offset = docIndex[1 + 2 * f]; + var len = docIndex[1 + 2 * f + 1]; + extractedFields[f] = offset >= 0 + ? AttributeExtractor.ParseValueAt(attrData, offset, len) + : default; + } - // Execute the compiled program against pre-extracted fields. if (ExprRunner.Run(program, attrData, selectors, extractedFields, stack)) { filterBitmap[i >> 3] |= (byte)(1 << (i & 7));