diff --git a/.claude/commands/ppl-bugfix.md b/.claude/commands/ppl-bugfix.md index f2ac6d6807d..7fefbe84568 100644 --- a/.claude/commands/ppl-bugfix.md +++ b/.claude/commands/ppl-bugfix.md @@ -95,16 +95,15 @@ for wt in .claude/worktrees/agent-*/; do done ``` -**If existing worktree found**: Do NOT use `isolation: "worktree"`. Pass the worktree path in the prompt so the subagent works there directly. +**If existing worktree found**: Do NOT use `isolation: "worktree"`. Instruct the subagent to call `EnterWorktree(path=...)` to switch cwd into the existing one. ``` Agent( mode: "", name: "bugfix-", description: "PPL bugfix # followup", - prompt: "cd first, then read .claude/harness/ppl-bugfix-followup.md and follow it. - PR: (), Issue: # - Working directory: " + prompt: "First, call EnterWorktree(path=\"\") to switch cwd. Then read .claude/harness/ppl-bugfix-followup.md and follow it. Do NOT use `cd` in Bash commands. + PR: (), Issue: #" ) ``` diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index 29c2ffd582f..aa76b87a5c8 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -32,6 +32,7 @@ CalciteDataTypeIT.class, CalciteDateTimeComparisonIT.class, CalciteDateTimeFunctionIT.class, + CalciteDisabledObjectFieldIT.class, CalciteDateTimeImplementationIT.class, CalciteDedupCommandIT.class, CalciteDescribeCommandIT.class, diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteDisabledObjectFieldIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteDisabledObjectFieldIT.java new file mode 100644 index 00000000000..4bf31032b53 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteDisabledObjectFieldIT.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; +import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; +import static org.opensearch.sql.util.TestUtils.isIndexExist; +import static org.opensearch.sql.util.TestUtils.performRequest; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.client.Request; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** + * Integration tests for querying inner fields of an object that is declared with {@code "enabled": + * false} in the index mapping. Such objects are stored in {@code _source} but are not indexed, so + * the plugin cannot learn the shape of the object from the mapping. See GitHub issue #4906. + */ +public class CalciteDisabledObjectFieldIT extends PPLIntegTestCase { + + private static final String DISABLED_OBJECT_INDEX = "test_disabled_object_4906"; + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + createTestIndex(); + } + + private void createTestIndex() throws IOException { + if (!isIndexExist(client(), DISABLED_OBJECT_INDEX)) { + String mapping = + "{\"mappings\":{\"properties\":{" + + "\"log\":{\"type\":\"object\",\"enabled\":false}" + + "}}}"; + createIndexByRestClient(client(), DISABLED_OBJECT_INDEX, mapping); + Request bulkReq = new Request("POST", "/" + DISABLED_OBJECT_INDEX + "/_bulk?refresh=true"); + bulkReq.setJsonEntity( + "{\"index\":{\"_id\":\"1\"}}\n" + "{\"log\":{\"a\":1,\"c\":{\"d\":2}}}\n"); + performRequest(client(), bulkReq); + } + } + + @Test + public void testSelectNestedFieldFromDisabledObject() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | fields log.c.d", DISABLED_OBJECT_INDEX)); + verifySchema(result, schema("log.c.d", "int")); + verifyDataRows(result, rows(2)); + } + + @Test + public void testSelectTopLevelFieldFromDisabledObject() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | fields log.a", DISABLED_OBJECT_INDEX)); + verifySchema(result, schema("log.a", "int")); + verifyDataRows(result, rows(1)); + } + + @Test + public void testSelectEntireDisabledObject() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | fields log", DISABLED_OBJECT_INDEX)); + verifySchema(result, schema("log", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"a\":1,\"c\":{\"d\":2},\"c.d\":2}"))); + } + + @Test + public void testSelectIntermediateFieldFromDisabledObject() throws IOException { + JSONObject result = + executeQuery(String.format("source=%s | fields log.c", DISABLED_OBJECT_INDEX)); + verifySchema(result, schema("log.c", "struct")); + verifyDataRows(result, rows(new JSONObject("{\"d\":2}"))); + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4906.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4906.yml new file mode 100644 index 00000000000..da650014d97 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4906.yml @@ -0,0 +1,50 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + log: + type: object + enabled: false + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : true + +--- +teardown: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled : false + +--- +"Access nested field inside a disabled object": + - skip: + features: + - headers + - allowed_warnings + - do: + bulk: + index: test + refresh: true + body: + - '{"index": {}}' + - '{"log": {"a": 1, "c": {"d": 2}}}' + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: 'source=test | fields log.c.d' + - match: {"total": 1} + - match: {"datarows": [[2]]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/Content.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/Content.java index d2fa68dac06..b795d95070b 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/Content.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/Content.java @@ -53,6 +53,9 @@ public interface Content { /** Is array value. */ boolean isArray(); + /** Is an object / map-like value. */ + boolean isObject(); + /** Get integer value. */ Integer intValue(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/ObjectContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/ObjectContent.java index b409640c993..e657513246d 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/ObjectContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/ObjectContent.java @@ -144,6 +144,11 @@ public boolean isArray() { return value instanceof ArrayNode || value instanceof List; } + @Override + public boolean isObject() { + return value instanceof Map; + } + @Override public boolean isString() { return value instanceof String; diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java index 93c2c6b1584..7c9adedf43f 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/utils/OpenSearchJsonContent.java @@ -140,6 +140,11 @@ public boolean isArray() { return value().isArray(); } + @Override + public boolean isObject() { + return value != null && value.isObject(); + } + @Override public Object objectValue() { return value(); diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java index d772b3e603b..d6d2b0138af 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java @@ -203,7 +203,7 @@ private ExprValue parse( // is resolved if (fieldType.isEmpty() || fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED))) { - return parseContent(content); + return parseContent(content, field, supportArrays); } final ExprType type = fieldType.get(); @@ -227,7 +227,15 @@ private ExprValue parse( } } - private ExprValue parseContent(Content content) { + /** + * Parse {@link Content} into an {@link ExprValue} when the declared field type is unknown (either + * absent from the mapping or {@code UNDEFINED}). Dispatches purely on the shape of the content. + * + *

Objects are recursed into as unmapped structs (see issue #4906). Without this, an object + * inside a disabled-object field would be stringified and nested keys like {@code log.c.d} would + * be lost. + */ + private ExprValue parseContent(Content content, String field, boolean supportArrays) { if (content.isNumber()) { if (content.isInt()) { return new ExprIntegerValue(content.intValue()); @@ -251,6 +259,8 @@ private ExprValue parseContent(Content content) { return ExprBooleanValue.of(content.booleanValue()); } else if (content.isNull()) { return ExprNullValue.of(); + } else if (content.isObject()) { + return parseUnmappedStruct(content, field, supportArrays); } // Default case, treat as a string value return new ExprStringValue(content.objectValue().toString()); @@ -362,6 +372,50 @@ private static ExprValue createOpenSearchDateType( return new ExprTimestampValue((Instant) value.objectValue()); } + /** + * Parse an unmapped object (e.g. an object declared with {@code "enabled": false} or generated by + * dynamic mapping with no known schema — see issue #4906). Produces an {@link ExprTupleValue} + * that holds each descendant value under both its nested key and the dotted-path key, so later + * {@code ITEM} lookups (which treat the tuple as a flat map) can retrieve values at any depth + * with a single key. + * + *

Example: an input of {@code {"a": 1, "c": {"d": 2}}} produces a tuple with entries {@code + * a=1}, {@code c=}, {@code c.d=2} where the {@code c} entry is itself expanded the same + * way. + * + * @param content Content to parse. + * @param prefix Prefix for level of object depth to parse. + * @param supportArrays Parsing the whole array if array is type nested. + * @return Value parsed from content. + */ + private ExprValue parseUnmappedStruct(Content content, String prefix, boolean supportArrays) { + ExprTupleValue result = ExprTupleValue.empty(); + content + .map() + .forEachRemaining( + entry -> { + String fieldKey = entry.getKey(); + if (isFieldNameMalformed(fieldKey)) { + result.tupleValue().put(fieldKey, ExprNullValue.of()); + return; + } + Content childContent = entry.getValue(); + String fullFieldPath = makeField(prefix, fieldKey); + // Recurse via parseContent, which handles nulls, scalars, and objects (by + // recursing back into this method). Single ITEM lookups still succeed because + // we also expose descendants under their dotted-path keys below. + ExprValue childValue = parseContent(childContent, fullFieldPath, supportArrays); + result.tupleValue().put(fieldKey, childValue); + // Additionally expose every descendant at the current level using dotted-path keys + // so ITEM(parent, "c.d") can resolve to a scalar leaf value. Intermediate dotted + // paths (e.g. "c" mapping to a tuple) are already provided by the direct child entry. + if (childValue instanceof ExprTupleValue childTuple) { + exposeDescendantsAsDottedKeys(result, fieldKey, childTuple); + } + }); + return result; + } + /** * Parse struct content. * @@ -383,15 +437,40 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr if (isFieldNameMalformed(fieldKey)) { result.tupleValue().put(fieldKey, ExprNullValue.of()); } else { - populateValueRecursive( - result, - new JsonPath(fieldKey), - parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays)); + Optional childType = type(fullFieldPath); + ExprValue childValue = + parse(entry.getValue(), fullFieldPath, childType, supportArrays); + populateValueRecursive(result, new JsonPath(fieldKey), childValue); + // If the child's type is unmapped (e.g. inside a disabled object — see #4906), + // also expose every descendant under its dotted-path key so single-key ITEM + // lookups at this level resolve nested paths like log.c.d. + if (isUnmappedType(childType) && childValue instanceof ExprTupleValue) { + exposeDescendantsAsDottedKeys(result, fieldKey, (ExprTupleValue) childValue); + } } }); return result; } + private static boolean isUnmappedType(Optional fieldType) { + return fieldType.isEmpty() + || fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED)); + } + + /** + * Add every descendant entry already present in {@code childTuple} to {@code result} under a + * dotted-path key rooted at {@code fieldKey}. The child tuple is assumed to have been produced by + * {@link #parseUnmappedStruct(Content, String, boolean)} which already expanded nested tuples + * into dotted-path entries at each level. Existing keys are preserved. + */ + private static void exposeDescendantsAsDottedKeys( + ExprTupleValue result, String fieldKey, ExprTupleValue childTuple) { + for (Map.Entry descendant : childTuple.tupleValue().entrySet()) { + String descendantKey = fieldKey + "." + descendant.getKey(); + result.tupleValue().putIfAbsent(descendantKey, descendant.getValue()); + } + } + /** * Check if a field name is malformed and cannot be processed by JsonPath. * diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java index 031b9243f38..913442b3fba 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactoryTest.java @@ -1180,6 +1180,78 @@ public void constructStructWithMalformedAndValidFields_preservesValidFields() { assertEquals(stringValue("value"), structValue.get("good")); } + // ==================== Disabled Object Field Tests ==================== + // Tests for issue #4906: PPL query returns null when accessing nested keys of an object + // declared with "enabled": false. Such objects are stored in _source but not indexed, so the + // plugin cannot learn their inner schema. The factory must parse the raw _source recursively + // AND also expose each descendant under a dotted-path key so the Calcite ITEM operator — which + // treats a tuple as a flat map — can resolve log.c.d with a single lookup. + + private OpenSearchExprValueFactory disabledObjectFactory() { + // "log" is mapped as an object with no properties (equivalent to {type:object, enabled:false}). + return new OpenSearchExprValueFactory( + Map.of("log", OpenSearchDataType.of(OpenSearchDataType.MappingType.Object)), true); + } + + @Test + public void disabledObject_topLevelScalarChildRetainsType() { + Map tuple = + disabledObjectFactory() + .construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false) + .tupleValue(); + ExprValue log = tuple.get("log"); + assertEquals(integerValue(1), log.tupleValue().get("a")); + } + + @Test + public void disabledObject_intermediateChildIsParsedAsTuple() { + Map tuple = + disabledObjectFactory() + .construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false) + .tupleValue(); + ExprValue logC = tuple.get("log").tupleValue().get("c"); + assertTrue(logC instanceof ExprTupleValue); + assertEquals(integerValue(2), logC.tupleValue().get("d")); + } + + @Test + public void disabledObject_leafExposedUnderDottedKeyForItemLookup() { + // The core fix: ITEM(log, "c.d") must resolve to 2. This requires the "log" tuple to carry + // the flat dotted-path key "c.d" in addition to the nested "c" -> tuple{d:2}. + Map tuple = + disabledObjectFactory() + .construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false) + .tupleValue(); + ExprValue log = tuple.get("log"); + assertEquals(integerValue(2), log.tupleValue().get("c.d")); + } + + @Test + public void disabledObject_deeplyNestedLeafExposedAtEveryAncestor() { + // Verify that deeper nesting {c:{d:{e:3}}} works with single-key ITEM at any level. + Map tuple = + disabledObjectFactory() + .construct("{\"log\":{\"c\":{\"d\":{\"e\":3}}}}", false) + .tupleValue(); + ExprValue log = tuple.get("log"); + assertEquals(integerValue(3), log.tupleValue().get("c.d.e")); + ExprValue logC = log.tupleValue().get("c"); + assertEquals(integerValue(3), logC.tupleValue().get("d.e")); + } + + @Test + public void disabledObject_mappedSiblingsAreUnaffected() { + // With a normal mapped struct (structV.id, structV.state), the factory should NOT inject flat + // dotted keys at the top level. Only the existing behaviour is preserved. + Map tuple = tupleValue("{\"structV\":{\"id\":1,\"state\":\"WA\"}}"); + ExprValue structV = tuple.get("structV"); + assertEquals(integerValue(1), structV.tupleValue().get("id")); + assertEquals(stringValue("WA"), structV.tupleValue().get("state")); + // No accidental flat-key injection on mapped structs. + assertFalse(structV.tupleValue().containsKey("structV.id")); + assertFalse(tuple.containsKey("structV.id")); + } + @EqualsAndHashCode(callSuper = false) @ToString private static class TestType extends OpenSearchDataType {