Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
CalciteDataTypeIT.class,
CalciteDateTimeComparisonIT.class,
CalciteDateTimeFunctionIT.class,
CalciteDisabledObjectFieldIT.class,
CalciteDateTimeImplementationIT.class,
CalciteDedupCommandIT.class,
CalciteDescribeCommandIT.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.remote;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
import static org.opensearch.sql.util.TestUtils.createIndexByRestClient;
import static org.opensearch.sql.util.TestUtils.isIndexExist;
import static org.opensearch.sql.util.TestUtils.performRequest;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.Request;
import org.opensearch.sql.ppl.PPLIntegTestCase;

/**
* Integration tests for querying inner fields of an object that is declared with {@code "enabled":
* false} in the index mapping. Such objects are stored in {@code _source} but are not indexed, so
* the plugin cannot learn the shape of the object from the mapping. See GitHub issue #4906.
*/
public class CalciteDisabledObjectFieldIT extends PPLIntegTestCase {

private static final String DISABLED_OBJECT_INDEX = "test_disabled_object_4906";

@Override
public void init() throws Exception {
super.init();
enableCalcite();
createTestIndex();
}

private void createTestIndex() throws IOException {
if (!isIndexExist(client(), DISABLED_OBJECT_INDEX)) {
String mapping =
"{\"mappings\":{\"properties\":{"
+ "\"log\":{\"type\":\"object\",\"enabled\":false}"
+ "}}}";
createIndexByRestClient(client(), DISABLED_OBJECT_INDEX, mapping);
Request bulkReq = new Request("POST", "/" + DISABLED_OBJECT_INDEX + "/_bulk?refresh=true");
bulkReq.setJsonEntity(
"{\"index\":{\"_id\":\"1\"}}\n" + "{\"log\":{\"a\":1,\"c\":{\"d\":2}}}\n");
performRequest(client(), bulkReq);
}
}

@Test
public void testSelectNestedFieldFromDisabledObject() throws IOException {
JSONObject result =
executeQuery(String.format("source=%s | fields log.c.d", DISABLED_OBJECT_INDEX));
verifySchema(result, schema("log.c.d", "int"));
verifyDataRows(result, rows(2));
Comment thread
qianheng-aws marked this conversation as resolved.
}

@Test
public void testSelectTopLevelFieldFromDisabledObject() throws IOException {
JSONObject result =
executeQuery(String.format("source=%s | fields log.a", DISABLED_OBJECT_INDEX));
verifySchema(result, schema("log.a", "int"));
verifyDataRows(result, rows(1));
Comment thread
qianheng-aws marked this conversation as resolved.
}

@Test
public void testSelectEntireDisabledObject() throws IOException {
JSONObject result =
executeQuery(String.format("source=%s | fields log", DISABLED_OBJECT_INDEX));
verifySchema(result, schema("log", "struct"));
Comment thread
qianheng-aws marked this conversation as resolved.
verifyDataRows(result, rows(new JSONObject("{\"a\":1,\"c\":{\"d\":2},\"c.d\":2}")));
}

@Test
public void testSelectIntermediateFieldFromDisabledObject() throws IOException {
JSONObject result =
executeQuery(String.format("source=%s | fields log.c", DISABLED_OBJECT_INDEX));
verifySchema(result, schema("log.c", "struct"));
Comment thread
qianheng-aws marked this conversation as resolved.
verifyDataRows(result, rows(new JSONObject("{\"d\":2}")));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
setup:
- do:
indices.create:
index: test
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
log:
type: object
enabled: false
- do:
query.settings:
body:
transient:
plugins.calcite.enabled : true

---
teardown:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled : false

---
"Access nested field inside a disabled object":
- skip:
features:
- headers
- allowed_warnings
- do:
bulk:
index: test
refresh: true
body:
- '{"index": {}}'
- '{"log": {"a": 1, "c": {"d": 2}}}'
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: 'source=test | fields log.c.d'
- match: {"total": 1}
- match: {"datarows": [[2]]}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ public interface Content {
/** Is array value. */
boolean isArray();

/** Is an object / map-like value. */
boolean isObject();

/** Get integer value. */
Integer intValue();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ public boolean isArray() {
return value instanceof ArrayNode || value instanceof List;
}

@Override
public boolean isObject() {
return value instanceof Map;
}

@Override
public boolean isString() {
return value instanceof String;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ public boolean isArray() {
return value().isArray();
}

@Override
public boolean isObject() {
return value != null && value.isObject();
}

@Override
public Object objectValue() {
return value();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ private ExprValue parse(
// is resolved
if (fieldType.isEmpty()
|| fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED))) {
return parseContent(content);
return parseContent(content, field, supportArrays);
}

final ExprType type = fieldType.get();
Expand All @@ -227,7 +227,15 @@ private ExprValue parse(
}
}

private ExprValue parseContent(Content content) {
/**
* Parse {@link Content} into an {@link ExprValue} when the declared field type is unknown (either
* absent from the mapping or {@code UNDEFINED}). Dispatches purely on the shape of the content.
*
* <p>Objects are recursed into as unmapped structs (see issue #4906). Without this, an object
* inside a disabled-object field would be stringified and nested keys like {@code log.c.d} would
* be lost.
*/
private ExprValue parseContent(Content content, String field, boolean supportArrays) {
if (content.isNumber()) {
if (content.isInt()) {
return new ExprIntegerValue(content.intValue());
Expand All @@ -251,6 +259,8 @@ private ExprValue parseContent(Content content) {
return ExprBooleanValue.of(content.booleanValue());
} else if (content.isNull()) {
return ExprNullValue.of();
} else if (content.isObject()) {
return parseUnmappedStruct(content, field, supportArrays);
}
// Default case, treat as a string value
return new ExprStringValue(content.objectValue().toString());
Expand Down Expand Up @@ -362,6 +372,50 @@ private static ExprValue createOpenSearchDateType(
return new ExprTimestampValue((Instant) value.objectValue());
}

/**
* Parse an unmapped object (e.g. an object declared with {@code "enabled": false} or generated by
* dynamic mapping with no known schema — see issue #4906). Produces an {@link ExprTupleValue}
* that holds each descendant value under both its nested key and the dotted-path key, so later
* {@code ITEM} lookups (which treat the tuple as a flat map) can retrieve values at any depth
* with a single key.
*
* <p>Example: an input of {@code {"a": 1, "c": {"d": 2}}} produces a tuple with entries {@code
* a=1}, {@code c=<tuple>}, {@code c.d=2} where the {@code c} entry is itself expanded the same
* way.
*
* @param content Content to parse.
* @param prefix Prefix for level of object depth to parse.
* @param supportArrays Parsing the whole array if array is type nested.
* @return Value parsed from content.
*/
private ExprValue parseUnmappedStruct(Content content, String prefix, boolean supportArrays) {
ExprTupleValue result = ExprTupleValue.empty();
content
.map()
.forEachRemaining(
entry -> {
String fieldKey = entry.getKey();
if (isFieldNameMalformed(fieldKey)) {
result.tupleValue().put(fieldKey, ExprNullValue.of());
return;
}
Content childContent = entry.getValue();
String fullFieldPath = makeField(prefix, fieldKey);
// Recurse via parseContent, which handles nulls, scalars, and objects (by
// recursing back into this method). Single ITEM lookups still succeed because
// we also expose descendants under their dotted-path keys below.
ExprValue childValue = parseContent(childContent, fullFieldPath, supportArrays);
result.tupleValue().put(fieldKey, childValue);
// Additionally expose every descendant at the current level using dotted-path keys
// so ITEM(parent, "c.d") can resolve to a scalar leaf value. Intermediate dotted
// paths (e.g. "c" mapping to a tuple) are already provided by the direct child entry.
if (childValue instanceof ExprTupleValue childTuple) {
exposeDescendantsAsDottedKeys(result, fieldKey, childTuple);
}
});
return result;
}

/**
* Parse struct content.
*
Expand All @@ -383,15 +437,40 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr
if (isFieldNameMalformed(fieldKey)) {
result.tupleValue().put(fieldKey, ExprNullValue.of());
} else {
populateValueRecursive(
result,
new JsonPath(fieldKey),
parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays));
Optional<ExprType> childType = type(fullFieldPath);
ExprValue childValue =
parse(entry.getValue(), fullFieldPath, childType, supportArrays);
populateValueRecursive(result, new JsonPath(fieldKey), childValue);
// If the child's type is unmapped (e.g. inside a disabled object — see #4906),
// also expose every descendant under its dotted-path key so single-key ITEM
// lookups at this level resolve nested paths like log.c.d.
if (isUnmappedType(childType) && childValue instanceof ExprTupleValue) {
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be moved into parse method?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks — partially done in dd1e295. The per-child body of parseStruct (the populateValueRecursive + conditional exposeDescendantsAsDottedKeys) is now extracted into a private helper parseStructEntry that sits next to parse(). parseStruct itself is reduced to:

content.map().forEachRemaining(entry -> parseStructEntry(result, prefix, entry, supportArrays));

Moving the dotted-key exposure literally inside parse() would require parse() to accept the parent ExprTupleValue result + key-in-parent so it can write flat entries directly. That widens parse()'s signature across all call sites (parseArray, parseInnerArrayValue, the two construct() overloads, and the recursive call from parseStructEntry) with arguments that are meaningless for non-struct-child contexts. It also breaks parse()'s current contract of "return an ExprValue, no side effects on caller state".

An alternative — having parse() mark its return with a sentinel so parseStruct can uniformly flatten — also fails because mapped-struct children can contain unmapped subtrees and do need selective flattening based on the declared field type, not just on content shape.

The current shape (parseStruct → trivial delegation to parseStructEntry which encapsulates parse + populate + conditional expose) keeps parse()'s signature clean while still hiding the "awkward conditional" behind a single helper. Happy to go deeper (e.g. widen parse() to take the sink) if you prefer that trade-off — let me know.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: in ac4afb0 I reverted the parseStructEntry extraction and inlined the per-child loop body back into parseStruct. On reflection, the extraction didn't really address the intent of your comment — moving the dotted-key exposure into parse() itself — it just moved it one call deeper.

A literal move into parse() would require either (a) widening its signature with a parent-tuple sink + key-in-parent across 8+ call sites, breaking its current pure-return contract and carrying meaningless parameters through non-struct-child paths, or (b) returning a richer shape that parseStruct then picks apart — either trades simplicity for a false sense of locality.

So the cleanest place for the mutation is parseStruct itself, which is the only caller with the parent tuple in scope. Keeping the parseContent unification from your line-209 comment and leaving parseStruct as the single owner of the dotted-key exposure. Thanks again for the review.

exposeDescendantsAsDottedKeys(result, fieldKey, (ExprTupleValue) childValue);
}
}
});
return result;
}

private static boolean isUnmappedType(Optional<ExprType> fieldType) {
return fieldType.isEmpty()
|| fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED));
}

/**
* Add every descendant entry already present in {@code childTuple} to {@code result} under a
* dotted-path key rooted at {@code fieldKey}. The child tuple is assumed to have been produced by
* {@link #parseUnmappedStruct(Content, String, boolean)} which already expanded nested tuples
* into dotted-path entries at each level. Existing keys are preserved.
*/
private static void exposeDescendantsAsDottedKeys(
ExprTupleValue result, String fieldKey, ExprTupleValue childTuple) {
for (Map.Entry<String, ExprValue> descendant : childTuple.tupleValue().entrySet()) {
String descendantKey = fieldKey + "." + descendant.getKey();
result.tupleValue().putIfAbsent(descendantKey, descendant.getValue());
}
}

/**
* Check if a field name is malformed and cannot be processed by JsonPath.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,78 @@ public void constructStructWithMalformedAndValidFields_preservesValidFields() {
assertEquals(stringValue("value"), structValue.get("good"));
}

// ==================== Disabled Object Field Tests ====================
// Tests for issue #4906: PPL query returns null when accessing nested keys of an object
// declared with "enabled": false. Such objects are stored in _source but not indexed, so the
// plugin cannot learn their inner schema. The factory must parse the raw _source recursively
// AND also expose each descendant under a dotted-path key so the Calcite ITEM operator — which
// treats a tuple as a flat map — can resolve log.c.d with a single lookup.

private OpenSearchExprValueFactory disabledObjectFactory() {
// "log" is mapped as an object with no properties (equivalent to {type:object, enabled:false}).
return new OpenSearchExprValueFactory(
Map.of("log", OpenSearchDataType.of(OpenSearchDataType.MappingType.Object)), true);
}

@Test
public void disabledObject_topLevelScalarChildRetainsType() {
Map<String, ExprValue> tuple =
disabledObjectFactory()
.construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false)
.tupleValue();
ExprValue log = tuple.get("log");
assertEquals(integerValue(1), log.tupleValue().get("a"));
}

@Test
public void disabledObject_intermediateChildIsParsedAsTuple() {
Map<String, ExprValue> tuple =
disabledObjectFactory()
.construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false)
.tupleValue();
ExprValue logC = tuple.get("log").tupleValue().get("c");
assertTrue(logC instanceof ExprTupleValue);
assertEquals(integerValue(2), logC.tupleValue().get("d"));
}

@Test
public void disabledObject_leafExposedUnderDottedKeyForItemLookup() {
// The core fix: ITEM(log, "c.d") must resolve to 2. This requires the "log" tuple to carry
// the flat dotted-path key "c.d" in addition to the nested "c" -> tuple{d:2}.
Map<String, ExprValue> tuple =
disabledObjectFactory()
.construct("{\"log\":{\"a\":1,\"c\":{\"d\":2}}}", false)
.tupleValue();
ExprValue log = tuple.get("log");
assertEquals(integerValue(2), log.tupleValue().get("c.d"));
}

@Test
public void disabledObject_deeplyNestedLeafExposedAtEveryAncestor() {
// Verify that deeper nesting {c:{d:{e:3}}} works with single-key ITEM at any level.
Map<String, ExprValue> tuple =
disabledObjectFactory()
.construct("{\"log\":{\"c\":{\"d\":{\"e\":3}}}}", false)
.tupleValue();
ExprValue log = tuple.get("log");
assertEquals(integerValue(3), log.tupleValue().get("c.d.e"));
ExprValue logC = log.tupleValue().get("c");
assertEquals(integerValue(3), logC.tupleValue().get("d.e"));
}

@Test
public void disabledObject_mappedSiblingsAreUnaffected() {
// With a normal mapped struct (structV.id, structV.state), the factory should NOT inject flat
// dotted keys at the top level. Only the existing behaviour is preserved.
Map<String, ExprValue> tuple = tupleValue("{\"structV\":{\"id\":1,\"state\":\"WA\"}}");
ExprValue structV = tuple.get("structV");
assertEquals(integerValue(1), structV.tupleValue().get("id"));
assertEquals(stringValue("WA"), structV.tupleValue().get("state"));
// No accidental flat-key injection on mapped structs.
assertFalse(structV.tupleValue().containsKey("structV.id"));
assertFalse(tuple.containsKey("structV.id"));
}

@EqualsAndHashCode(callSuper = false)
@ToString
private static class TestType extends OpenSearchDataType {
Expand Down
Loading