-
Notifications
You must be signed in to change notification settings - Fork 193
[BugFix] Fix PPL nested field access inside disabled object fields (#4906) #5390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
2ad9c97
df4e4ac
dd1e295
ac4afb0
8aefdab
34f9c29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.calcite.remote; | ||
|
|
||
| import static org.opensearch.sql.util.MatcherUtils.rows; | ||
| import static org.opensearch.sql.util.MatcherUtils.schema; | ||
| import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; | ||
| import static org.opensearch.sql.util.MatcherUtils.verifySchema; | ||
| import static org.opensearch.sql.util.TestUtils.createIndexByRestClient; | ||
| import static org.opensearch.sql.util.TestUtils.isIndexExist; | ||
| import static org.opensearch.sql.util.TestUtils.performRequest; | ||
|
|
||
| import java.io.IOException; | ||
| import org.json.JSONObject; | ||
| import org.junit.jupiter.api.Test; | ||
| import org.opensearch.client.Request; | ||
| import org.opensearch.sql.ppl.PPLIntegTestCase; | ||
|
|
||
| /** | ||
| * Integration tests for querying inner fields of an object that is declared with {@code "enabled": | ||
| * false} in the index mapping. Such objects are stored in {@code _source} but are not indexed, so | ||
| * the plugin cannot learn the shape of the object from the mapping. See GitHub issue #4906. | ||
| */ | ||
| public class CalciteDisabledObjectFieldIT extends PPLIntegTestCase { | ||
|
|
||
| private static final String DISABLED_OBJECT_INDEX = "test_disabled_object_4906"; | ||
|
|
||
| @Override | ||
| public void init() throws Exception { | ||
| super.init(); | ||
| enableCalcite(); | ||
| createTestIndex(); | ||
| } | ||
|
|
||
| private void createTestIndex() throws IOException { | ||
| if (!isIndexExist(client(), DISABLED_OBJECT_INDEX)) { | ||
| String mapping = | ||
| "{\"mappings\":{\"properties\":{" | ||
| + "\"log\":{\"type\":\"object\",\"enabled\":false}" | ||
| + "}}}"; | ||
| createIndexByRestClient(client(), DISABLED_OBJECT_INDEX, mapping); | ||
| Request bulkReq = new Request("POST", "/" + DISABLED_OBJECT_INDEX + "/_bulk?refresh=true"); | ||
| bulkReq.setJsonEntity( | ||
| "{\"index\":{\"_id\":\"1\"}}\n" + "{\"log\":{\"a\":1,\"c\":{\"d\":2}}}\n"); | ||
| performRequest(client(), bulkReq); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testSelectNestedFieldFromDisabledObject() throws IOException { | ||
| JSONObject result = | ||
| executeQuery(String.format("source=%s | fields log.c.d", DISABLED_OBJECT_INDEX)); | ||
| verifySchema(result, schema("log.c.d", "int")); | ||
| verifyDataRows(result, rows(2)); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSelectTopLevelFieldFromDisabledObject() throws IOException { | ||
| JSONObject result = | ||
| executeQuery(String.format("source=%s | fields log.a", DISABLED_OBJECT_INDEX)); | ||
| verifySchema(result, schema("log.a", "int")); | ||
| verifyDataRows(result, rows(1)); | ||
|
qianheng-aws marked this conversation as resolved.
|
||
| } | ||
|
|
||
| @Test | ||
| public void testSelectEntireDisabledObject() throws IOException { | ||
| JSONObject result = | ||
| executeQuery(String.format("source=%s | fields log", DISABLED_OBJECT_INDEX)); | ||
| verifySchema(result, schema("log", "struct")); | ||
|
qianheng-aws marked this conversation as resolved.
|
||
| verifyDataRows(result, rows(new JSONObject("{\"a\":1,\"c\":{\"d\":2},\"c.d\":2}"))); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSelectIntermediateFieldFromDisabledObject() throws IOException { | ||
| JSONObject result = | ||
| executeQuery(String.format("source=%s | fields log.c", DISABLED_OBJECT_INDEX)); | ||
| verifySchema(result, schema("log.c", "struct")); | ||
|
qianheng-aws marked this conversation as resolved.
|
||
| verifyDataRows(result, rows(new JSONObject("{\"d\":2}"))); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,50 @@ | ||
| setup: | ||
| - do: | ||
| indices.create: | ||
| index: test | ||
| body: | ||
| settings: | ||
| number_of_shards: 1 | ||
| number_of_replicas: 0 | ||
| mappings: | ||
| properties: | ||
| log: | ||
| type: object | ||
| enabled: false | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled : true | ||
|
|
||
| --- | ||
| teardown: | ||
| - do: | ||
| query.settings: | ||
| body: | ||
| transient: | ||
| plugins.calcite.enabled : false | ||
|
|
||
| --- | ||
| "Access nested field inside a disabled object": | ||
| - skip: | ||
| features: | ||
| - headers | ||
| - allowed_warnings | ||
| - do: | ||
| bulk: | ||
| index: test | ||
| refresh: true | ||
| body: | ||
| - '{"index": {}}' | ||
| - '{"log": {"a": 1, "c": {"d": 2}}}' | ||
| - do: | ||
| allowed_warnings: | ||
| - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' | ||
| headers: | ||
| Content-Type: 'application/json' | ||
| ppl: | ||
| body: | ||
| query: 'source=test | fields log.c.d' | ||
| - match: {"total": 1} | ||
| - match: {"datarows": [[2]]} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -203,7 +203,7 @@ private ExprValue parse( | |
| // is resolved | ||
| if (fieldType.isEmpty() | ||
| || fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED))) { | ||
| return parseContent(content); | ||
| return parseContent(content, field, supportArrays); | ||
| } | ||
|
|
||
| final ExprType type = fieldType.get(); | ||
|
|
@@ -227,7 +227,15 @@ private ExprValue parse( | |
| } | ||
| } | ||
|
|
||
| private ExprValue parseContent(Content content) { | ||
| /** | ||
| * Parse {@link Content} into an {@link ExprValue} when the declared field type is unknown (either | ||
| * absent from the mapping or {@code UNDEFINED}). Dispatches purely on the shape of the content. | ||
| * | ||
| * <p>Objects are recursed into as unmapped structs (see issue #4906). Without this, an object | ||
| * inside a disabled-object field would be stringified and nested keys like {@code log.c.d} would | ||
| * be lost. | ||
| */ | ||
| private ExprValue parseContent(Content content, String field, boolean supportArrays) { | ||
| if (content.isNumber()) { | ||
| if (content.isInt()) { | ||
| return new ExprIntegerValue(content.intValue()); | ||
|
|
@@ -251,6 +259,8 @@ private ExprValue parseContent(Content content) { | |
| return ExprBooleanValue.of(content.booleanValue()); | ||
| } else if (content.isNull()) { | ||
| return ExprNullValue.of(); | ||
| } else if (content.isObject()) { | ||
| return parseUnmappedStruct(content, field, supportArrays); | ||
| } | ||
| // Default case, treat as a string value | ||
| return new ExprStringValue(content.objectValue().toString()); | ||
|
|
@@ -362,6 +372,50 @@ private static ExprValue createOpenSearchDateType( | |
| return new ExprTimestampValue((Instant) value.objectValue()); | ||
| } | ||
|
|
||
| /** | ||
| * Parse an unmapped object (e.g. an object declared with {@code "enabled": false} or generated by | ||
| * dynamic mapping with no known schema — see issue #4906). Produces an {@link ExprTupleValue} | ||
| * that holds each descendant value under both its nested key and the dotted-path key, so later | ||
| * {@code ITEM} lookups (which treat the tuple as a flat map) can retrieve values at any depth | ||
| * with a single key. | ||
| * | ||
| * <p>Example: an input of {@code {"a": 1, "c": {"d": 2}}} produces a tuple with entries {@code | ||
| * a=1}, {@code c=<tuple>}, {@code c.d=2} where the {@code c} entry is itself expanded the same | ||
| * way. | ||
| * | ||
| * @param content Content to parse. | ||
| * @param prefix Prefix for level of object depth to parse. | ||
| * @param supportArrays Parsing the whole array if array is type nested. | ||
| * @return Value parsed from content. | ||
| */ | ||
| private ExprValue parseUnmappedStruct(Content content, String prefix, boolean supportArrays) { | ||
| ExprTupleValue result = ExprTupleValue.empty(); | ||
| content | ||
| .map() | ||
| .forEachRemaining( | ||
| entry -> { | ||
| String fieldKey = entry.getKey(); | ||
| if (isFieldNameMalformed(fieldKey)) { | ||
| result.tupleValue().put(fieldKey, ExprNullValue.of()); | ||
| return; | ||
| } | ||
| Content childContent = entry.getValue(); | ||
| String fullFieldPath = makeField(prefix, fieldKey); | ||
| // Recurse via parseContent, which handles nulls, scalars, and objects (by | ||
| // recursing back into this method). Single ITEM lookups still succeed because | ||
| // we also expose descendants under their dotted-path keys below. | ||
| ExprValue childValue = parseContent(childContent, fullFieldPath, supportArrays); | ||
| result.tupleValue().put(fieldKey, childValue); | ||
| // Additionally expose every descendant at the current level using dotted-path keys | ||
| // so ITEM(parent, "c.d") can resolve to a scalar leaf value. Intermediate dotted | ||
| // paths (e.g. "c" mapping to a tuple) are already provided by the direct child entry. | ||
| if (childValue instanceof ExprTupleValue childTuple) { | ||
| exposeDescendantsAsDottedKeys(result, fieldKey, childTuple); | ||
| } | ||
| }); | ||
| return result; | ||
| } | ||
|
|
||
| /** | ||
| * Parse struct content. | ||
| * | ||
|
|
@@ -383,15 +437,40 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr | |
| if (isFieldNameMalformed(fieldKey)) { | ||
| result.tupleValue().put(fieldKey, ExprNullValue.of()); | ||
| } else { | ||
| populateValueRecursive( | ||
| result, | ||
| new JsonPath(fieldKey), | ||
| parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays)); | ||
| Optional<ExprType> childType = type(fullFieldPath); | ||
| ExprValue childValue = | ||
| parse(entry.getValue(), fullFieldPath, childType, supportArrays); | ||
| populateValueRecursive(result, new JsonPath(fieldKey), childValue); | ||
| // If the child's type is unmapped (e.g. inside a disabled object — see #4906), | ||
| // also expose every descendant under its dotted-path key so single-key ITEM | ||
| // lookups at this level resolve nested paths like log.c.d. | ||
| if (isUnmappedType(childType) && childValue instanceof ExprTupleValue) { | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be moved into parse method?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks — partially done in dd1e295. The per-child body of content.map().forEachRemaining(entry -> parseStructEntry(result, prefix, entry, supportArrays));Moving the dotted-key exposure literally inside An alternative — having The current shape (
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update: in ac4afb0 I reverted the A literal move into So the cleanest place for the mutation is |
||
| exposeDescendantsAsDottedKeys(result, fieldKey, (ExprTupleValue) childValue); | ||
| } | ||
| } | ||
| }); | ||
| return result; | ||
| } | ||
|
|
||
| private static boolean isUnmappedType(Optional<ExprType> fieldType) { | ||
| return fieldType.isEmpty() | ||
| || fieldType.get().equals(OpenSearchDataType.of(ExprCoreType.UNDEFINED)); | ||
| } | ||
|
|
||
| /** | ||
| * Add every descendant entry already present in {@code childTuple} to {@code result} under a | ||
| * dotted-path key rooted at {@code fieldKey}. The child tuple is assumed to have been produced by | ||
| * {@link #parseUnmappedStruct(Content, String, boolean)} which already expanded nested tuples | ||
| * into dotted-path entries at each level. Existing keys are preserved. | ||
| */ | ||
| private static void exposeDescendantsAsDottedKeys( | ||
| ExprTupleValue result, String fieldKey, ExprTupleValue childTuple) { | ||
| for (Map.Entry<String, ExprValue> descendant : childTuple.tupleValue().entrySet()) { | ||
| String descendantKey = fieldKey + "." + descendant.getKey(); | ||
| result.tupleValue().putIfAbsent(descendantKey, descendant.getValue()); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Check if a field name is malformed and cannot be processed by JsonPath. | ||
| * | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.