-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-29413: Avoid code duplication by updating getPartCols method for iceberg tables #6337
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -174,9 +174,7 @@ private void addPartitionData(DataOutputStream out, HiveConf conf, String column | |
| List<FieldSchema> partitionColumns = null; | ||
| // TODO (HIVE-29413): Refactor to a generic getPartCols() implementation | ||
| if (table.isPartitioned()) { | ||
| partitionColumns = table.hasNonNativePartitionSupport() ? | ||
| table.getStorageHandler().getPartitionKeys(table) : | ||
| table.getPartCols(); | ||
| partitionColumns = table.getSupportedPartCols(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not make it like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please check #6337 (comment) |
||
| } | ||
| if (CollectionUtils.isNotEmpty(partitionColumns) && | ||
| conf.getBoolVar(ConfVars.HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY)) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -594,6 +594,10 @@ public boolean equals(Object obj) { | |
| && Objects.equals(snapshotRef, other.snapshotRef); | ||
| } | ||
|
|
||
| public List<FieldSchema> getSupportedPartCols() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should have generic API -
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updating getPartCols has direct effect on implementations related to alter statements and stats autogather implementation in which some of them are limited to native ones only ( like getPartCols() should return empty list for iceberg tables even if they have keys which is required for those implementations) which will introduce more ifs at those places and even if after all that if we get a green run we can't be sure that everything would be unaffected as we can't be sure that all possible scenarios affected from it are covered by tests. so IMHO, I think we should go for this separate api for generic use cases and older one for native use cases. I tried to explain my thinking and one such scenario more elaboratively in this comment: #6337 (comment)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @deniskuzZ can you please validate this explanation for separate api seems reasonable or are we still leaning towards having a common api with updated implementation which can lead to further if else blocks introduction on those kind of areas mentioned above |
||
| return hasNonNativePartitionSupport() ? getStorageHandler().getPartitionKeys(this) : getPartCols(); | ||
| } | ||
|
|
||
| public List<FieldSchema> getPartCols() { | ||
| List<FieldSchema> partKeys = tTable.getPartitionKeys(); | ||
| if (partKeys == null) { | ||
|
|
@@ -610,9 +614,7 @@ public FieldSchema getPartColByName(String colName) { | |
| } | ||
|
|
||
| public List<String> getPartColNames() { | ||
| List<FieldSchema> partCols = hasNonNativePartitionSupport() ? | ||
| getStorageHandler().getPartitionKeys(this) : getPartCols(); | ||
| return partCols.stream().map(FieldSchema::getName) | ||
| return getSupportedPartCols().stream().map(FieldSchema::getName) | ||
| .collect(Collectors.toList()); | ||
| } | ||
|
|
||
|
|
@@ -761,10 +763,16 @@ private List<FieldSchema> getColsInternal(boolean forMs) { | |
| * @return List<FieldSchema> | ||
| */ | ||
| public List<FieldSchema> getAllCols() { | ||
| ArrayList<FieldSchema> f_list = new ArrayList<FieldSchema>(); | ||
| f_list.addAll(getCols()); | ||
| f_list.addAll(getPartCols()); | ||
| return f_list; | ||
| List<FieldSchema> allCols = new ArrayList<>(getCols()); | ||
| Set<String> colNames = allCols.stream() | ||
| .map(FieldSchema::getName) | ||
| .collect(Collectors.toSet()); | ||
| for (FieldSchema col : getSupportedPartCols()) { | ||
| if (!colNames.contains(col.getName())) { | ||
| allCols.add(col); | ||
| } | ||
| } | ||
| return allCols; | ||
| } | ||
|
|
||
| public void setPartCols(List<FieldSchema> partCols) { | ||
|
|
@@ -812,7 +820,7 @@ public void setOutputFormatClass(String name) throws HiveException { | |
| } | ||
|
|
||
| public boolean isPartitioned() { | ||
| return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : | ||
| return hasNonNativePartitionSupport() ? getStorageHandler().isPartitioned(this) : | ||
| CollectionUtils.isNotEmpty(getPartCols()); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -581,8 +581,7 @@ public static Map<Integer, List<ExprNodeGenericFuncDesc>> getFullPartitionSpecs( | |
| CommonTree ast, Table table, Configuration conf, boolean canGroupExprs) throws SemanticException { | ||
| String defaultPartitionName = HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULT_PARTITION_NAME); | ||
| Map<String, String> colTypes = new HashMap<>(); | ||
| List<FieldSchema> partitionKeys = table.hasNonNativePartitionSupport() ? | ||
| table.getStorageHandler().getPartitionKeys(table) : table.getPartitionKeys(); | ||
| List<FieldSchema> partitionKeys = table.getSupportedPartCols(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prev used api is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes but it should be better to use the current one as |
||
| for (FieldSchema fs : partitionKeys) { | ||
| colTypes.put(fs.getName().toLowerCase(), fs.getType()); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need to clone it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was already cloning it so I just replaced
with
new ArrayList<>(table.getAllCols());