Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions datafusion/sql/src/unparser/dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@ pub trait Dialect: Send + Sync {
ast::DataType::BigInt(None)
}

/// The SQL type to use for Arrow Int8 unparsing
/// Most dialects use TinyInt, but PostgreSQL prefers SmallInt
fn int8_cast_dtype(&self) -> ast::DataType {
ast::DataType::TinyInt(None)
}

/// The SQL type to use for Arrow Int32 unparsing
/// Most dialects use Integer, but some, like MySQL, require SIGNED
fn int32_cast_dtype(&self) -> ast::DataType {
Expand Down Expand Up @@ -345,6 +351,10 @@ impl Dialect for PostgreSqlDialect {
ast::DataType::DoublePrecision
}

fn int8_cast_dtype(&self) -> ast::DataType {
ast::DataType::SmallInt(None)
}

fn scalar_function_to_sql_overrides(
&self,
unparser: &Unparser,
Expand Down Expand Up @@ -664,6 +674,7 @@ pub struct CustomDialect {
large_utf8_cast_dtype: ast::DataType,
date_field_extract_style: DateFieldExtractStyle,
character_length_style: CharacterLengthStyle,
int8_cast_dtype: ast::DataType,
int64_cast_dtype: ast::DataType,
int32_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
Expand All @@ -689,6 +700,7 @@ impl Default for CustomDialect {
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
character_length_style: CharacterLengthStyle::CharacterLength,
int8_cast_dtype: ast::DataType::TinyInt(None),
int64_cast_dtype: ast::DataType::BigInt(None),
int32_cast_dtype: ast::DataType::Integer(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
Expand Down Expand Up @@ -748,6 +760,10 @@ impl Dialect for CustomDialect {
self.int64_cast_dtype.clone()
}

fn int8_cast_dtype(&self) -> ast::DataType {
self.int8_cast_dtype.clone()
}

fn int32_cast_dtype(&self) -> ast::DataType {
self.int32_cast_dtype.clone()
}
Expand Down Expand Up @@ -839,6 +855,7 @@ pub struct CustomDialectBuilder {
large_utf8_cast_dtype: ast::DataType,
date_field_extract_style: DateFieldExtractStyle,
character_length_style: CharacterLengthStyle,
int8_cast_dtype: ast::DataType,
int64_cast_dtype: ast::DataType,
int32_cast_dtype: ast::DataType,
timestamp_cast_dtype: ast::DataType,
Expand Down Expand Up @@ -870,6 +887,7 @@ impl CustomDialectBuilder {
large_utf8_cast_dtype: ast::DataType::Text,
date_field_extract_style: DateFieldExtractStyle::DatePart,
character_length_style: CharacterLengthStyle::CharacterLength,
int8_cast_dtype: ast::DataType::TinyInt(None),
int64_cast_dtype: ast::DataType::BigInt(None),
int32_cast_dtype: ast::DataType::Integer(None),
timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
Expand Down Expand Up @@ -898,6 +916,7 @@ impl CustomDialectBuilder {
large_utf8_cast_dtype: self.large_utf8_cast_dtype,
date_field_extract_style: self.date_field_extract_style,
character_length_style: self.character_length_style,
int8_cast_dtype: self.int8_cast_dtype,
int64_cast_dtype: self.int64_cast_dtype,
int32_cast_dtype: self.int32_cast_dtype,
timestamp_cast_dtype: self.timestamp_cast_dtype,
Expand Down Expand Up @@ -952,6 +971,12 @@ impl CustomDialectBuilder {
self
}

/// Customize the dialect with a specific SQL type for Int8 casting: TinyInt, SmallInt, etc.
pub fn with_int8_cast_dtype(mut self, int8_cast_dtype: ast::DataType) -> Self {
self.int8_cast_dtype = int8_cast_dtype;
self
}

/// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
self.float64_ast_dtype = float64_ast_dtype;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/sql/src/unparser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1734,7 +1734,7 @@ impl Unparser<'_> {
not_impl_err!("Unsupported DataType: conversion: {data_type}")
}
DataType::Boolean => Ok(ast::DataType::Bool),
DataType::Int8 => Ok(ast::DataType::TinyInt(None)),
DataType::Int8 => Ok(self.dialect.int8_cast_dtype()),
DataType::Int16 => Ok(ast::DataType::SmallInt(None)),
DataType::Int32 => Ok(self.dialect.int32_cast_dtype()),
DataType::Int64 => Ok(self.dialect.int64_cast_dtype()),
Expand Down
22 changes: 22 additions & 0 deletions datafusion/sql/tests/cases/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1937,6 +1937,28 @@ fn test_without_offset() {
)
}

#[test]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add a test that the default path still produces TINYINT, just for reference and degradation prevention.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Thanks!

fn test_cast_to_tinyint() -> Result<(), DataFusionError> {
roundtrip_statement_with_dialect_helper!(
sql: "select cast(3 as tinyint)",
parser_dialect: GenericDialect {},
unparser_dialect: UnparserPostgreSqlDialect {},
expected: @"SELECT CAST(3 AS SMALLINT)",
);
Ok(())
}

#[test]
fn test_cast_to_tinyint_default_dialect() -> Result<(), DataFusionError> {
roundtrip_statement_with_dialect_helper!(
sql: "select cast(3 as tinyint)",
parser_dialect: GenericDialect {},
unparser_dialect: UnparserDefaultDialect {},
expected: @"SELECT CAST(3 AS TINYINT)",
);
Ok(())
}

#[test]
fn test_with_offset0() {
let statement = generate_round_trip_statement(MySqlDialect {}, "select 1 offset 0");
Expand Down
Loading