diff --git a/R/RenderSql.R b/R/RenderSql.R index 3114a042..f8505be9 100644 --- a/R/RenderSql.R +++ b/R/RenderSql.R @@ -68,6 +68,7 @@ #' b = "missingParameter" #' ) #' @import rJava +#' @import ParallelLogger #' @export render <- function(sql, warnOnMissingParameters = TRUE, ...) { errorMessages <- checkmate::makeAssertCollection() @@ -93,6 +94,114 @@ render <- function(sql, warnOnMissingParameters = TRUE, ...) { return(translatedSql) } +translateDuckDbDDL <- function(sql, targetDialect) { + # Store original SQL to detect if any translation was done + original_sql <- sql + + # Split SQL into individual statements (by semicolon) + statements <- strsplit(sql, ";")[[1]] + processed_statements <- character() + translation_done <- FALSE + + for (statement in statements) { + # Trim whitespace + statement <- trimws(statement) + + # Skip empty statements + if (statement == "") { + next + } + + # Remove SQL line comments (-- to end of line) to avoid interference with pattern matching + lines <- strsplit(statement, "\n")[[1]] + lines <- sub("--.*$", "", lines) + lines <- trimws(lines) + lines <- lines[lines != ""] + statement_no_comments <- paste(lines, collapse = "\n") + statement_no_comments <- trimws(statement_no_comments) + + # Check if this is a CREATE TABLE statement + if (grepl("^CREATE\\s+TABLE", statement_no_comments, ignore.case = TRUE)) { + # Extract table name from the first CREATE TABLE line + lines_split <- strsplit(statement_no_comments, "\n")[[1]] + create_table_line <- "" + + for (line_check in lines_split) { + line_check <- trimws(line_check) + if (line_check != "" && grepl("^CREATE\\s+TABLE", line_check, ignore.case = TRUE)) { + create_table_line <- line_check + break + } + } + + # Remove CREATE TABLE [IF NOT EXISTS] keywords + line <- sub("^CREATE\\s+TABLE\\s+(IF\\s+NOT\\s+EXISTS\\s+)?", "", create_table_line, ignore.case = TRUE) + # Remove @schema. prefix if present + parts <- strsplit(trimws(line), "\\.")[[1]] + table_name <- parts[length(parts)] + # Remove everything from opening paren onwards + table_name <- sub("\\s*\\(.*", "", trimws(table_name)) + + # Only process if table name contains "plp" (case insensitive) + if (!is.na(table_name) && nchar(table_name) > 0 && grepl("plp", table_name, ignore.case = TRUE)) { + # Check if GENERATED ALWAYS AS IDENTITY pattern exists + if (grepl("GENERATED\\s+ALWAYS\\s+AS\\s+IDENTITY", statement_no_comments, ignore.case = TRUE)) { + seq_name <- paste0(table_name, "_seq") + original_statement <- statement + statement <- gsub( + "GENERATED\\s+ALWAYS\\s+AS\\s+IDENTITY\\s+NOT\\s+NULL\\s+PRIMARY\\s+KEY", + paste0("PRIMARY KEY DEFAULT nextval('", seq_name, "')"), + statement, + ignore.case = TRUE + ) + + if (statement != original_statement) { + translation_done <- TRUE + # Add the sequence creation as a separate statement + processed_statements <- c(processed_statements, paste0("CREATE SEQUENCE IF NOT EXISTS ", seq_name)) + } + } + + # Standard conversions + if (grepl("\\bint\\b", statement, ignore.case = TRUE)) { + translation_done <- TRUE + statement <- gsub("\\bint\\b", "INTEGER", statement, ignore.case = TRUE) + } + + if (grepl("VARCHAR\\s*\\(\\s*MAX\\s*\\)", statement, ignore.case = TRUE)) { + translation_done <- TRUE + statement <- gsub("VARCHAR\\s*\\(\\s*MAX\\s*\\)", "VARCHAR", statement, ignore.case = TRUE) + } + + if (grepl("\\btext\\b", statement, ignore.case = TRUE)) { + translation_done <- TRUE + statement <- gsub("\\btext\\b", "TEXT", statement, ignore.case = TRUE) + } + + if (grepl("\\bfloat\\b", statement, ignore.case = TRUE)) { + translation_done <- TRUE + statement <- gsub("\\bfloat\\b", "DOUBLE", statement, ignore.case = TRUE) + } + } + } + + # Add the processed statement to the list + processed_statements <- c(processed_statements, statement) + } + # Recombine statements with semicolons + sql <- paste(processed_statements, collapse = ";\n") + if (sql != "") { + sql <- paste0(sql, ";") + } + # Log only if translation was performed + # if (translation_done) { + # ParallelLogger::logInfo("[DuckDB DDL Translator] Translation performed") + # ParallelLogger::logInfo("[ORIGINAL SQL]:\n", original_sql) + # ParallelLogger::logInfo("[TRANSLATED SQL]:\n", sql) + # } + return(sql) +} + #' @title #' Deprecated: Render SQL code based on parameterized SQL and parameter values #' @@ -150,9 +259,8 @@ translate <- function(sql, targetDialect, tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), oracleTempSchema = NULL) { - - targetDialect = trexDialect(targetDialect) - + targetDialect <- trexDialect(targetDialect) + errorMessages <- checkmate::makeAssertCollection() checkmate::assertCharacter(sql, len = 1, add = errorMessages) checkmate::assertCharacter(targetDialect, len = 1, add = errorMessages) @@ -161,6 +269,7 @@ translate <- function(sql, checkmate::assertCharacter(oracleTempSchema, len = 1, null.ok = TRUE, add = errorMessages) checkmate::reportAssertions(collection = errorMessages) + if (!is.null(attr(sql, "sqlDialect"))) { warn("Input SQL has already been translated, so not translating again", .frequency = "regularly", @@ -180,6 +289,12 @@ translate <- function(sql, ) tempEmulationSchema <- oracleTempSchema } + # translate DDL statements using d2e specific translation patterns + ParallelLogger::logInfo("Target dialect: ", targetDialect) + if (tolower(targetDialect) == "duckdb") { + sql <- translateDuckDbDDL(sql, targetDialect) + } + pathToReplacementPatterns <- system.file("csv", "replacementPatterns.csv", package = "SqlRender") if (is.null(tempEmulationSchema)) { tempEmulationSchema <- rJava::.jnull() @@ -235,7 +350,7 @@ translateSql <- function(sql = "", targetDialect, oracleTempSchema = NULL) { #' #' @param sql The SQL to be translated #' @param targetDialect The target dialect. Currently "oracle", "postgresql", "pdw", "impala", -#' "sqlite", "sqlite extended", "netezza", "bigquery", "snowflake", "synapse", "spark", +#' "sqlite", "sqlite extended", "netezza", "bigquery", "snowflake", "synapse", "spark", #' "redshift", and "iris" are supported. #' @param oracleTempSchema DEPRECATED: use \code{tempEmulationSchema} instead. #' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support