-
-
Notifications
You must be signed in to change notification settings - Fork 154
feat(replication): Add support for TEMPORARY replication slots
#515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
9be5ab4
fc583aa
4d04b0a
8ffbf21
ee3533b
22990a4
3138108
548b6a1
dc6f1f2
8fabadb
dd7c2ad
1b1f8aa
69cbbd5
c9d0326
9b7d2a3
c4d696c
c70cf69
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,4 +1,4 @@ | ||||||||
| use etl_config::shared::{BatchConfig, PgConnectionConfig, PipelineConfig}; | ||||||||
| use etl_config::shared::{BatchConfig, PgConnectionConfig, PipelineConfig, ReplicationSlotConfig}; | ||||||||
| use serde::{Deserialize, Serialize}; | ||||||||
| use utoipa::ToSchema; | ||||||||
|
|
||||||||
|
|
@@ -31,6 +31,7 @@ pub struct FullApiPipelineConfig { | |||||||
| #[schema(example = "my_publication")] | ||||||||
| #[serde(deserialize_with = "crate::utils::trim_string")] | ||||||||
| pub publication_name: String, | ||||||||
| pub temporary_replication_slot: bool, | ||||||||
| #[serde(skip_serializing_if = "Option::is_none")] | ||||||||
| pub batch: Option<ApiBatchConfig>, | ||||||||
| #[schema(example = 1000)] | ||||||||
|
|
@@ -49,6 +50,10 @@ impl From<StoredPipelineConfig> for FullApiPipelineConfig { | |||||||
| fn from(value: StoredPipelineConfig) -> Self { | ||||||||
| Self { | ||||||||
| publication_name: value.publication_name, | ||||||||
| temporary_replication_slot: match value.replication_slot { | ||||||||
| ReplicationSlotConfig::Temporary => true, | ||||||||
| ReplicationSlotConfig::Permanent => false, | ||||||||
| }, | ||||||||
| batch: Some(ApiBatchConfig { | ||||||||
| max_size: Some(value.batch.max_size), | ||||||||
| max_fill_ms: Some(value.batch.max_fill_ms), | ||||||||
|
|
@@ -89,6 +94,7 @@ pub struct PartialApiPipelineConfig { | |||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||||
| pub struct StoredPipelineConfig { | ||||||||
| pub publication_name: String, | ||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will crash for existing stored configs.
Suggested change
This has to be paired with the other comment that I left.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||||||||
| pub batch: BatchConfig, | ||||||||
| pub table_error_retry_delay_ms: u64, | ||||||||
| #[serde(default = "default_table_error_retry_max_attempts")] | ||||||||
|
|
@@ -106,6 +112,7 @@ impl StoredPipelineConfig { | |||||||
| PipelineConfig { | ||||||||
| id: pipeline_id, | ||||||||
| publication_name: self.publication_name, | ||||||||
| replication_slot: self.replication_slot, | ||||||||
| pg_connection: pg_connection_config, | ||||||||
| batch: self.batch, | ||||||||
| table_error_retry_delay_ms: self.table_error_retry_delay_ms, | ||||||||
|
|
@@ -161,6 +168,11 @@ impl From<FullApiPipelineConfig> for StoredPipelineConfig { | |||||||
|
|
||||||||
| Self { | ||||||||
| publication_name: value.publication_name, | ||||||||
| replication_slot: if value.temporary_replication_slot { | ||||||||
| ReplicationSlotConfig::Temporary | ||||||||
| } else { | ||||||||
| ReplicationSlotConfig::Permanent | ||||||||
| }, | ||||||||
| batch, | ||||||||
| table_error_retry_delay_ms: value | ||||||||
| .table_error_retry_delay_ms | ||||||||
|
|
@@ -185,6 +197,7 @@ mod tests { | |||||||
| fn test_stored_pipeline_config_serialization() { | ||||||||
| let config = StoredPipelineConfig { | ||||||||
| publication_name: "test_publication".to_string(), | ||||||||
| replication_slot: ReplicationSlotConfig::Permanent, | ||||||||
| batch: BatchConfig { | ||||||||
| max_size: 1000, | ||||||||
| max_fill_ms: 5000, | ||||||||
|
|
@@ -223,6 +236,7 @@ mod tests { | |||||||
| table_error_retry_max_attempts: None, | ||||||||
| max_table_sync_workers: None, | ||||||||
| log_level: Some(LogLevel::Debug), | ||||||||
| temporary_replication_slot: false, | ||||||||
| }; | ||||||||
|
|
||||||||
| let stored: StoredPipelineConfig = full_config.clone().into(); | ||||||||
|
|
@@ -235,6 +249,7 @@ mod tests { | |||||||
| fn test_full_api_pipeline_config_defaults() { | ||||||||
| let full_config = FullApiPipelineConfig { | ||||||||
| publication_name: "test_publication".to_string(), | ||||||||
| temporary_replication_slot: false, | ||||||||
| batch: None, | ||||||||
| table_error_retry_delay_ms: None, | ||||||||
| table_error_retry_max_attempts: None, | ||||||||
|
|
@@ -264,6 +279,7 @@ mod tests { | |||||||
| fn test_partial_api_pipeline_config_merge() { | ||||||||
| let mut stored = StoredPipelineConfig { | ||||||||
| publication_name: "old_publication".to_string(), | ||||||||
| replication_slot: ReplicationSlotConfig::Permanent, | ||||||||
| batch: BatchConfig { | ||||||||
| max_size: 500, | ||||||||
| max_fill_ms: 2000, | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,6 @@ use serde::{Deserialize, Serialize}; | |||||||||||||||||||
| use crate::shared::{ | ||||||||||||||||||||
| PgConnectionConfig, PgConnectionConfigWithoutSecrets, ValidationError, batch::BatchConfig, | ||||||||||||||||||||
| }; | ||||||||||||||||||||
|
|
||||||||||||||||||||
| /// Configuration for an ETL pipeline. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// Contains all settings required to run a replication pipeline including | ||||||||||||||||||||
|
|
@@ -20,6 +19,8 @@ pub struct PipelineConfig { | |||||||||||||||||||
| pub id: u64, | ||||||||||||||||||||
| /// Name of the Postgres publication to use for logical replication. | ||||||||||||||||||||
| pub publication_name: String, | ||||||||||||||||||||
| /// Whether to use a temporary replication slot | ||||||||||||||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||||||||||||||
|
Comment on lines
+63
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Add trailing period to documentation comment. Per coding guidelines, documentation comments should be properly punctuated. 📝 Suggested fix- /// Configuration for the replication slot used
+ /// Configuration for the replication slot used.📝 Committable suggestion
Suggested change
🤖 Prompt for AI AgentsAdd The 🔧 Proposed fix /// Configuration for the replication slot used.
+ #[serde(default)]
pub replication_slot: ReplicationSlotConfig,📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||
| /// The connection configuration for the Postgres instance to which the pipeline connects for | ||||||||||||||||||||
| /// replication. | ||||||||||||||||||||
| pub pg_connection: PgConnectionConfig, | ||||||||||||||||||||
|
|
@@ -64,6 +65,8 @@ pub struct PipelineConfigWithoutSecrets { | |||||||||||||||||||
| pub id: u64, | ||||||||||||||||||||
| /// Name of the Postgres publication to use for logical replication. | ||||||||||||||||||||
| pub publication_name: String, | ||||||||||||||||||||
| /// Whether to use a temporary replication slot | ||||||||||||||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||||||||||||||
|
Comment on lines
+143
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add Same issues as 🔧 Proposed fix- /// Configuration for the replication slot used
+ /// Configuration for the replication slot used.
+ #[serde(default)]
pub replication_slot: ReplicationSlotConfig,🤖 Prompt for AI Agents |
||||||||||||||||||||
| /// The connection configuration for the Postgres instance to which the pipeline connects for | ||||||||||||||||||||
| /// replication. | ||||||||||||||||||||
| pub pg_connection: PgConnectionConfigWithoutSecrets, | ||||||||||||||||||||
|
|
@@ -82,6 +85,7 @@ impl From<PipelineConfig> for PipelineConfigWithoutSecrets { | |||||||||||||||||||
| PipelineConfigWithoutSecrets { | ||||||||||||||||||||
| id: value.id, | ||||||||||||||||||||
| publication_name: value.publication_name, | ||||||||||||||||||||
| replication_slot: value.replication_slot, | ||||||||||||||||||||
| pg_connection: value.pg_connection.into(), | ||||||||||||||||||||
| batch: value.batch, | ||||||||||||||||||||
| table_error_retry_delay_ms: value.table_error_retry_delay_ms, | ||||||||||||||||||||
|
|
@@ -90,3 +94,9 @@ impl From<PipelineConfig> for PipelineConfigWithoutSecrets { | |||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #[derive(Clone, Copy, Debug, Deserialize, Serialize)] | ||||||||||||||||||||
| pub enum ReplicationSlotConfig { | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
| Temporary, | ||||||||||||||||||||
| Permanent, | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To main backward compatibility we should set the default to be |
||||||||||||||||||||
| } | ||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there other kind of replication slots? Asking since having a boolean might give us less flexibility in the future to support other replication slots types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmmm as per the Postgres docs (here) there are some other knobs that can be turned when creating the replication slots:
I would guess that
etlwill limit itself to logical replication, so the physical replication is irrelevant. Some of the others might be relevant, such asFAILOVER(to enable clean failover if someone is running a high-availability postgres cluster using e.g.,cloudnative-pg). But iirc theetlcurrently couldn't support this without adding some kind of reconnect logic anyhow.But I agree with your point! It could make sense to make it more future-proof wrt. other existing (or future) replication slot options later, by changing the
ReplicationSlotConfigto something like: