-
-
Notifications
You must be signed in to change notification settings - Fork 154
feat(replication): Add support for TEMPORARY replication slots
#515
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 10 commits
9be5ab4
fc583aa
4d04b0a
8ffbf21
ee3533b
22990a4
3138108
548b6a1
dc6f1f2
8fabadb
dd7c2ad
1b1f8aa
69cbbd5
c9d0326
9b7d2a3
c4d696c
c70cf69
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,4 +1,4 @@ | ||||||||
| use etl_config::shared::{BatchConfig, PgConnectionConfig, PipelineConfig}; | ||||||||
| use etl_config::shared::{BatchConfig, PgConnectionConfig, PipelineConfig, ReplicationSlotConfig}; | ||||||||
| use serde::{Deserialize, Serialize}; | ||||||||
| use utoipa::ToSchema; | ||||||||
|
|
||||||||
|
|
@@ -26,11 +26,34 @@ pub struct ApiBatchConfig { | |||||||
| pub max_fill_ms: Option<u64>, | ||||||||
| } | ||||||||
|
|
||||||||
| #[derive(Debug, Clone, Serialize, Deserialize, ToSchema, Default)] | ||||||||
| pub struct ApiReplicationSlotConfig { | ||||||||
| #[schema(example = false)] | ||||||||
| pub temporary: bool, | ||||||||
| } | ||||||||
|
Comment on lines
+34
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Add documentation to API types and consider adding The new API types are missing documentation. Also, 📝 Suggested improvements+/// Determines the persistence behavior of a replication slot in API requests.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, ToSchema, Default)]
#[serde(rename_all = "snake_case")]
pub enum ApiReplicationSlotPersistence {
+ /// Slot persists until explicitly dropped.
#[default]
Permanent,
+ /// Slot is automatically dropped when the connection ends.
Temporary,
}
// ... conversions ...
+/// Configuration for the replication slot used by the pipeline in API requests.
-#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, Default)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, ToSchema, Default)]
#[serde(rename_all = "snake_case")]
pub struct ApiReplicationSlotConfig {
+ /// Controls whether the replication slot persists across connection sessions.
pub persistence: ApiReplicationSlotPersistence,
}🤖 Prompt for AI Agents |
||||||||
|
|
||||||||
| impl From<ReplicationSlotConfig> for ApiReplicationSlotConfig { | ||||||||
| fn from(value: ReplicationSlotConfig) -> Self { | ||||||||
| ApiReplicationSlotConfig { | ||||||||
| temporary: value.temporary, | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| impl From<ApiReplicationSlotConfig> for ReplicationSlotConfig { | ||||||||
| fn from(value: ApiReplicationSlotConfig) -> Self { | ||||||||
| ReplicationSlotConfig { | ||||||||
| temporary: value.temporary, | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] | ||||||||
| pub struct FullApiPipelineConfig { | ||||||||
| #[schema(example = "my_publication")] | ||||||||
| #[serde(deserialize_with = "crate::utils::trim_string")] | ||||||||
| pub publication_name: String, | ||||||||
| pub replication_slot: ApiReplicationSlotConfig, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here:
Suggested change
Otherwise if the value is missing, it will crash existing API consumers. |
||||||||
| #[serde(skip_serializing_if = "Option::is_none")] | ||||||||
| pub batch: Option<ApiBatchConfig>, | ||||||||
| #[schema(example = 1000)] | ||||||||
|
|
@@ -49,6 +72,7 @@ impl From<StoredPipelineConfig> for FullApiPipelineConfig { | |||||||
| fn from(value: StoredPipelineConfig) -> Self { | ||||||||
| Self { | ||||||||
| publication_name: value.publication_name, | ||||||||
| replication_slot: value.replication_slot.into(), | ||||||||
| batch: Some(ApiBatchConfig { | ||||||||
| max_size: Some(value.batch.max_size), | ||||||||
| max_fill_ms: Some(value.batch.max_fill_ms), | ||||||||
|
|
@@ -89,6 +113,8 @@ pub struct PartialApiPipelineConfig { | |||||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||||||
| pub struct StoredPipelineConfig { | ||||||||
| pub publication_name: String, | ||||||||
| #[serde(default)] | ||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will crash for existing stored configs.
Suggested change
This has to be paired with the other comment that I left.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||||||||
| pub batch: BatchConfig, | ||||||||
| pub table_error_retry_delay_ms: u64, | ||||||||
| #[serde(default = "default_table_error_retry_max_attempts")] | ||||||||
|
|
@@ -106,6 +132,7 @@ impl StoredPipelineConfig { | |||||||
| PipelineConfig { | ||||||||
| id: pipeline_id, | ||||||||
| publication_name: self.publication_name, | ||||||||
| replication_slot: self.replication_slot, | ||||||||
| pg_connection: pg_connection_config, | ||||||||
| batch: self.batch, | ||||||||
| table_error_retry_delay_ms: self.table_error_retry_delay_ms, | ||||||||
|
|
@@ -161,6 +188,7 @@ impl From<FullApiPipelineConfig> for StoredPipelineConfig { | |||||||
|
|
||||||||
| Self { | ||||||||
| publication_name: value.publication_name, | ||||||||
| replication_slot: value.replication_slot.into(), | ||||||||
| batch, | ||||||||
| table_error_retry_delay_ms: value | ||||||||
| .table_error_retry_delay_ms | ||||||||
|
|
@@ -185,6 +213,7 @@ mod tests { | |||||||
| fn test_stored_pipeline_config_serialization() { | ||||||||
| let config = StoredPipelineConfig { | ||||||||
| publication_name: "test_publication".to_string(), | ||||||||
| replication_slot: ReplicationSlotConfig::default(), | ||||||||
| batch: BatchConfig { | ||||||||
| max_size: 1000, | ||||||||
| max_fill_ms: 5000, | ||||||||
|
|
@@ -223,6 +252,7 @@ mod tests { | |||||||
| table_error_retry_max_attempts: None, | ||||||||
| max_table_sync_workers: None, | ||||||||
| log_level: Some(LogLevel::Debug), | ||||||||
| replication_slot: ApiReplicationSlotConfig::default(), | ||||||||
| }; | ||||||||
|
|
||||||||
| let stored: StoredPipelineConfig = full_config.clone().into(); | ||||||||
|
|
@@ -235,6 +265,7 @@ mod tests { | |||||||
| fn test_full_api_pipeline_config_defaults() { | ||||||||
| let full_config = FullApiPipelineConfig { | ||||||||
| publication_name: "test_publication".to_string(), | ||||||||
| replication_slot: ApiReplicationSlotConfig::default(), | ||||||||
| batch: None, | ||||||||
| table_error_retry_delay_ms: None, | ||||||||
| table_error_retry_max_attempts: None, | ||||||||
|
|
@@ -264,6 +295,7 @@ mod tests { | |||||||
| fn test_partial_api_pipeline_config_merge() { | ||||||||
| let mut stored = StoredPipelineConfig { | ||||||||
| publication_name: "old_publication".to_string(), | ||||||||
| replication_slot: ReplicationSlotConfig::default(), | ||||||||
| batch: BatchConfig { | ||||||||
| max_size: 500, | ||||||||
| max_fill_ms: 2000, | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,6 @@ use serde::{Deserialize, Serialize}; | |||||||||||||||||||
| use crate::shared::{ | ||||||||||||||||||||
| PgConnectionConfig, PgConnectionConfigWithoutSecrets, ValidationError, batch::BatchConfig, | ||||||||||||||||||||
| }; | ||||||||||||||||||||
|
|
||||||||||||||||||||
| /// Configuration for an ETL pipeline. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// Contains all settings required to run a replication pipeline including | ||||||||||||||||||||
|
|
@@ -20,6 +19,8 @@ pub struct PipelineConfig { | |||||||||||||||||||
| pub id: u64, | ||||||||||||||||||||
| /// Name of the Postgres publication to use for logical replication. | ||||||||||||||||||||
| pub publication_name: String, | ||||||||||||||||||||
| /// Whether to use a temporary replication slot | ||||||||||||||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||||||||||||||
|
Comment on lines
+63
to
+64
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧹 Nitpick | 🔵 Trivial Add trailing period to documentation comment. Per coding guidelines, documentation comments should be properly punctuated. 📝 Suggested fix- /// Configuration for the replication slot used
+ /// Configuration for the replication slot used.📝 Committable suggestion
Suggested change
🤖 Prompt for AI AgentsAdd The 🔧 Proposed fix /// Configuration for the replication slot used.
+ #[serde(default)]
pub replication_slot: ReplicationSlotConfig,📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents |
||||||||||||||||||||
| /// The connection configuration for the Postgres instance to which the pipeline connects for | ||||||||||||||||||||
| /// replication. | ||||||||||||||||||||
| pub pg_connection: PgConnectionConfig, | ||||||||||||||||||||
|
|
@@ -64,6 +65,8 @@ pub struct PipelineConfigWithoutSecrets { | |||||||||||||||||||
| pub id: u64, | ||||||||||||||||||||
| /// Name of the Postgres publication to use for logical replication. | ||||||||||||||||||||
| pub publication_name: String, | ||||||||||||||||||||
| /// Whether to use a temporary replication slot | ||||||||||||||||||||
| pub replication_slot: ReplicationSlotConfig, | ||||||||||||||||||||
|
Comment on lines
+143
to
+144
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add Same issues as 🔧 Proposed fix- /// Configuration for the replication slot used
+ /// Configuration for the replication slot used.
+ #[serde(default)]
pub replication_slot: ReplicationSlotConfig,🤖 Prompt for AI Agents |
||||||||||||||||||||
| /// The connection configuration for the Postgres instance to which the pipeline connects for | ||||||||||||||||||||
| /// replication. | ||||||||||||||||||||
| pub pg_connection: PgConnectionConfigWithoutSecrets, | ||||||||||||||||||||
|
|
@@ -82,6 +85,7 @@ impl From<PipelineConfig> for PipelineConfigWithoutSecrets { | |||||||||||||||||||
| PipelineConfigWithoutSecrets { | ||||||||||||||||||||
| id: value.id, | ||||||||||||||||||||
| publication_name: value.publication_name, | ||||||||||||||||||||
| replication_slot: value.replication_slot, | ||||||||||||||||||||
| pg_connection: value.pg_connection.into(), | ||||||||||||||||||||
| batch: value.batch, | ||||||||||||||||||||
| table_error_retry_delay_ms: value.table_error_retry_delay_ms, | ||||||||||||||||||||
|
|
@@ -90,3 +94,9 @@ impl From<PipelineConfig> for PipelineConfigWithoutSecrets { | |||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #[derive(Clone, Copy, Debug, Deserialize, Serialize, Default)] | ||||||||||||||||||||
| #[serde(rename_all = "snake_case")] | ||||||||||||||||||||
| pub struct ReplicationSlotConfig { | ||||||||||||||||||||
| pub temporary: bool, | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have tried searching online for variants, to keep our format open for the future we could do something like:
Suggested change
And then the And that could be serialized as an internally tagged enum as snake case and with the identifier named something like The reason why I am suggesting this is that if there is ever the case where persistence changes or we want a slight variation, having a boolean is painful to migrate from. Unfortunately me being overly strict is because we store this config in the middleware db and the migration paths are very tricky. |
||||||||||||||||||||
| } | ||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the API, it's fine to use this interface as of now.