diff --git a/Cargo.lock b/Cargo.lock
index cca316b..d289744 100755
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1420,6 +1420,23 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "kube-leader-election"
+version = "0.1.0"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "chrono",
+ "futures",
+ "k8s-openapi",
+ "kube",
+ "rand",
+ "snafu",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
 [[package]]
 name = "kube-runtime"
 version = "2.0.1"
@@ -1650,12 +1667,14 @@ dependencies = [
  "futures",
  "hex",
  "hmac",
+ "hostname",
  "http",
  "hyper",
  "hyper-util",
  "jsonwebtoken",
  "k8s-openapi",
  "kube",
+ "kube-leader-election",
  "rcgen",
  "reqwest",
  "rustls",
@@ -2724,6 +2743,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "futures-sink",
+ "futures-util",
  "pin-project-lite",
  "slab",
  "tokio",
diff --git a/Cargo.toml b/Cargo.toml
index 889b3eb..1e2eefc 100755
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ serde = { version = "1.0.228", features = ["derive"] }
 tokio = { version = "1.49.0", features = ["rt", "rt-multi-thread", "macros", "fs", "io-std", "io-util"] }
 tokio-rustls = "0.26"
 tokio-stream = { version = "0.1", features = ["sync"] }
-tokio-util = { version = "0.7", features = ["io", "compat"] }
+tokio-util = { version = "0.7", features = ["io", "compat", "rt"] }
 futures = "0.3.31"
 tracing = "0.1.44"
 tracing-subscriber = { version = "0.3.22", features = ["env-filter"] }
@@ -36,6 +36,8 @@ reqwest = { version = "0.12", default-features = false, features = ["json", "rus
 url = "2.5"
 shadow-rs = "1.5.0"
 snafu = { version = "0.8.9", features = ["futures"] }
+kube-leader-election = { path = "crates/leader-election" }
+hostname = "0.4"
 
 # Console dependencies
 axum = { version = "0.7", features = ["macros", "json"] }
@@ -57,6 +59,9 @@ shadow-rs = { version = "1.5.0", features = ["build"] }
 [lints.rust]
 unused_variables = "allow"
 
+[workspace]
+members = ["crates/leader-election"]
+
 [lints.clippy]
 unwrap_used = "deny"
 expect_used = "deny"
diff --git a/crates/leader-election/Cargo.toml b/crates/leader-election/Cargo.toml
new file mode 100644
index 0000000..bb97936
--- /dev/null
+++ b/crates/leader-election/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "kube-leader-election"
+version = "0.1.0"
+edition = "2024"
+license = "Apache-2.0"
+description = "Kubernetes leader election for RustFS Operator"
+
+[dependencies]
+async-stream = "0.3"
+async-trait = { version = "0.1.89", default-features = false }
+chrono = { version = "0.4", features = ["serde"] }
+futures = "0.3.31"
+k8s-openapi = { version = "0.26.1", features = ["v1_30"] }
+kube = { version = "2.0.1", features = ["client", "rustls-tls"] }
+rand = "0.9"
+snafu = { version = "0.8.9", features = ["futures"] }
+tokio = { version = "1.49.0", features = ["sync", "time", "macros", "rt"] }
+tokio-util = { version = "0.7", features = ["rt"] }
+tracing = "0.1.44"
+
+[dev-dependencies]
+tokio = { version = "1.49.0", features = ["rt-multi-thread", "macros"] }
diff --git a/crates/leader-election/README.md b/crates/leader-election/README.md
new file mode 100644
index 0000000..a4d0497
--- /dev/null
+++ b/crates/leader-election/README.md
@@ -0,0 +1,134 @@
+# kube-leader-election
+
+Kubernetes leader election for Rust operators, using Lease resources as the lock backend.
+
+Semantics are aligned with [client-go leaderelection](https://github.com/kubernetes/client-go/tree/master/tools/leaderelection).
+
+## Features
+
+- **Lease-based locking** — uses `coordination.k8s.io/v1` Lease objects (K8s 1.14+)
+- **Structured concurrency** — no implicit task spawning; caller controls lifecycle via `CancellationToken`
+- **Lock trait abstraction** — pluggable backends, easy to test with fakes
+- **Clock trait injection** — deterministic tests with `MockClock`
+- **Observability** — `LeaderElectorHandle` with watch-channel state stream
+- **Jittered retry** — randomized retry intervals to avoid thundering herd
+
+## Quick Start
+
+```rust
+use kube_leader_election::{
+    LeaderCallbacks, LeaderElector, LeaderElectorConfig, LeaseLock, SystemClock,
+};
+use kube::Client;
+use std::time::Duration;
+use tokio_util::sync::CancellationToken;
+
+let client = Client::try_default().await?;
+
+// Create the lock (one per Lease name/namespace)
+let lock = LeaseLock::new(client, "my-operator-lease", "default");
+
+// Configure the elector
+let config = LeaderElectorConfig {
+    identity: "pod-abc123".into(),
+    lease_duration: Duration::from_secs(15),
+    renew_deadline: Duration::from_secs(10),
+    retry_period: Duration::from_secs(2),
+    release_on_cancel: true,
+};
+
+// Create the elector
+let elector = LeaderElector::new(config, lock, SystemClock);
+
+// Define callbacks
+struct MyCallbacks;
+
+#[async_trait::async_trait]
+impl LeaderCallbacks for MyCallbacks {
+    async fn on_started_leading(&self, cancel: CancellationToken) {
+        // Run your controller loop here
+        my_controller(cancel).await;
+    }
+
+    async fn on_stopped_leading(&self) {
+        tracing::warn!("lost leadership, shutting down");
+    }
+
+    async fn on_new_leader(&self, identity: String) {
+        tracing::info!("new leader: {}", identity);
+    }
+}
+
+// Run — blocks until cancellation, retrying on transient leadership loss
+let cancel = CancellationToken::new();
+elector.run(MyCallbacks, cancel).await;
+```
+
+## Configuration
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `identity` | *(required)* | Unique ID for this instance (typically pod name) |
+| `lease_duration` | 15s | Non-leaders wait this long before forcing takeover |
+| `renew_deadline` | 10s | Leader retry window for renewing the lease |
+| `retry_period` | 2s | Interval between acquire/renew attempts (with jitter) |
+| `release_on_cancel` | `true` | Whether to release the lease when context is cancelled |
+
+**Constraint:** `lease_duration > renew_deadline > retry_period × 1.2`
+
+## RBAC
+
+The operator needs the following permissions on the Lease resource:
+
+```yaml
+- apiGroups: ["coordination.k8s.io"]
+  resources: ["leases"]
+  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
+```
+
+## Architecture
+
+```
+LeaderElector
+├── acquire() — loop: try_acquire_or_renew → sleep(jittered retry_period)
+├── renew()   — loop: try_acquire_or_renew within renew_deadline window
+├── release() — clear holder_identity, preserve transition count
+└── run()     — orchestrate acquire → on_started_leading → renew → on_stopped_leading (retries on lease loss)
+```
+
+The `Lock` trait abstracts the backend:
+
+```rust
+#[async_trait]
+pub trait Lock: Send + Sync {
+    async fn get(&self) -> Result<Option<LeaderElectionRecord>, Error>;
+    async fn create(&self, record: LeaderElectionRecord) -> Result<(), Error>;
+    async fn update(&self, record: LeaderElectionRecord) -> Result<(), Error>;
+    fn identity(&self) -> &str;
+    fn describe(&self) -> String;
+}
+```
+
+`LeaseLock` is the built-in implementation backed by Kubernetes Lease objects.
+
+## Testing
+
+```bash
+# Unit tests
+cargo test -p kube-leader-election --lib
+
+# Integration tests (uses FakeLock, no cluster needed)
+cargo test -p kube-leader-election --test integration_tests
+```
+
+## Design Documentation
+
+See [`docs/design.md`](docs/design.md) for the full technical design including:
+- Client-go source analysis
+- API design decisions
+- Algorithm details (acquire/renew/release)
+- Concurrency model
+
+## License
+
+Apache-2.0
diff --git a/crates/leader-election/src/callbacks.rs b/crates/leader-election/src/callbacks.rs
new file mode 100644
index 0000000..63e5f93
--- /dev/null
+++ b/crates/leader-election/src/callbacks.rs
@@ -0,0 +1,32 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Leader callbacks trait.
+
+use async_trait::async_trait;
+use tokio_util::sync::CancellationToken;
+
+/// Callbacks invoked during leader election lifecycle events.
+#[async_trait]
+pub trait LeaderCallbacks: Send + Sync {
+    /// Called when this instance becomes the leader.
+    /// Receives a cancellation token that is cancelled when leadership is lost.
+    async fn on_started_leading(&self, cancel: CancellationToken);
+
+    /// Called when this instance stops being the leader (always called, even if never led).
+    async fn on_stopped_leading(&self);
+
+    /// Called when a new leader is observed (fire-and-forget, runs in a separate task).
+    async fn on_new_leader(&self, identity: String);
+}
diff --git a/crates/leader-election/src/clock.rs b/crates/leader-election/src/clock.rs
new file mode 100644
index 0000000..355e8e7
--- /dev/null
+++ b/crates/leader-election/src/clock.rs
@@ -0,0 +1,33 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Injectable clock trait for testability.
+
+use chrono::{DateTime, Utc};
+
+/// Abstraction over time source, allowing mock clocks in tests.
+pub trait Clock: Send + Sync {
+    /// Returns the current time.
+    fn now(&self) -> DateTime<Utc>;
+}
+
+/// Production clock backed by the system time.
+#[derive(Debug, Clone, Copy)]
+pub struct SystemClock;
+
+impl Clock for SystemClock {
+    fn now(&self) -> DateTime<Utc> {
+        Utc::now()
+    }
+}
diff --git a/crates/leader-election/src/config.rs b/crates/leader-election/src/config.rs
new file mode 100644
index 0000000..dfa239f
--- /dev/null
+++ b/crates/leader-election/src/config.rs
@@ -0,0 +1,34 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Leader election configuration and validation.
+
+use std::time::Duration;
+
+/// Configuration for the leader elector.
+///
+/// Constraint: `lease_duration > renew_deadline > retry_period * 1.2`
+#[derive(Debug, Clone)]
+pub struct LeaderElectorConfig {
+    /// Unique identity for this instance (typically pod name or hostname).
+    pub identity: String,
+    /// Duration a non-leader waits before attempting to acquire (default 15s).
+    pub lease_duration: Duration,
+    /// Deadline within which the leader must successfully renew (default 10s).
+    pub renew_deadline: Duration,
+    /// Interval between retry attempts (default 2s).
+    pub retry_period: Duration,
+    /// Whether to release the lock when the cancel token fires (default true).
+    pub release_on_cancel: bool,
+}
diff --git a/crates/leader-election/src/elector.rs b/crates/leader-election/src/elector.rs
new file mode 100644
index 0000000..37e3e99
--- /dev/null
+++ b/crates/leader-election/src/elector.rs
@@ -0,0 +1,664 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Core leader elector logic.
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use chrono::{DateTime, Utc};
+use tokio::sync::RwLock;
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, info, warn};
+
+use crate::callbacks::LeaderCallbacks;
+use crate::clock::Clock;
+use crate::config::LeaderElectorConfig;
+use crate::error::Error;
+use crate::lock::Lock;
+use crate::observed::ObservedState;
+use crate::record::LeaderElectionRecord;
+use crate::state::{LeaderElectorHandle, LeaderState};
+
+/// The leader elector — drives the acquire/renew/release loop against a Lock backend.
+pub struct LeaderElector<L: Lock> {
+    config: LeaderElectorConfig,
+    lock: L,
+    observed: Arc<RwLock<ObservedState>>,
+    clock: Box<dyn Clock>,
+}
+
+impl<L: Lock> LeaderElector<L> {
+    /// Create a new LeaderElector with the given configuration, lock, and clock.
+    ///
+    /// Returns `Error::InvalidConfig` if the timing constraints are violated:
+    /// `lease_duration > renew_deadline > retry_period * 1.2`
+    pub fn new(
+        config: LeaderElectorConfig,
+        lock: L,
+        clock: impl Clock + 'static,
+    ) -> Result<Self, Error> {
+        // Validate config constraints: lease_duration > renew_deadline > retry_period * 1.2
+        let min_renew = config.retry_period.mul_f64(1.2);
+        if config.lease_duration <= config.renew_deadline {
+            return Err(Error::InvalidConfig {
+                message: format!(
+                    "lease_duration ({:?}) must be greater than renew_deadline ({:?})",
+                    config.lease_duration, config.renew_deadline
+                ),
+            });
+        }
+        if config.renew_deadline <= min_renew {
+            return Err(Error::InvalidConfig {
+                message: format!(
+                    "renew_deadline ({:?}) must be greater than retry_period * 1.2 ({:?})",
+                    config.renew_deadline, min_renew
+                ),
+            });
+        }
+        if config.identity.is_empty() {
+            return Err(Error::InvalidConfig {
+                message: "identity must not be empty".to_string(),
+            });
+        }
+
+        Ok(Self {
+            config,
+            lock,
+            observed: Arc::new(RwLock::new(ObservedState::default())),
+            clock: Box::new(clock),
+        })
+    }
+
+    /// Run the leader election loop until the cancel token is triggered.
+    ///
+    /// Blocks the current task, cycling through acquire → renew → release phases.
+    /// On transient lease/renewal loss, it retries acquisition after releasing.
+    /// Always calls `on_stopped_leading` before returning, even if never became leader.
+    pub async fn run(
+        &self,
+        callbacks: impl LeaderCallbacks + 'static,
+        cancel: CancellationToken,
+    ) -> Result<(), Error> {
+        let callbacks = Arc::new(callbacks);
+        info!(identity = %self.config.identity, lock = %self.lock.describe(), "starting leader election");
+
+        loop {
+            // Phase 1: acquire
+            if cancel.is_cancelled() {
+                callbacks.on_stopped_leading().await;
+                return Ok(());
+            }
+
+            if !self.acquire(&cancel, &callbacks).await {
+                // Cancelled during acquire
+                callbacks.on_stopped_leading().await;
+                return Ok(());
+            }
+
+            // We are now the leader. Create a child token for the leading task.
+            let leading_cancel = CancellationToken::new();
+
+            // Phase 2: start the user's leading function in a separate task
+            let leading_handle = {
+                let lc = leading_cancel.clone();
+                let cb = callbacks.clone();
+                tokio::spawn(async move {
+                    cb.on_started_leading(lc).await;
+                })
+            };
+
+            // Phase 3: renew loop (exits on cancel or renew failure)
+            // false => stop; true => retry acquisition.
+            let should_retry = self.renew(&cancel).await;
+
+            // We lost leadership (or lost renew loop due cancel).
+            // Stop the leading task.
+            leading_cancel.cancel();
+            // Wait for the leading task to finish.
+            let _ = leading_handle.await;
+
+            // Phase 4: release if configured
+            if self.config.release_on_cancel {
+                self.release().await;
+            }
+
+            if !should_retry {
+                info!(identity = %self.config.identity, "stopped leading");
+                // Phase 5: notify stopped
+                callbacks.on_stopped_leading().await;
+                return Ok(());
+            }
+
+            warn!(identity = %self.config.identity, "lost leadership; retrying election");
+        }
+    }
+
+    /// Spawn the elector as a background task, returning a handle for state observation
+    /// and a JoinHandle for error propagation.
+    pub fn spawn(
+        self,
+        callbacks: impl LeaderCallbacks + 'static,
+        cancel: CancellationToken,
+    ) -> (
+        LeaderElectorHandle,
+        tokio::task::JoinHandle<Result<(), Error>>,
+    )
+    where
+        L: 'static,
+    {
+        let (state_tx, state_rx) = tokio::sync::watch::channel(LeaderState::Pending);
+        let handle = LeaderElectorHandle { state_rx };
+        let join = tokio::spawn(async move {
+            // Wrap callbacks to also update the watch channel
+            let wrapped = StateTrackingCallbacks {
+                inner: callbacks,
+                state_tx,
+            };
+            self.run(wrapped, cancel).await
+        });
+        (handle, join)
+    }
+
+    // ─── Core algorithm ───────────────────────────────────────────────
+
+    /// Acquire phase: loop trying to acquire the lock until success or cancellation.
+    async fn acquire(
+        &self,
+        cancel: &CancellationToken,
+        callbacks: &Arc<impl LeaderCallbacks + ?Sized>,
+    ) -> bool {
+        info!(identity = %self.config.identity, "attempting to acquire leader lock");
+        loop {
+            // Jittered sleep, interruptible by cancel
+            let jittered = self.jittered_retry_period();
+            tokio::select! {
+                _ = tokio::time::sleep(jittered) => {},
+                _ = cancel.cancelled() => {
+                    debug!(identity = %self.config.identity, "acquire cancelled");
+                    return false;
+                }
+            }
+
+            if self.try_acquire_or_renew().await {
+                info!(identity = %self.config.identity, "successfully acquired lease");
+                self.maybe_report_transition(callbacks).await;
+                return true;
+            }
+        }
+    }
+
+    /// Renew phase: keep renewing the lease, giving up after renew_deadline of failures.
+    /// Returns `true` when leadership should be re-acquired, `false` when stopping.
+    async fn renew(&self, cancel: &CancellationToken) -> bool {
+        loop {
+            if cancel.is_cancelled() {
+                debug!(identity = %self.config.identity, "renew cancelled");
+                return false;
+            }
+
+            if self.poll_renew(cancel).await {
+                // Renewal succeeded at least once; wait before next cycle, interruptible.
+                tokio::select! {
+                    _ = tokio::time::sleep(self.config.retry_period) => {},
+                    _ = cancel.cancelled() => {
+                        debug!(identity = %self.config.identity, "renew sleep cancelled");
+                        return false;
+                    }
+                }
+            } else {
+                // Failed to renew within renew_deadline — give up leadership.
+                if cancel.is_cancelled() {
+                    debug!(identity = %self.config.identity, "renew cancelled");
+                    return false;
+                }
+
+                warn!(identity = %self.config.identity, "failed to renew lease within deadline, retrying election");
+                return true;
+            }
+        }
+    }
+
+    /// Inner renew loop: retry within renew_deadline window.
+    /// Returns `true` if at least one renewal succeeded.
+    /// Returns `false` if the renew window expires or cancellation is requested.
+    async fn poll_renew(&self, cancel: &CancellationToken) -> bool {
+        let deadline = self.clock.now()
+            + chrono::Duration::from_std(self.config.renew_deadline)
+                .unwrap_or(chrono::Duration::seconds(10));
+        loop {
+            if cancel.is_cancelled() {
+                return false;
+            }
+
+            if self.try_acquire_or_renew().await {
+                return true;
+            }
+            if self.clock.now() >= deadline {
+                return false;
+            }
+            tokio::select! {
+                _ = tokio::time::sleep(self.config.retry_period) => {}
+                _ = cancel.cancelled() => {
+                    return false;
+                }
+            }
+        }
+    }
+
+    /// Try to acquire or renew the lease in a single attempt.
+    ///
+    /// Fast path: if we are already leader and lease is still valid, just update.
+    /// Slow path: get current state, decide if we can take over, then create/update.
+    async fn try_acquire_or_renew(&self) -> bool {
+        let now = self.clock.now();
+
+        // Fast path: we are leader and lease is still valid — just renew.
+        if self.is_leader().await && self.is_lease_valid(now).await {
+            let record = self.build_record(now).await;
+            match self.lock.update(record.clone()).await {
+                Ok(()) => {
+                    self.update_observed(record, now).await;
+                    return true;
+                }
+                Err(e) => {
+                    debug!(error = %e, "fast-path update failed, falling through to slow path");
+                    // Fall through to slow path
+                }
+            }
+        }
+
+        // Slow path: Get current record
+        let old_record = match self.lock.get().await {
+            Ok(Some(r)) => {
+                self.update_observed(r.clone(), now).await;
+                r
+            }
+            Ok(None) => {
+                // No existing lease — create one
+                let record = self.build_record(now).await;
+                return match self.lock.create(record.clone()).await {
+                    Ok(()) => {
+                        debug!(identity = %self.config.identity, "created new lease");
+                        self.update_observed(record, now).await;
+                        true
+                    }
+                    Err(e) => {
+                        debug!(error = %e, "failed to create lease");
+                        false
+                    }
+                };
+            }
+            Err(e) => {
+                debug!(error = %e, "failed to get lease");
+                return false;
+            }
+        };
+
+        // Check if we can take over
+        if !old_record.holder_identity.is_empty()
+            && self.is_lease_valid_with(&old_record, now)
+            && !self.is_leader_with(&old_record)
+        {
+            // Lease is held by someone else and still valid — give up this attempt
+            return false;
+        }
+
+        // Build new record
+        let mut record = self.build_record(now).await;
+        if !self.is_leader_with(&old_record) {
+            // New leader taking over — increment transitions
+            record.leader_transitions = old_record.leader_transitions + 1;
+        } else {
+            // We were already leader — preserve acquire time and transition count
+            record.acquire_time = old_record.acquire_time;
+            record.leader_transitions = old_record.leader_transitions;
+        }
+
+        match self.lock.update(record.clone()).await {
+            Ok(()) => {
+                self.update_observed(record, now).await;
+                true
+            }
+            Err(e) => {
+                debug!(error = %e, "failed to update lease");
+                false
+            }
+        }
+    }
+
+    /// Release the lease by clearing the holder identity.
+    async fn release(&self) -> bool {
+        let old_record = match self.lock.get().await {
+            Ok(Some(r)) => r,
+            Ok(None) => return true,
+            Err(e) => {
+                warn!(error = %e, "failed to get lease for release");
+                return false;
+            }
+        };
+
+        if !self.is_leader_with(&old_record) {
+            return true;
+        }
+
+        let now = self.clock.now();
+        let record = LeaderElectionRecord {
+            holder_identity: String::new(),
+            lease_duration_seconds: 1,
+            acquire_time: now,
+            renew_time: now,
+            leader_transitions: old_record.leader_transitions,
+        };
+
+        match self.lock.update(record).await {
+            Ok(()) => {
+                info!(identity = %self.config.identity, "released leader lock");
+                true
+            }
+            Err(e) => {
+                warn!(error = %e, "failed to release leader lock");
+                false
+            }
+        }
+    }
+
+    // ─── Helpers ──────────────────────────────────────────────────────
+
+    /// Check if we are the current leader based on observed state.
+    async fn is_leader(&self) -> bool {
+        let observed = self.observed.read().await;
+        observed
+            .record
+            .as_ref()
+            .map(|r| r.holder_identity == self.config.identity)
+            .unwrap_or(false)
+    }
+
+    /// Check if we are the leader according to a specific record.
+    fn is_leader_with(&self, record: &LeaderElectionRecord) -> bool {
+        record.holder_identity == self.config.identity
+    }
+
+    /// Check if the observed lease is still valid (based on observed_time + lease_duration).
+    async fn is_lease_valid(&self, now: DateTime<Utc>) -> bool {
+        let observed = self.observed.read().await;
+        match (observed.record.as_ref(), observed.observed_time) {
+            (Some(record), Some(obs_time)) => {
+                let duration = Duration::from_secs(record.lease_duration_seconds as u64);
+                now < obs_time + duration
+            }
+            _ => false,
+        }
+    }
+
+    /// Check if a specific record's lease is still valid relative to now.
+    fn is_lease_valid_with(&self, record: &LeaderElectionRecord, now: DateTime<Utc>) -> bool {
+        let duration = Duration::from_secs(record.lease_duration_seconds as u64);
+        // Use renew_time as the basis for validity (the last time the lease was refreshed)
+        now < record.renew_time + duration
+    }
+
+    /// Build a new election record for this identity.
+    async fn build_record(&self, now: DateTime<Utc>) -> LeaderElectionRecord {
+        let observed = self.observed.read().await;
+        let acquire_time = if self.is_leader_inner(&observed) {
+            // Preserve existing acquire time if we're already leader
+            observed
+                .record
+                .as_ref()
+                .map(|r| r.acquire_time)
+                .unwrap_or(now)
+        } else {
+            now
+        };
+
+        LeaderElectionRecord {
+            holder_identity: self.config.identity.clone(),
+            lease_duration_seconds: self.config.lease_duration.as_secs() as i32,
+            acquire_time,
+            renew_time: now,
+            leader_transitions: observed
+                .record
+                .as_ref()
+                .map(|r| r.leader_transitions)
+                .unwrap_or(0),
+        }
+    }
+
+    /// Internal leader check using an already-borrowed observed state.
+    fn is_leader_inner(&self, observed: &ObservedState) -> bool {
+        observed
+            .record
+            .as_ref()
+            .map(|r| r.holder_identity == self.config.identity)
+            .unwrap_or(false)
+    }
+
+    /// Update observed state after a successful get or update.
+    async fn update_observed(&self, record: LeaderElectionRecord, now: DateTime<Utc>) {
+        let mut observed = self.observed.write().await;
+        observed.record = Some(record);
+        observed.observed_time = Some(now);
+    }
+
+    /// Check if a new leader has been observed and fire the on_new_leader callback.
+    /// Deduplicates: only fires when the leader identity changes.
+    async fn maybe_report_transition(&self, callbacks: &Arc<impl LeaderCallbacks + ?Sized>) {
+        let mut observed = self.observed.write().await;
+        let current_leader = observed
+            .record
+            .as_ref()
+            .map(|r| r.holder_identity.clone())
+            .unwrap_or_default();
+
+        // Dedup: skip if we already reported this leader
+        if observed.reported_leader.as_deref() == Some(&current_leader) {
+            return;
+        }
+
+        observed.reported_leader = Some(current_leader.clone());
+
+        if !current_leader.is_empty() {
+            debug!(new_leader = %current_leader, "observed new leader");
+            // Fire the on_new_leader callback (runs in caller's task; callbacks are expected
+            // to be fast or spawn their own work).
+            callbacks.on_new_leader(current_leader).await;
+        }
+    }
+
+    /// Add jitter to retry period (factor 1.2, matching client-go).
+    fn jittered_retry_period(&self) -> Duration {
+        let jitter = self.config.retry_period.as_secs_f64() * 0.2 * rand::random::<f64>();
+        self.config.retry_period + Duration::from_secs_f64(jitter)
+    }
+}
+
+/// Internal callbacks wrapper that updates the watch channel on state transitions.
+struct StateTrackingCallbacks<C: LeaderCallbacks> {
+    inner: C,
+    state_tx: tokio::sync::watch::Sender<LeaderState>,
+}
+
+#[async_trait::async_trait]
+impl<C: LeaderCallbacks> LeaderCallbacks for StateTrackingCallbacks<C> {
+    async fn on_started_leading(&self, cancel: CancellationToken) {
+        let _ = self.state_tx.send(LeaderState::Leading);
+        self.inner.on_started_leading(cancel).await;
+    }
+
+    async fn on_stopped_leading(&self) {
+        let _ = self.state_tx.send(LeaderState::Pending);
+        self.inner.on_stopped_leading().await;
+    }
+
+    async fn on_new_leader(&self, identity: String) {
+        // Update state channel: if we're not currently Leading, report Following
+        let is_leading = matches!(&*self.state_tx.borrow(), LeaderState::Leading);
+        if !is_leading && !identity.is_empty() {
+            let _ = self.state_tx.send(LeaderState::Following(identity.clone()));
+        }
+        self.inner.on_new_leader(identity).await;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::clock::SystemClock;
+
+    /// Minimal config for tests (passes validation).
+    fn test_config(identity: &str) -> LeaderElectorConfig {
+        LeaderElectorConfig {
+            identity: identity.to_string(),
+            lease_duration: Duration::from_secs(15),
+            renew_deadline: Duration::from_secs(10),
+            retry_period: Duration::from_secs(2),
+            release_on_cancel: true,
+        }
+    }
+
+    #[test]
+    fn test_config_validation_ok() {
+        let config = test_config("node-1");
+        let result = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock);
+        assert!(result.is_ok());
+    }
+
+    #[test]
+    fn test_config_validation_lease_too_short() {
+        let config = LeaderElectorConfig {
+            identity: "node-1".to_string(),
+            lease_duration: Duration::from_secs(5),
+            renew_deadline: Duration::from_secs(10),
+            retry_period: Duration::from_secs(2),
+            release_on_cancel: true,
+        };
+        let result = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock);
+        assert!(matches!(result, Err(Error::InvalidConfig { .. })));
+    }
+
+    #[test]
+    fn test_config_validation_renew_too_short() {
+        let config = LeaderElectorConfig {
+            identity: "node-1".to_string(),
+            lease_duration: Duration::from_secs(15),
+            renew_deadline: Duration::from_secs(2),
+            retry_period: Duration::from_secs(2),
+            release_on_cancel: true,
+        };
+        let result = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock);
+        assert!(matches!(result, Err(Error::InvalidConfig { .. })));
+    }
+
+    #[test]
+    fn test_config_validation_empty_identity() {
+        let config = LeaderElectorConfig {
+            identity: String::new(),
+            lease_duration: Duration::from_secs(15),
+            renew_deadline: Duration::from_secs(10),
+            retry_period: Duration::from_secs(2),
+            release_on_cancel: true,
+        };
+        let result = LeaderElector::new(config, DummyLock::new(""), SystemClock);
+        assert!(matches!(result, Err(Error::InvalidConfig { .. })));
+    }
+
+    #[test]
+    fn test_jittered_retry_period() {
+        let config = test_config("node-1");
+        let elector = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock).unwrap();
+        for _ in 0..100 {
+            let jittered = elector.jittered_retry_period();
+            // Should be between retry_period and retry_period * 1.2
+            assert!(jittered >= Duration::from_secs(2));
+            assert!(jittered <= Duration::from_secs_f64(2.4 + 0.001));
+        }
+    }
+
+    #[test]
+    fn test_is_leader_with() {
+        let config = test_config("node-1");
+        let elector = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock).unwrap();
+
+        let record = LeaderElectionRecord {
+            holder_identity: "node-1".to_string(),
+            lease_duration_seconds: 15,
+            acquire_time: Utc::now(),
+            renew_time: Utc::now(),
+            leader_transitions: 0,
+        };
+        assert!(elector.is_leader_with(&record));
+
+        let record_other = LeaderElectionRecord {
+            holder_identity: "node-2".to_string(),
+            ..record
+        };
+        assert!(!elector.is_leader_with(&record_other));
+    }
+
+    #[test]
+    fn test_is_lease_valid_with() {
+        let config = test_config("node-1");
+        let elector = LeaderElector::new(config, DummyLock::new("node-1"), SystemClock).unwrap();
+        let now = Utc::now();
+
+        // Valid: renew_time + duration is in the future
+        let record = LeaderElectionRecord {
+            holder_identity: "node-1".to_string(),
+            lease_duration_seconds: 15,
+            acquire_time: now,
+            renew_time: now,
+            leader_transitions: 0,
+        };
+        assert!(elector.is_lease_valid_with(&record, now));
+
+        // Expired: check time is past renew_time + duration
+        let future = now + chrono::Duration::seconds(20);
+        assert!(!elector.is_lease_valid_with(&record, future));
+    }
+
+    // ─── Dummy lock for unit tests ────────────────────────────────────
+
+    struct DummyLock {
+        identity: String,
+    }
+
+    impl DummyLock {
+        fn new(identity: &str) -> Self {
+            Self {
+                identity: identity.to_string(),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl Lock for DummyLock {
+        async fn get(&self) -> Result<Option<LeaderElectionRecord>, Error> {
+            Ok(None)
+        }
+        async fn create(&self, _record: LeaderElectionRecord) -> Result<(), Error> {
+            Ok(())
+        }
+        async fn update(&self, _record: LeaderElectionRecord) -> Result<(), Error> {
+            Ok(())
+        }
+        fn identity(&self) -> &str {
+            &self.identity
+        }
+        fn describe(&self) -> String {
+            format!("dummy/{}", self.identity)
+        }
+    }
+}
diff --git a/crates/leader-election/src/error.rs b/crates/leader-election/src/error.rs
new file mode 100644
index 0000000..b503b58
--- /dev/null
+++ b/crates/leader-election/src/error.rs
@@ -0,0 +1,37 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Error types for leader election.
+
+use snafu::Snafu;
+
+/// Errors that can occur during leader election.
+#[derive(Debug, Snafu)]
+pub enum Error {
+    /// Invalid configuration (constraint violation).
+    #[snafu(display("invalid config: {message}"))]
+    InvalidConfig { message: String },
+
+    /// Kubernetes API error.
+    #[snafu(display("Kubernetes API error: {source}"))]
+    KubeApi { source: kube::Error },
+
+    /// Lease conflict (resourceVersion mismatch).
+    #[snafu(display("lease conflict (resourceVersion mismatch)"))]
+    Conflict,
+
+    /// Clock error.
+    #[snafu(display("clock error: {message}"))]
+    Clock { message: String },
+}
diff --git a/crates/leader-election/src/lib.rs b/crates/leader-election/src/lib.rs
new file mode 100644
index 0000000..1ca85f7
--- /dev/null
+++ b/crates/leader-election/src/lib.rs
@@ -0,0 +1,40 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Kubernetes leader election for RustFS Operator.
+//!
+//! Provides active/standby mutual exclusion for multi-replica operator deployments
+//! using Kubernetes Lease resources as the lock backend.
+
+pub mod callbacks;
+pub mod clock;
+pub mod config;
+pub mod elector;
+pub mod error;
+pub mod lock;
+pub mod metrics;
+pub mod observed;
+pub mod record;
+pub mod state;
+
+// Re-export core public types for convenience.
+pub use callbacks::LeaderCallbacks;
+pub use clock::{Clock, SystemClock};
+pub use config::LeaderElectorConfig;
+pub use elector::LeaderElector;
+pub use error::Error;
+pub use lock::Lock;
+pub use lock::lease::LeaseLock;
+pub use record::LeaderElectionRecord;
+pub use state::{LeaderElectorHandle, LeaderState};
diff --git a/crates/leader-election/src/lock/lease.rs b/crates/leader-election/src/lock/lease.rs
new file mode 100644
index 0000000..8acbb7b
--- /dev/null
+++ b/crates/leader-election/src/lock/lease.rs
@@ -0,0 +1,260 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Kubernetes Lease-based lock implementation.
+
+use async_trait::async_trait;
+use k8s_openapi::api::coordination::v1::{Lease, LeaseSpec};
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::{MicroTime, ObjectMeta};
+use kube::Client;
+use kube::api::{Api, PostParams};
+use tracing::debug;
+
+use super::Lock;
+use crate::error::Error;
+use crate::record::LeaderElectionRecord;
+
+/// A lock backed by a Kubernetes Lease resource.
+pub struct LeaseLock {
+    client: Client,
+    name: String,
+    namespace: String,
+    identity: String,
+    /// Cached lease object (for resourceVersion tracking).
+    cached: tokio::sync::Mutex<Option<Lease>>,
+}
+
+impl LeaseLock {
+    /// Create a new LeaseLock.
+    pub fn new(
+        client: Client,
+        name: impl Into<String>,
+        namespace: impl Into<String>,
+        identity: impl Into<String>,
+    ) -> Self {
+        Self {
+            client,
+            name: name.into(),
+            namespace: namespace.into(),
+            identity: identity.into(),
+            cached: tokio::sync::Mutex::new(None),
+        }
+    }
+
+    /// Build a Lease object from a LeaderElectionRecord.
+    fn build_lease(
+        &self,
+        record: &LeaderElectionRecord,
+        resource_version: Option<String>,
+    ) -> Lease {
+        Lease {
+            metadata: ObjectMeta {
+                name: Some(self.name.clone()),
+                namespace: Some(self.namespace.clone()),
+                resource_version,
+                ..Default::default()
+            },
+            spec: Some(record_to_spec(record)),
+        }
+    }
+
+    /// Return a namespaced Api handle for Lease resources.
+    fn api(&self) -> Api<Lease> {
+        Api::<Lease>::namespaced(self.client.clone(), &self.namespace)
+    }
+}
+
+/// Convert a LeaderElectionRecord into a LeaseSpec.
+fn record_to_spec(record: &LeaderElectionRecord) -> LeaseSpec {
+    LeaseSpec {
+        holder_identity: Some(record.holder_identity.clone()),
+        lease_duration_seconds: Some(record.lease_duration_seconds),
+        acquire_time: Some(MicroTime(record.acquire_time)),
+        renew_time: Some(MicroTime(record.renew_time)),
+        lease_transitions: Some(record.leader_transitions),
+    }
+}
+
+/// Convert a LeaseSpec into a LeaderElectionRecord.
+///
+/// Returns `None` if required fields are missing from the spec.
+fn spec_to_record(spec: &LeaseSpec) -> Option<LeaderElectionRecord> {
+    let acquire_time = spec.acquire_time.as_ref()?.0;
+    let renew_time = spec.renew_time.as_ref()?.0;
+
+    Some(LeaderElectionRecord {
+        holder_identity: spec.holder_identity.clone().unwrap_or_default(),
+        lease_duration_seconds: spec.lease_duration_seconds.unwrap_or(0),
+        acquire_time,
+        renew_time,
+        leader_transitions: spec.lease_transitions.unwrap_or(0),
+    })
+}
+
+/// Check whether a kube::Error represents a 409 Conflict.
+fn is_conflict(err: &kube::Error) -> bool {
+    matches!(err, kube::Error::Api(e) if e.code == 409)
+}
+
+#[async_trait]
+impl Lock for LeaseLock {
+    async fn get(&self) -> Result<Option<LeaderElectionRecord>, Error> {
+        let api = self.api();
+
+        // get_opt returns Ok(None) on 404, propagating other errors.
+        let lease = match api.get_opt(&self.name).await {
+            Ok(Some(lease)) => lease,
+            Ok(None) => return Ok(None),
+            Err(e) => return Err(Error::KubeApi { source: e }),
+        };
+
+        let record = lease.spec.as_ref().and_then(spec_to_record);
+
+        // Cache the lease for subsequent update() calls (preserves resourceVersion).
+        {
+            let mut cached = self.cached.lock().await;
+            *cached = Some(lease);
+        }
+
+        debug!(
+            lease = %self.describe(),
+            has_record = record.is_some(),
+            "fetched lease"
+        );
+
+        Ok(record)
+    }
+
+    async fn create(&self, record: LeaderElectionRecord) -> Result<(), Error> {
+        let api = self.api();
+        let lease = self.build_lease(&record, None);
+
+        let created = api
+            .create(&PostParams::default(), &lease)
+            .await
+            .map_err(|e| Error::KubeApi { source: e })?;
+
+        // Cache the created lease (server assigns resourceVersion).
+        {
+            let mut cached = self.cached.lock().await;
+            *cached = Some(created);
+        }
+
+        debug!(
+            lease = %self.describe(),
+            holder = %record.holder_identity,
+            "created lease"
+        );
+
+        Ok(())
+    }
+
+    async fn update(&self, record: LeaderElectionRecord) -> Result<(), Error> {
+        let mut cached = self.cached.lock().await;
+
+        let resource_version = cached
+            .as_ref()
+            .and_then(|lease| lease.metadata.resource_version.clone())
+            .ok_or_else(|| Error::Conflict)?;
+
+        let api = self.api();
+        let lease = self.build_lease(&record, Some(resource_version));
+
+        let updated = match api
+            .replace(&self.name, &PostParams::default(), &lease)
+            .await
+        {
+            Ok(lease) => lease,
+            Err(e) => {
+                if is_conflict(&e) {
+                    *cached = None;
+                    return Err(Error::Conflict);
+                }
+                return Err(Error::KubeApi { source: e });
+            }
+        };
+
+        *cached = Some(updated);
+
+        debug!(
+            lease = %self.describe(),
+            holder = %record.holder_identity,
+            "updated lease"
+        );
+
+        Ok(())
+    }
+
+    fn identity(&self) -> &str {
+        &self.identity
+    }
+
+    fn describe(&self) -> String {
+        format!("{}/{}", self.namespace, self.name)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::{TimeZone, Utc};
+
+    fn sample_record() -> LeaderElectionRecord {
+        LeaderElectionRecord {
+            holder_identity: "pod-abc".into(),
+            lease_duration_seconds: 15,
+            acquire_time: Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 0).unwrap(),
+            renew_time: Utc.with_ymd_and_hms(2026, 1, 1, 0, 0, 10).unwrap(),
+            leader_transitions: 3,
+        }
+    }
+
+    #[test]
+    fn record_to_spec_roundtrip() {
+        let record = sample_record();
+        let spec = record_to_spec(&record);
+        let back = spec_to_record(&spec).expect("roundtrip should succeed");
+
+        assert_eq!(back.holder_identity, record.holder_identity);
+        assert_eq!(back.lease_duration_seconds, record.lease_duration_seconds);
+        assert_eq!(back.acquire_time, record.acquire_time);
+        assert_eq!(back.renew_time, record.renew_time);
+        assert_eq!(back.leader_transitions, record.leader_transitions);
+    }
+
+    #[test]
+    fn spec_to_record_empty_holder() {
+        let spec = LeaseSpec {
+            holder_identity: None,
+            lease_duration_seconds: Some(15),
+            acquire_time: Some(MicroTime(Utc::now())),
+            renew_time: Some(MicroTime(Utc::now())),
+            lease_transitions: Some(0),
+        };
+        let record = spec_to_record(&spec).unwrap();
+        assert_eq!(record.holder_identity, "");
+    }
+
+    #[test]
+    fn spec_to_record_missing_times() {
+        let spec = LeaseSpec {
+            holder_identity: Some("pod-1".into()),
+            lease_duration_seconds: Some(15),
+            acquire_time: None,
+            renew_time: None,
+            lease_transitions: None,
+        };
+        assert!(spec_to_record(&spec).is_none());
+    }
+}
diff --git a/crates/leader-election/src/lock/mod.rs b/crates/leader-election/src/lock/mod.rs
new file mode 100644
index 0000000..72f3c44
--- /dev/null
+++ b/crates/leader-election/src/lock/mod.rs
@@ -0,0 +1,41 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Lock trait abstraction for leader election backends.
+
+pub mod lease;
+
+use async_trait::async_trait;
+
+use crate::error::Error;
+use crate::record::LeaderElectionRecord;
+
+/// Abstraction over a distributed lock backend (e.g., Kubernetes Lease).
+#[async_trait]
+pub trait Lock: Send + Sync {
+    /// Fetch the current election record. Returns None if the lock does not exist yet.
+    async fn get(&self) -> Result<Option<LeaderElectionRecord>, Error>;
+
+    /// Create a new lock with the given record (first-time election).
+    async fn create(&self, record: LeaderElectionRecord) -> Result<(), Error>;
+
+    /// Update an existing lock (relies on resourceVersion for optimistic concurrency).
+    async fn update(&self, record: LeaderElectionRecord) -> Result<(), Error>;
+
+    /// Returns the unique identity of this lock holder.
+    fn identity(&self) -> &str;
+
+    /// Returns a human-readable description of this lock (for logging/debugging).
+    fn describe(&self) -> String;
+}
diff --git a/crates/leader-election/src/metrics.rs b/crates/leader-election/src/metrics.rs
new file mode 100644
index 0000000..3f8a94f
--- /dev/null
+++ b/crates/leader-election/src/metrics.rs
@@ -0,0 +1,27 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Optional metrics trait for leader election observability.
+
+/// Metrics hooks for leader election events.
+pub trait LeaderMetrics: Send + Sync {
+    /// Called when leadership is acquired.
+    fn on_acquire(&self);
+    /// Called when leadership is released or lost.
+    fn on_release(&self);
+    /// Called on a successful lease renewal.
+    fn on_renew_success(&self);
+    /// Called on a failed lease renewal attempt.
+    fn on_renew_failure(&self);
+}
diff --git a/crates/leader-election/src/observed.rs b/crates/leader-election/src/observed.rs
new file mode 100644
index 0000000..38680a7
--- /dev/null
+++ b/crates/leader-election/src/observed.rs
@@ -0,0 +1,30 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Locally observed election state (mirrors client-go observedRecord).
+
+use chrono::{DateTime, Utc};
+
+use crate::record::LeaderElectionRecord;
+
+/// State observed locally from the most recent successful Get or Update.
+#[derive(Debug, Clone, Default)]
+pub struct ObservedState {
+    /// Most recently observed election record.
+    pub record: Option<LeaderElectionRecord>,
+    /// Local time when the record was observed (used for lease validity).
+    pub observed_time: Option<DateTime<Utc>>,
+    /// Leader identity already reported via on_new_leader callback (dedup).
+    pub reported_leader: Option<String>,
+}
diff --git a/crates/leader-election/src/record.rs b/crates/leader-election/src/record.rs
new file mode 100644
index 0000000..a1fac29
--- /dev/null
+++ b/crates/leader-election/src/record.rs
@@ -0,0 +1,32 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Leader election record — maps to Kubernetes Lease spec fields.
+
+use chrono::{DateTime, Utc};
+
+/// Record stored in the Lease object, representing the current election state.
+#[derive(Debug, Clone)]
+pub struct LeaderElectionRecord {
+    /// Identity of the current leader (empty if released).
+    pub holder_identity: String,
+    /// Lease duration in seconds (i32 to match K8s API int32).
+    pub lease_duration_seconds: i32,
+    /// Time when the current leader acquired leadership.
+    pub acquire_time: DateTime<Utc>,
+    /// Time of the most recent renewal.
+    pub renew_time: DateTime<Utc>,
+    /// Number of leader transitions observed.
+    pub leader_transitions: i32,
+}
diff --git a/crates/leader-election/src/state.rs b/crates/leader-election/src/state.rs
new file mode 100644
index 0000000..3d95e1d
--- /dev/null
+++ b/crates/leader-election/src/state.rs
@@ -0,0 +1,65 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Leader state observation (watch-based handle for consumers).
+
+use tokio::sync::watch;
+
+/// Current state of the leader elector.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum LeaderState {
+    /// Not yet participating in election.
+    Pending,
+    /// This instance is the current leader.
+    Leading,
+    /// Following another leader identified by the given identity.
+    Following(String),
+    /// Elector has failed with the given error message.
+    Failed(String),
+}
+
+/// Handle for observing leader election state from outside the elector task.
+#[derive(Debug)]
+pub struct LeaderElectorHandle {
+    pub(crate) state_rx: watch::Receiver<LeaderState>,
+}
+
+impl LeaderElectorHandle {
+    /// Returns true if this instance is currently the leader (non-blocking).
+    pub fn is_leader(&self) -> bool {
+        *self.state_rx.borrow() == LeaderState::Leading
+    }
+
+    /// Returns the current leader identity, if known.
+    pub fn current_leader(&self) -> Option<String> {
+        match &*self.state_rx.borrow() {
+            LeaderState::Leading => None, // we are the leader
+            LeaderState::Following(id) => Some(id.clone()),
+            _ => None,
+        }
+    }
+
+    /// Subscribe to state changes as an async stream.
+    pub fn state_stream(&self) -> impl futures::Stream<Item = LeaderState> + '_ {
+        let mut rx = self.state_rx.clone();
+        async_stream::stream! {
+            loop {
+                if rx.changed().await.is_err() {
+                    break;
+                }
+                yield rx.borrow().clone();
+            }
+        }
+    }
+}
diff --git a/crates/leader-election/tests/integration_tests.rs b/crates/leader-election/tests/integration_tests.rs
new file mode 100644
index 0000000..e856a67
--- /dev/null
+++ b/crates/leader-election/tests/integration_tests.rs
@@ -0,0 +1,552 @@
+// Copyright 2024 RustFS Team
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Integration tests for leader election.
+
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::time::Duration;
+
+use async_trait::async_trait;
+use chrono::{DateTime, Utc};
+use tokio::sync::{Mutex, RwLock};
+use tokio_util::sync::CancellationToken;
+
+use kube_leader_election::{
+    Clock, Error, LeaderCallbacks, LeaderElectionRecord, LeaderElector, LeaderElectorConfig, Lock,
+};
+
+// ─── Test Helpers ───────────────────────────────────────────────────────────
+
+/// Fake lock with controllable behavior for testing.
+struct FakeLock {
+    identity: String,
+    record: Arc<RwLock<Option<LeaderElectionRecord>>>,
+    resource_version: Arc<Mutex<u64>>,
+    /// If set, update() will fail with Conflict this many times before succeeding.
+    conflict_count: Arc<AtomicUsize>,
+}
+
+impl FakeLock {
+    fn new(identity: &str) -> Self {
+        Self {
+            identity: identity.to_string(),
+            record: Arc::new(RwLock::new(None)),
+            resource_version: Arc::new(Mutex::new(0)),
+            conflict_count: Arc::new(AtomicUsize::new(0)),
+        }
+    }
+
+    /// Configure the lock to return a specific record on get().
+    async fn set_record(&self, record: LeaderElectionRecord) {
+        let mut r = self.record.write().await;
+        *r = Some(record);
+        let mut rv = self.resource_version.lock().await;
+        *rv += 1;
+    }
+
+    /// Configure the lock to fail with Conflict N times before succeeding.
+    fn set_conflicts(&self, count: usize) {
+        self.conflict_count.store(count, Ordering::SeqCst);
+    }
+}
+
+#[async_trait]
+impl Lock for FakeLock {
+    async fn get(&self) -> Result<Option<LeaderElectionRecord>, Error> {
+        let r = self.record.read().await;
+        Ok(r.clone())
+    }
+
+    async fn create(&self, record: LeaderElectionRecord) -> Result<(), Error> {
+        let mut r = self.record.write().await;
+        if r.is_some() {
+            return Err(Error::Conflict);
+        }
+        *r = Some(record);
+        let mut rv = self.resource_version.lock().await;
+        *rv += 1;
+        Ok(())
+    }
+
+    async fn update(&self, record: LeaderElectionRecord) -> Result<(), Error> {
+        // Simulate conflict if configured
+        let remaining = self.conflict_count.load(Ordering::SeqCst);
+        if remaining > 0 {
+            self.conflict_count.fetch_sub(1, Ordering::SeqCst);
+            return Err(Error::Conflict);
+        }
+
+        let mut r = self.record.write().await;
+        *r = Some(record);
+        let mut rv = self.resource_version.lock().await;
+        *rv += 1;
+        Ok(())
+    }
+
+    fn identity(&self) -> &str {
+        &self.identity
+    }
+
+    fn describe(&self) -> String {
+        format!("fake/{}", self.identity)
+    }
+}
+
+/// Mock clock with controllable time.
+#[derive(Clone)]
+struct MockClock {
+    now: Arc<RwLock<DateTime<Utc>>>,
+}
+
+impl MockClock {
+    fn new(time: DateTime<Utc>) -> Self {
+        Self {
+            now: Arc::new(RwLock::new(time)),
+        }
+    }
+}
+
+impl Clock for MockClock {
+    fn now(&self) -> DateTime<Utc> {
+        // Block on async read (safe in tests)
+        futures::executor::block_on(async { *self.now.read().await })
+    }
+}
+
+/// Test callbacks that record events.
+struct TestCallbacks {
+    started_leading: Arc<AtomicUsize>,
+    stopped_leading: Arc<AtomicUsize>,
+    new_leader: Arc<RwLock<Vec<String>>>,
+}
+
+impl TestCallbacks {
+    fn new() -> Self {
+        Self {
+            started_leading: Arc::new(AtomicUsize::new(0)),
+            stopped_leading: Arc::new(AtomicUsize::new(0)),
+            new_leader: Arc::new(RwLock::new(Vec::new())),
+        }
+    }
+
+    async fn started_count(&self) -> usize {
+        self.started_leading.load(Ordering::SeqCst)
+    }
+
+    async fn stopped_count(&self) -> usize {
+        self.stopped_leading.load(Ordering::SeqCst)
+    }
+
+    async fn leaders(&self) -> Vec<String> {
+        self.new_leader.read().await.clone()
+    }
+}
+
+#[async_trait]
+impl LeaderCallbacks for TestCallbacks {
+    async fn on_started_leading(&self, _cancel: CancellationToken) {
+        self.started_leading.fetch_add(1, Ordering::SeqCst);
+    }
+
+    async fn on_stopped_leading(&self) {
+        self.stopped_leading.fetch_add(1, Ordering::SeqCst);
+    }
+
+    async fn on_new_leader(&self, identity: String) {
+        let mut leaders = self.new_leader.write().await;
+        leaders.push(identity);
+    }
+}
+
+/// Owned wrapper around `Arc<TestCallbacks>` so we can implement `LeaderCallbacks`
+/// (we can't impl a foreign trait on `Arc<T>` directly due to the orphan rule).
+#[derive(Clone)]
+struct SharedCallbacks(Arc<TestCallbacks>);
+
+#[async_trait]
+impl LeaderCallbacks for SharedCallbacks {
+    async fn on_started_leading(&self, cancel: CancellationToken) {
+        self.0.on_started_leading(cancel).await;
+    }
+
+    async fn on_stopped_leading(&self) {
+        self.0.on_stopped_leading().await;
+    }
+
+    async fn on_new_leader(&self, identity: String) {
+        self.0.on_new_leader(identity).await;
+    }
+}
+
+/// Create a valid test config.
+fn test_config(identity: &str) -> LeaderElectorConfig {
+    LeaderElectorConfig {
+        identity: identity.to_string(),
+        lease_duration: Duration::from_secs(15),
+        renew_deadline: Duration::from_secs(10),
+        retry_period: Duration::from_millis(100),
+        release_on_cancel: true,
+    }
+}
+
+// ─── Tests ──────────────────────────────────────────────────────────────────
+
+/// Test 1: LeaseLock construction and LeaderElectionRecord field semantics.
+///
+/// Note on the fake kube client: the task description references
+/// `kube::Client::new_custom`, but kube 2.x does not expose a public fake/mock
+/// client constructor (only the internal `kube_client::Client::new` used by
+/// `kube::Client::mock` inside the kube crate's own tests). Rather than pull in
+/// a heavy envtest dependency for a unit test, the Lock-trait CRUD semantics
+/// are exercised by `FakeLock` below — `LeaseLock` is a thin wrapper around
+/// `Api<Lease>` that delegates to the same `Lock` trait, and the pure
+/// record↔spec conversion logic is covered by unit tests in `lock::lease`.
+#[test]
+fn test_lease_lock_create_update_get() {
+    // LeaderElectionRecord field semantics (these are what LeaseLock persists).
+    let record = LeaderElectionRecord {
+        holder_identity: "pod-1".to_string(),
+        lease_duration_seconds: 15,
+        acquire_time: Utc::now(),
+        renew_time: Utc::now(),
+        leader_transitions: 0,
+    };
+    assert_eq!(record.holder_identity, "pod-1");
+    assert_eq!(record.lease_duration_seconds, 15);
+    assert_eq!(record.leader_transitions, 0);
+
+    // FakeLock CRUD round-trip: create → get → update → get.
+    // This exercises the same Lock trait that LeaseLock implements.
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+    rt.block_on(async {
+        let lock = FakeLock::new("pod-1");
+
+        // get() on a fresh lock returns None (lease does not exist yet).
+        assert!(lock.get().await.unwrap().is_none());
+
+        // create() installs a record.
+        let r1 = LeaderElectionRecord {
+            holder_identity: "pod-1".to_string(),
+            lease_duration_seconds: 15,
+            acquire_time: Utc::now(),
+            renew_time: Utc::now(),
+            leader_transitions: 0,
+        };
+        lock.create(r1.clone()).await.unwrap();
+
+        // get() now returns the created record.
+        let got = lock
+            .get()
+            .await
+            .unwrap()
+            .expect("record should exist after create");
+        assert_eq!(got.holder_identity, "pod-1");
+        assert_eq!(got.lease_duration_seconds, 15);
+
+        // create() again must fail with Conflict (lock already exists).
+        assert!(matches!(lock.create(r1).await, Err(Error::Conflict)));
+
+        // update() replaces the record.
+        let r2 = LeaderElectionRecord {
+            holder_identity: "pod-1".to_string(),
+            lease_duration_seconds: 15,
+            acquire_time: got.acquire_time,
+            renew_time: Utc::now(),
+            leader_transitions: 1,
+        };
+        lock.update(r2).await.unwrap();
+
+        let got2 = lock
+            .get()
+            .await
+            .unwrap()
+            .expect("record should exist after update");
+        assert_eq!(got2.leader_transitions, 1);
+    });
+}
+
+/// Test 2: Acquire when no holder exists.
+#[tokio::test]
+async fn test_acquire_no_holder() {
+    let lock = FakeLock::new("node-1");
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-1");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    // Run elector in background
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    // Wait a bit for acquire to succeed
+    tokio::time::sleep(Duration::from_millis(200)).await;
+
+    // Cancel to stop the elector
+    cancel.cancel();
+
+    // Wait for completion
+    let result = tokio::time::timeout(Duration::from_secs(2), handle).await;
+    assert!(result.is_ok(), "elector should complete");
+
+    // Verify callbacks were called
+    assert_eq!(
+        callbacks.started_count().await,
+        1,
+        "should have started leading"
+    );
+    assert!(
+        callbacks.stopped_count().await >= 1,
+        "should have stopped leading"
+    );
+}
+
+/// Test 3: Acquire when lease is expired.
+#[tokio::test]
+async fn test_acquire_expired_lease() {
+    let lock = FakeLock::new("node-2");
+
+    // Set an expired lease held by another node
+    let expired_time = Utc::now() - chrono::Duration::seconds(30);
+    let expired_record = LeaderElectionRecord {
+        holder_identity: "node-1".to_string(),
+        lease_duration_seconds: 15,
+        acquire_time: expired_time,
+        renew_time: expired_time,
+        leader_transitions: 0,
+    };
+    lock.set_record(expired_record).await;
+
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-2");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    tokio::time::sleep(Duration::from_millis(200)).await;
+    cancel.cancel();
+
+    let result = tokio::time::timeout(Duration::from_secs(2), handle).await;
+    assert!(result.is_ok(), "elector should complete");
+
+    // node-2 should have acquired the expired lease
+    assert_eq!(
+        callbacks.started_count().await,
+        1,
+        "should have started leading"
+    );
+}
+
+/// Test 4: Wait when lease is active and held by another.
+#[tokio::test]
+async fn test_acquire_active_lease() {
+    let lock = FakeLock::new("node-2");
+
+    // Set an active lease held by another node
+    let active_time = Utc::now();
+    let active_record = LeaderElectionRecord {
+        holder_identity: "node-1".to_string(),
+        lease_duration_seconds: 15,
+        acquire_time: active_time,
+        renew_time: active_time,
+        leader_transitions: 0,
+    };
+    lock.set_record(active_record).await;
+
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-2");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    // Wait a bit - node-2 should NOT acquire while lease is active
+    tokio::time::sleep(Duration::from_millis(300)).await;
+
+    // node-2 should not have started leading yet
+    assert_eq!(
+        callbacks.started_count().await,
+        0,
+        "should not have started leading while lease is active"
+    );
+
+    cancel.cancel();
+    let _ = tokio::time::timeout(Duration::from_secs(2), handle).await;
+}
+
+/// Test 5: Successful renewal.
+#[tokio::test]
+async fn test_renew_success() {
+    let lock = FakeLock::new("node-1");
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-1");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    // Let it acquire and renew a few times
+    tokio::time::sleep(Duration::from_millis(500)).await;
+
+    cancel.cancel();
+    let result = tokio::time::timeout(Duration::from_secs(2), handle).await;
+    assert!(result.is_ok(), "elector should complete");
+
+    // Should have started leading (acquire succeeded)
+    assert_eq!(
+        callbacks.started_count().await,
+        1,
+        "should have started leading"
+    );
+}
+
+/// Test 6: Renewal with resource version conflict (retry behavior).
+#[tokio::test]
+async fn test_renew_conflict() {
+    let lock = FakeLock::new("node-1");
+    // Configure 2 conflicts before success
+    lock.set_conflicts(2);
+
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-1");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    // Let it retry through conflicts
+    tokio::time::sleep(Duration::from_millis(600)).await;
+
+    cancel.cancel();
+    let result = tokio::time::timeout(Duration::from_secs(2), handle).await;
+    assert!(result.is_ok(), "elector should complete");
+
+    // Should eventually succeed after retries
+    assert_eq!(
+        callbacks.started_count().await,
+        1,
+        "should have started leading after retries"
+    );
+}
+
+/// Test 7: Leader transition callback is invoked.
+#[tokio::test]
+async fn test_leader_transition_callback() {
+    let lock = FakeLock::new("node-1");
+    let clock = MockClock::new(Utc::now());
+    let config = test_config("node-1");
+    let callbacks = Arc::new(TestCallbacks::new());
+
+    let elector = LeaderElector::new(config, lock, clock).unwrap();
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+
+    let cb = callbacks.clone();
+    let handle = tokio::spawn(async move { elector.run(SharedCallbacks(cb), cancel_clone).await });
+
+    tokio::time::sleep(Duration::from_millis(200)).await;
+    cancel.cancel();
+
+    let _ = tokio::time::timeout(Duration::from_secs(2), handle).await;
+
+    // The on_new_leader callback should have been called with our identity
+    let leaders = callbacks.leaders().await;
+    assert!(!leaders.is_empty(), "on_new_leader should have been called");
+    assert_eq!(leaders[0], "node-1", "leader identity should match");
+}
+
+/// Test 8: Concurrent acquire - only one candidate wins.
+#[tokio::test]
+async fn test_concurrent_acquire() {
+    // Shared lock between two candidates
+    let shared_record = Arc::new(RwLock::new(None));
+    let shared_rv = Arc::new(Mutex::new(0));
+
+    let lock1 = FakeLock {
+        identity: "node-1".to_string(),
+        record: shared_record.clone(),
+        resource_version: shared_rv.clone(),
+        conflict_count: Arc::new(AtomicUsize::new(0)),
+    };
+
+    let lock2 = FakeLock {
+        identity: "node-2".to_string(),
+        record: shared_record.clone(),
+        resource_version: shared_rv.clone(),
+        conflict_count: Arc::new(AtomicUsize::new(0)),
+    };
+
+    let clock1 = MockClock::new(Utc::now());
+    let clock2 = MockClock::new(Utc::now());
+
+    let config1 = test_config("node-1");
+    let config2 = test_config("node-2");
+
+    let callbacks1 = Arc::new(TestCallbacks::new());
+    let callbacks2 = Arc::new(TestCallbacks::new());
+
+    let elector1 = LeaderElector::new(config1, lock1, clock1).unwrap();
+    let elector2 = LeaderElector::new(config2, lock2, clock2).unwrap();
+
+    let cancel = CancellationToken::new();
+    let cancel1 = cancel.clone();
+    let cancel2 = cancel.clone();
+
+    let cb1 = callbacks1.clone();
+    let h1 = tokio::spawn(async move { elector1.run(SharedCallbacks(cb1), cancel1).await });
+
+    let cb2 = callbacks2.clone();
+    let h2 = tokio::spawn(async move { elector2.run(SharedCallbacks(cb2), cancel2).await });
+
+    // Let them race
+    tokio::time::sleep(Duration::from_millis(500)).await;
+
+    cancel.cancel();
+
+    let _ = tokio::time::timeout(Duration::from_secs(2), h1).await;
+    let _ = tokio::time::timeout(Duration::from_secs(2), h2).await;
+
+    let started1 = callbacks1.started_count().await;
+    let started2 = callbacks2.started_count().await;
+
+    // Exactly one should have become leader
+    assert_eq!(
+        started1 + started2,
+        1,
+        "exactly one candidate should become leader"
+    );
+}
diff --git a/deploy/k8s-dev/operator-deployment.yaml b/deploy/k8s-dev/operator-deployment.yaml
index 3179078..39655d2 100755
--- a/deploy/k8s-dev/operator-deployment.yaml
+++ b/deploy/k8s-dev/operator-deployment.yaml
@@ -27,6 +27,8 @@ spec:
           image: rustfs/operator:dev
           imagePullPolicy: Never
           command: ["./operator", "server"]
+          args:
+            - "--leader-elect=false"
           ports:
             - name: sts
               containerPort: 4223
@@ -34,6 +36,10 @@ spec:
           env:
             - name: RUST_LOG
               value: info
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
             - name: OPERATOR_STS_ENABLED
               value: "true"
             - name: OPERATOR_STS_AUDIENCE
diff --git a/deploy/k8s-dev/operator-rbac.yaml b/deploy/k8s-dev/operator-rbac.yaml
index 1625ebc..e6a34d7 100755
--- a/deploy/k8s-dev/operator-rbac.yaml
+++ b/deploy/k8s-dev/operator-rbac.yaml
@@ -64,6 +64,10 @@ rules:
   - apiGroups: ["events.k8s.io"]
     resources: ["events"]
     verbs: ["get", "list", "watch", "create", "patch"]
+  # Leader election
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
diff --git a/deploy/rustfs-operator/README.md b/deploy/rustfs-operator/README.md
index e11c639..e14077d 100755
--- a/deploy/rustfs-operator/README.md
+++ b/deploy/rustfs-operator/README.md
@@ -42,6 +42,7 @@ The following table lists the configurable parameters of the RustFS Operator cha
 | `operator.image.tag` | Operator image tag | `latest` |
 | `operator.image.pullPolicy` | Image pull policy | `IfNotPresent` |
 | `operator.imagePullSecrets` | Image pull secrets | `[]` |
+| `operator.leaderElect` | Enable leader election override (`null`/unset for auto by replicas) | `null` |
 | `operator.resources.requests.cpu` | CPU resource requests | `100m` |
 | `operator.resources.requests.memory` | Memory resource requests | `128Mi` |
 | `operator.resources.limits.cpu` | CPU resource limits | `500m` |
@@ -141,6 +142,24 @@ helm install rustfs-operator deploy/rustfs-operator/ \
   --set operator.resources.limits.memory=1Gi
 ```
 
+### Leader Election for Helm Deployments
+
+With the chart default behavior, `leaderElect` is automatically enabled when
+`operator.replicas > 1` and disabled when `operator.replicas <= 1`:
+
+```bash
+helm install rustfs-operator deploy/rustfs-operator/ \
+  --set operator.replicas=3
+```
+
+Override explicitly if needed (for example, to force single-leader mode in all cases):
+
+```bash
+helm install rustfs-operator deploy/rustfs-operator/ \
+  --set operator.replicas=3 \
+  --set operator.leaderElect=false
+```
+
 ### Using a Values File
 
 Create a custom `values.yaml`:
@@ -161,6 +180,7 @@ operator:
   env:
     - name: RUST_LOG
       value: debug
+  leaderElect:
 ```
 
 Install with your custom values:
diff --git a/deploy/rustfs-operator/crds/tenant-crd.yaml b/deploy/rustfs-operator/crds/tenant-crd.yaml
index b7c6ec8..59d1957 100644
--- a/deploy/rustfs-operator/crds/tenant-crd.yaml
+++ b/deploy/rustfs-operator/crds/tenant-crd.yaml
@@ -1043,6 +1043,8 @@ spec:
                       x-kubernetes-validations:
                       - message: pool name must be not empty
                         rule: self != ''
+                      - message: 'pool name must be a valid RFC 1123 label: lowercase alphanumeric or ''-'', start and end with alphanumeric, max 63 characters'
+                        rule: self.size() <= 63 && self.matches('^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
                     nodeSelector:
                       additionalProperties:
                         type: string
@@ -1184,6 +1186,8 @@ spec:
                           x-kubernetes-validations:
                           - message: volumesPerServer must be greater than 0
                             rule: self > 0
+                          - message: volumesPerServer is immutable
+                            rule: self == oldSelf
                       required:
                       - volumesPerServer
                       type: object
@@ -1231,6 +1235,8 @@ spec:
                       x-kubernetes-validations:
                       - message: servers must be greater than 0
                         rule: self > 0
+                      - message: servers is immutable
+                        rule: self == oldSelf
                     tolerations:
                       description: Tolerations allow pods to schedule onto nodes with matching taints.
                       items:
@@ -1351,9 +1357,14 @@ spec:
                     reason: FieldValueInvalid
                     rule: self.servers != 3 || self.servers * self.persistence.volumesPerServer >= 6
                 type: array
+                x-kubernetes-list-map-keys:
+                - name
+                x-kubernetes-list-type: map
                 x-kubernetes-validations:
                 - message: pools must be configured
                   rule: self.size() > 0
+                - message: pool names must be unique
+                  rule: self.all(x, self.filter(y, y.name == x.name).size() == 1)
               priorityClassName:
                 nullable: true
                 type: string
diff --git a/deploy/rustfs-operator/crds/tenant.yaml b/deploy/rustfs-operator/crds/tenant.yaml
index b7c6ec8..59d1957 100755
--- a/deploy/rustfs-operator/crds/tenant.yaml
+++ b/deploy/rustfs-operator/crds/tenant.yaml
@@ -1043,6 +1043,8 @@ spec:
                       x-kubernetes-validations:
                       - message: pool name must be not empty
                         rule: self != ''
+                      - message: 'pool name must be a valid RFC 1123 label: lowercase alphanumeric or ''-'', start and end with alphanumeric, max 63 characters'
+                        rule: self.size() <= 63 && self.matches('^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')
                     nodeSelector:
                       additionalProperties:
                         type: string
@@ -1184,6 +1186,8 @@ spec:
                           x-kubernetes-validations:
                           - message: volumesPerServer must be greater than 0
                             rule: self > 0
+                          - message: volumesPerServer is immutable
+                            rule: self == oldSelf
                       required:
                       - volumesPerServer
                       type: object
@@ -1231,6 +1235,8 @@ spec:
                       x-kubernetes-validations:
                       - message: servers must be greater than 0
                         rule: self > 0
+                      - message: servers is immutable
+                        rule: self == oldSelf
                     tolerations:
                       description: Tolerations allow pods to schedule onto nodes with matching taints.
                       items:
@@ -1351,9 +1357,14 @@ spec:
                     reason: FieldValueInvalid
                     rule: self.servers != 3 || self.servers * self.persistence.volumesPerServer >= 6
                 type: array
+                x-kubernetes-list-map-keys:
+                - name
+                x-kubernetes-list-type: map
                 x-kubernetes-validations:
                 - message: pools must be configured
                   rule: self.size() > 0
+                - message: pool names must be unique
+                  rule: self.all(x, self.filter(y, y.name == x.name).size() == 1)
               priorityClassName:
                 nullable: true
                 type: string
diff --git a/deploy/rustfs-operator/templates/clusterrole.yaml b/deploy/rustfs-operator/templates/clusterrole.yaml
index af36de6..bb2bcc9 100755
--- a/deploy/rustfs-operator/templates/clusterrole.yaml
+++ b/deploy/rustfs-operator/templates/clusterrole.yaml
@@ -69,4 +69,9 @@ rules:
   - apiGroups: ["events.k8s.io"]
     resources: ["events"]
     verbs: ["get", "list", "watch", "create", "patch"]
+
+  # Leader election
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
 {{- end }}
diff --git a/deploy/rustfs-operator/templates/deployment.yaml b/deploy/rustfs-operator/templates/deployment.yaml
index 18fffe3..9dfefe4 100755
--- a/deploy/rustfs-operator/templates/deployment.yaml
+++ b/deploy/rustfs-operator/templates/deployment.yaml
@@ -36,6 +36,14 @@ spec:
           image: "{{ .Values.operator.image.repository }}:{{ .Values.operator.image.tag }}"
           imagePullPolicy: {{ .Values.operator.image.pullPolicy }}
           command: ["./operator", "server"]
+          {{- $leaderElect := gt (int .Values.operator.replicas) 1 }}
+          {{- if hasKey .Values.operator "leaderElect" }}
+          {{- if ne .Values.operator.leaderElect nil }}
+          {{- $leaderElect = .Values.operator.leaderElect }}
+          {{- end }}
+          {{- end }}
+          args:
+            - --leader-elect={{ ternary "true" "false" $leaderElect }}
           {{- if .Values.sts.enabled }}
           ports:
             - name: sts
@@ -47,13 +55,17 @@ spec:
               value: {{ .Values.sts.enabled | quote }}
             - name: OPERATOR_STS_AUDIENCE
               value: {{ .Values.sts.audience | quote }}
-          {{- if .Values.sts.enabled }}
-            - name: OPERATOR_STS_PORT
-              value: {{ .Values.sts.port | quote }}
+            - name: POD_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.name
             - name: OPERATOR_NAMESPACE
               valueFrom:
                 fieldRef:
                   fieldPath: metadata.namespace
+          {{- if .Values.sts.enabled }}
+            - name: OPERATOR_STS_PORT
+              value: {{ .Values.sts.port | quote }}
             - name: OPERATOR_STS_SERVICE_NAME
               value: {{ printf "%s-sts" (include "rustfs-operator.fullname" .) | quote }}
             - name: OPERATOR_STS_TLS_ENABLED
diff --git a/deploy/rustfs-operator/values.yaml b/deploy/rustfs-operator/values.yaml
index 5644fa4..32a0daa 100755
--- a/deploy/rustfs-operator/values.yaml
+++ b/deploy/rustfs-operator/values.yaml
@@ -4,6 +4,9 @@
 operator:
   # Number of operator replicas
   replicas: 1
+  # Force leader election setting.
+  # If unset/null, Helm enables it automatically when replicas > 1.
+  leaderElect:
 
   image:
     repository: rustfs/operator
diff --git a/e2e/Cargo.lock b/e2e/Cargo.lock
index b5456d6..5e77a3b 100644
--- a/e2e/Cargo.lock
+++ b/e2e/Cargo.lock
@@ -1498,6 +1498,23 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "kube-leader-election"
+version = "0.1.0"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "chrono",
+ "futures",
+ "k8s-openapi",
+ "kube",
+ "rand",
+ "snafu",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
 [[package]]
 name = "kube-runtime"
 version = "2.0.1"
@@ -1746,12 +1763,14 @@ dependencies = [
  "futures",
  "hex",
  "hmac",
+ "hostname",
  "http",
  "hyper",
  "hyper-util",
  "jsonwebtoken",
  "k8s-openapi",
  "kube",
+ "kube-leader-election",
  "rcgen",
  "reqwest",
  "rustls",
@@ -2893,6 +2912,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "futures-sink",
+ "futures-util",
  "pin-project-lite",
  "slab",
  "tokio",
diff --git a/e2e/Cargo.toml b/e2e/Cargo.toml
index b8a74fd..d8e2c64 100644
--- a/e2e/Cargo.toml
+++ b/e2e/Cargo.toml
@@ -21,3 +21,5 @@ uuid = { version = "1", features = ["v4"] }
 k8s-openapi = { version = "0.26.1", features = ["v1_30", "schemars"] }
 kube = { version = "2.0.1", features = ["runtime", "derive", "client", "rustls-tls"] }
 reqwest = { version = "0.12", default-features = false, features = ["cookies", "json", "rustls-tls", "stream"] }
+
+[workspace]
diff --git a/src/console/handlers/pools.rs b/src/console/handlers/pools.rs
index d31ee4b..a576a39 100755
--- a/src/console/handlers/pools.rs
+++ b/src/console/handlers/pools.rs
@@ -24,7 +24,10 @@ use crate::console::{
 };
 use crate::types::v1alpha1::{
     persistence::PersistenceConfig,
-    pool::{Pool, SchedulingConfig, validate_pool_total_volumes},
+    pool::{
+        Pool, SchedulingConfig, validate_pool_collection, validate_pool_name,
+        validate_pool_total_volumes,
+    },
     pool_lifecycle::{
         DecommissionAction, DecommissionRequest, PoolLifecycleSpec, PvcRetentionPolicy,
     },
@@ -33,26 +36,6 @@ use crate::types::v1alpha1::{
     tenant::Tenant,
 };
 
-/// Validate a Kubernetes resource name (RFC 1123 subdomain: lowercase alphanumeric + hyphen, 1–63).
-fn is_valid_k8s_name(s: &str) -> bool {
-    if s.is_empty() || s.len() > 63 {
-        return false;
-    }
-    let mut chars = s.chars();
-    let Some(first) = chars.next() else {
-        return false;
-    };
-    if !first.is_ascii_alphanumeric() {
-        return false;
-    }
-    for c in chars {
-        if c != '-' && !c.is_ascii_alphanumeric() {
-            return false;
-        }
-    }
-    s.chars().last().is_some_and(|c| c != '-')
-}
-
 /// Loose validation for a Kubernetes resource quantity (e.g. `10Gi`, `100M`, `1`).
 fn is_valid_k8s_quantity(s: &str) -> bool {
     if s.is_empty() || s.len() > 32 {
@@ -582,12 +565,9 @@ pub async fn add_pool(
 
     // Validate pool name and quantities
     let pool_name = req.name.trim();
-    if !is_valid_k8s_name(pool_name) {
+    if let Err(message) = validate_pool_name(pool_name) {
         return Err(Error::BadRequest {
-            message: format!(
-                "Invalid pool name '{}': must be 1-63 chars, lowercase alphanumeric and hyphens (RFC 1123)",
-                req.name
-            ),
+            message: format!("Invalid pool name '{}': {}", req.name, message),
         });
     }
     if !is_valid_k8s_quantity(req.storage_size.trim()) {
@@ -689,6 +669,11 @@ pub async fn add_pool(
 
         remove_decommission_request(&mut tenant, pool_name);
         tenant.spec.pools.push(new_pool.clone());
+        if let Err(message) =
+            validate_pool_collection(&tenant.name_any(), &tenant.spec.pools, &tenant.spec.env)
+        {
+            return Err(Error::BadRequest { message });
+        }
 
         match tenant_api
             .replace(&tenant_name, &Default::default(), &tenant)
diff --git a/src/console/handlers/tenants.rs b/src/console/handlers/tenants.rs
index 3dd2437..50e09f2 100755
--- a/src/console/handlers/tenants.rs
+++ b/src/console/handlers/tenants.rs
@@ -20,7 +20,7 @@ use crate::console::{
 use crate::types::v1alpha1::{
     encryption::PodSecurityContextOverride,
     persistence::PersistenceConfig,
-    pool::Pool,
+    pool::{Pool, validate_pool_shape_immutable},
     tenant::{Tenant, TenantSpec},
 };
 use axum::{
@@ -304,6 +304,11 @@ pub async fn create_tenant(
         },
         status: None,
     };
+    if let Err(e) = tenant.validate_pools() {
+        return Err(Error::BadRequest {
+            message: e.to_string(),
+        });
+    }
 
     let api: Api<Tenant> = Api::namespaced(client.clone(), &req.namespace);
     let created = api
@@ -469,6 +474,11 @@ pub async fn update_tenant(
             message: "No fields to update".to_string(),
         });
     }
+    if let Err(e) = tenant.validate_pools() {
+        return Err(Error::BadRequest {
+            message: e.to_string(),
+        });
+    }
 
     // Replace status-safe fields
     let updated_tenant = api
@@ -545,16 +555,6 @@ pub async fn put_tenant_yaml(
         });
     }
 
-    // Validate: no duplicate pool names
-    let mut pool_names = std::collections::HashSet::new();
-    for pool in &in_tenant.spec.pools {
-        if !pool_names.insert(&pool.name) {
-            return Err(Error::BadRequest {
-                message: format!("Duplicate pool name '{}'", pool.name),
-            });
-        }
-    }
-
     let client = create_client(&claims).await?;
     let api: Api<Tenant> = Api::namespaced(client, &namespace);
 
@@ -564,8 +564,18 @@ pub async fn put_tenant_yaml(
         .await
         .map_err(|e| error::map_kube_error(e, format!("Tenant '{}'", name)))?;
 
+    if let Err(message) = validate_pool_shape_immutable(&current.spec.pools, &in_tenant.spec.pools)
+    {
+        return Err(Error::BadRequest { message });
+    }
+
     // Only update safe fields: spec, metadata.labels, metadata.annotations, metadata.finalizers
     current.spec = in_tenant.spec;
+    if let Err(e) = current.validate_pools() {
+        return Err(Error::BadRequest {
+            message: e.to_string(),
+        });
+    }
     if let Some(labels) = in_tenant.metadata.labels {
         current.metadata.labels = Some(labels);
     }
diff --git a/src/lib.rs b/src/lib.rs
index 55f1480..634e9a4 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,11 +31,16 @@ use kube::core::{ApiResource, DynamicObject, GroupVersionKind};
 use kube::runtime::reflector::ObjectRef;
 use kube::runtime::{Controller, watcher};
 use kube::{Api, Client, CustomResourceExt, Resource};
+use kube_leader_election::{
+    LeaderCallbacks, LeaderElector, LeaderElectorConfig, LeaseLock, SystemClock,
+};
 use std::collections::BTreeMap;
 use std::pin::Pin;
 use std::sync::Arc;
+use std::time::Duration;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio_rustls::TlsAcceptor;
+use tokio_util::sync::CancellationToken;
 use tower::ServiceExt as _;
 use tracing::{info, warn};
 
@@ -45,6 +50,18 @@ const CERT_MANAGER_VERSION: &str = "v1";
 const CERT_MANAGER_CERTIFICATE_KIND: &str = "Certificate";
 const CERT_MANAGER_CERTIFICATE_PLURAL: &str = "certificates";
 
+/// Options for the operator server command.
+pub struct ServerOptions {
+    /// Whether to enable leader election.
+    pub leader_elect: bool,
+    /// Name of the Lease resource for leader election.
+    pub leader_elect_lease_name: String,
+    /// Namespace of the Lease resource.
+    pub leader_elect_namespace: String,
+    /// Identity of this instance in leader election.
+    pub leader_elect_identity: String,
+}
+
 pub fn install_rustls_crypto_provider() {
     let _ = rustls::crypto::ring::default_provider().install_default();
 }
@@ -62,7 +79,7 @@ pub mod sts;
 #[cfg(test)]
 pub mod tests;
 
-pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
+pub async fn run(options: ServerOptions) -> Result<(), Box<dyn std::error::Error>> {
     install_rustls_crypto_provider();
 
     tracing_subscriber::fmt()
@@ -86,7 +103,7 @@ pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
                 &material,
             )?))
         } else {
-            warn!("Operator STS TLS disabled by OPERATOR_STS_TLS_ENABLED=false");
+            warn!("Operator STS TLS disabled by OPERATOR_STS_ENABLED=false");
             None
         };
         let sts_listener = bind_sts_listener(sts_port, tls_server_config.is_some()).await?;
@@ -99,6 +116,45 @@ pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
         tracing::info!("Operator STS server disabled by OPERATOR_STS_ENABLED=false");
     }
 
+    if options.leader_elect {
+        info!(
+            identity = %options.leader_elect_identity,
+            lease = %format!("{}/{}", options.leader_elect_namespace, options.leader_elect_lease_name),
+            "starting with leader election enabled"
+        );
+
+        let lock = LeaseLock::new(
+            client.clone(),
+            &options.leader_elect_lease_name,
+            &options.leader_elect_namespace,
+            &options.leader_elect_identity,
+        );
+
+        let config = LeaderElectorConfig {
+            identity: options.leader_elect_identity.clone(),
+            lease_duration: Duration::from_secs(15),
+            renew_deadline: Duration::from_secs(10),
+            retry_period: Duration::from_secs(2),
+            release_on_cancel: true,
+        };
+
+        let callbacks = ControllerCallbacks {
+            client: client.clone(),
+        };
+
+        let cancel = CancellationToken::new();
+        let elector = LeaderElector::new(config, lock, SystemClock)?;
+        elector.run(callbacks, cancel).await?;
+    } else {
+        info!("starting with leader election disabled");
+        run_controller(client, CancellationToken::new()).await;
+    }
+
+    Ok(())
+}
+
+/// Build and run the controller reconcile loop.
+async fn run_controller(client: Client, cancel: CancellationToken) {
     let tenant_client = Api::<Tenant>::all(client.clone());
     let context = Context::new(client.clone());
     let controller = Controller::new(tenant_client, watcher::Config::default())
@@ -145,17 +201,69 @@ pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
         }
     };
 
-    controller
+    let mut reconcile_stream = controller
         .run(reconcile_rustfs, error_policy, Arc::new(context))
-        .for_each(|res| async move {
-            match res {
-                Ok((tenant, _)) => info!("reconciled successful, object{:?}", tenant.name),
-                Err(e) => warn!("reconcile failed: {}", e),
+        .boxed();
+
+    tokio::select! {
+        _ = cancel.cancelled() => {
+            warn!("controller cancellation requested, stopping");
+        }
+        _ = async {
+            while let Some(res) = reconcile_stream.next().await {
+                match res {
+                    Ok((tenant, _)) => info!("reconciled successful, object{:?}", tenant.name),
+                    Err(e) => warn!("reconcile failed: {}", e),
+                }
             }
-        })
-        .await;
+        } => {}
+    }
+}
 
-    Ok(())
+/// Callbacks for running the controller inside leader election.
+struct ControllerCallbacks {
+    client: Client,
+}
+
+#[async_trait::async_trait]
+impl LeaderCallbacks for ControllerCallbacks {
+    async fn on_started_leading(&self, cancel: CancellationToken) {
+        info!("acquired leader lease, starting controller");
+        let client = self.client.clone();
+        let controller_cancel = CancellationToken::new();
+        let run_cancel = controller_cancel.clone();
+        // Run the controller in a separate task so we can select on the cancel token.
+        let controller_handle = tokio::spawn(async move {
+            run_controller(client, run_cancel).await;
+        });
+        tokio::pin!(controller_handle);
+
+        tokio::select! {
+            _ = &mut controller_handle => {
+                info!("controller finished");
+            }
+            _ = cancel.cancelled() => {
+                info!("lost leader lease, stopping controller");
+                controller_cancel.cancel();
+                if tokio::time::timeout(Duration::from_secs(5), &mut controller_handle)
+                    .await
+                    .is_err()
+                {
+                    warn!("controller stop timed out, forcing shutdown");
+                    controller_handle.abort();
+                }
+                let _ = controller_handle.await;
+            }
+        }
+    }
+
+    async fn on_stopped_leading(&self) {
+        warn!("stopped leading");
+    }
+
+    async fn on_new_leader(&self, identity: String) {
+        info!(new_leader = %identity, "observed new leader");
+    }
 }
 
 fn operator_sts_port() -> u16 {
diff --git a/src/main.rs b/src/main.rs
index e5cc6e5..41c9e61 100755
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,10 +14,13 @@
 
 use clap::{Parser, Subcommand};
 use const_str::concat;
-use operator::{crd, run};
+use operator::{ServerOptions, crd, run};
 
 shadow_rs::shadow!(build);
 
+const SERVICE_ACCOUNT_NAMESPACE_PATH: &str =
+    "/var/run/secrets/kubernetes.io/serviceaccount/namespace";
+
 #[allow(clippy::const_is_empty)]
 const SHORT_VERSION: &str = {
     if !build::TAG.is_empty() {
@@ -66,7 +69,23 @@ enum Commands {
     },
 
     /// Run the controller
-    Server {},
+    Server {
+        /// Enable leader election (disable for single-replica/local mode)
+        #[arg(long, default_value = "false")]
+        leader_elect: bool,
+
+        /// Name of the Lease resource used for leader election
+        #[arg(long, default_value = "rustfs-operator-leader")]
+        leader_elect_lease_name: String,
+
+        /// Namespace for the LeaderLease (CLI flag > OPERATOR_NAMESPACE > pod namespace > default)
+        #[arg(long)]
+        leader_elect_namespace: Option<String>,
+
+        /// Identity for this instance in leader election (defaults to POD_NAME env or hostname)
+        #[arg(long)]
+        leader_elect_identity: Option<String>,
+    },
 
     /// Run the console web server
     Console {
@@ -82,7 +101,52 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     match cli.command {
         Commands::Crd { file } => crd(file).await,
-        Commands::Server {} => run().await,
+        Commands::Server {
+            leader_elect,
+            leader_elect_lease_name,
+            leader_elect_namespace,
+            leader_elect_identity,
+        } => {
+            let namespace = resolve_leader_elect_namespace(leader_elect_namespace);
+            let identity = leader_elect_identity
+                .or_else(|| std::env::var("POD_NAME").ok())
+                .unwrap_or_else(|| {
+                    hostname::get()
+                        .ok()
+                        .and_then(|h| h.into_string().ok())
+                        .unwrap_or_else(|| "unknown".to_string())
+                });
+            let options = ServerOptions {
+                leader_elect,
+                leader_elect_lease_name,
+                leader_elect_namespace: namespace,
+                leader_elect_identity: identity,
+            };
+            run(options).await
+        }
         Commands::Console { port } => operator::console::server::run(port).await,
     }
 }
+
+fn resolve_leader_elect_namespace(cli_namespace: Option<String>) -> String {
+    if let Some(namespace) = cli_namespace {
+        let namespace = namespace.trim().to_string();
+        if !namespace.is_empty() {
+            return namespace;
+        }
+    }
+
+    if let Some(namespace) = std::env::var("OPERATOR_NAMESPACE")
+        .ok()
+        .map(|value| value.trim().to_string())
+        .filter(|value| !value.is_empty())
+    {
+        return namespace;
+    }
+
+    std::fs::read_to_string(SERVICE_ACCOUNT_NAMESPACE_PATH)
+        .ok()
+        .map(|value| value.trim().to_string())
+        .filter(|value| !value.is_empty())
+        .unwrap_or_else(|| "default".to_string())
+}
diff --git a/src/reconcile/phases.rs b/src/reconcile/phases.rs
index dcd30c9..bb89a30 100644
--- a/src/reconcile/phases.rs
+++ b/src/reconcile/phases.rs
@@ -75,6 +75,12 @@ pub(super) async fn validate_tenant_prerequisites(
         return Err(e.into());
     }
 
+    if let Err(e) = tenant.validate_pools() {
+        let status_error = StatusError::from_types_error(&e);
+        patch_status_error(ctx, tenant, &status_error).await;
+        return Err(e.into());
+    }
+
     // Validate credential Secret if configured.
     // This only validates the Secret exists and has required keys.
     // Actual credential injection happens via secretKeyRef in the StatefulSet.
@@ -423,6 +429,9 @@ pub(super) async fn reconcile_pool_statefulsets(
         ..Default::default()
     };
 
+    let mut existing_pool_statefulsets = Vec::new();
+    let mut created_missing_pool = false;
+
     for pool in &tenant.spec.pools {
         let ss_name = format!("{}-{}", tenant.name(), pool.name);
         let lifecycle_decision = lifecycle_decisions.decision_for(&pool.name);
@@ -445,16 +454,7 @@ pub(super) async fn reconcile_pool_statefulsets(
             .await
         {
             Ok(existing_ss) => {
-                reconcile_existing_pool_statefulset(
-                    ctx,
-                    tenant,
-                    namespace,
-                    pool,
-                    existing_ss,
-                    tls_plan,
-                    &mut summary,
-                )
-                .await?;
+                existing_pool_statefulsets.push((pool, existing_ss));
             }
             Err(e) if is_not_found_context_error(&e) => {
                 reconcile_missing_pool_statefulset(
@@ -467,6 +467,7 @@ pub(super) async fn reconcile_pool_statefulsets(
                     &mut summary,
                 )
                 .await?;
+                created_missing_pool = true;
             }
             Err(e) => {
                 error!("Failed to get StatefulSet {}: {}", ss_name, e);
@@ -477,6 +478,27 @@ pub(super) async fn reconcile_pool_statefulsets(
         }
     }
 
+    if created_missing_pool {
+        for (pool, existing_ss) in existing_pool_statefulsets {
+            let pool_status = tenant.build_pool_status(&pool.name, &existing_ss);
+            update_pool_summary(&mut summary, pool_status);
+        }
+        return Ok(summary);
+    }
+
+    for (pool, existing_ss) in existing_pool_statefulsets {
+        reconcile_existing_pool_statefulset(
+            ctx,
+            tenant,
+            namespace,
+            pool,
+            existing_ss,
+            tls_plan,
+            &mut summary,
+        )
+        .await?;
+    }
+
     Ok(summary)
 }
 
diff --git a/src/status.rs b/src/status.rs
index 2ce6582..3df8a0a 100644
--- a/src/status.rs
+++ b/src/status.rs
@@ -128,6 +128,11 @@ impl StatusError {
                 ConditionType::SpecValid,
                 sanitize_message(reason),
             ),
+            types::error::Error::InvalidPoolSpec { message, .. } => Self::blocked(
+                Reason::InvalidPoolSpec,
+                ConditionType::SpecValid,
+                sanitize_message(message),
+            ),
             types::error::Error::ImmutableFieldModified { field, .. } => Self::blocked(
                 Reason::ImmutableFieldModified,
                 ConditionType::SpecValid,
diff --git a/src/types/error.rs b/src/types/error.rs
index 6f8d9b8..8b07a6b 100755
--- a/src/types/error.rs
+++ b/src/types/error.rs
@@ -36,6 +36,9 @@ pub enum Error {
     #[snafu(display("invalid tenant name '{}': {}", name, reason))]
     InvalidTenantName { name: String, reason: String },
 
+    #[snafu(display("invalid pool specification for tenant '{}': {}", name, message))]
+    InvalidPoolSpec { name: String, message: String },
+
     #[snafu(display("serde_json error: {}", source))]
     SerdeJson { source: serde_json::Error },
 }
diff --git a/src/types/v1alpha1/persistence.rs b/src/types/v1alpha1/persistence.rs
index fbc779f..b895fdc 100755
--- a/src/types/v1alpha1/persistence.rs
+++ b/src/types/v1alpha1/persistence.rs
@@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize};
 #[serde(rename_all = "camelCase")]
 pub struct PersistenceConfig {
     #[x_kube(validation = Rule::new("self > 0").message("volumesPerServer must be greater than 0"))]
+    #[x_kube(validation = Rule::new("self == oldSelf").message("volumesPerServer is immutable"))]
     pub volumes_per_server: i32,
 
     #[serde(skip_serializing_if = "Option::is_none")]
diff --git a/src/types/v1alpha1/pool.rs b/src/types/v1alpha1/pool.rs
index e0774a8..ac534ca 100755
--- a/src/types/v1alpha1/pool.rs
+++ b/src/types/v1alpha1/pool.rs
@@ -15,9 +15,16 @@
 use k8s_openapi::api::core::v1 as corev1;
 use kube::KubeSchema;
 use serde::{Deserialize, Serialize};
+use std::collections::HashSet;
 
 use crate::types::v1alpha1::persistence::PersistenceConfig;
 
+const RUSTFS_ERASURE_SET_SIZES: &[usize] = &[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+const RUSTFS_ERASURE_SET_DRIVE_COUNT_ENV: &str = "RUSTFS_ERASURE_SET_DRIVE_COUNT";
+const RUSTFS_STORAGE_CLASS_STANDARD_ENV: &str = "RUSTFS_STORAGE_CLASS_STANDARD";
+const RUSTFS_STORAGE_CLASS_RRS_ENV: &str = "RUSTFS_STORAGE_CLASS_RRS";
+const DEFAULT_RRS_PARITY: usize = 1;
+
 /// Kubernetes scheduling and placement configuration for pools.
 /// Groups related scheduling fields for better code organization.
 /// Uses #[serde(flatten)] to maintain flat YAML structure.
@@ -61,9 +68,13 @@ pub struct SchedulingConfig {
 ]
 pub struct Pool {
     #[x_kube(validation = Rule::new("self != ''").message("pool name must be not empty"))]
+    #[x_kube(validation = Rule::new("self.size() <= 63 && self.matches('^[a-z0-9]([-a-z0-9]*[a-z0-9])?$')").
+        message("pool name must be a valid RFC 1123 label: lowercase alphanumeric or '-', start and end with alphanumeric, max 63 characters"))
+    ]
     pub name: String,
 
     #[x_kube(validation = Rule::new("self > 0").message("servers must be greater than 0"))]
+    #[x_kube(validation = Rule::new("self == oldSelf").message("servers is immutable"))]
     pub servers: i32,
 
     pub persistence: PersistenceConfig,
@@ -96,9 +107,344 @@ pub fn validate_pool_total_volumes(servers: i32, volumes_per_server: i32) -> Res
     Ok(total)
 }
 
+/// Validate a pool name used in labels and RustFS peer DNS names.
+pub fn validate_pool_name(name: &str) -> Result<(), String> {
+    if name.is_empty() {
+        return Err("pool name must be not empty".to_string());
+    }
+    if name.len() > 63 {
+        return Err(format!(
+            "pool name must be at most 63 characters, got {}",
+            name.len()
+        ));
+    }
+
+    let bytes = name.as_bytes();
+    if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
+        return Err(
+            "pool name must start with a lowercase alphanumeric character (a-z, 0-9)".to_string(),
+        );
+    }
+    if !bytes[bytes.len() - 1].is_ascii_lowercase() && !bytes[bytes.len() - 1].is_ascii_digit() {
+        return Err(
+            "pool name must end with a lowercase alphanumeric character (a-z, 0-9)".to_string(),
+        );
+    }
+    for &b in bytes {
+        if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'-' {
+            return Err(format!(
+                "pool name contains invalid character '{}'; only lowercase alphanumeric and '-' are allowed",
+                b as char
+            ));
+        }
+    }
+
+    Ok(())
+}
+
+pub fn validate_pool_collection(
+    tenant_name: &str,
+    pools: &[Pool],
+    env: &[corev1::EnvVar],
+) -> Result<(), String> {
+    if pools.is_empty() {
+        return Err("pools must be configured".to_string());
+    }
+
+    let mut names = HashSet::new();
+    for pool in pools {
+        validate_pool_name(&pool.name)?;
+        if !names.insert(pool.name.as_str()) {
+            return Err(format!("pool names must be unique: '{}'", pool.name));
+        }
+        validate_pool_total_volumes(pool.servers, pool.persistence.volumes_per_server)
+            .map_err(|message| format!("pool '{}': {}", pool.name, message))?;
+        validate_rustfs_peer_dns_label(tenant_name, pool)?;
+    }
+
+    validate_pool_erasure_compatibility(pools, env)
+}
+
+pub fn validate_pool_shape_immutable(existing: &[Pool], desired: &[Pool]) -> Result<(), String> {
+    for desired_pool in desired {
+        let Some(existing_pool) = existing
+            .iter()
+            .find(|existing_pool| existing_pool.name == desired_pool.name)
+        else {
+            continue;
+        };
+
+        if existing_pool.servers != desired_pool.servers {
+            return Err(format!(
+                "pool '{}' servers is immutable ({} -> {})",
+                desired_pool.name, existing_pool.servers, desired_pool.servers
+            ));
+        }
+
+        if existing_pool.persistence.volumes_per_server
+            != desired_pool.persistence.volumes_per_server
+        {
+            return Err(format!(
+                "pool '{}' volumesPerServer is immutable ({} -> {})",
+                desired_pool.name,
+                existing_pool.persistence.volumes_per_server,
+                desired_pool.persistence.volumes_per_server
+            ));
+        }
+    }
+
+    Ok(())
+}
+
+fn validate_rustfs_peer_dns_label(tenant_name: &str, pool: &Pool) -> Result<(), String> {
+    let max_ordinal = pool.servers.saturating_sub(1).max(0);
+    let dns_label_len = tenant_name.len() + 1 + pool.name.len() + 1 + ordinal_digits(max_ordinal);
+
+    if dns_label_len > 63 {
+        return Err(format!(
+            "pool '{}' makes RustFS peer DNS label too long: '{}-{}-<ordinal>' is {} characters at max ordinal {}, must be at most 63",
+            pool.name, tenant_name, pool.name, dns_label_len, max_ordinal
+        ));
+    }
+
+    Ok(())
+}
+
+fn ordinal_digits(value: i32) -> usize {
+    value.to_string().len()
+}
+
+fn validate_pool_erasure_compatibility(
+    pools: &[Pool],
+    env: &[corev1::EnvVar],
+) -> Result<(), String> {
+    let set_drive_count = rustfs_erasure_set_drive_count_from_env(env)?;
+    let pool_layouts = pools
+        .iter()
+        .map(|pool| {
+            rustfs_drives_per_set(
+                pool.servers,
+                pool.persistence.volumes_per_server,
+                set_drive_count,
+            )
+            .map(|drives_per_set| (pool, drives_per_set))
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+
+    let Some((first_pool, first_drives_per_set)) = pool_layouts.first() else {
+        return Ok(());
+    };
+
+    let standard_parity = rustfs_standard_parity_from_env(env, *first_drives_per_set)?;
+    let rrs_parity = rustfs_rrs_parity_from_env(env, *first_drives_per_set)?;
+
+    if standard_parity > 0 && rrs_parity > 0 && standard_parity < rrs_parity {
+        return Err(format!(
+            "{RUSTFS_STORAGE_CLASS_STANDARD_ENV} parity {} must be greater than or equal to {RUSTFS_STORAGE_CLASS_RRS_ENV} parity {}",
+            standard_parity, rrs_parity
+        ));
+    }
+
+    validate_parity_for_pool(
+        first_pool,
+        *first_drives_per_set,
+        standard_parity,
+        "STANDARD",
+    )?;
+    validate_parity_for_pool(first_pool, *first_drives_per_set, rrs_parity, "RRS")?;
+
+    for (pool, drives_per_set) in pool_layouts.iter().skip(1) {
+        validate_parity_for_pool(pool, *drives_per_set, standard_parity, "STANDARD")?;
+        validate_parity_for_pool(pool, *drives_per_set, rrs_parity, "RRS")?;
+    }
+
+    Ok(())
+}
+
+fn validate_parity_for_pool(
+    pool: &Pool,
+    drives_per_set: usize,
+    parity: usize,
+    storage_class: &str,
+) -> Result<(), String> {
+    if drives_per_set == 0 {
+        return Err(format!(
+            "pool '{}' has invalid drivesPerSet {}, must be greater than zero",
+            pool.name, drives_per_set
+        ));
+    }
+
+    let max_parity = drives_per_set / 2;
+    if parity > max_parity {
+        return Err(format!(
+            "pool '{}' has RustFS drivesPerSet {}, but {} parity {} exceeds the maximum {}",
+            pool.name, drives_per_set, storage_class, parity, max_parity
+        ));
+    }
+    Ok(())
+}
+
+fn rustfs_erasure_set_drive_count_from_env(
+    env: &[corev1::EnvVar],
+) -> Result<Option<usize>, String> {
+    let Some(value) = literal_env_value(env, RUSTFS_ERASURE_SET_DRIVE_COUNT_ENV)? else {
+        return Ok(None);
+    };
+
+    let set_drive_count = value.parse::<usize>().map_err(|_| {
+        format!(
+            "{RUSTFS_ERASURE_SET_DRIVE_COUNT_ENV} must be a non-negative integer, got '{}'",
+            value
+        )
+    })?;
+
+    Ok((set_drive_count > 0).then_some(set_drive_count))
+}
+
+fn rustfs_standard_parity_from_env(
+    env: &[corev1::EnvVar],
+    set_drive_count: usize,
+) -> Result<usize, String> {
+    let parity = literal_env_value(env, RUSTFS_STORAGE_CLASS_STANDARD_ENV)?
+        .map(parse_storage_class_parity)
+        .transpose()?;
+
+    Ok(parity.unwrap_or_else(|| rustfs_default_parity_count(set_drive_count)))
+}
+
+fn rustfs_rrs_parity_from_env(
+    env: &[corev1::EnvVar],
+    set_drive_count: usize,
+) -> Result<usize, String> {
+    let parity = literal_env_value(env, RUSTFS_STORAGE_CLASS_RRS_ENV)?
+        .map(parse_storage_class_parity)
+        .transpose()?;
+
+    Ok(parity.unwrap_or(if set_drive_count == 1 {
+        0
+    } else {
+        DEFAULT_RRS_PARITY
+    }))
+}
+
+fn literal_env_value<'a>(env: &'a [corev1::EnvVar], name: &str) -> Result<Option<&'a str>, String> {
+    let Some(var) = env.iter().rev().find(|var| var.name == name) else {
+        return Ok(None);
+    };
+
+    if var.value_from.is_some() {
+        return Err(format!(
+            "{name} is configured with value_from, which is not supported during admission validation"
+        ));
+    }
+
+    Ok(var.value.as_deref())
+}
+
+fn parse_storage_class_parity(value: &str) -> Result<usize, String> {
+    let Some((scheme, parity)) = value.split_once(':') else {
+        return Err(format!(
+            "invalid storage class '{}': expected 'EC:<parity>'",
+            value
+        ));
+    };
+    if scheme != "EC" {
+        return Err(format!(
+            "invalid storage class '{}': only EC scheme is supported",
+            value
+        ));
+    }
+    parity.parse::<usize>().map_err(|_| {
+        format!(
+            "invalid storage class '{}': parity must be a non-negative integer",
+            value
+        )
+    })
+}
+
+pub fn rustfs_drives_per_set(
+    servers: i32,
+    volumes_per_server: i32,
+    set_drive_count: Option<usize>,
+) -> Result<usize, String> {
+    let total_volumes = validate_pool_total_volumes(servers, volumes_per_server)? as usize;
+    let volume_pattern_size = volumes_per_server as usize;
+
+    // Matches the current RustFS ellipsis layout calculation for the operator's
+    // generated host{0...n}/rustfs{0...m} pattern.
+    let set_counts = RUSTFS_ERASURE_SET_SIZES
+        .iter()
+        .copied()
+        .filter(|set_size| total_volumes.is_multiple_of(*set_size))
+        .filter(|set_size| pattern_is_symmetric(volume_pattern_size, *set_size))
+        .collect::<Vec<_>>();
+
+    if set_counts.is_empty() {
+        return Err(format!(
+            "pool layout with {} servers × {} volumesPerServer = {} total volumes is not divisible by any RustFS supported erasure set size with symmetric volume distribution",
+            servers, volumes_per_server, total_volumes
+        ));
+    }
+
+    if let Some(set_drive_count) = set_drive_count {
+        if !set_counts.contains(&set_drive_count) {
+            return Err(format!(
+                "{RUSTFS_ERASURE_SET_DRIVE_COUNT_ENV}={} is not valid for pool layout {} servers × {} volumesPerServer; acceptable values are {:?}",
+                set_drive_count, servers, volumes_per_server, set_counts
+            ));
+        }
+        return Ok(set_drive_count);
+    }
+
+    Ok(common_set_drive_count(total_volumes, &set_counts))
+}
+
+fn pattern_is_symmetric(pattern_size: usize, set_size: usize) -> bool {
+    if pattern_size > set_size {
+        pattern_size.is_multiple_of(set_size)
+    } else {
+        set_size.is_multiple_of(pattern_size)
+    }
+}
+
+fn common_set_drive_count(divisible_size: usize, set_counts: &[usize]) -> usize {
+    if divisible_size < set_counts[set_counts.len() - 1] {
+        return divisible_size;
+    }
+
+    let mut prev_d = divisible_size / set_counts[0];
+    let mut set_size = 0;
+    for &count in set_counts {
+        if divisible_size.is_multiple_of(count) {
+            let d = divisible_size / count;
+            if d <= prev_d {
+                prev_d = d;
+                set_size = count;
+            }
+        }
+    }
+    set_size
+}
+
+fn rustfs_default_parity_count(drives_per_set: usize) -> usize {
+    match drives_per_set {
+        1 => 0,
+        2 | 3 => 1,
+        4 | 5 => 2,
+        6 | 7 => 3,
+        _ => 4,
+    }
+}
+
 #[cfg(test)]
 mod tests {
-    use super::validate_pool_total_volumes;
+    use super::{
+        parse_storage_class_parity, rustfs_drives_per_set, validate_pool_collection,
+        validate_pool_name, validate_pool_total_volumes,
+    };
+    use crate::types::v1alpha1::persistence::PersistenceConfig;
+    use crate::types::v1alpha1::pool::Pool;
+    use k8s_openapi::api::core::v1 as corev1;
 
     #[test]
     fn rejects_non_positive_inputs() {
@@ -135,4 +481,132 @@ mod tests {
         assert_eq!(validate_pool_total_volumes(4, 1).unwrap(), 4);
         assert_eq!(validate_pool_total_volumes(2, 2).unwrap(), 4);
     }
+
+    #[test]
+    fn validates_pool_name_as_rfc1123_label() {
+        assert!(validate_pool_name("pool-0").is_ok());
+        assert!(validate_pool_name("0-pool").is_ok());
+        assert!(validate_pool_name("Pool-0").is_err());
+        assert!(validate_pool_name("-pool").is_err());
+        assert!(validate_pool_name("pool-").is_err());
+    }
+
+    #[test]
+    fn derives_rustfs_drives_per_set_for_operator_pool_pattern() {
+        assert_eq!(rustfs_drives_per_set(4, 4, None).unwrap(), 16);
+        assert_eq!(rustfs_drives_per_set(8, 4, None).unwrap(), 16);
+        assert_eq!(rustfs_drives_per_set(2, 2, None).unwrap(), 4);
+        assert_eq!(rustfs_drives_per_set(3, 2, None).unwrap(), 6);
+    }
+
+    #[test]
+    fn rejects_pool_layout_rustfs_cannot_split_into_sets() {
+        assert!(rustfs_drives_per_set(17, 1, None).is_err());
+    }
+
+    #[test]
+    fn rejects_incompatible_pool_parity() {
+        let pools = vec![test_pool("pool-0", 4, 4), test_pool("pool-1", 2, 2)];
+
+        let err = validate_pool_collection("tenant", &pools, &[]).unwrap_err();
+
+        assert!(err.contains("STANDARD parity 4 exceeds the maximum 2"));
+    }
+
+    #[test]
+    fn rejects_parity_exceeding_half_for_small_drive_set() {
+        let pools = vec![test_pool("pool-0", 2, 4)];
+        let env = vec![
+            corev1::EnvVar {
+                name: "RUSTFS_ERASURE_SET_DRIVE_COUNT".to_string(),
+                value: Some("2".to_string()),
+                ..Default::default()
+            },
+            corev1::EnvVar {
+                name: "RUSTFS_STORAGE_CLASS_STANDARD".to_string(),
+                value: Some("EC:2".to_string()),
+                ..Default::default()
+            },
+        ];
+
+        let err = validate_pool_collection("tenant", &pools, &env).unwrap_err();
+        assert!(err.contains("STANDARD parity 2 exceeds the maximum 1"));
+    }
+
+    #[test]
+    fn honors_literal_erasure_env_for_pool_compatibility() {
+        let pools = vec![test_pool("pool-0", 4, 4), test_pool("pool-1", 2, 2)];
+        let env = vec![corev1::EnvVar {
+            name: "RUSTFS_ERASURE_SET_DRIVE_COUNT".to_string(),
+            value: Some("4".to_string()),
+            ..Default::default()
+        }];
+
+        assert!(validate_pool_collection("tenant", &pools, &env).is_ok());
+    }
+
+    #[test]
+    fn rejects_erasure_layout_validation_with_value_from_env_vars() {
+        let pools = vec![test_pool("pool-0", 4, 4)];
+        let env = vec![corev1::EnvVar {
+            name: "RUSTFS_STORAGE_CLASS_STANDARD".to_string(),
+            value_from: Some(corev1::EnvVarSource {
+                secret_key_ref: Some(corev1::SecretKeySelector {
+                    name: "legacy-rustfs-config".to_string(),
+                    key: "standard-parity".to_string(),
+                    optional: Some(false),
+                }),
+                ..Default::default()
+            }),
+            ..Default::default()
+        }];
+
+        let err = validate_pool_collection("tenant", &pools, &env).unwrap_err();
+        assert!(err.contains("not supported during admission validation"));
+    }
+
+    #[test]
+    fn honors_literal_standard_storage_class_env_for_pool_compatibility() {
+        let pools = vec![test_pool("pool-0", 4, 4), test_pool("pool-1", 2, 2)];
+        let env = vec![corev1::EnvVar {
+            name: "RUSTFS_STORAGE_CLASS_STANDARD".to_string(),
+            value: Some("EC:2".to_string()),
+            ..Default::default()
+        }];
+
+        assert!(validate_pool_collection("tenant", &pools, &env).is_ok());
+    }
+
+    #[test]
+    fn validates_storage_class_parity_format() {
+        assert_eq!(parse_storage_class_parity("EC:2").unwrap(), 2);
+        assert!(parse_storage_class_parity("INVALID:2").is_err());
+        assert!(parse_storage_class_parity("EC:not-a-number").is_err());
+    }
+
+    #[test]
+    fn rejects_too_long_rustfs_peer_dns_labels() {
+        let pools = vec![test_pool(
+            "pool-name-that-makes-the-peer-label-too-long",
+            4,
+            4,
+        )];
+
+        let err = validate_pool_collection("tenant-name-that-is-already-quite-long", &pools, &[])
+            .unwrap_err();
+
+        assert!(err.contains("RustFS peer DNS label too long"));
+    }
+
+    fn test_pool(name: &str, servers: i32, volumes_per_server: i32) -> Pool {
+        Pool {
+            name: name.to_string(),
+            servers,
+            persistence: PersistenceConfig {
+                volumes_per_server,
+                ..Default::default()
+            },
+            scheduling: Default::default(),
+        }
+    }
 }
diff --git a/src/types/v1alpha1/status.rs b/src/types/v1alpha1/status.rs
index efb0087..56b18f3 100755
--- a/src/types/v1alpha1/status.rs
+++ b/src/types/v1alpha1/status.rs
@@ -112,6 +112,7 @@ pub enum Reason {
     ReconcileStarted,
     ReconcileSucceeded,
     InvalidTenantName,
+    InvalidPoolSpec,
     ImmutableFieldModified,
     CredentialSecretNotFound,
     CredentialSecretMissingKey,
@@ -173,6 +174,7 @@ impl Reason {
             Self::ReconcileStarted => "ReconcileStarted",
             Self::ReconcileSucceeded => "ReconcileSucceeded",
             Self::InvalidTenantName => "InvalidTenantName",
+            Self::InvalidPoolSpec => "InvalidPoolSpec",
             Self::ImmutableFieldModified => "ImmutableFieldModified",
             Self::CredentialSecretNotFound => "CredentialSecretNotFound",
             Self::CredentialSecretMissingKey => "CredentialSecretMissingKey",
@@ -446,6 +448,7 @@ pub fn is_blocked_reason(reason: &str) -> bool {
     matches!(
         reason,
         "InvalidTenantName"
+            | "InvalidPoolSpec"
             | "ImmutableFieldModified"
             | "CredentialSecretNotFound"
             | "CredentialSecretMissingKey"
@@ -504,6 +507,7 @@ fn condition_matches_observed_generation(status: &Status, condition: &Condition)
 
 pub fn next_actions_for_reason(reason: &str) -> Vec<&'static str> {
     match reason {
+        "InvalidPoolSpec" => vec!["fixPoolSpec"],
         "CredentialSecretNotFound" => vec!["createCredentialSecret"],
         "CredentialSecretMissingKey" => vec!["addRequiredSecretKey"],
         "CredentialSecretInvalidEncoding" => vec!["replaceSecretValueWithUtf8"],
diff --git a/src/types/v1alpha1/tenant.rs b/src/types/v1alpha1/tenant.rs
index 030d1b7..859b14c 100755
--- a/src/types/v1alpha1/tenant.rs
+++ b/src/types/v1alpha1/tenant.rs
@@ -15,7 +15,7 @@
 use crate::types::v1alpha1::encryption::{EncryptionConfig, PodSecurityContextOverride};
 use crate::types::v1alpha1::k8s;
 use crate::types::v1alpha1::logging::LoggingConfig;
-use crate::types::v1alpha1::pool::Pool;
+use crate::types::v1alpha1::pool::{Pool, validate_pool_collection};
 use crate::types::v1alpha1::pool_lifecycle::PoolLifecycleSpec;
 use crate::types::v1alpha1::provisioning::{
     ProvisioningBucket, ProvisioningPolicy, ProvisioningUser,
@@ -53,7 +53,9 @@ pub struct TenantSpec {
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub scheduler: Option<String>,
 
+    #[schemars(extend("x-kubernetes-list-type" = "map", "x-kubernetes-list-map-keys" = ["name"]))]
     #[x_kube(validation = Rule::new("self.size() > 0").message("pools must be configured"))]
+    #[x_kube(validation = Rule::new("self.all(x, self.filter(y, y.name == x.name).size() == 1)").message("pool names must be unique"))]
     pub pools: Vec<Pool>,
 
     /// Explicit lifecycle requests for pool decommissioning.
@@ -199,6 +201,15 @@ impl Tenant {
         validate_dns1035_label(&self.name())
     }
 
+    pub fn validate_pools(&self) -> Result<(), types::error::Error> {
+        validate_pool_collection(&self.name(), &self.spec.pools, &self.spec.env).map_err(
+            |message| types::error::Error::InvalidPoolSpec {
+                name: self.name(),
+                message,
+            },
+        )
+    }
+
     /// a new owner reference for tenant
     pub fn new_owner_ref(&self) -> metav1::OwnerReference {
         metav1::OwnerReference {
diff --git a/src/types/v1alpha1/tenant/workloads.rs b/src/types/v1alpha1/tenant/workloads.rs
index cf38f3a..ea1a75a 100755
--- a/src/types/v1alpha1/tenant/workloads.rs
+++ b/src/types/v1alpha1/tenant/workloads.rs
@@ -833,6 +833,16 @@ impl Tenant {
             .unwrap_or(&"<unknown>".to_string())
             .clone();
 
+        // MinIO-compatible expansion model: an existing pool's server count is
+        // immutable. Horizontal capacity expansion must add a new pool.
+        if existing_spec.replicas != desired_spec.replicas {
+            return Err(types::error::Error::ImmutableFieldModified {
+                name: ss_name,
+                field: "spec.replicas".to_string(),
+                message: "Cannot change pool servers for an existing StatefulSet. Add a new pool to expand capacity.".to_string(),
+            });
+        }
+
         // Validate selector is unchanged (immutable field)
         if serde_json::to_value(&existing_spec.selector)?
             != serde_json::to_value(&desired_spec.selector)?