diff --git a/db/db_tunables.c b/db/db_tunables.c
index 470ad14a2f..068621b48b 100644
--- a/db/db_tunables.c
+++ b/db/db_tunables.c
@@ -511,6 +511,7 @@ extern int gbl_fdb_io_error_retries_phase_2_poll;
 extern int gbl_fdb_auth_enabled;
 extern int gbl_fdb_auth_error;
 extern int gbl_debug_invalid_genid;
+extern int gbl_debug_fail_replay_dispatch;
 
 /* Tranlog */
 extern int gbl_tranlog_incoherent_timeout;
diff --git a/db/db_tunables.h b/db/db_tunables.h
index f99724d4f2..6a9846646b 100644
--- a/db/db_tunables.h
+++ b/db/db_tunables.h
@@ -1378,6 +1378,9 @@ REGISTER_TUNABLE("debug.invalid_genid",
                  "Deliberately introduce an invalid genid, FOR TESTING PURPOSE (Default: off)",
                  TUNABLE_BOOLEAN, &gbl_debug_invalid_genid,
                  NOARG | EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);
+REGISTER_TUNABLE("debug.fail_replay_dispatch",
+                 "Force replay dispatch to fail when verify_retries >= N, FOR TESTING PURPOSE (Default: 0)",
+                 TUNABLE_INTEGER, &gbl_debug_fail_replay_dispatch, EXPERIMENTAL | INTERNAL, NULL, NULL, NULL, NULL);
 REGISTER_TUNABLE(
     "query_plan_percentage",
     "Alarm if the average cost per row of current query plan is n percent above the cost for different query plan."
diff --git a/db/osql_srs.c b/db/osql_srs.c
index 84bafee5c9..1192dbac8b 100644
--- a/db/osql_srs.c
+++ b/db/osql_srs.c
@@ -254,138 +254,164 @@ int srs_tran_empty(struct sqlclntstate *clnt)
 long long gbl_verify_tran_replays = 0;
 int gbl_disttxn_random_retry_poll = 500;
 
-/**
- * Replay transaction using the current history
- *
- */
-static int srs_tran_replay_int(struct sqlclntstate *clnt, int(dispatch_fn)(struct sqlclntstate *))
+int srs_tran_replay_prepare(struct sqlclntstate *clnt)
+{
+    clnt->verify_retries = 0;
+    if (!clnt->osql.history) {
+        logmsg(LOGMSG_ERROR, "Trying to replay, but no history?\n");
+        cheap_stack_trace();
+        return -1;
+    }
+    clnt->save_cb = clnt->done_cb;
+    clnt->done_cb = srs_tran_replay_async;
+    int rc = srs_tran_replay_begin(clnt);
+    if (rc) {
+        clnt->done_cb = clnt->save_cb;
+        clnt->save_cb = NULL;
+        clnt->query_rc = rc;
+    }
+    return rc;
+}
+
+int srs_tran_replay_begin(struct sqlclntstate *clnt)
 {
     osqlstate_t *osql = &clnt->osql;
-    srs_tran_query_t *item = 0;
     int rc = 0;
-    int nq = 0;
-    int tnq = 0;
-
-    clnt->verify_retries = 0;
 
+    reset_query_effects(clnt, 0, 1);
     if (!osql->history) {
         logmsg(LOGMSG_ERROR, "Trying to replay, but no history?\n");
-        cheap_stack_trace();
-        return -1;
+        abort();
+    }
+    clnt->verify_retries++;
+    gbl_verify_tran_replays++;
+    if (clnt->dist_timestamp > 0) {
+        int pval = gbl_disttxn_random_retry_poll;
+        if (pval > 1) {
+            poll(0, 0, rand() % pval);
+        }
     }
 
-    do {
-        /* resending writes do not repeat reads, preserve num_selected */
-        reset_query_effects(clnt, 0, 1); /* Reset it for each retry*/
-        if (!osql->history) {
-            logmsg(LOGMSG_ERROR, "Trying to replay, but no history?\n");
+    // Replays for SERIAL or SNAPISOL will never have select or selectv
+    if (clnt->dbtran.mode == TRANLEVEL_RECOM /* not for modsnap */) {
+        // we need to free all the shadows but selectv table (recgenid)
+        rc = osql_shadtbl_reset_for_selectv(clnt);
+        if (rc) {
+            logmsg(LOGMSG_ERROR, "Failed to reset selectv in read committed\n");
             abort();
+            cheap_stack_trace();
+            return -1;
         }
+    } else {
+        osql_shadtbl_close(clnt);
+    }
 
-        clnt->verify_retries++;
-        gbl_verify_tran_replays++;
-        if (clnt->dist_timestamp > 0) {
-            int pval = gbl_disttxn_random_retry_poll;
-            if (pval > 1) {
-                poll(0, 0, rand() % pval);
-            }
-        }
+    if (clnt->verify_retries == gbl_osql_verify_retries_max) {
+        osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_LAST);
+    }
 
-        /* Replays for SERIAL or SNAPISOL will never have select or selectv */
-        if (clnt->dbtran.mode == TRANLEVEL_RECOM /* not for modsnap */) {
-            /* we need to free all the shadows but selectv table (recgenid) */
-            rc = osql_shadtbl_reset_for_selectv(clnt);
-            if (rc) {
-                logmsg(LOGMSG_ERROR,
-                       "Failed to reset selectv in read committed\n");
-                abort();
-                cheap_stack_trace();
-                return -1;
-            }
-        } else {
-            osql_shadtbl_close(clnt); 
-        }
+    osql->num_queries = 0;
+    clnt->start_gen = bdb_get_rep_gen(thedb->bdb_env);
 
-        if (clnt->verify_retries == gbl_osql_verify_retries_max + 1) {
-            osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_LAST);
-        }
+    // Schedule the first query
+    osql->replay_cursor = LISTC_TOP(&osql->history->lst);
+    clnt->done = 0;
+
+    restore_stmt(clnt, osql->replay_cursor);
+    rc = dispatch_sql_query_no_wait(clnt);
 
-        if (0 /*!bdb_am_i_coherent(thedb->bdb_env)*/) {
-            logmsg(LOGMSG_ERROR,
-                   "Cannot replay, I am incoherent id=%d retries=%d\n",
-                   clnt->queryid, clnt->verify_retries);
-            rc = CDB2ERR_VERIFY_ERROR;
-            break;
+    return rc;
+}
+
+int srs_tran_replay_async(struct sqlclntstate *clnt)
+{
+    osqlstate_t *osql = &clnt->osql;
+    int rc = 0;
+
+    // Set query_rc so that `newsql_done_cb` can clean up properly
+    if (peer_dropped_connection(clnt)) {
+        clnt->query_rc = CDB2ERR_IO_ERROR;
+    }
+    if (clnt->query_rc == CDB2ERR_IO_ERROR) {
+        logmsg(LOGMSG_ERROR, "%s: client disconnected during async replay, aborting\n", __func__);
+        goto done;
+    }
+
+    osql->num_queries++;
+
+    // Schedule the next stmt or finish if the last retry succeed at commit
+    if (!osql->history) {
+        goto done;
+    }
+    osql->replay_cursor = LISTC_NEXT(osql->replay_cursor, lnk);
+    if (osql->replay_cursor != 0) {
+        clnt->done = 0;
+        restore_stmt(clnt, osql->replay_cursor);
+        rc = dispatch_sql_query_no_wait(clnt);
+        if (rc == 0) {
+            return 0;
         }
-        nq = 0;
-        clnt->start_gen = bdb_get_rep_gen(thedb->bdb_env);
-        LISTC_FOR_EACH(&osql->history->lst, item, lnk)
-        {
-            clnt->done = 0; /* reset done flag */
-            restore_stmt(clnt, item);
-            if ((rc = dispatch_fn(clnt)) != 0)
-                break;
-            if (!osql->history)
-                break;
-            nq++;
+        if (osql->replay != OSQL_RETRY_NONE) {
+            logmsg(LOGMSG_ERROR, "%p Replaying failed abnormally in dispatch, calling abort, nq=%d, rc=%d\n", clnt,
+                   osql->num_queries, rc);
+            osql_sock_abort(clnt, tran2req(clnt->dbtran.mode));
         }
-        if (rc == 0)
-            tnq = nq;
-
-        /* don't repeat if we fail with unexplicable error, i.e. not a logical
-         * error */
-        if (rc < 0) {
-            if (osql->replay != OSQL_RETRY_NONE) {
-                logmsg(LOGMSG_ERROR,
-                       "%p Replaying failed abnormally, calling abort, nq=%d tnq=%d\n",
-                       clnt, nq, tnq);
-                if (debug_switch_osql_verbose_history_replay()) {
-                    if (osql->history) {
-                        LISTC_FOR_EACH(&osql->history->lst, item, lnk)
-                        {
-                            logmsg(LOGMSG_DEBUG, "\"%s\"\n", print_stmt(clnt, item));
-                        }
-                    }
+        clnt->query_rc = CDB2ERR_INTERNAL;
+    }
+    // No more stmt in this txn
+    else {
+        osql->total_queries = osql->num_queries;
+
+        // Not yet reached the retry max
+        if (clnt->verify_retries < gbl_osql_verify_retries_max) {
+            // Start another pass of replay
+            if (clnt->osql.replay == OSQL_RETRY_DO) {
+                rc = srs_tran_replay_begin(clnt);
+                if (rc == 0) {
+                    return 0;
                 }
-
-                int type = tran2req(clnt->dbtran.mode);
-                osql_sock_abort(clnt, type);
+                if (osql->replay != OSQL_RETRY_NONE) {
+                    logmsg(LOGMSG_ERROR, "%p Replaying failed abnormally in dispatch, calling abort, nq=%d, rc=%d\n",
+                           clnt, osql->num_queries, rc);
+                    osql_sock_abort(clnt, tran2req(clnt->dbtran.mode));
+                }
+                clnt->query_rc = CDB2ERR_INTERNAL;
             }
-            break;
         }
-    } while (osql->replay == OSQL_RETRY_DO && clnt->verify_retries <= gbl_osql_verify_retries_max);
-
-    if (clnt->verify_retries >= gbl_osql_verify_retries_max && osql->xerr.errval) {
-        logmsg(LOGMSG_ERROR, "transaction from pid %d on origin host %s failed %d times with verify errors\n",
-               clnt->last_pid, clnt->origin, clnt->verify_retries);
-        /* Set to NONE to suppress the error from srs_tran_destroy(). */
-        osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_NONE);
+        // Too many retry failures
+        else if (osql->xerr.errval) {
+            logmsg(LOGMSG_ERROR, "transaction from pid %d on origin host %s failed %d times with verify errors\n",
+                   clnt->last_pid, clnt->origin, clnt->verify_retries);
+            clnt->query_rc = CDB2ERR_VERIFY_ERROR;
+        }
     }
 
-    /* replayed, free the session */
+done:
+    // Set to NONE to suppress the error from srs_tran_destroy().
+    osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_NONE);
+
+    // Finish replay, free the session
     if (srs_tran_destroy(clnt)) {
-        logmsg(LOGMSG_ERROR, "%s Fail to destroy transaction replay session\n",
-               __func__);
+        logmsg(LOGMSG_ERROR, "%s Fail to destroy transaction replay session\n", __func__);
     }
     if (rc && clnt->verify_retries < gbl_osql_verify_retries_max) {
-        logmsg(LOGMSG_ERROR, "Uncommittable transaction %d retried %d times,  "
-               "rc=%d [global retr=%lld] nq=%d tnq=%d\n", clnt->queryid,
-               clnt->verify_retries, rc, gbl_verify_tran_replays, nq, tnq);
+        logmsg(LOGMSG_ERROR,
+               "Uncommittable transaction %d retried %d times,  "
+               "rc=%d [global retr=%lld] nq=%d tnq=%d\n",
+               clnt->queryid, clnt->verify_retries, rc, gbl_verify_tran_replays, osql->num_queries,
+               osql->total_queries);
     }
 
-    osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_NONE);
     clnt->verify_retries = 0;
 
-    return rc;
-}
+    if (rc && !clnt->query_rc) {
+        clnt->query_rc = rc;
+    }
+    clnt->done_cb = clnt->save_cb;
+    clnt->save_cb = NULL;
 
-static int run_sql_query(struct sqlclntstate *clnt)
-{
-    sqlengine_work_appsock(clnt->thd, clnt);
-    return 0;
-}
+    // Clean up the residual states
+    clnt->done_cb(clnt);
 
-int srs_tran_replay_inline(struct sqlclntstate *clnt)
-{
-    return srs_tran_replay_int(clnt, run_sql_query);
+    return 0;
 }
diff --git a/db/osql_srs.h b/db/osql_srs.h
index b3218ce6bb..74e7438189 100644
--- a/db/osql_srs.h
+++ b/db/osql_srs.h
@@ -58,10 +58,19 @@ int srs_tran_del_last_query(struct sqlclntstate *clnt);
 int srs_tran_empty(struct sqlclntstate *clnt);
 
 /**
- * Replay transaction using the current history
- *
+ * Prepare the transaction for replay
+ */
+int srs_tran_replay_prepare(struct sqlclntstate *);
+
+/**
+ * Begin the replay of the transaction
+ */
+int srs_tran_replay_begin(struct sqlclntstate *);
+
+/**
+ * Schedule a query during replay
  */
-int srs_tran_replay_inline(struct sqlclntstate *);
+int srs_tran_replay_async(struct sqlclntstate *);
 
 void srs_tran_print_history(struct sqlclntstate *clnt, int indent);
 #endif
diff --git a/db/sql.h b/db/sql.h
index b2300e31a7..c9e14f4349 100644
--- a/db/sql.h
+++ b/db/sql.h
@@ -182,7 +182,10 @@ typedef struct osqlstate {
 
     /* verify handling */
     /* keep the log of sql strings for the current transaction */
+    struct srs_tran_query *replay_cursor;
     struct srs_tran *history;
+    int num_queries;
+    int total_queries;
     int replay;  /* set this when a session is replayed, used by sorese */
     int sent_column_data; /* set this if we've already sent the column data */
 
@@ -785,6 +788,7 @@ struct sqlclntstate {
     uint8_t no_more_heartbeats;
     uint8_t done;
     plugin_func *done_cb; /* newsql_done_evbuffer */
+    plugin_func *save_cb;
     unsigned long long sqltick, sqltick_last_seen;
 
     int using_case_insensitive_like;
diff --git a/db/sqlinterfaces.c b/db/sqlinterfaces.c
index fa2ded8faa..35eb5ed905 100644
--- a/db/sqlinterfaces.c
+++ b/db/sqlinterfaces.c
@@ -4480,14 +4480,13 @@ int done_cb_evbuffer(struct sqlclntstate *clnt)
         return -1;
     }
     if (clnt->osql.replay == OSQL_RETRY_DO) {
-        plugin_func *save_cb = clnt->done_cb;
-        clnt->done_cb = NULL;
-        int rc  = srs_tran_replay_inline(clnt);
-        if (rc && !clnt->query_rc) {
-            clnt->query_rc = rc;
+        if (srs_tran_replay_prepare(clnt) == 0) {
+            return RC_INTERNAL_RETRY;
         }
-        clnt->done_cb = save_cb;
-    } else if (clnt->osql.history && clnt->ctrl_sqlengine == SQLENG_NORMAL_PROCESS) {
+    }
+    /* Set to NONE to suppress the error from srs_tran_destroy(). */
+    osql_set_replay(__FILE__, __LINE__, clnt, OSQL_RETRY_NONE);
+    if (clnt->osql.history && clnt->ctrl_sqlengine == SQLENG_NORMAL_PROCESS) {
         srs_tran_destroy(clnt);
     }
     Pthread_mutex_lock(&lru_evbuffers_mtx); /* protect log_long_running_stmts_evbuffer() */
@@ -4601,6 +4600,7 @@ static void sqlengine_work_lua_thread(void *thddata, void *work)
 }
 
 int gbl_debug_sqlthd_failures;
+int gbl_debug_fail_replay_dispatch;
 int gbl_enable_internal_sql_stmt_caching = 1;
 
 static int execute_verify_indexes(struct sqlthdstate *thd, struct sqlclntstate *clnt)
@@ -5055,6 +5055,11 @@ static int enqueue_sql_query(struct sqlclntstate *clnt, int force_dispatch)
     }
 
     struct string_ref *sr = get_ref(clnt->sql_ref);
+    if (gbl_debug_fail_replay_dispatch && clnt->osql.replay != OSQL_RETRY_NONE &&
+        clnt->verify_retries >= gbl_debug_fail_replay_dispatch) {
+        put_ref(&sr);
+        return -1;
+    }
     if ((rc = thdpool_enqueue(pool, sqlengine_work_appsock_pp,
                               clnt, clnt->queue_me, sr, flags)) != 0) {
         if ((in_client_trans(clnt) || clnt->osql.replay == OSQL_RETRY_DO) &&
diff --git a/net/sqlwriter.c b/net/sqlwriter.c
index 38e4173daa..17c00d444b 100644
--- a/net/sqlwriter.c
+++ b/net/sqlwriter.c
@@ -455,8 +455,9 @@ int sql_peer_check(struct sqlwriter *writer)
 int sql_done(struct sqlwriter *writer)
 {
     struct sqlclntstate *clnt = writer->clnt;
-    if (done_cb_evbuffer(clnt) != 0) {
-        return -1;
+    int rc = done_cb_evbuffer(clnt);
+    if (rc != 0) {
+        return rc;
     }
     Pthread_mutex_lock(&writer->wr_lock);
     writer->done = 1;
diff --git a/plugins/newsql/newsql_evbuffer.c b/plugins/newsql/newsql_evbuffer.c
index d573b29389..dcbc6f78d3 100644
--- a/plugins/newsql/newsql_evbuffer.c
+++ b/plugins/newsql/newsql_evbuffer.c
@@ -176,7 +176,8 @@ static void newsql_reset_evbuffer(struct newsql_appdata_evbuffer *appdata)
 static int newsql_done_cb(struct sqlclntstate *clnt)
 {
     struct newsql_appdata_evbuffer *appdata = clnt->appdata;
-    if (sql_done(appdata->writer) == 0) {
+    int rc = sql_done(appdata->writer);
+    if (rc == 0) {
         if (clnt->added_to_hist) {
             clnt->added_to_hist = 0;
         } else {
@@ -184,7 +185,7 @@ static int newsql_done_cb(struct sqlclntstate *clnt)
         }
         appdata->query = NULL;
         evtimer_once(appdata->base, rd_hdr, appdata);
-    } else {
+    } else if (rc < 0) {
         appdata->cleanup_ev = event_new(appdata->base, -1, 0, newsql_cleanup, appdata);
         event_active(appdata->cleanup_ev, 0, 0);
     }
diff --git a/tests/replay_trans.test/Makefile b/tests/replay_trans.test/Makefile
index e05866ba3a..2d39e80074 100644
--- a/tests/replay_trans.test/Makefile
+++ b/tests/replay_trans.test/Makefile
@@ -4,5 +4,5 @@ else
   include $(TESTSROOTDIR)/testcase.mk
 endif
 ifeq ($(TEST_TIMEOUT),)
-	export TEST_TIMEOUT=1m
+	export TEST_TIMEOUT=3m
 endif
diff --git a/tests/replay_trans.test/runit b/tests/replay_trans.test/runit
index 010db35088..279eb3738d 100755
--- a/tests/replay_trans.test/runit
+++ b/tests/replay_trans.test/runit
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-bash -n "$0" | exit 1
+bash -n "$0" || exit 1
 
 set -x
 source ${TESTSROOTDIR}/tools/runit_common.sh
@@ -23,6 +23,11 @@ function init() {
     cdb2sql ${CDB2_OPTIONS} $dbnm default "INSERT INTO t1 VALUES(1, NOW())"
 
     master=`getmaster`
+    if [ -n "$CLUSTER" ] ; then
+        logfile="$TESTDIR/logs/$dbnm.$master.db"
+    else
+        logfile="$TESTDIR/logs/$dbnm.db"
+    fi
     cdb2sql --tabs ${CDB2_OPTIONS} $dbnm --host $master "PUT TUNABLE 'debug.invalid_genid' 1"
 }
 
@@ -56,8 +61,217 @@ EOF
     assert_fail $?
 }
 
+function set_all_nodes_tunable() {
+    local tunable=$1
+    local value=$2
+    if [ -n "$CLUSTER" ]; then
+        for node in $CLUSTER; do
+            cdb2sql --tabs ${CDB2_OPTIONS} $dbnm --host $node "PUT TUNABLE '$tunable' $value"
+        done
+    else
+        cdb2sql --tabs ${CDB2_OPTIONS} $dbnm --host $master "PUT TUNABLE '$tunable' $value"
+    fi
+}
+
+function run_client_disconnect_during_retry() {
+    echo "Test: client disconnect during retry"
+    # Raise retry max on all nodes so retries are still in flight when
+    # timeout kills the client (the retry loop runs on the SQL node the
+    # client is connected to, which may not be the master).
+    set_all_nodes_tunable osql_verify_retry_max 100000
+
+    # Client starts a tx that will retry for a very long time (invalid_genid is on).
+    # timeout kills the client after 2s while retries are in flight.
+    timeout 2 cdb2sql ${CDB2_OPTIONS} $dbnm default - <<EOF 2>&1 || true
+BEGIN
+UPDATE t1 SET j = NOW() WHERE i = 1;
+COMMIT
+EOF
+    # Give the server a moment to clean up the dead client
+    sleep 2
+
+    # Restore default
+    set_all_nodes_tunable osql_verify_retry_max 499
+
+    # The server should still be healthy — run a simple query to prove it
+    out=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT i FROM t1 WHERE i = 1")
+    if [[ "$out" != "1" ]]; then
+        failexit "server unhealthy after client disconnect during retry, got: $out"
+    fi
+
+    # The disconnect log appears on whichever node ran the SQL — check all logs.
+    found_disconnect=0
+    if [ -n "$CLUSTER" ]; then
+        for node in $CLUSTER; do
+            if grep -q "client disconnected during async replay" "$TESTDIR/logs/$dbnm.$node.db"; then
+                found_disconnect=1
+                break
+            fi
+        done
+    else
+        if grep -q "client disconnected during async replay" "$logfile"; then
+            found_disconnect=1
+        fi
+    fi
+    if [[ $found_disconnect -eq 0 ]]; then
+        failexit "server did not detect client disconnect during replay"
+    fi
+
+    # Verify table data is intact (no partial commit from killed replay)
+    val=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT j FROM t1 WHERE i = 1")
+    if [[ -z "$val" ]]; then
+        failexit "data corrupted after client disconnect during retry"
+    fi
+
+    echo "client disconnect during retry: passed"
+}
+
+function run_client_timeout_during_retry() {
+    echo "Test: client timeout during retry"
+    set_all_nodes_tunable osql_verify_retry_max 100000
+
+    # Client-side api_call_timeout expires while the server is still retrying.
+    # This exercises the peer_dropped_connection path in srs_tran_replay_async.
+    COMDB2_CONFIG_API_CALL_TIMEOUT=1000 cdb2sql ${CDB2_OPTIONS} $dbnm default - <<EOF 2>&1 || true
+BEGIN
+UPDATE t1 SET j = NOW() WHERE i = 1;
+COMMIT
+EOF
+
+    sleep 2
+
+    set_all_nodes_tunable osql_verify_retry_max 499
+
+    out=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT i FROM t1 WHERE i = 1")
+    if [[ "$out" != "1" ]]; then
+        failexit "server unhealthy after client timeout during retry, got: $out"
+    fi
+
+    # The replay state must be cleaned up without "state is wrong" errors.
+    found_state_wrong=0
+    if [ -n "$CLUSTER" ]; then
+        for node in $CLUSTER; do
+            if grep -q "state is wrong" "$TESTDIR/logs/$dbnm.$node.db"; then
+                found_state_wrong=1
+                break
+            fi
+        done
+    else
+        if grep -q "state is wrong" "$logfile"; then
+            found_state_wrong=1
+        fi
+    fi
+    if [[ $found_state_wrong -eq 1 ]]; then
+        failexit "replay state not cleaned up on client disconnect"
+    fi
+
+    echo "client timeout during retry: passed"
+}
+
+function run_queue_full_during_retry() {
+    echo "Test: queue full during retry"
+    # Shrink the SQL engine pool so it's easy to saturate.
+    # Set on all nodes since the client may connect to any of them.
+    set_all_nodes_tunable sqlenginepool.maxt 4
+    set_all_nodes_tunable sqlenginepool.maxq 2
+
+    # Saturate the pool with long-running queries
+    for i in $(seq 1 6); do
+        cdb2sql ${CDB2_OPTIONS} $dbnm default "SELECT SLEEP(5)" &>/dev/null &
+    done
+    sleep 1
+
+    # Trigger a tx that will need to retry — pool is full, dispatch may fail
+    cdb2sql ${CDB2_OPTIONS} $dbnm default - <<EOF 2>&1 || true
+BEGIN
+UPDATE t1 SET j = NOW() WHERE i = 1;
+COMMIT
+EOF
+
+    # Wait for SLEEP queries to finish and pool to drain
+    wait
+
+    # Restore pool settings
+    set_all_nodes_tunable sqlenginepool.maxt 48
+    set_all_nodes_tunable sqlenginepool.maxq 0
+
+    # Server should still be healthy
+    out=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT i FROM t1 WHERE i = 1")
+    if [[ "$out" != "1" ]]; then
+        failexit "server unhealthy after queue-full retry, got: $out"
+    fi
+    echo "queue full during retry: passed"
+}
+
+function run_replay_dispatch_fail() {
+    echo "Test: replay dispatch failure must clean up replay state"
+
+    # Force every replay dispatch to fail.  The UPDATE will get a verify
+    # error (debug.invalid_genid is on), set replay=OSQL_RETRY_DO, and
+    # then done_cb_evbuffer will call srs_tran_replay_prepare which calls
+    # dispatch_sql_query_no_wait — that dispatch is rejected by the tunable.
+    # If done_cb_evbuffer doesn't clear replay before calling
+    # srs_tran_destroy, we'll see "state is wrong" in the log.
+    set_all_nodes_tunable 'debug.fail_replay_dispatch' 1
+
+    cdb2sql ${CDB2_OPTIONS} $dbnm default \
+        "UPDATE t1 SET j = NOW() WHERE i = 1" 2>&1 || true
+
+    set_all_nodes_tunable 'debug.fail_replay_dispatch' 0
+
+    out=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT i FROM t1 WHERE i = 1")
+    if [[ "$out" != "1" ]]; then
+        failexit "server unhealthy after replay dispatch failure, got: $out"
+    fi
+
+    if grep -q "state is wrong" $logfile; then
+        failexit "replay state not cleaned up after dispatch failure"
+    fi
+
+    echo "replay dispatch failure: passed"
+}
+
+function run_async_replay_dispatch_fail() {
+    echo "Test: async replay dispatch failure must clean up replay state"
+
+    # Tunable=2 means: let the first replay attempt (verify_retries=1)
+    # succeed, but reject subsequent attempts (verify_retries >= 2).
+    # Flow:
+    #   1. UPDATE gets verify error, replay=OSQL_RETRY_DO.
+    #   2. done_cb_evbuffer → srs_tran_replay_prepare → srs_tran_replay_begin
+    #      dispatches with verify_retries=1, succeeds.
+    #   3. Replayed UPDATE hits verify error again.
+    #   4. srs_tran_replay_async runs, calls srs_tran_replay_begin for the
+    #      next pass, which dispatches with verify_retries=2 — rejected.
+    #   5. srs_tran_replay_async falls through to its done: label.
+    # If the done: path doesn't clear replay before srs_tran_destroy,
+    # we'll see "state is wrong" in the log.
+    set_all_nodes_tunable 'debug.fail_replay_dispatch' 2
+
+    cdb2sql ${CDB2_OPTIONS} $dbnm default \
+        "UPDATE t1 SET j = NOW() WHERE i = 1" 2>&1 || true
+
+    set_all_nodes_tunable 'debug.fail_replay_dispatch' 0
+
+    out=$(cdb2sql --tabs ${CDB2_OPTIONS} $dbnm default "SELECT i FROM t1 WHERE i = 1")
+    if [[ "$out" != "1" ]]; then
+        failexit "server unhealthy after async replay dispatch failure, got: $out"
+    fi
+
+    if grep -q "state is wrong" $logfile; then
+        failexit "replay state not cleaned up after async dispatch failure"
+    fi
+
+    echo "async replay dispatch failure: passed"
+}
+
 init
 run_bad_tx_with_intransres
 run_bad_tx_without_intransres
+run_client_disconnect_during_retry
+run_client_timeout_during_retry
+run_queue_full_during_retry
+run_replay_dispatch_fail
+run_async_replay_dispatch_fail
 verify
 cleanup