From a6c34d8775a359d48b9118bedb3f720b8e012506 Mon Sep 17 00:00:00 2001 From: Deepak Singhal Date: Thu, 7 May 2026 01:37:25 +0000 Subject: [PATCH] [202511] zebra: defer RIB sweep until metaqueue is drained Cherry-pick of sonic-net/sonic-buildimage#27093 to 202511 branch. Patch renumbered from 0108 to 0107 to match 202511 series. Signed-off-by: Deepak Singhal --- ...er-rib-sweep-until-metaqueue-drained.patch | 76 +++++++++++++++++++ src/sonic-frr/patch/series | 3 +- 2 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 src/sonic-frr/patch/0107-zebra-defer-rib-sweep-until-metaqueue-drained.patch diff --git a/src/sonic-frr/patch/0107-zebra-defer-rib-sweep-until-metaqueue-drained.patch b/src/sonic-frr/patch/0107-zebra-defer-rib-sweep-until-metaqueue-drained.patch new file mode 100644 index 00000000000..c2642dea7a0 --- /dev/null +++ b/src/sonic-frr/patch/0107-zebra-defer-rib-sweep-until-metaqueue-drained.patch @@ -0,0 +1,76 @@ +From a7a61b9bc812108dac52a17ac060ae0ab656e1bf Mon Sep 17 00:00:00 2001 +From: Deepak Singhal +Date: Tue, 5 May 2026 05:23:13 +0000 +Subject: [PATCH] SONiC-ONLY: zebra: defer RIB sweep until metaqueue is drained +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Targeted carry patch — to be dropped when the upstream startup-ordering +rework (https://github.com/FRRouting/frr/pull/21550) lands in a future +FRR release. + +When zebra starts without -K (graceful_restart=0), the sweep timer fires +with 0-second delay but the metaqueue work_queue has a 10ms batching +hold (ZEBRA_RIB_PROCESS_HOLD_TIME). This causes the sweep to walk an +empty RIB and miss stale routes that are still queued in the metaqueue. + +Defer the sweep if the metaqueue still has pending entries, rescheduling +with 2x the hold time (20ms) to ensure routes are processed into the +RIB before sweeping. Bound the retry to 50 attempts (~1 second) to +avoid deferring forever if the metaqueue never fully drains. + +Upstream: https://github.com/FRRouting/frr/pull/21550 (structural fix, pending merge) +Upstream: https://github.com/FRRouting/frr/pull/21826 (this targeted fix, closed in favor of the above) +Fixes: https://github.com/sonic-net/sonic-buildimage/issues/27012 + +Signed-off-by: Deepak Singhal +--- + zebra/zebra_rib.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c +index 35a125b1fd..43c5d8bf60 100644 +--- a/zebra/zebra_rib.c ++++ b/zebra/zebra_rib.c +@@ -5027,9 +5027,37 @@ void rib_sweep_table(struct route_table *table) + /* Sweep all RIB tables. */ + void rib_sweep_route(struct event *t) + { ++ static unsigned int defer_count; + struct vrf *vrf; + struct zebra_vrf *zvrf; + ++ /* ++ * Kernel routes read by route_read() are queued in the metaqueue ++ * and only move into the RIB when the work_queue fires (after the ++ * hold timer, ZEBRA_RIB_PROCESS_HOLD_TIME = 10 ms). If we sweep ++ * before the metaqueue drains, the RIB is empty and no stale ++ * routes are cleaned up. Reschedule until the queue is empty. ++ * ++ * This is safe because zebra's event loop is single-threaded, so ++ * mq->size cannot change while we are in this callback. ++ * ++ * Bound the retry to avoid deferring forever if the metaqueue ++ * never fully drains (e.g. heavy convergence at startup). ++ */ ++ if (zrouter.mq->size > 0) { ++ if (++defer_count <= 50) { ++ if (IS_ZEBRA_DEBUG_RIB) ++ zlog_debug("RIB sweep deferred: metaqueue still has %u entries", ++ zrouter.mq->size); ++ event_add_timer_msec(zrouter.master, rib_sweep_route, NULL, ++ ZEBRA_RIB_PROCESS_HOLD_TIME * 2, &zrouter.t_rib_sweep); ++ return; ++ } ++ zlog_warn("RIB sweep: metaqueue still non-empty after %u retries, sweeping anyway", ++ defer_count - 1); ++ } ++ defer_count = 0; ++ + zrouter.rib_sweep_time = monotime(NULL); + /* TODO: Change to debug */ + zlog_info("Sweeping the RIB for stale routes..."); +-- +2.34.1 + diff --git a/src/sonic-frr/patch/series b/src/sonic-frr/patch/series index a6079deb228..7d090d57a8b 100644 --- a/src/sonic-frr/patch/series +++ b/src/sonic-frr/patch/series @@ -64,4 +64,5 @@ 0103-SONiC-ONLY-bgpd-reduce-suppress-fib-advertisement-delay-to-50ms.patch 0104-SONiC-ONLY-zebra-skip-if-add-update-in-speed-timer-for-unready-ifp.patch 0105-bgpd-Show-all-advertised-paths-including-non-best-paths-only-if-addpath-is-enabled.patch -0106-bgpd-Fix-suppress-fib-pending-config-race-condition.patch \ No newline at end of file +0106-bgpd-Fix-suppress-fib-pending-config-race-condition.patch +0107-zebra-defer-rib-sweep-until-metaqueue-drained.patch