Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions zebra/dplane_fpm_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,7 @@ static int fpm_nl_enqueue(struct fpm_nl_ctx *fnc, struct zebra_dplane_ctx *ctx)
case DPLANE_OP_INTF_INSTALL:
case DPLANE_OP_INTF_UPDATE:
case DPLANE_OP_INTF_DELETE:
case DPLANE_OP_INTF_SPEED_GET:
case DPLANE_OP_TC_QDISC_INSTALL:
case DPLANE_OP_TC_QDISC_UNINSTALL:
case DPLANE_OP_TC_CLASS_ADD:
Expand Down
20 changes: 9 additions & 11 deletions zebra/if_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,12 @@ static void netlink_vrf_change(struct nlmsghdr *h, struct rtattr *tb,
ctx, *(uint32_t *)RTA_DATA(attr[IFLA_VRF_TABLE]));
}

static uint32_t get_iflink_speed(struct interface *interface, int *error)
uint32_t kernel_get_speed(vrf_id_t vrf_id, const char *ifname, int *error)
{
struct ifreq ifdata;
struct ethtool_cmd ecmd;
int sd;
int rc;
const char *ifname = interface->name;
uint32_t ret;

if (error)
Expand All @@ -293,8 +292,7 @@ static uint32_t get_iflink_speed(struct interface *interface, int *error)

/* use ioctl to get speed of an interface */
frr_with_privs(&zserv_privs) {
sd = vrf_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP,
interface->vrf->vrf_id, NULL);
sd = vrf_socket(PF_INET, SOCK_DGRAM, IPPROTO_IP, vrf_id, NULL);
if (sd < 0) {
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("Failure to read interface %s speed: %d %s",
Expand All @@ -309,8 +307,7 @@ static uint32_t get_iflink_speed(struct interface *interface, int *error)
return 0;
}
/* Get the current link state for the interface */
rc = vrf_ioctl(interface->vrf->vrf_id, sd, SIOCETHTOOL,
(char *)&ifdata);
rc = vrf_ioctl(vrf_id, sd, SIOCETHTOOL, (char *)&ifdata);
}
if (rc < 0) {
if (errno != EOPNOTSUPP && IS_ZEBRA_DEBUG_KERNEL)
Expand Down Expand Up @@ -340,11 +337,6 @@ static uint32_t get_iflink_speed(struct interface *interface, int *error)
return ret;
}

uint32_t kernel_get_speed(struct interface *ifp, int *error)
{
return get_iflink_speed(ifp, error);
}

static ssize_t
netlink_gre_set_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
size_t buflen)
Expand Down Expand Up @@ -1514,6 +1506,12 @@ int netlink_link_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
} else
zif_slave_type = ZEBRA_IF_SLAVE_OTHER;
}
if (startup) {
dplane_ctx_set_ifp_speed(ctx, kernel_get_speed(vrf_id, name, NULL));
dplane_ctx_set_ifp_speed_set(ctx, true);
} else
dplane_ctx_set_ifp_speed_set(ctx, false);
Comment on lines +1509 to +1513

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Startup error silently suppresses the retry mechanism

kernel_get_speed is called with NULL for the error pointer, so any error return (including EOPNOTSUPP for virtual/bridge interfaces, which sets speed=0, error=INTERFACE_SPEED_ERROR_UNKNOWN) is silently swallowed. The caller then unconditionally sets speed_set=true, which in zebra_if_dplane_ifp_handling skips both dplane_intf_speed_get and the 15-second retry timer. An interface that reports the wrong speed at startup will never be corrected.

Passing &error and treating INTERFACE_SPEED_ERROR_UNKNOWN as speed_set=false would preserve the retry logic for uncertain speeds:

		int sp_err = 0;
		uint32_t sp = kernel_get_speed(vrf_id, name, &sp_err);

		if (sp_err == 0) {
			dplane_ctx_set_ifp_speed(ctx, sp);
			dplane_ctx_set_ifp_speed_set(ctx, true);
		} else {
			dplane_ctx_set_ifp_speed_set(ctx, false);
		}
Prompt To Fix With AI
This is a comment left during a code review.
Path: zebra/if_netlink.c
Line: 1509-1513

Comment:
**Startup error silently suppresses the retry mechanism**

`kernel_get_speed` is called with `NULL` for the `error` pointer, so any error return (including `EOPNOTSUPP` for virtual/bridge interfaces, which sets `speed=0, error=INTERFACE_SPEED_ERROR_UNKNOWN`) is silently swallowed. The caller then unconditionally sets `speed_set=true`, which in `zebra_if_dplane_ifp_handling` skips both `dplane_intf_speed_get` and the 15-second retry timer. An interface that reports the wrong speed at startup will never be corrected.

Passing `&error` and treating `INTERFACE_SPEED_ERROR_UNKNOWN` as `speed_set=false` would preserve the retry logic for uncertain speeds:

```c
		int sp_err = 0;
		uint32_t sp = kernel_get_speed(vrf_id, name, &sp_err);

		if (sp_err == 0) {
			dplane_ctx_set_ifp_speed(ctx, sp);
			dplane_ctx_set_ifp_speed_set(ctx, true);
		} else {
			dplane_ctx_set_ifp_speed_set(ctx, false);
		}
```

How can I resolve this? If you propose a fix, please make it concise.


dplane_ctx_set_ifp_zif_slave_type(ctx, zif_slave_type);
dplane_ctx_set_ifp_vrf_id(ctx, vrf_id);
dplane_ctx_set_ifp_master_ifindex(ctx, master_infindex);
Expand Down
122 changes: 88 additions & 34 deletions zebra/interface.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,40 @@ static const char *if_zebra_data_state(uint8_t state)
static void if_zebra_speed_update(struct event *event)
{
struct interface *ifp = EVENT_ARG(event);
struct zebra_if *zif = ifp->info;

dplane_intf_speed_get(ifp);
}

static void zebra_if_schedule_speed_update(struct zebra_if *zif, int timeout)
{
event_add_timer(zrouter.master, if_zebra_speed_update, zif->ifp, timeout,
&zif->speed_update);
event_ignore_late_timer(zif->speed_update);
}

static void zebra_if_speed_update_ctx(struct zebra_dplane_ctx *ctx, struct interface *ifp)
{
enum zebra_dplane_result dp_res;
bool speed_set, changed = false;
struct zebra_if *zif;
uint32_t new_speed;
bool changed = false;
int error = 0;

zif->speed_checked++;
if (!ifp)
return;

new_speed = kernel_get_speed(ifp, &error);
zif = ifp->info;
zif->speed_checked++;

dp_res = dplane_ctx_get_status(ctx);
/* error may indicate vrf not available or
* interfaces not available.
* note that loopback & virtual interfaces can return 0 as speed
*/
if (error == INTERFACE_SPEED_ERROR_READ)
if (dp_res == ZEBRA_DPLANE_REQUEST_FAILURE)
return;

speed_set = dplane_ctx_get_ifp_speed_set(ctx);
new_speed = speed_set ? dplane_ctx_get_ifp_speed(ctx) : 0;
if (new_speed != ifp->speed) {
zlog_info("%s: %s old speed: %u new speed: %u", __func__,
ifp->name, ifp->speed, new_speed);
Expand All @@ -88,7 +106,7 @@ static void if_zebra_speed_update(struct event *event)
changed = true;
}

if (changed || error == INTERFACE_SPEED_ERROR_UNKNOWN) {
if (changed || !speed_set) {
#define SPEED_UPDATE_SLEEP_TIME 5
#define SPEED_UPDATE_COUNT_MAX (4 * 60 / SPEED_UPDATE_SLEEP_TIME)
/*
Expand All @@ -103,17 +121,44 @@ static void if_zebra_speed_update(struct event *event)
* to not update the system to keep track of that. This
* is far simpler to just stop trying after 4 minutes
*/
if (error == INTERFACE_SPEED_ERROR_UNKNOWN &&
zif->speed_update_count == SPEED_UPDATE_COUNT_MAX)
if (!speed_set && zif->speed_update_count == SPEED_UPDATE_COUNT_MAX)
return;

zif->speed_update_count++;
event_add_timer(zrouter.master, if_zebra_speed_update, ifp,
SPEED_UPDATE_SLEEP_TIME, &zif->speed_update);
event_ignore_late_timer(zif->speed_update);
zebra_if_schedule_speed_update(zif, SPEED_UPDATE_SLEEP_TIME);
}
}

void zebra_if_speed_process(struct zebra_dplane_ctx *ctx)
{
const char *ifname = dplane_ctx_get_ifname(ctx);
vrf_id_t vrf_id = dplane_ctx_get_vrf(ctx);
uint32_t speed;
int error = 0;

speed = kernel_get_speed(vrf_id, ifname, &error);
switch (error) {
case 0:
dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
dplane_ctx_set_ifp_speed(ctx, speed);
dplane_ctx_set_ifp_speed_set(ctx, true);
return;
case INTERFACE_SPEED_ERROR_UNKNOWN:
dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_SUCCESS);
dplane_ctx_set_ifp_speed_set(ctx, false);
return;
case INTERFACE_SPEED_ERROR_READ:
/* INTERFACE_SPEED_ERROR_READ: means no device, no vrf */
break;
default:
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("kernel_get_speed returned an unknown error %u", error);
break;
Comment on lines +154 to +156

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Format specifier mismatch in default error-case log

error is declared as int but formatted with %u (unsigned). While benign for small values, a negative error code would log as a large unsigned number, making debugging harder.

Suggested change
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("kernel_get_speed returned an unknown error %u", error);
break;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug("kernel_get_speed returned an unknown error %d", error);
Prompt To Fix With AI
This is a comment left during a code review.
Path: zebra/interface.c
Line: 154-156

Comment:
**Format specifier mismatch in default error-case log**

`error` is declared as `int` but formatted with `%u` (unsigned). While benign for small values, a negative `error` code would log as a large unsigned number, making debugging harder.

```suggestion
		if (IS_ZEBRA_DEBUG_KERNEL)
			zlog_debug("kernel_get_speed returned an unknown error %d", error);
```

How can I resolve this? If you propose a fix, please make it concise.

}

dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_FAILURE);
}

static void zebra_if_node_destroy(route_table_delegate_t *delegate,
struct route_table *table,
struct route_node *node)
Expand Down Expand Up @@ -159,19 +204,6 @@ static int if_zebra_new_hook(struct interface *ifp)

ifp->info = zebra_if;

/*
* Some platforms are telling us that the interface is
* up and ready to go. When we check the speed we
* sometimes get the wrong value. Wait a couple
* of seconds and ask again. Hopefully it's all settled
* down upon startup.
*/
zebra_if->speed_update_count = 0;
zebra_if->speed_checked = 0;
event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 15,
&zebra_if->speed_update);
event_ignore_late_timer(zebra_if->speed_update);

return 0;
}

Expand Down Expand Up @@ -933,9 +965,7 @@ static void if_handle_bond_speed_change(struct interface *ifp)
if (part_of_bond->bond_if) {
zif = part_of_bond->bond_if->info;

if (!event_is_scheduled(zif->speed_update))
event_add_timer(zrouter.master, if_zebra_speed_update, part_of_bond->bond_if, 1,
&zif->speed_update);
zebra_if_schedule_speed_update(zif, 1);
}
}

Expand Down Expand Up @@ -997,9 +1027,8 @@ void if_up(struct interface *ifp, bool install_connected)
if (zif->flags & ZIF_FLAG_EVPN_MH_UPLINK)
zebra_evpn_mh_uplink_oper_update(zif);

event_add_timer(zrouter.master, if_zebra_speed_update, ifp, 0,
&zif->speed_update);
event_ignore_late_timer(zif->speed_update);
event_cancel(&zif->speed_update);
dplane_intf_speed_get(ifp);

if_addr_wakeup(ifp);

Expand Down Expand Up @@ -2026,8 +2055,8 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
uint32_t mtu;
ns_id_t link_nsid;
struct zebra_if *zif;
bool protodown, protodown_set, startup;
uint32_t rc_bitfield;
bool protodown, protodown_set, startup, speed_set;
uint32_t rc_bitfield, speed;
uint8_t old_hw_addr[INTERFACE_HWADDR_MAX];
char *desc;
uint8_t family;
Expand All @@ -2054,6 +2083,8 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
desc = dplane_ctx_get_ifp_desc(ctx);
family = dplane_ctx_get_ifp_family(ctx);
change_flags = dplane_ctx_get_ifp_change_flags(ctx);
speed_set = dplane_ctx_get_ifp_speed_set(ctx);
speed = dplane_ctx_get_ifp_speed(ctx);

#ifndef AF_BRIDGE
/*
Expand Down Expand Up @@ -2092,7 +2123,24 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
if_update_state_mtu(ifp, mtu);
if_update_state_mtu6(ifp, mtu);
if_update_state_metric(ifp, 0);
if_update_state_speed(ifp, kernel_get_speed(ifp, NULL));
if (!speed_set) {
speed = 0;
/* Query initial speed if not provided by dplane */
dplane_intf_speed_get(ifp);

/*
* Some platforms are telling us that the interface is
* up and ready to go. When we check the speed we
* sometimes get the wrong value. Wait a couple
* of seconds and ask again. Hopefully it's all settled
* down upon startup.
*/
zif->speed_update_count = 0;
zif->speed_checked = 0;
zebra_if_schedule_speed_update(zif, 15);
} else
zif->speed_checked++;
if_update_state_speed(ifp, speed);
ifp->ptm_status = ZEBRA_PTM_STATUS_UNKNOWN;
ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx);

Expand Down Expand Up @@ -2196,6 +2244,10 @@ static void zebra_if_dplane_ifp_handling(struct zebra_dplane_ctx *ctx)
if_update_state_mtu(ifp, mtu);
if_update_state_mtu6(ifp, mtu);
if_update_state_metric(ifp, 0);
if (speed_set) {
zif->speed_checked++;
if_update_state_speed(ifp, speed);
}
ifp->txqlen = dplane_ctx_get_intf_txqlen(ctx);

/*
Expand Down Expand Up @@ -2403,6 +2455,8 @@ void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx)
zebra_if_update_ctx(ctx, ifp);
} else if (op == DPLANE_OP_INTF_NETCONFIG) {
zebra_if_netconf_update_ctx(ctx, ifp, ifindex);
} else if (op == DPLANE_OP_INTF_SPEED_GET) {
zebra_if_speed_update_ctx(ctx, ifp);
}
}

Expand Down
1 change: 1 addition & 0 deletions zebra/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ extern void zebra_l2_unmap_slave_from_bond(struct zebra_if *zif);
extern const char *zebra_protodown_rc_str(uint32_t protodown_rc, char *pd_buf,
uint32_t pd_buf_len);
void zebra_if_dplane_result(struct zebra_dplane_ctx *ctx);
void zebra_if_speed_process(struct zebra_dplane_ctx *ctx);

#ifdef HAVE_PROC_NET_DEV
extern void ifstat_update_proc(void);
Expand Down
1 change: 1 addition & 0 deletions zebra/kernel_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,7 @@ static enum netlink_msg_status nl_put_msg(struct nl_batch *bth,
case DPLANE_OP_BR_PORT_UPDATE:
return FRR_NETLINK_SUCCESS;

case DPLANE_OP_INTF_SPEED_GET:
case DPLANE_OP_IPTABLE_ADD:
case DPLANE_OP_IPTABLE_DELETE:
case DPLANE_OP_IPSET_ADD:
Expand Down
1 change: 1 addition & 0 deletions zebra/kernel_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,7 @@ void kernel_update_multi(struct dplane_ctx_list_head *ctx_list)
case DPLANE_OP_GRE_SET:
case DPLANE_OP_INTF_ADDR_ADD:
case DPLANE_OP_INTF_ADDR_DEL:
case DPLANE_OP_INTF_SPEED_GET:
case DPLANE_OP_STARTUP_STAGE:
case DPLANE_OP_SRV6_ENCAP_SRCADDR_SET:
case DPLANE_OP_VLAN_INSTALL:
Expand Down
2 changes: 1 addition & 1 deletion zebra/rt.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ extern int mpls_kernel_init(void);
void kernel_router_init(void);
void kernel_router_terminate(void);

extern uint32_t kernel_get_speed(struct interface *ifp, int *error);
extern uint32_t kernel_get_speed(vrf_id_t vrf_id, const char *ifname, int *error);
extern int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *mroute);

/*
Expand Down
8 changes: 6 additions & 2 deletions zebra/rt_socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "lib_errors.h"

#include "zebra/debug.h"
#include "zebra/interface.h"
#include "zebra/rib.h"
#include "zebra/rt.h"
#include "zebra/kernel_socket.h"
Expand Down Expand Up @@ -383,9 +384,12 @@ extern int kernel_interface_set_master(struct interface *master,
return 0;
}

uint32_t kernel_get_speed(struct interface *ifp, int *error)
uint32_t kernel_get_speed(vrf_id_t vrf_id, const char *ifname, int *error)
{
return ifp->speed;
if (error)
*error = INTERFACE_SPEED_ERROR_READ;

return 0;
Comment on lines 384 to +392

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 BSD speed regression: always returning INTERFACE_SPEED_ERROR_READ

The old stub returned ifp->speed, which works correctly on BSD because kernel_socket.c populates ifp->speed directly from ifm->ifm_data.ifi_baudrate / 1000000 on RTM_IFINFO messages (kernel_socket.c line 730). That assignment does not call if_update_state_speed, so the timer-based path was the only way clients received the speed-change notification via if_add_update.

With this change, kernel_get_speed always returns INTERFACE_SPEED_ERROR_READ on BSD. zebra_if_speed_update_ctx then hits the ZEBRA_DPLANE_REQUEST_FAILURE early-return path, skips the speed update, and never reschedules — leaving BSD clients without any speed-change notification. The correct fix is to restore the ifp->speed return for BSD:

uint32_t kernel_get_speed(vrf_id_t vrf_id, const char *ifname, int *error)
{
	struct interface *ifp;

	ifp = if_lookup_by_name(ifname, VRF_DEFAULT);
	if (!ifp) {
		if (error)
			*error = INTERFACE_SPEED_ERROR_READ;
		return 0;
	}

	if (error)
		*error = 0;
	return ifp->speed;
}
Prompt To Fix With AI
This is a comment left during a code review.
Path: zebra/rt_socket.c
Line: 384-392

Comment:
**BSD speed regression: always returning INTERFACE_SPEED_ERROR_READ**

The old stub returned `ifp->speed`, which works correctly on BSD because `kernel_socket.c` populates `ifp->speed` directly from `ifm->ifm_data.ifi_baudrate / 1000000` on RTM_IFINFO messages (`kernel_socket.c` line 730). That assignment does not call `if_update_state_speed`, so the timer-based path was the only way clients received the speed-change notification via `if_add_update`.

With this change, `kernel_get_speed` always returns `INTERFACE_SPEED_ERROR_READ` on BSD. `zebra_if_speed_update_ctx` then hits the `ZEBRA_DPLANE_REQUEST_FAILURE` early-return path, skips the speed update, and never reschedules — leaving BSD clients without any speed-change notification. The correct fix is to restore the `ifp->speed` return for BSD:

```c
uint32_t kernel_get_speed(vrf_id_t vrf_id, const char *ifname, int *error)
{
	struct interface *ifp;

	ifp = if_lookup_by_name(ifname, VRF_DEFAULT);
	if (!ifp) {
		if (error)
			*error = INTERFACE_SPEED_ERROR_READ;
		return 0;
	}

	if (error)
		*error = 0;
	return ifp->speed;
}
```

How can I resolve this? If you propose a fix, please make it concise.

}

int kernel_upd_mac_nh(uint32_t nh_id, struct ipaddr *vtep_ip)
Expand Down
Loading