xref: /linux-6.15/net/bridge/br_switchdev.c (revision fab9eca8)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/list.h>
4 #include <linux/netdevice.h>
5 #include <linux/rtnetlink.h>
6 #include <linux/skbuff.h>
7 #include <net/switchdev.h>
8 
9 #include "br_private.h"
10 
11 static struct static_key_false br_switchdev_tx_fwd_offload;
12 
13 static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
14 					     const struct sk_buff *skb)
15 {
16 	if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
17 		return false;
18 
19 	return (p->flags & BR_TX_FWD_OFFLOAD) &&
20 	       (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
21 }
22 
23 bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
24 {
25 	if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
26 		return false;
27 
28 	return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
29 }
30 
31 void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
32 {
33 	skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
34 }
35 
36 /* Mark the frame for TX forwarding offload if this egress port supports it */
37 void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
38 					     struct sk_buff *skb)
39 {
40 	if (nbp_switchdev_can_offload_tx_fwd(p, skb))
41 		BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
42 }
43 
44 /* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
45  * that the skb has been already forwarded to, to avoid further cloning to
46  * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
47  * return false.
48  */
49 void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
50 					      struct sk_buff *skb)
51 {
52 	if (nbp_switchdev_can_offload_tx_fwd(p, skb))
53 		set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
54 }
55 
56 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
57 			      struct sk_buff *skb)
58 {
59 	if (p->hwdom)
60 		BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
61 }
62 
63 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
64 				  const struct sk_buff *skb)
65 {
66 	struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
67 
68 	return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
69 		(!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
70 }
71 
72 /* Flags that can be offloaded to hardware */
73 #define BR_PORT_FLAGS_HW_OFFLOAD (BR_LEARNING | BR_FLOOD | \
74 				  BR_MCAST_FLOOD | BR_BCAST_FLOOD)
75 
76 int br_switchdev_set_port_flag(struct net_bridge_port *p,
77 			       unsigned long flags,
78 			       unsigned long mask,
79 			       struct netlink_ext_ack *extack)
80 {
81 	struct switchdev_attr attr = {
82 		.orig_dev = p->dev,
83 	};
84 	struct switchdev_notifier_port_attr_info info = {
85 		.attr = &attr,
86 	};
87 	int err;
88 
89 	mask &= BR_PORT_FLAGS_HW_OFFLOAD;
90 	if (!mask)
91 		return 0;
92 
93 	attr.id = SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS;
94 	attr.u.brport_flags.val = flags;
95 	attr.u.brport_flags.mask = mask;
96 
97 	/* We run from atomic context here */
98 	err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev,
99 				       &info.info, extack);
100 	err = notifier_to_errno(err);
101 	if (err == -EOPNOTSUPP)
102 		return 0;
103 
104 	if (err) {
105 		if (extack && !extack->_msg)
106 			NL_SET_ERR_MSG_MOD(extack,
107 					   "bridge flag offload is not supported");
108 		return -EOPNOTSUPP;
109 	}
110 
111 	attr.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS;
112 	attr.flags = SWITCHDEV_F_DEFER;
113 
114 	err = switchdev_port_attr_set(p->dev, &attr, extack);
115 	if (err) {
116 		if (extack && !extack->_msg)
117 			NL_SET_ERR_MSG_MOD(extack,
118 					   "error setting offload flag on port");
119 		return err;
120 	}
121 
122 	return 0;
123 }
124 
125 static void br_switchdev_fdb_populate(struct net_bridge *br,
126 				      struct switchdev_notifier_fdb_info *item,
127 				      const struct net_bridge_fdb_entry *fdb,
128 				      const void *ctx)
129 {
130 	const struct net_bridge_port *p = READ_ONCE(fdb->dst);
131 
132 	item->addr = fdb->key.addr.addr;
133 	item->vid = fdb->key.vlan_id;
134 	item->added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
135 	item->offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
136 	item->is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
137 	item->info.dev = (!p || item->is_local) ? br->dev : p->dev;
138 	item->info.ctx = ctx;
139 }
140 
141 void
142 br_switchdev_fdb_notify(struct net_bridge *br,
143 			const struct net_bridge_fdb_entry *fdb, int type)
144 {
145 	struct switchdev_notifier_fdb_info item;
146 
147 	br_switchdev_fdb_populate(br, &item, fdb, NULL);
148 
149 	switch (type) {
150 	case RTM_DELNEIGH:
151 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
152 					 item.info.dev, &item.info, NULL);
153 		break;
154 	case RTM_NEWNEIGH:
155 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
156 					 item.info.dev, &item.info, NULL);
157 		break;
158 	}
159 }
160 
161 int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
162 			       struct netlink_ext_ack *extack)
163 {
164 	struct switchdev_obj_port_vlan v = {
165 		.obj.orig_dev = dev,
166 		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
167 		.flags = flags,
168 		.vid = vid,
169 	};
170 
171 	return switchdev_port_obj_add(dev, &v.obj, extack);
172 }
173 
174 int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
175 {
176 	struct switchdev_obj_port_vlan v = {
177 		.obj.orig_dev = dev,
178 		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
179 		.vid = vid,
180 	};
181 
182 	return switchdev_port_obj_del(dev, &v.obj);
183 }
184 
185 static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
186 {
187 	struct net_bridge *br = joining->br;
188 	struct net_bridge_port *p;
189 	int hwdom;
190 
191 	/* joining is yet to be added to the port list. */
192 	list_for_each_entry(p, &br->port_list, list) {
193 		if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
194 			joining->hwdom = p->hwdom;
195 			return 0;
196 		}
197 	}
198 
199 	hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
200 	if (hwdom >= BR_HWDOM_MAX)
201 		return -EBUSY;
202 
203 	set_bit(hwdom, &br->busy_hwdoms);
204 	joining->hwdom = hwdom;
205 	return 0;
206 }
207 
208 static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
209 {
210 	struct net_bridge *br = leaving->br;
211 	struct net_bridge_port *p;
212 
213 	/* leaving is no longer in the port list. */
214 	list_for_each_entry(p, &br->port_list, list) {
215 		if (p->hwdom == leaving->hwdom)
216 			return;
217 	}
218 
219 	clear_bit(leaving->hwdom, &br->busy_hwdoms);
220 }
221 
222 static int nbp_switchdev_add(struct net_bridge_port *p,
223 			     struct netdev_phys_item_id ppid,
224 			     bool tx_fwd_offload,
225 			     struct netlink_ext_ack *extack)
226 {
227 	int err;
228 
229 	if (p->offload_count) {
230 		/* Prevent unsupported configurations such as a bridge port
231 		 * which is a bonding interface, and the member ports are from
232 		 * different hardware switches.
233 		 */
234 		if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
235 			NL_SET_ERR_MSG_MOD(extack,
236 					   "Same bridge port cannot be offloaded by two physical switches");
237 			return -EBUSY;
238 		}
239 
240 		/* Tolerate drivers that call switchdev_bridge_port_offload()
241 		 * more than once for the same bridge port, such as when the
242 		 * bridge port is an offloaded bonding/team interface.
243 		 */
244 		p->offload_count++;
245 
246 		return 0;
247 	}
248 
249 	p->ppid = ppid;
250 	p->offload_count = 1;
251 
252 	err = nbp_switchdev_hwdom_set(p);
253 	if (err)
254 		return err;
255 
256 	if (tx_fwd_offload) {
257 		p->flags |= BR_TX_FWD_OFFLOAD;
258 		static_branch_inc(&br_switchdev_tx_fwd_offload);
259 	}
260 
261 	return 0;
262 }
263 
264 static void nbp_switchdev_del(struct net_bridge_port *p)
265 {
266 	if (WARN_ON(!p->offload_count))
267 		return;
268 
269 	p->offload_count--;
270 
271 	if (p->offload_count)
272 		return;
273 
274 	if (p->hwdom)
275 		nbp_switchdev_hwdom_put(p);
276 
277 	if (p->flags & BR_TX_FWD_OFFLOAD) {
278 		p->flags &= ~BR_TX_FWD_OFFLOAD;
279 		static_branch_dec(&br_switchdev_tx_fwd_offload);
280 	}
281 }
282 
283 static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
284 			     const struct net_bridge_fdb_entry *fdb,
285 			     unsigned long action, const void *ctx)
286 {
287 	struct switchdev_notifier_fdb_info item;
288 	int err;
289 
290 	br_switchdev_fdb_populate(br, &item, fdb, ctx);
291 
292 	err = nb->notifier_call(nb, action, &item);
293 	return notifier_to_errno(err);
294 }
295 
296 static int br_fdb_replay(const struct net_device *br_dev, const void *ctx,
297 			 bool adding, struct notifier_block *nb)
298 {
299 	struct net_bridge_fdb_entry *fdb;
300 	struct net_bridge *br;
301 	unsigned long action;
302 	int err = 0;
303 
304 	if (!nb)
305 		return 0;
306 
307 	if (!netif_is_bridge_master(br_dev))
308 		return -EINVAL;
309 
310 	br = netdev_priv(br_dev);
311 
312 	if (adding)
313 		action = SWITCHDEV_FDB_ADD_TO_DEVICE;
314 	else
315 		action = SWITCHDEV_FDB_DEL_TO_DEVICE;
316 
317 	rcu_read_lock();
318 
319 	hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
320 		err = br_fdb_replay_one(br, nb, fdb, action, ctx);
321 		if (err)
322 			break;
323 	}
324 
325 	rcu_read_unlock();
326 
327 	return err;
328 }
329 
330 static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
331 				   struct notifier_block *atomic_nb,
332 				   struct notifier_block *blocking_nb,
333 				   struct netlink_ext_ack *extack)
334 {
335 	struct net_device *br_dev = p->br->dev;
336 	struct net_device *dev = p->dev;
337 	int err;
338 
339 	err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
340 	if (err && err != -EOPNOTSUPP)
341 		return err;
342 
343 	err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
344 	if (err && err != -EOPNOTSUPP)
345 		return err;
346 
347 	err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
348 	if (err && err != -EOPNOTSUPP)
349 		return err;
350 
351 	return 0;
352 }
353 
354 static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
355 				      const void *ctx,
356 				      struct notifier_block *atomic_nb,
357 				      struct notifier_block *blocking_nb)
358 {
359 	struct net_device *br_dev = p->br->dev;
360 	struct net_device *dev = p->dev;
361 
362 	br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
363 
364 	br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
365 
366 	br_fdb_replay(br_dev, ctx, false, atomic_nb);
367 }
368 
369 /* Let the bridge know that this port is offloaded, so that it can assign a
370  * switchdev hardware domain to it.
371  */
372 int br_switchdev_port_offload(struct net_bridge_port *p,
373 			      struct net_device *dev, const void *ctx,
374 			      struct notifier_block *atomic_nb,
375 			      struct notifier_block *blocking_nb,
376 			      bool tx_fwd_offload,
377 			      struct netlink_ext_ack *extack)
378 {
379 	struct netdev_phys_item_id ppid;
380 	int err;
381 
382 	err = dev_get_port_parent_id(dev, &ppid, false);
383 	if (err)
384 		return err;
385 
386 	err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
387 	if (err)
388 		return err;
389 
390 	err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
391 	if (err)
392 		goto out_switchdev_del;
393 
394 	return 0;
395 
396 out_switchdev_del:
397 	nbp_switchdev_del(p);
398 
399 	return err;
400 }
401 
402 void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
403 				 struct notifier_block *atomic_nb,
404 				 struct notifier_block *blocking_nb)
405 {
406 	nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
407 
408 	nbp_switchdev_del(p);
409 }
410