xref: /linux-6.15/net/switchdev/switchdev.c (revision 1b69c6d0)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <[email protected]>
4  * Copyright (c) 2014-2015 Scott Feldman <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/list.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22 
23 /**
24  *	switchdev_trans_item_enqueue - Enqueue data item to transaction queue
25  *
26  *	@trans: transaction
27  *	@data: pointer to data being queued
28  *	@destructor: data destructor
29  *	@tritem: transaction item being queued
30  *
31  *	Enqeueue data item to transaction queue. tritem is typically placed in
32  *	cointainter pointed at by data pointer. Destructor is called on
33  *	transaction abort and after successful commit phase in case
34  *	the caller did not dequeue the item before.
35  */
36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
37 				  void *data, void (*destructor)(void const *),
38 				  struct switchdev_trans_item *tritem)
39 {
40 	tritem->data = data;
41 	tritem->destructor = destructor;
42 	list_add_tail(&tritem->list, &trans->item_list);
43 }
44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
45 
46 static struct switchdev_trans_item *
47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans)
48 {
49 	struct switchdev_trans_item *tritem;
50 
51 	if (list_empty(&trans->item_list))
52 		return NULL;
53 	tritem = list_first_entry(&trans->item_list,
54 				  struct switchdev_trans_item, list);
55 	list_del(&tritem->list);
56 	return tritem;
57 }
58 
59 /**
60  *	switchdev_trans_item_dequeue - Dequeue data item from transaction queue
61  *
62  *	@trans: transaction
63  */
64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
65 {
66 	struct switchdev_trans_item *tritem;
67 
68 	tritem = __switchdev_trans_item_dequeue(trans);
69 	BUG_ON(!tritem);
70 	return tritem->data;
71 }
72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
73 
74 static void switchdev_trans_init(struct switchdev_trans *trans)
75 {
76 	INIT_LIST_HEAD(&trans->item_list);
77 }
78 
79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
80 {
81 	struct switchdev_trans_item *tritem;
82 
83 	while ((tritem = __switchdev_trans_item_dequeue(trans)))
84 		tritem->destructor(tritem->data);
85 }
86 
87 static void switchdev_trans_items_warn_destroy(struct net_device *dev,
88 					       struct switchdev_trans *trans)
89 {
90 	WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
91 	     dev->name);
92 	switchdev_trans_items_destroy(trans);
93 }
94 
95 /**
96  *	switchdev_port_attr_get - Get port attribute
97  *
98  *	@dev: port device
99  *	@attr: attribute to get
100  */
101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
102 {
103 	const struct switchdev_ops *ops = dev->switchdev_ops;
104 	struct net_device *lower_dev;
105 	struct list_head *iter;
106 	struct switchdev_attr first = {
107 		.id = SWITCHDEV_ATTR_UNDEFINED
108 	};
109 	int err = -EOPNOTSUPP;
110 
111 	if (ops && ops->switchdev_port_attr_get)
112 		return ops->switchdev_port_attr_get(dev, attr);
113 
114 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
115 		return err;
116 
117 	/* Switch device port(s) may be stacked under
118 	 * bond/team/vlan dev, so recurse down to get attr on
119 	 * each port.  Return -ENODATA if attr values don't
120 	 * compare across ports.
121 	 */
122 
123 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
124 		err = switchdev_port_attr_get(lower_dev, attr);
125 		if (err)
126 			break;
127 		if (first.id == SWITCHDEV_ATTR_UNDEFINED)
128 			first = *attr;
129 		else if (memcmp(&first, attr, sizeof(*attr)))
130 			return -ENODATA;
131 	}
132 
133 	return err;
134 }
135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
136 
137 static int __switchdev_port_attr_set(struct net_device *dev,
138 				     struct switchdev_attr *attr,
139 				     struct switchdev_trans *trans)
140 {
141 	const struct switchdev_ops *ops = dev->switchdev_ops;
142 	struct net_device *lower_dev;
143 	struct list_head *iter;
144 	int err = -EOPNOTSUPP;
145 
146 	if (ops && ops->switchdev_port_attr_set)
147 		return ops->switchdev_port_attr_set(dev, attr, trans);
148 
149 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
150 		return err;
151 
152 	/* Switch device port(s) may be stacked under
153 	 * bond/team/vlan dev, so recurse down to set attr on
154 	 * each port.
155 	 */
156 
157 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
158 		err = __switchdev_port_attr_set(lower_dev, attr, trans);
159 		if (err)
160 			break;
161 	}
162 
163 	return err;
164 }
165 
166 struct switchdev_attr_set_work {
167 	struct work_struct work;
168 	struct net_device *dev;
169 	struct switchdev_attr attr;
170 };
171 
172 static void switchdev_port_attr_set_work(struct work_struct *work)
173 {
174 	struct switchdev_attr_set_work *asw =
175 		container_of(work, struct switchdev_attr_set_work, work);
176 	int err;
177 
178 	rtnl_lock();
179 	err = switchdev_port_attr_set(asw->dev, &asw->attr);
180 	if (err && err != -EOPNOTSUPP)
181 		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
182 			   err, asw->attr.id);
183 	rtnl_unlock();
184 
185 	dev_put(asw->dev);
186 	kfree(work);
187 }
188 
189 static int switchdev_port_attr_set_defer(struct net_device *dev,
190 					 struct switchdev_attr *attr)
191 {
192 	struct switchdev_attr_set_work *asw;
193 
194 	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
195 	if (!asw)
196 		return -ENOMEM;
197 
198 	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
199 
200 	dev_hold(dev);
201 	asw->dev = dev;
202 	memcpy(&asw->attr, attr, sizeof(asw->attr));
203 
204 	schedule_work(&asw->work);
205 
206 	return 0;
207 }
208 
209 /**
210  *	switchdev_port_attr_set - Set port attribute
211  *
212  *	@dev: port device
213  *	@attr: attribute to set
214  *
215  *	Use a 2-phase prepare-commit transaction model to ensure
216  *	system is not left in a partially updated state due to
217  *	failure from driver/device.
218  */
219 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
220 {
221 	struct switchdev_trans trans;
222 	int err;
223 
224 	if (!rtnl_is_locked()) {
225 		/* Running prepare-commit transaction across stacked
226 		 * devices requires nothing moves, so if rtnl_lock is
227 		 * not held, schedule a worker thread to hold rtnl_lock
228 		 * while setting attr.
229 		 */
230 
231 		return switchdev_port_attr_set_defer(dev, attr);
232 	}
233 
234 	switchdev_trans_init(&trans);
235 
236 	/* Phase I: prepare for attr set. Driver/device should fail
237 	 * here if there are going to be issues in the commit phase,
238 	 * such as lack of resources or support.  The driver/device
239 	 * should reserve resources needed for the commit phase here,
240 	 * but should not commit the attr.
241 	 */
242 
243 	trans.ph_prepare = true;
244 	err = __switchdev_port_attr_set(dev, attr, &trans);
245 	if (err) {
246 		/* Prepare phase failed: abort the transaction.  Any
247 		 * resources reserved in the prepare phase are
248 		 * released.
249 		 */
250 
251 		if (err != -EOPNOTSUPP)
252 			switchdev_trans_items_destroy(&trans);
253 
254 		return err;
255 	}
256 
257 	/* Phase II: commit attr set.  This cannot fail as a fault
258 	 * of driver/device.  If it does, it's a bug in the driver/device
259 	 * because the driver said everythings was OK in phase I.
260 	 */
261 
262 	trans.ph_prepare = false;
263 	err = __switchdev_port_attr_set(dev, attr, &trans);
264 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
265 	     dev->name, attr->id);
266 	switchdev_trans_items_warn_destroy(dev, &trans);
267 
268 	return err;
269 }
270 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
271 
272 static int __switchdev_port_obj_add(struct net_device *dev,
273 				    struct switchdev_obj *obj,
274 				    struct switchdev_trans *trans)
275 {
276 	const struct switchdev_ops *ops = dev->switchdev_ops;
277 	struct net_device *lower_dev;
278 	struct list_head *iter;
279 	int err = -EOPNOTSUPP;
280 
281 	if (ops && ops->switchdev_port_obj_add)
282 		return ops->switchdev_port_obj_add(dev, obj, trans);
283 
284 	/* Switch device port(s) may be stacked under
285 	 * bond/team/vlan dev, so recurse down to add object on
286 	 * each port.
287 	 */
288 
289 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
290 		err = __switchdev_port_obj_add(lower_dev, obj, trans);
291 		if (err)
292 			break;
293 	}
294 
295 	return err;
296 }
297 
298 /**
299  *	switchdev_port_obj_add - Add port object
300  *
301  *	@dev: port device
302  *	@obj: object to add
303  *
304  *	Use a 2-phase prepare-commit transaction model to ensure
305  *	system is not left in a partially updated state due to
306  *	failure from driver/device.
307  *
308  *	rtnl_lock must be held.
309  */
310 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
311 {
312 	struct switchdev_trans trans;
313 	int err;
314 
315 	ASSERT_RTNL();
316 
317 	switchdev_trans_init(&trans);
318 
319 	/* Phase I: prepare for obj add. Driver/device should fail
320 	 * here if there are going to be issues in the commit phase,
321 	 * such as lack of resources or support.  The driver/device
322 	 * should reserve resources needed for the commit phase here,
323 	 * but should not commit the obj.
324 	 */
325 
326 	trans.ph_prepare = true;
327 	err = __switchdev_port_obj_add(dev, obj, &trans);
328 	if (err) {
329 		/* Prepare phase failed: abort the transaction.  Any
330 		 * resources reserved in the prepare phase are
331 		 * released.
332 		 */
333 
334 		if (err != -EOPNOTSUPP)
335 			switchdev_trans_items_destroy(&trans);
336 
337 		return err;
338 	}
339 
340 	/* Phase II: commit obj add.  This cannot fail as a fault
341 	 * of driver/device.  If it does, it's a bug in the driver/device
342 	 * because the driver said everythings was OK in phase I.
343 	 */
344 
345 	trans.ph_prepare = false;
346 	err = __switchdev_port_obj_add(dev, obj, &trans);
347 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
348 	switchdev_trans_items_warn_destroy(dev, &trans);
349 
350 	return err;
351 }
352 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
353 
354 /**
355  *	switchdev_port_obj_del - Delete port object
356  *
357  *	@dev: port device
358  *	@obj: object to delete
359  */
360 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
361 {
362 	const struct switchdev_ops *ops = dev->switchdev_ops;
363 	struct net_device *lower_dev;
364 	struct list_head *iter;
365 	int err = -EOPNOTSUPP;
366 
367 	if (ops && ops->switchdev_port_obj_del)
368 		return ops->switchdev_port_obj_del(dev, obj);
369 
370 	/* Switch device port(s) may be stacked under
371 	 * bond/team/vlan dev, so recurse down to delete object on
372 	 * each port.
373 	 */
374 
375 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
376 		err = switchdev_port_obj_del(lower_dev, obj);
377 		if (err)
378 			break;
379 	}
380 
381 	return err;
382 }
383 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
384 
385 /**
386  *	switchdev_port_obj_dump - Dump port objects
387  *
388  *	@dev: port device
389  *	@obj: object to dump
390  */
391 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
392 {
393 	const struct switchdev_ops *ops = dev->switchdev_ops;
394 	struct net_device *lower_dev;
395 	struct list_head *iter;
396 	int err = -EOPNOTSUPP;
397 
398 	if (ops && ops->switchdev_port_obj_dump)
399 		return ops->switchdev_port_obj_dump(dev, obj);
400 
401 	/* Switch device port(s) may be stacked under
402 	 * bond/team/vlan dev, so recurse down to dump objects on
403 	 * first port at bottom of stack.
404 	 */
405 
406 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
407 		err = switchdev_port_obj_dump(lower_dev, obj);
408 		break;
409 	}
410 
411 	return err;
412 }
413 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
414 
415 static DEFINE_MUTEX(switchdev_mutex);
416 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
417 
418 /**
419  *	register_switchdev_notifier - Register notifier
420  *	@nb: notifier_block
421  *
422  *	Register switch device notifier. This should be used by code
423  *	which needs to monitor events happening in particular device.
424  *	Return values are same as for atomic_notifier_chain_register().
425  */
426 int register_switchdev_notifier(struct notifier_block *nb)
427 {
428 	int err;
429 
430 	mutex_lock(&switchdev_mutex);
431 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
432 	mutex_unlock(&switchdev_mutex);
433 	return err;
434 }
435 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
436 
437 /**
438  *	unregister_switchdev_notifier - Unregister notifier
439  *	@nb: notifier_block
440  *
441  *	Unregister switch device notifier.
442  *	Return values are same as for atomic_notifier_chain_unregister().
443  */
444 int unregister_switchdev_notifier(struct notifier_block *nb)
445 {
446 	int err;
447 
448 	mutex_lock(&switchdev_mutex);
449 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
450 	mutex_unlock(&switchdev_mutex);
451 	return err;
452 }
453 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
454 
455 /**
456  *	call_switchdev_notifiers - Call notifiers
457  *	@val: value passed unmodified to notifier function
458  *	@dev: port device
459  *	@info: notifier information data
460  *
461  *	Call all network notifier blocks. This should be called by driver
462  *	when it needs to propagate hardware event.
463  *	Return values are same as for atomic_notifier_call_chain().
464  */
465 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
466 			     struct switchdev_notifier_info *info)
467 {
468 	int err;
469 
470 	info->dev = dev;
471 	mutex_lock(&switchdev_mutex);
472 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
473 	mutex_unlock(&switchdev_mutex);
474 	return err;
475 }
476 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
477 
478 struct switchdev_vlan_dump {
479 	struct switchdev_obj obj;
480 	struct sk_buff *skb;
481 	u32 filter_mask;
482 	u16 flags;
483 	u16 begin;
484 	u16 end;
485 };
486 
487 static int switchdev_port_vlan_dump_put(struct net_device *dev,
488 					struct switchdev_vlan_dump *dump)
489 {
490 	struct bridge_vlan_info vinfo;
491 
492 	vinfo.flags = dump->flags;
493 
494 	if (dump->begin == 0 && dump->end == 0) {
495 		return 0;
496 	} else if (dump->begin == dump->end) {
497 		vinfo.vid = dump->begin;
498 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
499 			    sizeof(vinfo), &vinfo))
500 			return -EMSGSIZE;
501 	} else {
502 		vinfo.vid = dump->begin;
503 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
504 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
505 			    sizeof(vinfo), &vinfo))
506 			return -EMSGSIZE;
507 		vinfo.vid = dump->end;
508 		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
509 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
510 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
511 			    sizeof(vinfo), &vinfo))
512 			return -EMSGSIZE;
513 	}
514 
515 	return 0;
516 }
517 
518 static int switchdev_port_vlan_dump_cb(struct net_device *dev,
519 				       struct switchdev_obj *obj)
520 {
521 	struct switchdev_vlan_dump *dump =
522 		container_of(obj, struct switchdev_vlan_dump, obj);
523 	struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
524 	int err = 0;
525 
526 	if (vlan->vid_begin > vlan->vid_end)
527 		return -EINVAL;
528 
529 	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
530 		dump->flags = vlan->flags;
531 		for (dump->begin = dump->end = vlan->vid_begin;
532 		     dump->begin <= vlan->vid_end;
533 		     dump->begin++, dump->end++) {
534 			err = switchdev_port_vlan_dump_put(dev, dump);
535 			if (err)
536 				return err;
537 		}
538 	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
539 		if (dump->begin > vlan->vid_begin &&
540 		    dump->begin >= vlan->vid_end) {
541 			if ((dump->begin - 1) == vlan->vid_end &&
542 			    dump->flags == vlan->flags) {
543 				/* prepend */
544 				dump->begin = vlan->vid_begin;
545 			} else {
546 				err = switchdev_port_vlan_dump_put(dev, dump);
547 				dump->flags = vlan->flags;
548 				dump->begin = vlan->vid_begin;
549 				dump->end = vlan->vid_end;
550 			}
551 		} else if (dump->end <= vlan->vid_begin &&
552 		           dump->end < vlan->vid_end) {
553 			if ((dump->end  + 1) == vlan->vid_begin &&
554 			    dump->flags == vlan->flags) {
555 				/* append */
556 				dump->end = vlan->vid_end;
557 			} else {
558 				err = switchdev_port_vlan_dump_put(dev, dump);
559 				dump->flags = vlan->flags;
560 				dump->begin = vlan->vid_begin;
561 				dump->end = vlan->vid_end;
562 			}
563 		} else {
564 			err = -EINVAL;
565 		}
566 	}
567 
568 	return err;
569 }
570 
571 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
572 				    u32 filter_mask)
573 {
574 	struct switchdev_vlan_dump dump = {
575 		.obj = {
576 			.id = SWITCHDEV_OBJ_PORT_VLAN,
577 			.cb = switchdev_port_vlan_dump_cb,
578 		},
579 		.skb = skb,
580 		.filter_mask = filter_mask,
581 	};
582 	int err = 0;
583 
584 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
585 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
586 		err = switchdev_port_obj_dump(dev, &dump.obj);
587 		if (err)
588 			goto err_out;
589 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
590 			/* last one */
591 			err = switchdev_port_vlan_dump_put(dev, &dump);
592 	}
593 
594 err_out:
595 	return err == -EOPNOTSUPP ? 0 : err;
596 }
597 
598 /**
599  *	switchdev_port_bridge_getlink - Get bridge port attributes
600  *
601  *	@dev: port device
602  *
603  *	Called for SELF on rtnl_bridge_getlink to get bridge port
604  *	attributes.
605  */
606 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
607 				  struct net_device *dev, u32 filter_mask,
608 				  int nlflags)
609 {
610 	struct switchdev_attr attr = {
611 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
612 	};
613 	u16 mode = BRIDGE_MODE_UNDEF;
614 	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
615 	int err;
616 
617 	err = switchdev_port_attr_get(dev, &attr);
618 	if (err && err != -EOPNOTSUPP)
619 		return err;
620 
621 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
622 				       attr.u.brport_flags, mask, nlflags,
623 				       filter_mask, switchdev_port_vlan_fill);
624 }
625 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
626 
627 static int switchdev_port_br_setflag(struct net_device *dev,
628 				     struct nlattr *nlattr,
629 				     unsigned long brport_flag)
630 {
631 	struct switchdev_attr attr = {
632 		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
633 	};
634 	u8 flag = nla_get_u8(nlattr);
635 	int err;
636 
637 	err = switchdev_port_attr_get(dev, &attr);
638 	if (err)
639 		return err;
640 
641 	if (flag)
642 		attr.u.brport_flags |= brport_flag;
643 	else
644 		attr.u.brport_flags &= ~brport_flag;
645 
646 	return switchdev_port_attr_set(dev, &attr);
647 }
648 
649 static const struct nla_policy
650 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
651 	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
652 	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
653 	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
654 	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
655 	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
656 	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
657 	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
658 	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
659 	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
660 	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
661 };
662 
663 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
664 					      struct nlattr *protinfo)
665 {
666 	struct nlattr *attr;
667 	int rem;
668 	int err;
669 
670 	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
671 				  switchdev_port_bridge_policy);
672 	if (err)
673 		return err;
674 
675 	nla_for_each_nested(attr, protinfo, rem) {
676 		switch (nla_type(attr)) {
677 		case IFLA_BRPORT_LEARNING:
678 			err = switchdev_port_br_setflag(dev, attr,
679 							BR_LEARNING);
680 			break;
681 		case IFLA_BRPORT_LEARNING_SYNC:
682 			err = switchdev_port_br_setflag(dev, attr,
683 							BR_LEARNING_SYNC);
684 			break;
685 		default:
686 			err = -EOPNOTSUPP;
687 			break;
688 		}
689 		if (err)
690 			return err;
691 	}
692 
693 	return 0;
694 }
695 
696 static int switchdev_port_br_afspec(struct net_device *dev,
697 				    struct nlattr *afspec,
698 				    int (*f)(struct net_device *dev,
699 					     struct switchdev_obj *obj))
700 {
701 	struct nlattr *attr;
702 	struct bridge_vlan_info *vinfo;
703 	struct switchdev_obj obj = {
704 		.id = SWITCHDEV_OBJ_PORT_VLAN,
705 	};
706 	struct switchdev_obj_vlan *vlan = &obj.u.vlan;
707 	int rem;
708 	int err;
709 
710 	nla_for_each_nested(attr, afspec, rem) {
711 		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
712 			continue;
713 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
714 			return -EINVAL;
715 		vinfo = nla_data(attr);
716 		vlan->flags = vinfo->flags;
717 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
718 			if (vlan->vid_begin)
719 				return -EINVAL;
720 			vlan->vid_begin = vinfo->vid;
721 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
722 			if (!vlan->vid_begin)
723 				return -EINVAL;
724 			vlan->vid_end = vinfo->vid;
725 			if (vlan->vid_end <= vlan->vid_begin)
726 				return -EINVAL;
727 			err = f(dev, &obj);
728 			if (err)
729 				return err;
730 			memset(vlan, 0, sizeof(*vlan));
731 		} else {
732 			if (vlan->vid_begin)
733 				return -EINVAL;
734 			vlan->vid_begin = vinfo->vid;
735 			vlan->vid_end = vinfo->vid;
736 			err = f(dev, &obj);
737 			if (err)
738 				return err;
739 			memset(vlan, 0, sizeof(*vlan));
740 		}
741 	}
742 
743 	return 0;
744 }
745 
746 /**
747  *	switchdev_port_bridge_setlink - Set bridge port attributes
748  *
749  *	@dev: port device
750  *	@nlh: netlink header
751  *	@flags: netlink flags
752  *
753  *	Called for SELF on rtnl_bridge_setlink to set bridge port
754  *	attributes.
755  */
756 int switchdev_port_bridge_setlink(struct net_device *dev,
757 				  struct nlmsghdr *nlh, u16 flags)
758 {
759 	struct nlattr *protinfo;
760 	struct nlattr *afspec;
761 	int err = 0;
762 
763 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
764 				   IFLA_PROTINFO);
765 	if (protinfo) {
766 		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
767 		if (err)
768 			return err;
769 	}
770 
771 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
772 				 IFLA_AF_SPEC);
773 	if (afspec)
774 		err = switchdev_port_br_afspec(dev, afspec,
775 					       switchdev_port_obj_add);
776 
777 	return err;
778 }
779 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
780 
781 /**
782  *	switchdev_port_bridge_dellink - Set bridge port attributes
783  *
784  *	@dev: port device
785  *	@nlh: netlink header
786  *	@flags: netlink flags
787  *
788  *	Called for SELF on rtnl_bridge_dellink to set bridge port
789  *	attributes.
790  */
791 int switchdev_port_bridge_dellink(struct net_device *dev,
792 				  struct nlmsghdr *nlh, u16 flags)
793 {
794 	struct nlattr *afspec;
795 
796 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
797 				 IFLA_AF_SPEC);
798 	if (afspec)
799 		return switchdev_port_br_afspec(dev, afspec,
800 						switchdev_port_obj_del);
801 
802 	return 0;
803 }
804 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
805 
806 /**
807  *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
808  *
809  *	@ndmsg: netlink hdr
810  *	@nlattr: netlink attributes
811  *	@dev: port device
812  *	@addr: MAC address to add
813  *	@vid: VLAN to add
814  *
815  *	Add FDB entry to switch device.
816  */
817 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
818 			   struct net_device *dev, const unsigned char *addr,
819 			   u16 vid, u16 nlm_flags)
820 {
821 	struct switchdev_obj obj = {
822 		.id = SWITCHDEV_OBJ_PORT_FDB,
823 		.u.fdb = {
824 			.addr = addr,
825 			.vid = vid,
826 		},
827 	};
828 
829 	return switchdev_port_obj_add(dev, &obj);
830 }
831 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
832 
833 /**
834  *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
835  *
836  *	@ndmsg: netlink hdr
837  *	@nlattr: netlink attributes
838  *	@dev: port device
839  *	@addr: MAC address to delete
840  *	@vid: VLAN to delete
841  *
842  *	Delete FDB entry from switch device.
843  */
844 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
845 			   struct net_device *dev, const unsigned char *addr,
846 			   u16 vid)
847 {
848 	struct switchdev_obj obj = {
849 		.id = SWITCHDEV_OBJ_PORT_FDB,
850 		.u.fdb = {
851 			.addr = addr,
852 			.vid = vid,
853 		},
854 	};
855 
856 	return switchdev_port_obj_del(dev, &obj);
857 }
858 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
859 
860 struct switchdev_fdb_dump {
861 	struct switchdev_obj obj;
862 	struct sk_buff *skb;
863 	struct netlink_callback *cb;
864 	int idx;
865 };
866 
867 static int switchdev_port_fdb_dump_cb(struct net_device *dev,
868 				      struct switchdev_obj *obj)
869 {
870 	struct switchdev_fdb_dump *dump =
871 		container_of(obj, struct switchdev_fdb_dump, obj);
872 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
873 	u32 seq = dump->cb->nlh->nlmsg_seq;
874 	struct nlmsghdr *nlh;
875 	struct ndmsg *ndm;
876 
877 	if (dump->idx < dump->cb->args[0])
878 		goto skip;
879 
880 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
881 			sizeof(*ndm), NLM_F_MULTI);
882 	if (!nlh)
883 		return -EMSGSIZE;
884 
885 	ndm = nlmsg_data(nlh);
886 	ndm->ndm_family  = AF_BRIDGE;
887 	ndm->ndm_pad1    = 0;
888 	ndm->ndm_pad2    = 0;
889 	ndm->ndm_flags   = NTF_SELF;
890 	ndm->ndm_type    = 0;
891 	ndm->ndm_ifindex = dev->ifindex;
892 	ndm->ndm_state   = obj->u.fdb.ndm_state;
893 
894 	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
895 		goto nla_put_failure;
896 
897 	if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
898 		goto nla_put_failure;
899 
900 	nlmsg_end(dump->skb, nlh);
901 
902 skip:
903 	dump->idx++;
904 	return 0;
905 
906 nla_put_failure:
907 	nlmsg_cancel(dump->skb, nlh);
908 	return -EMSGSIZE;
909 }
910 
911 /**
912  *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
913  *
914  *	@skb: netlink skb
915  *	@cb: netlink callback
916  *	@dev: port device
917  *	@filter_dev: filter device
918  *	@idx:
919  *
920  *	Delete FDB entry from switch device.
921  */
922 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
923 			    struct net_device *dev,
924 			    struct net_device *filter_dev, int idx)
925 {
926 	struct switchdev_fdb_dump dump = {
927 		.obj = {
928 			.id = SWITCHDEV_OBJ_PORT_FDB,
929 			.cb = switchdev_port_fdb_dump_cb,
930 		},
931 		.skb = skb,
932 		.cb = cb,
933 		.idx = idx,
934 	};
935 
936 	switchdev_port_obj_dump(dev, &dump.obj);
937 	return dump.idx;
938 }
939 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
940 
941 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
942 {
943 	const struct switchdev_ops *ops = dev->switchdev_ops;
944 	struct net_device *lower_dev;
945 	struct net_device *port_dev;
946 	struct list_head *iter;
947 
948 	/* Recusively search down until we find a sw port dev.
949 	 * (A sw port dev supports switchdev_port_attr_get).
950 	 */
951 
952 	if (ops && ops->switchdev_port_attr_get)
953 		return dev;
954 
955 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
956 		port_dev = switchdev_get_lowest_dev(lower_dev);
957 		if (port_dev)
958 			return port_dev;
959 	}
960 
961 	return NULL;
962 }
963 
964 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
965 {
966 	struct switchdev_attr attr = {
967 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
968 	};
969 	struct switchdev_attr prev_attr;
970 	struct net_device *dev = NULL;
971 	int nhsel;
972 
973 	/* For this route, all nexthop devs must be on the same switch. */
974 
975 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
976 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
977 
978 		if (!nh->nh_dev)
979 			return NULL;
980 
981 		dev = switchdev_get_lowest_dev(nh->nh_dev);
982 		if (!dev)
983 			return NULL;
984 
985 		if (switchdev_port_attr_get(dev, &attr))
986 			return NULL;
987 
988 		if (nhsel > 0 &&
989 		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
990 				return NULL;
991 
992 		prev_attr = attr;
993 	}
994 
995 	return dev;
996 }
997 
998 /**
999  *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
1000  *
1001  *	@dst: route's IPv4 destination address
1002  *	@dst_len: destination address length (prefix length)
1003  *	@fi: route FIB info structure
1004  *	@tos: route TOS
1005  *	@type: route type
1006  *	@nlflags: netlink flags passed in (NLM_F_*)
1007  *	@tb_id: route table ID
1008  *
1009  *	Add/modify switch IPv4 route entry.
1010  */
1011 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1012 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
1013 {
1014 	struct switchdev_obj fib_obj = {
1015 		.id = SWITCHDEV_OBJ_IPV4_FIB,
1016 		.u.ipv4_fib = {
1017 			.dst = dst,
1018 			.dst_len = dst_len,
1019 			.fi = fi,
1020 			.tos = tos,
1021 			.type = type,
1022 			.nlflags = nlflags,
1023 			.tb_id = tb_id,
1024 		},
1025 	};
1026 	struct net_device *dev;
1027 	int err = 0;
1028 
1029 	/* Don't offload route if using custom ip rules or if
1030 	 * IPv4 FIB offloading has been disabled completely.
1031 	 */
1032 
1033 #ifdef CONFIG_IP_MULTIPLE_TABLES
1034 	if (fi->fib_net->ipv4.fib_has_custom_rules)
1035 		return 0;
1036 #endif
1037 
1038 	if (fi->fib_net->ipv4.fib_offload_disabled)
1039 		return 0;
1040 
1041 	dev = switchdev_get_dev_by_nhs(fi);
1042 	if (!dev)
1043 		return 0;
1044 
1045 	err = switchdev_port_obj_add(dev, &fib_obj);
1046 	if (!err)
1047 		fi->fib_flags |= RTNH_F_OFFLOAD;
1048 
1049 	return err == -EOPNOTSUPP ? 0 : err;
1050 }
1051 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1052 
1053 /**
1054  *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1055  *
1056  *	@dst: route's IPv4 destination address
1057  *	@dst_len: destination address length (prefix length)
1058  *	@fi: route FIB info structure
1059  *	@tos: route TOS
1060  *	@type: route type
1061  *	@tb_id: route table ID
1062  *
1063  *	Delete IPv4 route entry from switch device.
1064  */
1065 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1066 			   u8 tos, u8 type, u32 tb_id)
1067 {
1068 	struct switchdev_obj fib_obj = {
1069 		.id = SWITCHDEV_OBJ_IPV4_FIB,
1070 		.u.ipv4_fib = {
1071 			.dst = dst,
1072 			.dst_len = dst_len,
1073 			.fi = fi,
1074 			.tos = tos,
1075 			.type = type,
1076 			.nlflags = 0,
1077 			.tb_id = tb_id,
1078 		},
1079 	};
1080 	struct net_device *dev;
1081 	int err = 0;
1082 
1083 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1084 		return 0;
1085 
1086 	dev = switchdev_get_dev_by_nhs(fi);
1087 	if (!dev)
1088 		return 0;
1089 
1090 	err = switchdev_port_obj_del(dev, &fib_obj);
1091 	if (!err)
1092 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
1093 
1094 	return err == -EOPNOTSUPP ? 0 : err;
1095 }
1096 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1097 
1098 /**
1099  *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1100  *
1101  *	@fi: route FIB info structure
1102  */
1103 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1104 {
1105 	/* There was a problem installing this route to the offload
1106 	 * device.  For now, until we come up with more refined
1107 	 * policy handling, abruptly end IPv4 fib offloading for
1108 	 * for entire net by flushing offload device(s) of all
1109 	 * IPv4 routes, and mark IPv4 fib offloading broken from
1110 	 * this point forward.
1111 	 */
1112 
1113 	fib_flush_external(fi->fib_net);
1114 	fi->fib_net->ipv4.fib_offload_disabled = true;
1115 }
1116 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1117 
1118 static bool switchdev_port_same_parent_id(struct net_device *a,
1119 					  struct net_device *b)
1120 {
1121 	struct switchdev_attr a_attr = {
1122 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1123 		.flags = SWITCHDEV_F_NO_RECURSE,
1124 	};
1125 	struct switchdev_attr b_attr = {
1126 		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
1127 		.flags = SWITCHDEV_F_NO_RECURSE,
1128 	};
1129 
1130 	if (switchdev_port_attr_get(a, &a_attr) ||
1131 	    switchdev_port_attr_get(b, &b_attr))
1132 		return false;
1133 
1134 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1135 }
1136 
1137 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1138 				       struct net_device *group_dev)
1139 {
1140 	struct net_device *lower_dev;
1141 	struct list_head *iter;
1142 
1143 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1144 		if (lower_dev == dev)
1145 			continue;
1146 		if (switchdev_port_same_parent_id(dev, lower_dev))
1147 			return lower_dev->offload_fwd_mark;
1148 		return switchdev_port_fwd_mark_get(dev, lower_dev);
1149 	}
1150 
1151 	return dev->ifindex;
1152 }
1153 
1154 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1155 					  u32 old_mark, u32 *reset_mark)
1156 {
1157 	struct net_device *lower_dev;
1158 	struct list_head *iter;
1159 
1160 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1161 		if (lower_dev->offload_fwd_mark == old_mark) {
1162 			if (!*reset_mark)
1163 				*reset_mark = lower_dev->ifindex;
1164 			lower_dev->offload_fwd_mark = *reset_mark;
1165 		}
1166 		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1167 	}
1168 }
1169 
1170 /**
1171  *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
1172  *
1173  *	@dev: port device
1174  *	@group_dev: containing device
1175  *	@joining: true if dev is joining group; false if leaving group
1176  *
1177  *	An ungrouped port's offload mark is just its ifindex.  A grouped
1178  *	port's (member of a bridge, for example) offload mark is the ifindex
1179  *	of one of the ports in the group with the same parent (switch) ID.
1180  *	Ports on the same device in the same group will have the same mark.
1181  *
1182  *	Example:
1183  *
1184  *		br0		ifindex=9
1185  *		  sw1p1		ifindex=2	mark=2
1186  *		  sw1p2		ifindex=3	mark=2
1187  *		  sw2p1		ifindex=4	mark=5
1188  *		  sw2p2		ifindex=5	mark=5
1189  *
1190  *	If sw2p2 leaves the bridge, we'll have:
1191  *
1192  *		br0		ifindex=9
1193  *		  sw1p1		ifindex=2	mark=2
1194  *		  sw1p2		ifindex=3	mark=2
1195  *		  sw2p1		ifindex=4	mark=4
1196  *		sw2p2		ifindex=5	mark=5
1197  */
1198 void switchdev_port_fwd_mark_set(struct net_device *dev,
1199 				 struct net_device *group_dev,
1200 				 bool joining)
1201 {
1202 	u32 mark = dev->ifindex;
1203 	u32 reset_mark = 0;
1204 
1205 	if (group_dev && joining) {
1206 		mark = switchdev_port_fwd_mark_get(dev, group_dev);
1207 	} else if (group_dev && !joining) {
1208 		if (dev->offload_fwd_mark == mark)
1209 			/* Ohoh, this port was the mark reference port,
1210 			 * but it's leaving the group, so reset the
1211 			 * mark for the remaining ports in the group.
1212 			 */
1213 			switchdev_port_fwd_mark_reset(group_dev, mark,
1214 						      &reset_mark);
1215 	}
1216 
1217 	dev->offload_fwd_mark = mark;
1218 }
1219 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
1220