xref: /linux-6.15/net/switchdev/switchdev.c (revision fee6d4c7)
1 /*
2  * net/switchdev/switchdev.c - Switch device API
3  * Copyright (c) 2014-2015 Jiri Pirko <[email protected]>
4  * Copyright (c) 2014-2015 Scott Feldman <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/types.h>
14 #include <linux/init.h>
15 #include <linux/mutex.h>
16 #include <linux/notifier.h>
17 #include <linux/netdevice.h>
18 #include <linux/if_bridge.h>
19 #include <linux/list.h>
20 #include <net/ip_fib.h>
21 #include <net/switchdev.h>
22 
23 /**
24  *	switchdev_trans_item_enqueue - Enqueue data item to transaction queue
25  *
26  *	@trans: transaction
27  *	@data: pointer to data being queued
28  *	@destructor: data destructor
29  *	@tritem: transaction item being queued
30  *
31  *	Enqeueue data item to transaction queue. tritem is typically placed in
32  *	cointainter pointed at by data pointer. Destructor is called on
33  *	transaction abort and after successful commit phase in case
34  *	the caller did not dequeue the item before.
35  */
36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
37 				  void *data, void (*destructor)(void const *),
38 				  struct switchdev_trans_item *tritem)
39 {
40 	tritem->data = data;
41 	tritem->destructor = destructor;
42 	list_add_tail(&tritem->list, &trans->item_list);
43 }
44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
45 
46 static struct switchdev_trans_item *
47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans)
48 {
49 	struct switchdev_trans_item *tritem;
50 
51 	if (list_empty(&trans->item_list))
52 		return NULL;
53 	tritem = list_first_entry(&trans->item_list,
54 				  struct switchdev_trans_item, list);
55 	list_del(&tritem->list);
56 	return tritem;
57 }
58 
59 /**
60  *	switchdev_trans_item_dequeue - Dequeue data item from transaction queue
61  *
62  *	@trans: transaction
63  */
64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
65 {
66 	struct switchdev_trans_item *tritem;
67 
68 	tritem = __switchdev_trans_item_dequeue(trans);
69 	BUG_ON(!tritem);
70 	return tritem->data;
71 }
72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
73 
74 static void switchdev_trans_init(struct switchdev_trans *trans)
75 {
76 	INIT_LIST_HEAD(&trans->item_list);
77 }
78 
79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
80 {
81 	struct switchdev_trans_item *tritem;
82 
83 	while ((tritem = __switchdev_trans_item_dequeue(trans)))
84 		tritem->destructor(tritem->data);
85 }
86 
87 static void switchdev_trans_items_warn_destroy(struct net_device *dev,
88 					       struct switchdev_trans *trans)
89 {
90 	WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
91 	     dev->name);
92 	switchdev_trans_items_destroy(trans);
93 }
94 
95 /**
96  *	switchdev_port_attr_get - Get port attribute
97  *
98  *	@dev: port device
99  *	@attr: attribute to get
100  */
101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
102 {
103 	const struct switchdev_ops *ops = dev->switchdev_ops;
104 	struct net_device *lower_dev;
105 	struct list_head *iter;
106 	struct switchdev_attr first = {
107 		.id = SWITCHDEV_ATTR_ID_UNDEFINED
108 	};
109 	int err = -EOPNOTSUPP;
110 
111 	if (ops && ops->switchdev_port_attr_get)
112 		return ops->switchdev_port_attr_get(dev, attr);
113 
114 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
115 		return err;
116 
117 	/* Switch device port(s) may be stacked under
118 	 * bond/team/vlan dev, so recurse down to get attr on
119 	 * each port.  Return -ENODATA if attr values don't
120 	 * compare across ports.
121 	 */
122 
123 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
124 		err = switchdev_port_attr_get(lower_dev, attr);
125 		if (err)
126 			break;
127 		if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
128 			first = *attr;
129 		else if (memcmp(&first, attr, sizeof(*attr)))
130 			return -ENODATA;
131 	}
132 
133 	return err;
134 }
135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
136 
137 static int __switchdev_port_attr_set(struct net_device *dev,
138 				     struct switchdev_attr *attr,
139 				     struct switchdev_trans *trans)
140 {
141 	const struct switchdev_ops *ops = dev->switchdev_ops;
142 	struct net_device *lower_dev;
143 	struct list_head *iter;
144 	int err = -EOPNOTSUPP;
145 
146 	if (ops && ops->switchdev_port_attr_set)
147 		return ops->switchdev_port_attr_set(dev, attr, trans);
148 
149 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
150 		return err;
151 
152 	/* Switch device port(s) may be stacked under
153 	 * bond/team/vlan dev, so recurse down to set attr on
154 	 * each port.
155 	 */
156 
157 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
158 		err = __switchdev_port_attr_set(lower_dev, attr, trans);
159 		if (err)
160 			break;
161 	}
162 
163 	return err;
164 }
165 
166 struct switchdev_attr_set_work {
167 	struct work_struct work;
168 	struct net_device *dev;
169 	struct switchdev_attr attr;
170 };
171 
172 static void switchdev_port_attr_set_work(struct work_struct *work)
173 {
174 	struct switchdev_attr_set_work *asw =
175 		container_of(work, struct switchdev_attr_set_work, work);
176 	int err;
177 
178 	rtnl_lock();
179 	err = switchdev_port_attr_set(asw->dev, &asw->attr);
180 	if (err && err != -EOPNOTSUPP)
181 		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
182 			   err, asw->attr.id);
183 	rtnl_unlock();
184 
185 	dev_put(asw->dev);
186 	kfree(work);
187 }
188 
189 static int switchdev_port_attr_set_defer(struct net_device *dev,
190 					 struct switchdev_attr *attr)
191 {
192 	struct switchdev_attr_set_work *asw;
193 
194 	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
195 	if (!asw)
196 		return -ENOMEM;
197 
198 	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
199 
200 	dev_hold(dev);
201 	asw->dev = dev;
202 	memcpy(&asw->attr, attr, sizeof(asw->attr));
203 
204 	schedule_work(&asw->work);
205 
206 	return 0;
207 }
208 
209 /**
210  *	switchdev_port_attr_set - Set port attribute
211  *
212  *	@dev: port device
213  *	@attr: attribute to set
214  *
215  *	Use a 2-phase prepare-commit transaction model to ensure
216  *	system is not left in a partially updated state due to
217  *	failure from driver/device.
218  */
219 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
220 {
221 	struct switchdev_trans trans;
222 	int err;
223 
224 	if (!rtnl_is_locked()) {
225 		/* Running prepare-commit transaction across stacked
226 		 * devices requires nothing moves, so if rtnl_lock is
227 		 * not held, schedule a worker thread to hold rtnl_lock
228 		 * while setting attr.
229 		 */
230 
231 		return switchdev_port_attr_set_defer(dev, attr);
232 	}
233 
234 	switchdev_trans_init(&trans);
235 
236 	/* Phase I: prepare for attr set. Driver/device should fail
237 	 * here if there are going to be issues in the commit phase,
238 	 * such as lack of resources or support.  The driver/device
239 	 * should reserve resources needed for the commit phase here,
240 	 * but should not commit the attr.
241 	 */
242 
243 	trans.ph_prepare = true;
244 	err = __switchdev_port_attr_set(dev, attr, &trans);
245 	if (err) {
246 		/* Prepare phase failed: abort the transaction.  Any
247 		 * resources reserved in the prepare phase are
248 		 * released.
249 		 */
250 
251 		if (err != -EOPNOTSUPP)
252 			switchdev_trans_items_destroy(&trans);
253 
254 		return err;
255 	}
256 
257 	/* Phase II: commit attr set.  This cannot fail as a fault
258 	 * of driver/device.  If it does, it's a bug in the driver/device
259 	 * because the driver said everythings was OK in phase I.
260 	 */
261 
262 	trans.ph_prepare = false;
263 	err = __switchdev_port_attr_set(dev, attr, &trans);
264 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
265 	     dev->name, attr->id);
266 	switchdev_trans_items_warn_destroy(dev, &trans);
267 
268 	return err;
269 }
270 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
271 
272 static int __switchdev_port_obj_add(struct net_device *dev,
273 				    const struct switchdev_obj *obj,
274 				    struct switchdev_trans *trans)
275 {
276 	const struct switchdev_ops *ops = dev->switchdev_ops;
277 	struct net_device *lower_dev;
278 	struct list_head *iter;
279 	int err = -EOPNOTSUPP;
280 
281 	if (ops && ops->switchdev_port_obj_add)
282 		return ops->switchdev_port_obj_add(dev, obj, trans);
283 
284 	/* Switch device port(s) may be stacked under
285 	 * bond/team/vlan dev, so recurse down to add object on
286 	 * each port.
287 	 */
288 
289 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
290 		err = __switchdev_port_obj_add(lower_dev, obj, trans);
291 		if (err)
292 			break;
293 	}
294 
295 	return err;
296 }
297 
298 /**
299  *	switchdev_port_obj_add - Add port object
300  *
301  *	@dev: port device
302  *	@id: object ID
303  *	@obj: object to add
304  *
305  *	Use a 2-phase prepare-commit transaction model to ensure
306  *	system is not left in a partially updated state due to
307  *	failure from driver/device.
308  *
309  *	rtnl_lock must be held.
310  */
311 int switchdev_port_obj_add(struct net_device *dev,
312 			   const struct switchdev_obj *obj)
313 {
314 	struct switchdev_trans trans;
315 	int err;
316 
317 	ASSERT_RTNL();
318 
319 	switchdev_trans_init(&trans);
320 
321 	/* Phase I: prepare for obj add. Driver/device should fail
322 	 * here if there are going to be issues in the commit phase,
323 	 * such as lack of resources or support.  The driver/device
324 	 * should reserve resources needed for the commit phase here,
325 	 * but should not commit the obj.
326 	 */
327 
328 	trans.ph_prepare = true;
329 	err = __switchdev_port_obj_add(dev, obj, &trans);
330 	if (err) {
331 		/* Prepare phase failed: abort the transaction.  Any
332 		 * resources reserved in the prepare phase are
333 		 * released.
334 		 */
335 
336 		if (err != -EOPNOTSUPP)
337 			switchdev_trans_items_destroy(&trans);
338 
339 		return err;
340 	}
341 
342 	/* Phase II: commit obj add.  This cannot fail as a fault
343 	 * of driver/device.  If it does, it's a bug in the driver/device
344 	 * because the driver said everythings was OK in phase I.
345 	 */
346 
347 	trans.ph_prepare = false;
348 	err = __switchdev_port_obj_add(dev, obj, &trans);
349 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
350 	switchdev_trans_items_warn_destroy(dev, &trans);
351 
352 	return err;
353 }
354 EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
355 
356 /**
357  *	switchdev_port_obj_del - Delete port object
358  *
359  *	@dev: port device
360  *	@id: object ID
361  *	@obj: object to delete
362  */
363 int switchdev_port_obj_del(struct net_device *dev,
364 			   const struct switchdev_obj *obj)
365 {
366 	const struct switchdev_ops *ops = dev->switchdev_ops;
367 	struct net_device *lower_dev;
368 	struct list_head *iter;
369 	int err = -EOPNOTSUPP;
370 
371 	if (ops && ops->switchdev_port_obj_del)
372 		return ops->switchdev_port_obj_del(dev, obj);
373 
374 	/* Switch device port(s) may be stacked under
375 	 * bond/team/vlan dev, so recurse down to delete object on
376 	 * each port.
377 	 */
378 
379 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
380 		err = switchdev_port_obj_del(lower_dev, obj);
381 		if (err)
382 			break;
383 	}
384 
385 	return err;
386 }
387 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
388 
389 /**
390  *	switchdev_port_obj_dump - Dump port objects
391  *
392  *	@dev: port device
393  *	@id: object ID
394  *	@obj: object to dump
395  *	@cb: function to call with a filled object
396  */
397 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
398 			    switchdev_obj_dump_cb_t *cb)
399 {
400 	const struct switchdev_ops *ops = dev->switchdev_ops;
401 	struct net_device *lower_dev;
402 	struct list_head *iter;
403 	int err = -EOPNOTSUPP;
404 
405 	if (ops && ops->switchdev_port_obj_dump)
406 		return ops->switchdev_port_obj_dump(dev, obj, cb);
407 
408 	/* Switch device port(s) may be stacked under
409 	 * bond/team/vlan dev, so recurse down to dump objects on
410 	 * first port at bottom of stack.
411 	 */
412 
413 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
414 		err = switchdev_port_obj_dump(lower_dev, obj, cb);
415 		break;
416 	}
417 
418 	return err;
419 }
420 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
421 
422 static DEFINE_MUTEX(switchdev_mutex);
423 static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
424 
425 /**
426  *	register_switchdev_notifier - Register notifier
427  *	@nb: notifier_block
428  *
429  *	Register switch device notifier. This should be used by code
430  *	which needs to monitor events happening in particular device.
431  *	Return values are same as for atomic_notifier_chain_register().
432  */
433 int register_switchdev_notifier(struct notifier_block *nb)
434 {
435 	int err;
436 
437 	mutex_lock(&switchdev_mutex);
438 	err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
439 	mutex_unlock(&switchdev_mutex);
440 	return err;
441 }
442 EXPORT_SYMBOL_GPL(register_switchdev_notifier);
443 
444 /**
445  *	unregister_switchdev_notifier - Unregister notifier
446  *	@nb: notifier_block
447  *
448  *	Unregister switch device notifier.
449  *	Return values are same as for atomic_notifier_chain_unregister().
450  */
451 int unregister_switchdev_notifier(struct notifier_block *nb)
452 {
453 	int err;
454 
455 	mutex_lock(&switchdev_mutex);
456 	err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
457 	mutex_unlock(&switchdev_mutex);
458 	return err;
459 }
460 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
461 
462 /**
463  *	call_switchdev_notifiers - Call notifiers
464  *	@val: value passed unmodified to notifier function
465  *	@dev: port device
466  *	@info: notifier information data
467  *
468  *	Call all network notifier blocks. This should be called by driver
469  *	when it needs to propagate hardware event.
470  *	Return values are same as for atomic_notifier_call_chain().
471  */
472 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
473 			     struct switchdev_notifier_info *info)
474 {
475 	int err;
476 
477 	info->dev = dev;
478 	mutex_lock(&switchdev_mutex);
479 	err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
480 	mutex_unlock(&switchdev_mutex);
481 	return err;
482 }
483 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
484 
485 struct switchdev_vlan_dump {
486 	struct switchdev_obj_port_vlan vlan;
487 	struct sk_buff *skb;
488 	u32 filter_mask;
489 	u16 flags;
490 	u16 begin;
491 	u16 end;
492 };
493 
494 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
495 {
496 	struct bridge_vlan_info vinfo;
497 
498 	vinfo.flags = dump->flags;
499 
500 	if (dump->begin == 0 && dump->end == 0) {
501 		return 0;
502 	} else if (dump->begin == dump->end) {
503 		vinfo.vid = dump->begin;
504 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
505 			    sizeof(vinfo), &vinfo))
506 			return -EMSGSIZE;
507 	} else {
508 		vinfo.vid = dump->begin;
509 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN;
510 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
511 			    sizeof(vinfo), &vinfo))
512 			return -EMSGSIZE;
513 		vinfo.vid = dump->end;
514 		vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
515 		vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END;
516 		if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO,
517 			    sizeof(vinfo), &vinfo))
518 			return -EMSGSIZE;
519 	}
520 
521 	return 0;
522 }
523 
524 static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
525 {
526 	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
527 	struct switchdev_vlan_dump *dump =
528 		container_of(vlan, struct switchdev_vlan_dump, vlan);
529 	int err = 0;
530 
531 	if (vlan->vid_begin > vlan->vid_end)
532 		return -EINVAL;
533 
534 	if (dump->filter_mask & RTEXT_FILTER_BRVLAN) {
535 		dump->flags = vlan->flags;
536 		for (dump->begin = dump->end = vlan->vid_begin;
537 		     dump->begin <= vlan->vid_end;
538 		     dump->begin++, dump->end++) {
539 			err = switchdev_port_vlan_dump_put(dump);
540 			if (err)
541 				return err;
542 		}
543 	} else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) {
544 		if (dump->begin > vlan->vid_begin &&
545 		    dump->begin >= vlan->vid_end) {
546 			if ((dump->begin - 1) == vlan->vid_end &&
547 			    dump->flags == vlan->flags) {
548 				/* prepend */
549 				dump->begin = vlan->vid_begin;
550 			} else {
551 				err = switchdev_port_vlan_dump_put(dump);
552 				dump->flags = vlan->flags;
553 				dump->begin = vlan->vid_begin;
554 				dump->end = vlan->vid_end;
555 			}
556 		} else if (dump->end <= vlan->vid_begin &&
557 		           dump->end < vlan->vid_end) {
558 			if ((dump->end  + 1) == vlan->vid_begin &&
559 			    dump->flags == vlan->flags) {
560 				/* append */
561 				dump->end = vlan->vid_end;
562 			} else {
563 				err = switchdev_port_vlan_dump_put(dump);
564 				dump->flags = vlan->flags;
565 				dump->begin = vlan->vid_begin;
566 				dump->end = vlan->vid_end;
567 			}
568 		} else {
569 			err = -EINVAL;
570 		}
571 	}
572 
573 	return err;
574 }
575 
576 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
577 				    u32 filter_mask)
578 {
579 	struct switchdev_vlan_dump dump = {
580 		.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
581 		.skb = skb,
582 		.filter_mask = filter_mask,
583 	};
584 	int err = 0;
585 
586 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
587 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
588 		err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
589 					      switchdev_port_vlan_dump_cb);
590 		if (err)
591 			goto err_out;
592 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
593 			/* last one */
594 			err = switchdev_port_vlan_dump_put(&dump);
595 	}
596 
597 err_out:
598 	return err == -EOPNOTSUPP ? 0 : err;
599 }
600 
601 /**
602  *	switchdev_port_bridge_getlink - Get bridge port attributes
603  *
604  *	@dev: port device
605  *
606  *	Called for SELF on rtnl_bridge_getlink to get bridge port
607  *	attributes.
608  */
609 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
610 				  struct net_device *dev, u32 filter_mask,
611 				  int nlflags)
612 {
613 	struct switchdev_attr attr = {
614 		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
615 	};
616 	u16 mode = BRIDGE_MODE_UNDEF;
617 	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
618 	int err;
619 
620 	err = switchdev_port_attr_get(dev, &attr);
621 	if (err && err != -EOPNOTSUPP)
622 		return err;
623 
624 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
625 				       attr.u.brport_flags, mask, nlflags,
626 				       filter_mask, switchdev_port_vlan_fill);
627 }
628 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
629 
630 static int switchdev_port_br_setflag(struct net_device *dev,
631 				     struct nlattr *nlattr,
632 				     unsigned long brport_flag)
633 {
634 	struct switchdev_attr attr = {
635 		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
636 	};
637 	u8 flag = nla_get_u8(nlattr);
638 	int err;
639 
640 	err = switchdev_port_attr_get(dev, &attr);
641 	if (err)
642 		return err;
643 
644 	if (flag)
645 		attr.u.brport_flags |= brport_flag;
646 	else
647 		attr.u.brport_flags &= ~brport_flag;
648 
649 	return switchdev_port_attr_set(dev, &attr);
650 }
651 
652 static const struct nla_policy
653 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
654 	[IFLA_BRPORT_STATE]		= { .type = NLA_U8 },
655 	[IFLA_BRPORT_COST]		= { .type = NLA_U32 },
656 	[IFLA_BRPORT_PRIORITY]		= { .type = NLA_U16 },
657 	[IFLA_BRPORT_MODE]		= { .type = NLA_U8 },
658 	[IFLA_BRPORT_GUARD]		= { .type = NLA_U8 },
659 	[IFLA_BRPORT_PROTECT]		= { .type = NLA_U8 },
660 	[IFLA_BRPORT_FAST_LEAVE]	= { .type = NLA_U8 },
661 	[IFLA_BRPORT_LEARNING]		= { .type = NLA_U8 },
662 	[IFLA_BRPORT_LEARNING_SYNC]	= { .type = NLA_U8 },
663 	[IFLA_BRPORT_UNICAST_FLOOD]	= { .type = NLA_U8 },
664 };
665 
666 static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
667 					      struct nlattr *protinfo)
668 {
669 	struct nlattr *attr;
670 	int rem;
671 	int err;
672 
673 	err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
674 				  switchdev_port_bridge_policy);
675 	if (err)
676 		return err;
677 
678 	nla_for_each_nested(attr, protinfo, rem) {
679 		switch (nla_type(attr)) {
680 		case IFLA_BRPORT_LEARNING:
681 			err = switchdev_port_br_setflag(dev, attr,
682 							BR_LEARNING);
683 			break;
684 		case IFLA_BRPORT_LEARNING_SYNC:
685 			err = switchdev_port_br_setflag(dev, attr,
686 							BR_LEARNING_SYNC);
687 			break;
688 		default:
689 			err = -EOPNOTSUPP;
690 			break;
691 		}
692 		if (err)
693 			return err;
694 	}
695 
696 	return 0;
697 }
698 
699 static int switchdev_port_br_afspec(struct net_device *dev,
700 				    struct nlattr *afspec,
701 				    int (*f)(struct net_device *dev,
702 					     const struct switchdev_obj *obj))
703 {
704 	struct nlattr *attr;
705 	struct bridge_vlan_info *vinfo;
706 	struct switchdev_obj_port_vlan vlan = {
707 		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
708 	};
709 	int rem;
710 	int err;
711 
712 	nla_for_each_nested(attr, afspec, rem) {
713 		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
714 			continue;
715 		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
716 			return -EINVAL;
717 		vinfo = nla_data(attr);
718 		vlan.flags = vinfo->flags;
719 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
720 			if (vlan.vid_begin)
721 				return -EINVAL;
722 			vlan.vid_begin = vinfo->vid;
723 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
724 			if (!vlan.vid_begin)
725 				return -EINVAL;
726 			vlan.vid_end = vinfo->vid;
727 			if (vlan.vid_end <= vlan.vid_begin)
728 				return -EINVAL;
729 			err = f(dev, &vlan.obj);
730 			if (err)
731 				return err;
732 			memset(&vlan, 0, sizeof(vlan));
733 		} else {
734 			if (vlan.vid_begin)
735 				return -EINVAL;
736 			vlan.vid_begin = vinfo->vid;
737 			vlan.vid_end = vinfo->vid;
738 			err = f(dev, &vlan.obj);
739 			if (err)
740 				return err;
741 			memset(&vlan, 0, sizeof(vlan));
742 		}
743 	}
744 
745 	return 0;
746 }
747 
748 /**
749  *	switchdev_port_bridge_setlink - Set bridge port attributes
750  *
751  *	@dev: port device
752  *	@nlh: netlink header
753  *	@flags: netlink flags
754  *
755  *	Called for SELF on rtnl_bridge_setlink to set bridge port
756  *	attributes.
757  */
758 int switchdev_port_bridge_setlink(struct net_device *dev,
759 				  struct nlmsghdr *nlh, u16 flags)
760 {
761 	struct nlattr *protinfo;
762 	struct nlattr *afspec;
763 	int err = 0;
764 
765 	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
766 				   IFLA_PROTINFO);
767 	if (protinfo) {
768 		err = switchdev_port_br_setlink_protinfo(dev, protinfo);
769 		if (err)
770 			return err;
771 	}
772 
773 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
774 				 IFLA_AF_SPEC);
775 	if (afspec)
776 		err = switchdev_port_br_afspec(dev, afspec,
777 					       switchdev_port_obj_add);
778 
779 	return err;
780 }
781 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink);
782 
783 /**
784  *	switchdev_port_bridge_dellink - Set bridge port attributes
785  *
786  *	@dev: port device
787  *	@nlh: netlink header
788  *	@flags: netlink flags
789  *
790  *	Called for SELF on rtnl_bridge_dellink to set bridge port
791  *	attributes.
792  */
793 int switchdev_port_bridge_dellink(struct net_device *dev,
794 				  struct nlmsghdr *nlh, u16 flags)
795 {
796 	struct nlattr *afspec;
797 
798 	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
799 				 IFLA_AF_SPEC);
800 	if (afspec)
801 		return switchdev_port_br_afspec(dev, afspec,
802 						switchdev_port_obj_del);
803 
804 	return 0;
805 }
806 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink);
807 
808 /**
809  *	switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port
810  *
811  *	@ndmsg: netlink hdr
812  *	@nlattr: netlink attributes
813  *	@dev: port device
814  *	@addr: MAC address to add
815  *	@vid: VLAN to add
816  *
817  *	Add FDB entry to switch device.
818  */
819 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
820 			   struct net_device *dev, const unsigned char *addr,
821 			   u16 vid, u16 nlm_flags)
822 {
823 	struct switchdev_obj_port_fdb fdb = {
824 		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
825 		.addr = addr,
826 		.vid = vid,
827 	};
828 
829 	return switchdev_port_obj_add(dev, &fdb.obj);
830 }
831 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
832 
833 /**
834  *	switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port
835  *
836  *	@ndmsg: netlink hdr
837  *	@nlattr: netlink attributes
838  *	@dev: port device
839  *	@addr: MAC address to delete
840  *	@vid: VLAN to delete
841  *
842  *	Delete FDB entry from switch device.
843  */
844 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
845 			   struct net_device *dev, const unsigned char *addr,
846 			   u16 vid)
847 {
848 	struct switchdev_obj_port_fdb fdb = {
849 		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
850 		.addr = addr,
851 		.vid = vid,
852 	};
853 
854 	return switchdev_port_obj_del(dev, &fdb.obj);
855 }
856 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
857 
858 struct switchdev_fdb_dump {
859 	struct switchdev_obj_port_fdb fdb;
860 	struct net_device *dev;
861 	struct sk_buff *skb;
862 	struct netlink_callback *cb;
863 	int idx;
864 };
865 
866 static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
867 {
868 	struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
869 	struct switchdev_fdb_dump *dump =
870 		container_of(fdb, struct switchdev_fdb_dump, fdb);
871 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
872 	u32 seq = dump->cb->nlh->nlmsg_seq;
873 	struct nlmsghdr *nlh;
874 	struct ndmsg *ndm;
875 
876 	if (dump->idx < dump->cb->args[0])
877 		goto skip;
878 
879 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
880 			sizeof(*ndm), NLM_F_MULTI);
881 	if (!nlh)
882 		return -EMSGSIZE;
883 
884 	ndm = nlmsg_data(nlh);
885 	ndm->ndm_family  = AF_BRIDGE;
886 	ndm->ndm_pad1    = 0;
887 	ndm->ndm_pad2    = 0;
888 	ndm->ndm_flags   = NTF_SELF;
889 	ndm->ndm_type    = 0;
890 	ndm->ndm_ifindex = dump->dev->ifindex;
891 	ndm->ndm_state   = fdb->ndm_state;
892 
893 	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
894 		goto nla_put_failure;
895 
896 	if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
897 		goto nla_put_failure;
898 
899 	nlmsg_end(dump->skb, nlh);
900 
901 skip:
902 	dump->idx++;
903 	return 0;
904 
905 nla_put_failure:
906 	nlmsg_cancel(dump->skb, nlh);
907 	return -EMSGSIZE;
908 }
909 
910 /**
911  *	switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries
912  *
913  *	@skb: netlink skb
914  *	@cb: netlink callback
915  *	@dev: port device
916  *	@filter_dev: filter device
917  *	@idx:
918  *
919  *	Delete FDB entry from switch device.
920  */
921 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
922 			    struct net_device *dev,
923 			    struct net_device *filter_dev, int idx)
924 {
925 	struct switchdev_fdb_dump dump = {
926 		.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
927 		.dev = dev,
928 		.skb = skb,
929 		.cb = cb,
930 		.idx = idx,
931 	};
932 
933 	switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
934 	return dump.idx;
935 }
936 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
937 
938 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
939 {
940 	const struct switchdev_ops *ops = dev->switchdev_ops;
941 	struct net_device *lower_dev;
942 	struct net_device *port_dev;
943 	struct list_head *iter;
944 
945 	/* Recusively search down until we find a sw port dev.
946 	 * (A sw port dev supports switchdev_port_attr_get).
947 	 */
948 
949 	if (ops && ops->switchdev_port_attr_get)
950 		return dev;
951 
952 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
953 		port_dev = switchdev_get_lowest_dev(lower_dev);
954 		if (port_dev)
955 			return port_dev;
956 	}
957 
958 	return NULL;
959 }
960 
961 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
962 {
963 	struct switchdev_attr attr = {
964 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
965 	};
966 	struct switchdev_attr prev_attr;
967 	struct net_device *dev = NULL;
968 	int nhsel;
969 
970 	/* For this route, all nexthop devs must be on the same switch. */
971 
972 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
973 		const struct fib_nh *nh = &fi->fib_nh[nhsel];
974 
975 		if (!nh->nh_dev)
976 			return NULL;
977 
978 		dev = switchdev_get_lowest_dev(nh->nh_dev);
979 		if (!dev)
980 			return NULL;
981 
982 		if (switchdev_port_attr_get(dev, &attr))
983 			return NULL;
984 
985 		if (nhsel > 0 &&
986 		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
987 				return NULL;
988 
989 		prev_attr = attr;
990 	}
991 
992 	return dev;
993 }
994 
995 /**
996  *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
997  *
998  *	@dst: route's IPv4 destination address
999  *	@dst_len: destination address length (prefix length)
1000  *	@fi: route FIB info structure
1001  *	@tos: route TOS
1002  *	@type: route type
1003  *	@nlflags: netlink flags passed in (NLM_F_*)
1004  *	@tb_id: route table ID
1005  *
1006  *	Add/modify switch IPv4 route entry.
1007  */
1008 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
1009 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
1010 {
1011 	struct switchdev_obj_ipv4_fib ipv4_fib = {
1012 		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1013 		.dst = dst,
1014 		.dst_len = dst_len,
1015 		.fi = fi,
1016 		.tos = tos,
1017 		.type = type,
1018 		.nlflags = nlflags,
1019 		.tb_id = tb_id,
1020 	};
1021 	struct net_device *dev;
1022 	int err = 0;
1023 
1024 	/* Don't offload route if using custom ip rules or if
1025 	 * IPv4 FIB offloading has been disabled completely.
1026 	 */
1027 
1028 #ifdef CONFIG_IP_MULTIPLE_TABLES
1029 	if (fi->fib_net->ipv4.fib_has_custom_rules)
1030 		return 0;
1031 #endif
1032 
1033 	if (fi->fib_net->ipv4.fib_offload_disabled)
1034 		return 0;
1035 
1036 	dev = switchdev_get_dev_by_nhs(fi);
1037 	if (!dev)
1038 		return 0;
1039 
1040 	err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
1041 	if (!err)
1042 		fi->fib_flags |= RTNH_F_OFFLOAD;
1043 
1044 	return err == -EOPNOTSUPP ? 0 : err;
1045 }
1046 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
1047 
1048 /**
1049  *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
1050  *
1051  *	@dst: route's IPv4 destination address
1052  *	@dst_len: destination address length (prefix length)
1053  *	@fi: route FIB info structure
1054  *	@tos: route TOS
1055  *	@type: route type
1056  *	@tb_id: route table ID
1057  *
1058  *	Delete IPv4 route entry from switch device.
1059  */
1060 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
1061 			   u8 tos, u8 type, u32 tb_id)
1062 {
1063 	struct switchdev_obj_ipv4_fib ipv4_fib = {
1064 		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
1065 		.dst = dst,
1066 		.dst_len = dst_len,
1067 		.fi = fi,
1068 		.tos = tos,
1069 		.type = type,
1070 		.nlflags = 0,
1071 		.tb_id = tb_id,
1072 	};
1073 	struct net_device *dev;
1074 	int err = 0;
1075 
1076 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
1077 		return 0;
1078 
1079 	dev = switchdev_get_dev_by_nhs(fi);
1080 	if (!dev)
1081 		return 0;
1082 
1083 	err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
1084 	if (!err)
1085 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
1086 
1087 	return err == -EOPNOTSUPP ? 0 : err;
1088 }
1089 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
1090 
1091 /**
1092  *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
1093  *
1094  *	@fi: route FIB info structure
1095  */
1096 void switchdev_fib_ipv4_abort(struct fib_info *fi)
1097 {
1098 	/* There was a problem installing this route to the offload
1099 	 * device.  For now, until we come up with more refined
1100 	 * policy handling, abruptly end IPv4 fib offloading for
1101 	 * for entire net by flushing offload device(s) of all
1102 	 * IPv4 routes, and mark IPv4 fib offloading broken from
1103 	 * this point forward.
1104 	 */
1105 
1106 	fib_flush_external(fi->fib_net);
1107 	fi->fib_net->ipv4.fib_offload_disabled = true;
1108 }
1109 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
1110 
1111 static bool switchdev_port_same_parent_id(struct net_device *a,
1112 					  struct net_device *b)
1113 {
1114 	struct switchdev_attr a_attr = {
1115 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1116 		.flags = SWITCHDEV_F_NO_RECURSE,
1117 	};
1118 	struct switchdev_attr b_attr = {
1119 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
1120 		.flags = SWITCHDEV_F_NO_RECURSE,
1121 	};
1122 
1123 	if (switchdev_port_attr_get(a, &a_attr) ||
1124 	    switchdev_port_attr_get(b, &b_attr))
1125 		return false;
1126 
1127 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
1128 }
1129 
1130 static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
1131 				       struct net_device *group_dev)
1132 {
1133 	struct net_device *lower_dev;
1134 	struct list_head *iter;
1135 
1136 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1137 		if (lower_dev == dev)
1138 			continue;
1139 		if (switchdev_port_same_parent_id(dev, lower_dev))
1140 			return lower_dev->offload_fwd_mark;
1141 		return switchdev_port_fwd_mark_get(dev, lower_dev);
1142 	}
1143 
1144 	return dev->ifindex;
1145 }
1146 
1147 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
1148 					  u32 old_mark, u32 *reset_mark)
1149 {
1150 	struct net_device *lower_dev;
1151 	struct list_head *iter;
1152 
1153 	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
1154 		if (lower_dev->offload_fwd_mark == old_mark) {
1155 			if (!*reset_mark)
1156 				*reset_mark = lower_dev->ifindex;
1157 			lower_dev->offload_fwd_mark = *reset_mark;
1158 		}
1159 		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
1160 	}
1161 }
1162 
1163 /**
1164  *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
1165  *
1166  *	@dev: port device
1167  *	@group_dev: containing device
1168  *	@joining: true if dev is joining group; false if leaving group
1169  *
1170  *	An ungrouped port's offload mark is just its ifindex.  A grouped
1171  *	port's (member of a bridge, for example) offload mark is the ifindex
1172  *	of one of the ports in the group with the same parent (switch) ID.
1173  *	Ports on the same device in the same group will have the same mark.
1174  *
1175  *	Example:
1176  *
1177  *		br0		ifindex=9
1178  *		  sw1p1		ifindex=2	mark=2
1179  *		  sw1p2		ifindex=3	mark=2
1180  *		  sw2p1		ifindex=4	mark=5
1181  *		  sw2p2		ifindex=5	mark=5
1182  *
1183  *	If sw2p2 leaves the bridge, we'll have:
1184  *
1185  *		br0		ifindex=9
1186  *		  sw1p1		ifindex=2	mark=2
1187  *		  sw1p2		ifindex=3	mark=2
1188  *		  sw2p1		ifindex=4	mark=4
1189  *		sw2p2		ifindex=5	mark=5
1190  */
1191 void switchdev_port_fwd_mark_set(struct net_device *dev,
1192 				 struct net_device *group_dev,
1193 				 bool joining)
1194 {
1195 	u32 mark = dev->ifindex;
1196 	u32 reset_mark = 0;
1197 
1198 	if (group_dev && joining) {
1199 		mark = switchdev_port_fwd_mark_get(dev, group_dev);
1200 	} else if (group_dev && !joining) {
1201 		if (dev->offload_fwd_mark == mark)
1202 			/* Ohoh, this port was the mark reference port,
1203 			 * but it's leaving the group, so reset the
1204 			 * mark for the remaining ports in the group.
1205 			 */
1206 			switchdev_port_fwd_mark_reset(group_dev, mark,
1207 						      &reset_mark);
1208 	}
1209 
1210 	dev->offload_fwd_mark = mark;
1211 }
1212 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
1213