1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <[email protected]> 4 * Copyright (c) 2014-2015 Scott Feldman <[email protected]> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/if_bridge.h> 19 #include <linux/list.h> 20 #include <net/ip_fib.h> 21 #include <net/switchdev.h> 22 23 /** 24 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue 25 * 26 * @trans: transaction 27 * @data: pointer to data being queued 28 * @destructor: data destructor 29 * @tritem: transaction item being queued 30 * 31 * Enqeueue data item to transaction queue. tritem is typically placed in 32 * cointainter pointed at by data pointer. Destructor is called on 33 * transaction abort and after successful commit phase in case 34 * the caller did not dequeue the item before. 35 */ 36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans, 37 void *data, void (*destructor)(void const *), 38 struct switchdev_trans_item *tritem) 39 { 40 tritem->data = data; 41 tritem->destructor = destructor; 42 list_add_tail(&tritem->list, &trans->item_list); 43 } 44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); 45 46 static struct switchdev_trans_item * 47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans) 48 { 49 struct switchdev_trans_item *tritem; 50 51 if (list_empty(&trans->item_list)) 52 return NULL; 53 tritem = list_first_entry(&trans->item_list, 54 struct switchdev_trans_item, list); 55 list_del(&tritem->list); 56 return tritem; 57 } 58 59 /** 60 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue 61 * 62 * @trans: transaction 63 */ 64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) 65 { 66 struct switchdev_trans_item *tritem; 67 68 tritem = __switchdev_trans_item_dequeue(trans); 69 BUG_ON(!tritem); 70 return tritem->data; 71 } 72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); 73 74 static void switchdev_trans_init(struct switchdev_trans *trans) 75 { 76 INIT_LIST_HEAD(&trans->item_list); 77 } 78 79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans) 80 { 81 struct switchdev_trans_item *tritem; 82 83 while ((tritem = __switchdev_trans_item_dequeue(trans))) 84 tritem->destructor(tritem->data); 85 } 86 87 static void switchdev_trans_items_warn_destroy(struct net_device *dev, 88 struct switchdev_trans *trans) 89 { 90 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", 91 dev->name); 92 switchdev_trans_items_destroy(trans); 93 } 94 95 /** 96 * switchdev_port_attr_get - Get port attribute 97 * 98 * @dev: port device 99 * @attr: attribute to get 100 */ 101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 102 { 103 const struct switchdev_ops *ops = dev->switchdev_ops; 104 struct net_device *lower_dev; 105 struct list_head *iter; 106 struct switchdev_attr first = { 107 .id = SWITCHDEV_ATTR_ID_UNDEFINED 108 }; 109 int err = -EOPNOTSUPP; 110 111 if (ops && ops->switchdev_port_attr_get) 112 return ops->switchdev_port_attr_get(dev, attr); 113 114 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 115 return err; 116 117 /* Switch device port(s) may be stacked under 118 * bond/team/vlan dev, so recurse down to get attr on 119 * each port. Return -ENODATA if attr values don't 120 * compare across ports. 121 */ 122 123 netdev_for_each_lower_dev(dev, lower_dev, iter) { 124 err = switchdev_port_attr_get(lower_dev, attr); 125 if (err) 126 break; 127 if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) 128 first = *attr; 129 else if (memcmp(&first, attr, sizeof(*attr))) 130 return -ENODATA; 131 } 132 133 return err; 134 } 135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 136 137 static int __switchdev_port_attr_set(struct net_device *dev, 138 struct switchdev_attr *attr, 139 struct switchdev_trans *trans) 140 { 141 const struct switchdev_ops *ops = dev->switchdev_ops; 142 struct net_device *lower_dev; 143 struct list_head *iter; 144 int err = -EOPNOTSUPP; 145 146 if (ops && ops->switchdev_port_attr_set) 147 return ops->switchdev_port_attr_set(dev, attr, trans); 148 149 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 150 return err; 151 152 /* Switch device port(s) may be stacked under 153 * bond/team/vlan dev, so recurse down to set attr on 154 * each port. 155 */ 156 157 netdev_for_each_lower_dev(dev, lower_dev, iter) { 158 err = __switchdev_port_attr_set(lower_dev, attr, trans); 159 if (err) 160 break; 161 } 162 163 return err; 164 } 165 166 struct switchdev_attr_set_work { 167 struct work_struct work; 168 struct net_device *dev; 169 struct switchdev_attr attr; 170 }; 171 172 static void switchdev_port_attr_set_work(struct work_struct *work) 173 { 174 struct switchdev_attr_set_work *asw = 175 container_of(work, struct switchdev_attr_set_work, work); 176 int err; 177 178 rtnl_lock(); 179 err = switchdev_port_attr_set(asw->dev, &asw->attr); 180 if (err && err != -EOPNOTSUPP) 181 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n", 182 err, asw->attr.id); 183 rtnl_unlock(); 184 185 dev_put(asw->dev); 186 kfree(work); 187 } 188 189 static int switchdev_port_attr_set_defer(struct net_device *dev, 190 struct switchdev_attr *attr) 191 { 192 struct switchdev_attr_set_work *asw; 193 194 asw = kmalloc(sizeof(*asw), GFP_ATOMIC); 195 if (!asw) 196 return -ENOMEM; 197 198 INIT_WORK(&asw->work, switchdev_port_attr_set_work); 199 200 dev_hold(dev); 201 asw->dev = dev; 202 memcpy(&asw->attr, attr, sizeof(asw->attr)); 203 204 schedule_work(&asw->work); 205 206 return 0; 207 } 208 209 /** 210 * switchdev_port_attr_set - Set port attribute 211 * 212 * @dev: port device 213 * @attr: attribute to set 214 * 215 * Use a 2-phase prepare-commit transaction model to ensure 216 * system is not left in a partially updated state due to 217 * failure from driver/device. 218 */ 219 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) 220 { 221 struct switchdev_trans trans; 222 int err; 223 224 if (!rtnl_is_locked()) { 225 /* Running prepare-commit transaction across stacked 226 * devices requires nothing moves, so if rtnl_lock is 227 * not held, schedule a worker thread to hold rtnl_lock 228 * while setting attr. 229 */ 230 231 return switchdev_port_attr_set_defer(dev, attr); 232 } 233 234 switchdev_trans_init(&trans); 235 236 /* Phase I: prepare for attr set. Driver/device should fail 237 * here if there are going to be issues in the commit phase, 238 * such as lack of resources or support. The driver/device 239 * should reserve resources needed for the commit phase here, 240 * but should not commit the attr. 241 */ 242 243 trans.ph_prepare = true; 244 err = __switchdev_port_attr_set(dev, attr, &trans); 245 if (err) { 246 /* Prepare phase failed: abort the transaction. Any 247 * resources reserved in the prepare phase are 248 * released. 249 */ 250 251 if (err != -EOPNOTSUPP) 252 switchdev_trans_items_destroy(&trans); 253 254 return err; 255 } 256 257 /* Phase II: commit attr set. This cannot fail as a fault 258 * of driver/device. If it does, it's a bug in the driver/device 259 * because the driver said everythings was OK in phase I. 260 */ 261 262 trans.ph_prepare = false; 263 err = __switchdev_port_attr_set(dev, attr, &trans); 264 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 265 dev->name, attr->id); 266 switchdev_trans_items_warn_destroy(dev, &trans); 267 268 return err; 269 } 270 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 271 272 static int __switchdev_port_obj_add(struct net_device *dev, 273 const struct switchdev_obj *obj, 274 struct switchdev_trans *trans) 275 { 276 const struct switchdev_ops *ops = dev->switchdev_ops; 277 struct net_device *lower_dev; 278 struct list_head *iter; 279 int err = -EOPNOTSUPP; 280 281 if (ops && ops->switchdev_port_obj_add) 282 return ops->switchdev_port_obj_add(dev, obj, trans); 283 284 /* Switch device port(s) may be stacked under 285 * bond/team/vlan dev, so recurse down to add object on 286 * each port. 287 */ 288 289 netdev_for_each_lower_dev(dev, lower_dev, iter) { 290 err = __switchdev_port_obj_add(lower_dev, obj, trans); 291 if (err) 292 break; 293 } 294 295 return err; 296 } 297 298 /** 299 * switchdev_port_obj_add - Add port object 300 * 301 * @dev: port device 302 * @id: object ID 303 * @obj: object to add 304 * 305 * Use a 2-phase prepare-commit transaction model to ensure 306 * system is not left in a partially updated state due to 307 * failure from driver/device. 308 * 309 * rtnl_lock must be held. 310 */ 311 int switchdev_port_obj_add(struct net_device *dev, 312 const struct switchdev_obj *obj) 313 { 314 struct switchdev_trans trans; 315 int err; 316 317 ASSERT_RTNL(); 318 319 switchdev_trans_init(&trans); 320 321 /* Phase I: prepare for obj add. Driver/device should fail 322 * here if there are going to be issues in the commit phase, 323 * such as lack of resources or support. The driver/device 324 * should reserve resources needed for the commit phase here, 325 * but should not commit the obj. 326 */ 327 328 trans.ph_prepare = true; 329 err = __switchdev_port_obj_add(dev, obj, &trans); 330 if (err) { 331 /* Prepare phase failed: abort the transaction. Any 332 * resources reserved in the prepare phase are 333 * released. 334 */ 335 336 if (err != -EOPNOTSUPP) 337 switchdev_trans_items_destroy(&trans); 338 339 return err; 340 } 341 342 /* Phase II: commit obj add. This cannot fail as a fault 343 * of driver/device. If it does, it's a bug in the driver/device 344 * because the driver said everythings was OK in phase I. 345 */ 346 347 trans.ph_prepare = false; 348 err = __switchdev_port_obj_add(dev, obj, &trans); 349 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 350 switchdev_trans_items_warn_destroy(dev, &trans); 351 352 return err; 353 } 354 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 355 356 /** 357 * switchdev_port_obj_del - Delete port object 358 * 359 * @dev: port device 360 * @id: object ID 361 * @obj: object to delete 362 */ 363 int switchdev_port_obj_del(struct net_device *dev, 364 const struct switchdev_obj *obj) 365 { 366 const struct switchdev_ops *ops = dev->switchdev_ops; 367 struct net_device *lower_dev; 368 struct list_head *iter; 369 int err = -EOPNOTSUPP; 370 371 if (ops && ops->switchdev_port_obj_del) 372 return ops->switchdev_port_obj_del(dev, obj); 373 374 /* Switch device port(s) may be stacked under 375 * bond/team/vlan dev, so recurse down to delete object on 376 * each port. 377 */ 378 379 netdev_for_each_lower_dev(dev, lower_dev, iter) { 380 err = switchdev_port_obj_del(lower_dev, obj); 381 if (err) 382 break; 383 } 384 385 return err; 386 } 387 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 388 389 /** 390 * switchdev_port_obj_dump - Dump port objects 391 * 392 * @dev: port device 393 * @id: object ID 394 * @obj: object to dump 395 * @cb: function to call with a filled object 396 */ 397 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, 398 switchdev_obj_dump_cb_t *cb) 399 { 400 const struct switchdev_ops *ops = dev->switchdev_ops; 401 struct net_device *lower_dev; 402 struct list_head *iter; 403 int err = -EOPNOTSUPP; 404 405 if (ops && ops->switchdev_port_obj_dump) 406 return ops->switchdev_port_obj_dump(dev, obj, cb); 407 408 /* Switch device port(s) may be stacked under 409 * bond/team/vlan dev, so recurse down to dump objects on 410 * first port at bottom of stack. 411 */ 412 413 netdev_for_each_lower_dev(dev, lower_dev, iter) { 414 err = switchdev_port_obj_dump(lower_dev, obj, cb); 415 break; 416 } 417 418 return err; 419 } 420 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 421 422 static DEFINE_MUTEX(switchdev_mutex); 423 static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 424 425 /** 426 * register_switchdev_notifier - Register notifier 427 * @nb: notifier_block 428 * 429 * Register switch device notifier. This should be used by code 430 * which needs to monitor events happening in particular device. 431 * Return values are same as for atomic_notifier_chain_register(). 432 */ 433 int register_switchdev_notifier(struct notifier_block *nb) 434 { 435 int err; 436 437 mutex_lock(&switchdev_mutex); 438 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 439 mutex_unlock(&switchdev_mutex); 440 return err; 441 } 442 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 443 444 /** 445 * unregister_switchdev_notifier - Unregister notifier 446 * @nb: notifier_block 447 * 448 * Unregister switch device notifier. 449 * Return values are same as for atomic_notifier_chain_unregister(). 450 */ 451 int unregister_switchdev_notifier(struct notifier_block *nb) 452 { 453 int err; 454 455 mutex_lock(&switchdev_mutex); 456 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 457 mutex_unlock(&switchdev_mutex); 458 return err; 459 } 460 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 461 462 /** 463 * call_switchdev_notifiers - Call notifiers 464 * @val: value passed unmodified to notifier function 465 * @dev: port device 466 * @info: notifier information data 467 * 468 * Call all network notifier blocks. This should be called by driver 469 * when it needs to propagate hardware event. 470 * Return values are same as for atomic_notifier_call_chain(). 471 */ 472 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 473 struct switchdev_notifier_info *info) 474 { 475 int err; 476 477 info->dev = dev; 478 mutex_lock(&switchdev_mutex); 479 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 480 mutex_unlock(&switchdev_mutex); 481 return err; 482 } 483 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 484 485 struct switchdev_vlan_dump { 486 struct switchdev_obj_port_vlan vlan; 487 struct sk_buff *skb; 488 u32 filter_mask; 489 u16 flags; 490 u16 begin; 491 u16 end; 492 }; 493 494 static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) 495 { 496 struct bridge_vlan_info vinfo; 497 498 vinfo.flags = dump->flags; 499 500 if (dump->begin == 0 && dump->end == 0) { 501 return 0; 502 } else if (dump->begin == dump->end) { 503 vinfo.vid = dump->begin; 504 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 505 sizeof(vinfo), &vinfo)) 506 return -EMSGSIZE; 507 } else { 508 vinfo.vid = dump->begin; 509 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; 510 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 511 sizeof(vinfo), &vinfo)) 512 return -EMSGSIZE; 513 vinfo.vid = dump->end; 514 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; 515 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; 516 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 517 sizeof(vinfo), &vinfo)) 518 return -EMSGSIZE; 519 } 520 521 return 0; 522 } 523 524 static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj) 525 { 526 struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); 527 struct switchdev_vlan_dump *dump = 528 container_of(vlan, struct switchdev_vlan_dump, vlan); 529 int err = 0; 530 531 if (vlan->vid_begin > vlan->vid_end) 532 return -EINVAL; 533 534 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { 535 dump->flags = vlan->flags; 536 for (dump->begin = dump->end = vlan->vid_begin; 537 dump->begin <= vlan->vid_end; 538 dump->begin++, dump->end++) { 539 err = switchdev_port_vlan_dump_put(dump); 540 if (err) 541 return err; 542 } 543 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { 544 if (dump->begin > vlan->vid_begin && 545 dump->begin >= vlan->vid_end) { 546 if ((dump->begin - 1) == vlan->vid_end && 547 dump->flags == vlan->flags) { 548 /* prepend */ 549 dump->begin = vlan->vid_begin; 550 } else { 551 err = switchdev_port_vlan_dump_put(dump); 552 dump->flags = vlan->flags; 553 dump->begin = vlan->vid_begin; 554 dump->end = vlan->vid_end; 555 } 556 } else if (dump->end <= vlan->vid_begin && 557 dump->end < vlan->vid_end) { 558 if ((dump->end + 1) == vlan->vid_begin && 559 dump->flags == vlan->flags) { 560 /* append */ 561 dump->end = vlan->vid_end; 562 } else { 563 err = switchdev_port_vlan_dump_put(dump); 564 dump->flags = vlan->flags; 565 dump->begin = vlan->vid_begin; 566 dump->end = vlan->vid_end; 567 } 568 } else { 569 err = -EINVAL; 570 } 571 } 572 573 return err; 574 } 575 576 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, 577 u32 filter_mask) 578 { 579 struct switchdev_vlan_dump dump = { 580 .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, 581 .skb = skb, 582 .filter_mask = filter_mask, 583 }; 584 int err = 0; 585 586 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 587 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 588 err = switchdev_port_obj_dump(dev, &dump.vlan.obj, 589 switchdev_port_vlan_dump_cb); 590 if (err) 591 goto err_out; 592 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 593 /* last one */ 594 err = switchdev_port_vlan_dump_put(&dump); 595 } 596 597 err_out: 598 return err == -EOPNOTSUPP ? 0 : err; 599 } 600 601 /** 602 * switchdev_port_bridge_getlink - Get bridge port attributes 603 * 604 * @dev: port device 605 * 606 * Called for SELF on rtnl_bridge_getlink to get bridge port 607 * attributes. 608 */ 609 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 610 struct net_device *dev, u32 filter_mask, 611 int nlflags) 612 { 613 struct switchdev_attr attr = { 614 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, 615 }; 616 u16 mode = BRIDGE_MODE_UNDEF; 617 u32 mask = BR_LEARNING | BR_LEARNING_SYNC; 618 int err; 619 620 err = switchdev_port_attr_get(dev, &attr); 621 if (err && err != -EOPNOTSUPP) 622 return err; 623 624 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 625 attr.u.brport_flags, mask, nlflags, 626 filter_mask, switchdev_port_vlan_fill); 627 } 628 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); 629 630 static int switchdev_port_br_setflag(struct net_device *dev, 631 struct nlattr *nlattr, 632 unsigned long brport_flag) 633 { 634 struct switchdev_attr attr = { 635 .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, 636 }; 637 u8 flag = nla_get_u8(nlattr); 638 int err; 639 640 err = switchdev_port_attr_get(dev, &attr); 641 if (err) 642 return err; 643 644 if (flag) 645 attr.u.brport_flags |= brport_flag; 646 else 647 attr.u.brport_flags &= ~brport_flag; 648 649 return switchdev_port_attr_set(dev, &attr); 650 } 651 652 static const struct nla_policy 653 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { 654 [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, 655 [IFLA_BRPORT_COST] = { .type = NLA_U32 }, 656 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, 657 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 658 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 659 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 660 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, 661 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, 662 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, 663 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 664 }; 665 666 static int switchdev_port_br_setlink_protinfo(struct net_device *dev, 667 struct nlattr *protinfo) 668 { 669 struct nlattr *attr; 670 int rem; 671 int err; 672 673 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, 674 switchdev_port_bridge_policy); 675 if (err) 676 return err; 677 678 nla_for_each_nested(attr, protinfo, rem) { 679 switch (nla_type(attr)) { 680 case IFLA_BRPORT_LEARNING: 681 err = switchdev_port_br_setflag(dev, attr, 682 BR_LEARNING); 683 break; 684 case IFLA_BRPORT_LEARNING_SYNC: 685 err = switchdev_port_br_setflag(dev, attr, 686 BR_LEARNING_SYNC); 687 break; 688 default: 689 err = -EOPNOTSUPP; 690 break; 691 } 692 if (err) 693 return err; 694 } 695 696 return 0; 697 } 698 699 static int switchdev_port_br_afspec(struct net_device *dev, 700 struct nlattr *afspec, 701 int (*f)(struct net_device *dev, 702 const struct switchdev_obj *obj)) 703 { 704 struct nlattr *attr; 705 struct bridge_vlan_info *vinfo; 706 struct switchdev_obj_port_vlan vlan = { 707 .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, 708 }; 709 int rem; 710 int err; 711 712 nla_for_each_nested(attr, afspec, rem) { 713 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) 714 continue; 715 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 716 return -EINVAL; 717 vinfo = nla_data(attr); 718 vlan.flags = vinfo->flags; 719 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 720 if (vlan.vid_begin) 721 return -EINVAL; 722 vlan.vid_begin = vinfo->vid; 723 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 724 if (!vlan.vid_begin) 725 return -EINVAL; 726 vlan.vid_end = vinfo->vid; 727 if (vlan.vid_end <= vlan.vid_begin) 728 return -EINVAL; 729 err = f(dev, &vlan.obj); 730 if (err) 731 return err; 732 memset(&vlan, 0, sizeof(vlan)); 733 } else { 734 if (vlan.vid_begin) 735 return -EINVAL; 736 vlan.vid_begin = vinfo->vid; 737 vlan.vid_end = vinfo->vid; 738 err = f(dev, &vlan.obj); 739 if (err) 740 return err; 741 memset(&vlan, 0, sizeof(vlan)); 742 } 743 } 744 745 return 0; 746 } 747 748 /** 749 * switchdev_port_bridge_setlink - Set bridge port attributes 750 * 751 * @dev: port device 752 * @nlh: netlink header 753 * @flags: netlink flags 754 * 755 * Called for SELF on rtnl_bridge_setlink to set bridge port 756 * attributes. 757 */ 758 int switchdev_port_bridge_setlink(struct net_device *dev, 759 struct nlmsghdr *nlh, u16 flags) 760 { 761 struct nlattr *protinfo; 762 struct nlattr *afspec; 763 int err = 0; 764 765 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 766 IFLA_PROTINFO); 767 if (protinfo) { 768 err = switchdev_port_br_setlink_protinfo(dev, protinfo); 769 if (err) 770 return err; 771 } 772 773 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 774 IFLA_AF_SPEC); 775 if (afspec) 776 err = switchdev_port_br_afspec(dev, afspec, 777 switchdev_port_obj_add); 778 779 return err; 780 } 781 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); 782 783 /** 784 * switchdev_port_bridge_dellink - Set bridge port attributes 785 * 786 * @dev: port device 787 * @nlh: netlink header 788 * @flags: netlink flags 789 * 790 * Called for SELF on rtnl_bridge_dellink to set bridge port 791 * attributes. 792 */ 793 int switchdev_port_bridge_dellink(struct net_device *dev, 794 struct nlmsghdr *nlh, u16 flags) 795 { 796 struct nlattr *afspec; 797 798 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 799 IFLA_AF_SPEC); 800 if (afspec) 801 return switchdev_port_br_afspec(dev, afspec, 802 switchdev_port_obj_del); 803 804 return 0; 805 } 806 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); 807 808 /** 809 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port 810 * 811 * @ndmsg: netlink hdr 812 * @nlattr: netlink attributes 813 * @dev: port device 814 * @addr: MAC address to add 815 * @vid: VLAN to add 816 * 817 * Add FDB entry to switch device. 818 */ 819 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 820 struct net_device *dev, const unsigned char *addr, 821 u16 vid, u16 nlm_flags) 822 { 823 struct switchdev_obj_port_fdb fdb = { 824 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 825 .addr = addr, 826 .vid = vid, 827 }; 828 829 return switchdev_port_obj_add(dev, &fdb.obj); 830 } 831 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 832 833 /** 834 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port 835 * 836 * @ndmsg: netlink hdr 837 * @nlattr: netlink attributes 838 * @dev: port device 839 * @addr: MAC address to delete 840 * @vid: VLAN to delete 841 * 842 * Delete FDB entry from switch device. 843 */ 844 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 845 struct net_device *dev, const unsigned char *addr, 846 u16 vid) 847 { 848 struct switchdev_obj_port_fdb fdb = { 849 .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 850 .addr = addr, 851 .vid = vid, 852 }; 853 854 return switchdev_port_obj_del(dev, &fdb.obj); 855 } 856 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 857 858 struct switchdev_fdb_dump { 859 struct switchdev_obj_port_fdb fdb; 860 struct net_device *dev; 861 struct sk_buff *skb; 862 struct netlink_callback *cb; 863 int idx; 864 }; 865 866 static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) 867 { 868 struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj); 869 struct switchdev_fdb_dump *dump = 870 container_of(fdb, struct switchdev_fdb_dump, fdb); 871 u32 portid = NETLINK_CB(dump->cb->skb).portid; 872 u32 seq = dump->cb->nlh->nlmsg_seq; 873 struct nlmsghdr *nlh; 874 struct ndmsg *ndm; 875 876 if (dump->idx < dump->cb->args[0]) 877 goto skip; 878 879 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, 880 sizeof(*ndm), NLM_F_MULTI); 881 if (!nlh) 882 return -EMSGSIZE; 883 884 ndm = nlmsg_data(nlh); 885 ndm->ndm_family = AF_BRIDGE; 886 ndm->ndm_pad1 = 0; 887 ndm->ndm_pad2 = 0; 888 ndm->ndm_flags = NTF_SELF; 889 ndm->ndm_type = 0; 890 ndm->ndm_ifindex = dump->dev->ifindex; 891 ndm->ndm_state = fdb->ndm_state; 892 893 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) 894 goto nla_put_failure; 895 896 if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) 897 goto nla_put_failure; 898 899 nlmsg_end(dump->skb, nlh); 900 901 skip: 902 dump->idx++; 903 return 0; 904 905 nla_put_failure: 906 nlmsg_cancel(dump->skb, nlh); 907 return -EMSGSIZE; 908 } 909 910 /** 911 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries 912 * 913 * @skb: netlink skb 914 * @cb: netlink callback 915 * @dev: port device 916 * @filter_dev: filter device 917 * @idx: 918 * 919 * Delete FDB entry from switch device. 920 */ 921 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 922 struct net_device *dev, 923 struct net_device *filter_dev, int idx) 924 { 925 struct switchdev_fdb_dump dump = { 926 .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, 927 .dev = dev, 928 .skb = skb, 929 .cb = cb, 930 .idx = idx, 931 }; 932 933 switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); 934 return dump.idx; 935 } 936 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 937 938 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) 939 { 940 const struct switchdev_ops *ops = dev->switchdev_ops; 941 struct net_device *lower_dev; 942 struct net_device *port_dev; 943 struct list_head *iter; 944 945 /* Recusively search down until we find a sw port dev. 946 * (A sw port dev supports switchdev_port_attr_get). 947 */ 948 949 if (ops && ops->switchdev_port_attr_get) 950 return dev; 951 952 netdev_for_each_lower_dev(dev, lower_dev, iter) { 953 port_dev = switchdev_get_lowest_dev(lower_dev); 954 if (port_dev) 955 return port_dev; 956 } 957 958 return NULL; 959 } 960 961 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 962 { 963 struct switchdev_attr attr = { 964 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 965 }; 966 struct switchdev_attr prev_attr; 967 struct net_device *dev = NULL; 968 int nhsel; 969 970 /* For this route, all nexthop devs must be on the same switch. */ 971 972 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 973 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 974 975 if (!nh->nh_dev) 976 return NULL; 977 978 dev = switchdev_get_lowest_dev(nh->nh_dev); 979 if (!dev) 980 return NULL; 981 982 if (switchdev_port_attr_get(dev, &attr)) 983 return NULL; 984 985 if (nhsel > 0 && 986 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) 987 return NULL; 988 989 prev_attr = attr; 990 } 991 992 return dev; 993 } 994 995 /** 996 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry 997 * 998 * @dst: route's IPv4 destination address 999 * @dst_len: destination address length (prefix length) 1000 * @fi: route FIB info structure 1001 * @tos: route TOS 1002 * @type: route type 1003 * @nlflags: netlink flags passed in (NLM_F_*) 1004 * @tb_id: route table ID 1005 * 1006 * Add/modify switch IPv4 route entry. 1007 */ 1008 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 1009 u8 tos, u8 type, u32 nlflags, u32 tb_id) 1010 { 1011 struct switchdev_obj_ipv4_fib ipv4_fib = { 1012 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, 1013 .dst = dst, 1014 .dst_len = dst_len, 1015 .fi = fi, 1016 .tos = tos, 1017 .type = type, 1018 .nlflags = nlflags, 1019 .tb_id = tb_id, 1020 }; 1021 struct net_device *dev; 1022 int err = 0; 1023 1024 /* Don't offload route if using custom ip rules or if 1025 * IPv4 FIB offloading has been disabled completely. 1026 */ 1027 1028 #ifdef CONFIG_IP_MULTIPLE_TABLES 1029 if (fi->fib_net->ipv4.fib_has_custom_rules) 1030 return 0; 1031 #endif 1032 1033 if (fi->fib_net->ipv4.fib_offload_disabled) 1034 return 0; 1035 1036 dev = switchdev_get_dev_by_nhs(fi); 1037 if (!dev) 1038 return 0; 1039 1040 err = switchdev_port_obj_add(dev, &ipv4_fib.obj); 1041 if (!err) 1042 fi->fib_flags |= RTNH_F_OFFLOAD; 1043 1044 return err == -EOPNOTSUPP ? 0 : err; 1045 } 1046 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); 1047 1048 /** 1049 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch 1050 * 1051 * @dst: route's IPv4 destination address 1052 * @dst_len: destination address length (prefix length) 1053 * @fi: route FIB info structure 1054 * @tos: route TOS 1055 * @type: route type 1056 * @tb_id: route table ID 1057 * 1058 * Delete IPv4 route entry from switch device. 1059 */ 1060 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 1061 u8 tos, u8 type, u32 tb_id) 1062 { 1063 struct switchdev_obj_ipv4_fib ipv4_fib = { 1064 .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, 1065 .dst = dst, 1066 .dst_len = dst_len, 1067 .fi = fi, 1068 .tos = tos, 1069 .type = type, 1070 .nlflags = 0, 1071 .tb_id = tb_id, 1072 }; 1073 struct net_device *dev; 1074 int err = 0; 1075 1076 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1077 return 0; 1078 1079 dev = switchdev_get_dev_by_nhs(fi); 1080 if (!dev) 1081 return 0; 1082 1083 err = switchdev_port_obj_del(dev, &ipv4_fib.obj); 1084 if (!err) 1085 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1086 1087 return err == -EOPNOTSUPP ? 0 : err; 1088 } 1089 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); 1090 1091 /** 1092 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation 1093 * 1094 * @fi: route FIB info structure 1095 */ 1096 void switchdev_fib_ipv4_abort(struct fib_info *fi) 1097 { 1098 /* There was a problem installing this route to the offload 1099 * device. For now, until we come up with more refined 1100 * policy handling, abruptly end IPv4 fib offloading for 1101 * for entire net by flushing offload device(s) of all 1102 * IPv4 routes, and mark IPv4 fib offloading broken from 1103 * this point forward. 1104 */ 1105 1106 fib_flush_external(fi->fib_net); 1107 fi->fib_net->ipv4.fib_offload_disabled = true; 1108 } 1109 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1110 1111 static bool switchdev_port_same_parent_id(struct net_device *a, 1112 struct net_device *b) 1113 { 1114 struct switchdev_attr a_attr = { 1115 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 1116 .flags = SWITCHDEV_F_NO_RECURSE, 1117 }; 1118 struct switchdev_attr b_attr = { 1119 .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, 1120 .flags = SWITCHDEV_F_NO_RECURSE, 1121 }; 1122 1123 if (switchdev_port_attr_get(a, &a_attr) || 1124 switchdev_port_attr_get(b, &b_attr)) 1125 return false; 1126 1127 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 1128 } 1129 1130 static u32 switchdev_port_fwd_mark_get(struct net_device *dev, 1131 struct net_device *group_dev) 1132 { 1133 struct net_device *lower_dev; 1134 struct list_head *iter; 1135 1136 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1137 if (lower_dev == dev) 1138 continue; 1139 if (switchdev_port_same_parent_id(dev, lower_dev)) 1140 return lower_dev->offload_fwd_mark; 1141 return switchdev_port_fwd_mark_get(dev, lower_dev); 1142 } 1143 1144 return dev->ifindex; 1145 } 1146 1147 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, 1148 u32 old_mark, u32 *reset_mark) 1149 { 1150 struct net_device *lower_dev; 1151 struct list_head *iter; 1152 1153 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1154 if (lower_dev->offload_fwd_mark == old_mark) { 1155 if (!*reset_mark) 1156 *reset_mark = lower_dev->ifindex; 1157 lower_dev->offload_fwd_mark = *reset_mark; 1158 } 1159 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); 1160 } 1161 } 1162 1163 /** 1164 * switchdev_port_fwd_mark_set - Set port offload forwarding mark 1165 * 1166 * @dev: port device 1167 * @group_dev: containing device 1168 * @joining: true if dev is joining group; false if leaving group 1169 * 1170 * An ungrouped port's offload mark is just its ifindex. A grouped 1171 * port's (member of a bridge, for example) offload mark is the ifindex 1172 * of one of the ports in the group with the same parent (switch) ID. 1173 * Ports on the same device in the same group will have the same mark. 1174 * 1175 * Example: 1176 * 1177 * br0 ifindex=9 1178 * sw1p1 ifindex=2 mark=2 1179 * sw1p2 ifindex=3 mark=2 1180 * sw2p1 ifindex=4 mark=5 1181 * sw2p2 ifindex=5 mark=5 1182 * 1183 * If sw2p2 leaves the bridge, we'll have: 1184 * 1185 * br0 ifindex=9 1186 * sw1p1 ifindex=2 mark=2 1187 * sw1p2 ifindex=3 mark=2 1188 * sw2p1 ifindex=4 mark=4 1189 * sw2p2 ifindex=5 mark=5 1190 */ 1191 void switchdev_port_fwd_mark_set(struct net_device *dev, 1192 struct net_device *group_dev, 1193 bool joining) 1194 { 1195 u32 mark = dev->ifindex; 1196 u32 reset_mark = 0; 1197 1198 if (group_dev && joining) { 1199 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1200 } else if (group_dev && !joining) { 1201 if (dev->offload_fwd_mark == mark) 1202 /* Ohoh, this port was the mark reference port, 1203 * but it's leaving the group, so reset the 1204 * mark for the remaining ports in the group. 1205 */ 1206 switchdev_port_fwd_mark_reset(group_dev, mark, 1207 &reset_mark); 1208 } 1209 1210 dev->offload_fwd_mark = mark; 1211 } 1212 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); 1213