1 /* 2 * net/switchdev/switchdev.c - Switch device API 3 * Copyright (c) 2014-2015 Jiri Pirko <[email protected]> 4 * Copyright (c) 2014-2015 Scott Feldman <[email protected]> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/types.h> 14 #include <linux/init.h> 15 #include <linux/mutex.h> 16 #include <linux/notifier.h> 17 #include <linux/netdevice.h> 18 #include <linux/if_bridge.h> 19 #include <linux/list.h> 20 #include <net/ip_fib.h> 21 #include <net/switchdev.h> 22 23 /** 24 * switchdev_trans_item_enqueue - Enqueue data item to transaction queue 25 * 26 * @trans: transaction 27 * @data: pointer to data being queued 28 * @destructor: data destructor 29 * @tritem: transaction item being queued 30 * 31 * Enqeueue data item to transaction queue. tritem is typically placed in 32 * cointainter pointed at by data pointer. Destructor is called on 33 * transaction abort and after successful commit phase in case 34 * the caller did not dequeue the item before. 35 */ 36 void switchdev_trans_item_enqueue(struct switchdev_trans *trans, 37 void *data, void (*destructor)(void const *), 38 struct switchdev_trans_item *tritem) 39 { 40 tritem->data = data; 41 tritem->destructor = destructor; 42 list_add_tail(&tritem->list, &trans->item_list); 43 } 44 EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); 45 46 static struct switchdev_trans_item * 47 __switchdev_trans_item_dequeue(struct switchdev_trans *trans) 48 { 49 struct switchdev_trans_item *tritem; 50 51 if (list_empty(&trans->item_list)) 52 return NULL; 53 tritem = list_first_entry(&trans->item_list, 54 struct switchdev_trans_item, list); 55 list_del(&tritem->list); 56 return tritem; 57 } 58 59 /** 60 * switchdev_trans_item_dequeue - Dequeue data item from transaction queue 61 * 62 * @trans: transaction 63 */ 64 void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) 65 { 66 struct switchdev_trans_item *tritem; 67 68 tritem = __switchdev_trans_item_dequeue(trans); 69 BUG_ON(!tritem); 70 return tritem->data; 71 } 72 EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); 73 74 static void switchdev_trans_init(struct switchdev_trans *trans) 75 { 76 INIT_LIST_HEAD(&trans->item_list); 77 } 78 79 static void switchdev_trans_items_destroy(struct switchdev_trans *trans) 80 { 81 struct switchdev_trans_item *tritem; 82 83 while ((tritem = __switchdev_trans_item_dequeue(trans))) 84 tritem->destructor(tritem->data); 85 } 86 87 static void switchdev_trans_items_warn_destroy(struct net_device *dev, 88 struct switchdev_trans *trans) 89 { 90 WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", 91 dev->name); 92 switchdev_trans_items_destroy(trans); 93 } 94 95 /** 96 * switchdev_port_attr_get - Get port attribute 97 * 98 * @dev: port device 99 * @attr: attribute to get 100 */ 101 int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) 102 { 103 const struct switchdev_ops *ops = dev->switchdev_ops; 104 struct net_device *lower_dev; 105 struct list_head *iter; 106 struct switchdev_attr first = { 107 .id = SWITCHDEV_ATTR_UNDEFINED 108 }; 109 int err = -EOPNOTSUPP; 110 111 if (ops && ops->switchdev_port_attr_get) 112 return ops->switchdev_port_attr_get(dev, attr); 113 114 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 115 return err; 116 117 /* Switch device port(s) may be stacked under 118 * bond/team/vlan dev, so recurse down to get attr on 119 * each port. Return -ENODATA if attr values don't 120 * compare across ports. 121 */ 122 123 netdev_for_each_lower_dev(dev, lower_dev, iter) { 124 err = switchdev_port_attr_get(lower_dev, attr); 125 if (err) 126 break; 127 if (first.id == SWITCHDEV_ATTR_UNDEFINED) 128 first = *attr; 129 else if (memcmp(&first, attr, sizeof(*attr))) 130 return -ENODATA; 131 } 132 133 return err; 134 } 135 EXPORT_SYMBOL_GPL(switchdev_port_attr_get); 136 137 static int __switchdev_port_attr_set(struct net_device *dev, 138 struct switchdev_attr *attr, 139 struct switchdev_trans *trans) 140 { 141 const struct switchdev_ops *ops = dev->switchdev_ops; 142 struct net_device *lower_dev; 143 struct list_head *iter; 144 int err = -EOPNOTSUPP; 145 146 if (ops && ops->switchdev_port_attr_set) 147 return ops->switchdev_port_attr_set(dev, attr, trans); 148 149 if (attr->flags & SWITCHDEV_F_NO_RECURSE) 150 return err; 151 152 /* Switch device port(s) may be stacked under 153 * bond/team/vlan dev, so recurse down to set attr on 154 * each port. 155 */ 156 157 netdev_for_each_lower_dev(dev, lower_dev, iter) { 158 err = __switchdev_port_attr_set(lower_dev, attr, trans); 159 if (err) 160 break; 161 } 162 163 return err; 164 } 165 166 struct switchdev_attr_set_work { 167 struct work_struct work; 168 struct net_device *dev; 169 struct switchdev_attr attr; 170 }; 171 172 static void switchdev_port_attr_set_work(struct work_struct *work) 173 { 174 struct switchdev_attr_set_work *asw = 175 container_of(work, struct switchdev_attr_set_work, work); 176 int err; 177 178 rtnl_lock(); 179 err = switchdev_port_attr_set(asw->dev, &asw->attr); 180 if (err && err != -EOPNOTSUPP) 181 netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n", 182 err, asw->attr.id); 183 rtnl_unlock(); 184 185 dev_put(asw->dev); 186 kfree(work); 187 } 188 189 static int switchdev_port_attr_set_defer(struct net_device *dev, 190 struct switchdev_attr *attr) 191 { 192 struct switchdev_attr_set_work *asw; 193 194 asw = kmalloc(sizeof(*asw), GFP_ATOMIC); 195 if (!asw) 196 return -ENOMEM; 197 198 INIT_WORK(&asw->work, switchdev_port_attr_set_work); 199 200 dev_hold(dev); 201 asw->dev = dev; 202 memcpy(&asw->attr, attr, sizeof(asw->attr)); 203 204 schedule_work(&asw->work); 205 206 return 0; 207 } 208 209 /** 210 * switchdev_port_attr_set - Set port attribute 211 * 212 * @dev: port device 213 * @attr: attribute to set 214 * 215 * Use a 2-phase prepare-commit transaction model to ensure 216 * system is not left in a partially updated state due to 217 * failure from driver/device. 218 */ 219 int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) 220 { 221 struct switchdev_trans trans; 222 int err; 223 224 if (!rtnl_is_locked()) { 225 /* Running prepare-commit transaction across stacked 226 * devices requires nothing moves, so if rtnl_lock is 227 * not held, schedule a worker thread to hold rtnl_lock 228 * while setting attr. 229 */ 230 231 return switchdev_port_attr_set_defer(dev, attr); 232 } 233 234 switchdev_trans_init(&trans); 235 236 /* Phase I: prepare for attr set. Driver/device should fail 237 * here if there are going to be issues in the commit phase, 238 * such as lack of resources or support. The driver/device 239 * should reserve resources needed for the commit phase here, 240 * but should not commit the attr. 241 */ 242 243 trans.ph_prepare = true; 244 err = __switchdev_port_attr_set(dev, attr, &trans); 245 if (err) { 246 /* Prepare phase failed: abort the transaction. Any 247 * resources reserved in the prepare phase are 248 * released. 249 */ 250 251 if (err != -EOPNOTSUPP) 252 switchdev_trans_items_destroy(&trans); 253 254 return err; 255 } 256 257 /* Phase II: commit attr set. This cannot fail as a fault 258 * of driver/device. If it does, it's a bug in the driver/device 259 * because the driver said everythings was OK in phase I. 260 */ 261 262 trans.ph_prepare = false; 263 err = __switchdev_port_attr_set(dev, attr, &trans); 264 WARN(err, "%s: Commit of attribute (id=%d) failed.\n", 265 dev->name, attr->id); 266 switchdev_trans_items_warn_destroy(dev, &trans); 267 268 return err; 269 } 270 EXPORT_SYMBOL_GPL(switchdev_port_attr_set); 271 272 static int __switchdev_port_obj_add(struct net_device *dev, 273 struct switchdev_obj *obj, 274 struct switchdev_trans *trans) 275 { 276 const struct switchdev_ops *ops = dev->switchdev_ops; 277 struct net_device *lower_dev; 278 struct list_head *iter; 279 int err = -EOPNOTSUPP; 280 281 if (ops && ops->switchdev_port_obj_add) 282 return ops->switchdev_port_obj_add(dev, obj, trans); 283 284 /* Switch device port(s) may be stacked under 285 * bond/team/vlan dev, so recurse down to add object on 286 * each port. 287 */ 288 289 netdev_for_each_lower_dev(dev, lower_dev, iter) { 290 err = __switchdev_port_obj_add(lower_dev, obj, trans); 291 if (err) 292 break; 293 } 294 295 return err; 296 } 297 298 /** 299 * switchdev_port_obj_add - Add port object 300 * 301 * @dev: port device 302 * @obj: object to add 303 * 304 * Use a 2-phase prepare-commit transaction model to ensure 305 * system is not left in a partially updated state due to 306 * failure from driver/device. 307 * 308 * rtnl_lock must be held. 309 */ 310 int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj) 311 { 312 struct switchdev_trans trans; 313 int err; 314 315 ASSERT_RTNL(); 316 317 switchdev_trans_init(&trans); 318 319 /* Phase I: prepare for obj add. Driver/device should fail 320 * here if there are going to be issues in the commit phase, 321 * such as lack of resources or support. The driver/device 322 * should reserve resources needed for the commit phase here, 323 * but should not commit the obj. 324 */ 325 326 trans.ph_prepare = true; 327 err = __switchdev_port_obj_add(dev, obj, &trans); 328 if (err) { 329 /* Prepare phase failed: abort the transaction. Any 330 * resources reserved in the prepare phase are 331 * released. 332 */ 333 334 if (err != -EOPNOTSUPP) 335 switchdev_trans_items_destroy(&trans); 336 337 return err; 338 } 339 340 /* Phase II: commit obj add. This cannot fail as a fault 341 * of driver/device. If it does, it's a bug in the driver/device 342 * because the driver said everythings was OK in phase I. 343 */ 344 345 trans.ph_prepare = false; 346 err = __switchdev_port_obj_add(dev, obj, &trans); 347 WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); 348 switchdev_trans_items_warn_destroy(dev, &trans); 349 350 return err; 351 } 352 EXPORT_SYMBOL_GPL(switchdev_port_obj_add); 353 354 /** 355 * switchdev_port_obj_del - Delete port object 356 * 357 * @dev: port device 358 * @obj: object to delete 359 */ 360 int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj) 361 { 362 const struct switchdev_ops *ops = dev->switchdev_ops; 363 struct net_device *lower_dev; 364 struct list_head *iter; 365 int err = -EOPNOTSUPP; 366 367 if (ops && ops->switchdev_port_obj_del) 368 return ops->switchdev_port_obj_del(dev, obj); 369 370 /* Switch device port(s) may be stacked under 371 * bond/team/vlan dev, so recurse down to delete object on 372 * each port. 373 */ 374 375 netdev_for_each_lower_dev(dev, lower_dev, iter) { 376 err = switchdev_port_obj_del(lower_dev, obj); 377 if (err) 378 break; 379 } 380 381 return err; 382 } 383 EXPORT_SYMBOL_GPL(switchdev_port_obj_del); 384 385 /** 386 * switchdev_port_obj_dump - Dump port objects 387 * 388 * @dev: port device 389 * @obj: object to dump 390 */ 391 int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj) 392 { 393 const struct switchdev_ops *ops = dev->switchdev_ops; 394 struct net_device *lower_dev; 395 struct list_head *iter; 396 int err = -EOPNOTSUPP; 397 398 if (ops && ops->switchdev_port_obj_dump) 399 return ops->switchdev_port_obj_dump(dev, obj); 400 401 /* Switch device port(s) may be stacked under 402 * bond/team/vlan dev, so recurse down to dump objects on 403 * first port at bottom of stack. 404 */ 405 406 netdev_for_each_lower_dev(dev, lower_dev, iter) { 407 err = switchdev_port_obj_dump(lower_dev, obj); 408 break; 409 } 410 411 return err; 412 } 413 EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); 414 415 static DEFINE_MUTEX(switchdev_mutex); 416 static RAW_NOTIFIER_HEAD(switchdev_notif_chain); 417 418 /** 419 * register_switchdev_notifier - Register notifier 420 * @nb: notifier_block 421 * 422 * Register switch device notifier. This should be used by code 423 * which needs to monitor events happening in particular device. 424 * Return values are same as for atomic_notifier_chain_register(). 425 */ 426 int register_switchdev_notifier(struct notifier_block *nb) 427 { 428 int err; 429 430 mutex_lock(&switchdev_mutex); 431 err = raw_notifier_chain_register(&switchdev_notif_chain, nb); 432 mutex_unlock(&switchdev_mutex); 433 return err; 434 } 435 EXPORT_SYMBOL_GPL(register_switchdev_notifier); 436 437 /** 438 * unregister_switchdev_notifier - Unregister notifier 439 * @nb: notifier_block 440 * 441 * Unregister switch device notifier. 442 * Return values are same as for atomic_notifier_chain_unregister(). 443 */ 444 int unregister_switchdev_notifier(struct notifier_block *nb) 445 { 446 int err; 447 448 mutex_lock(&switchdev_mutex); 449 err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); 450 mutex_unlock(&switchdev_mutex); 451 return err; 452 } 453 EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); 454 455 /** 456 * call_switchdev_notifiers - Call notifiers 457 * @val: value passed unmodified to notifier function 458 * @dev: port device 459 * @info: notifier information data 460 * 461 * Call all network notifier blocks. This should be called by driver 462 * when it needs to propagate hardware event. 463 * Return values are same as for atomic_notifier_call_chain(). 464 */ 465 int call_switchdev_notifiers(unsigned long val, struct net_device *dev, 466 struct switchdev_notifier_info *info) 467 { 468 int err; 469 470 info->dev = dev; 471 mutex_lock(&switchdev_mutex); 472 err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); 473 mutex_unlock(&switchdev_mutex); 474 return err; 475 } 476 EXPORT_SYMBOL_GPL(call_switchdev_notifiers); 477 478 struct switchdev_vlan_dump { 479 struct switchdev_obj obj; 480 struct sk_buff *skb; 481 u32 filter_mask; 482 u16 flags; 483 u16 begin; 484 u16 end; 485 }; 486 487 static int switchdev_port_vlan_dump_put(struct net_device *dev, 488 struct switchdev_vlan_dump *dump) 489 { 490 struct bridge_vlan_info vinfo; 491 492 vinfo.flags = dump->flags; 493 494 if (dump->begin == 0 && dump->end == 0) { 495 return 0; 496 } else if (dump->begin == dump->end) { 497 vinfo.vid = dump->begin; 498 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 499 sizeof(vinfo), &vinfo)) 500 return -EMSGSIZE; 501 } else { 502 vinfo.vid = dump->begin; 503 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; 504 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 505 sizeof(vinfo), &vinfo)) 506 return -EMSGSIZE; 507 vinfo.vid = dump->end; 508 vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; 509 vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; 510 if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, 511 sizeof(vinfo), &vinfo)) 512 return -EMSGSIZE; 513 } 514 515 return 0; 516 } 517 518 static int switchdev_port_vlan_dump_cb(struct net_device *dev, 519 struct switchdev_obj *obj) 520 { 521 struct switchdev_vlan_dump *dump = 522 container_of(obj, struct switchdev_vlan_dump, obj); 523 struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan; 524 int err = 0; 525 526 if (vlan->vid_begin > vlan->vid_end) 527 return -EINVAL; 528 529 if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { 530 dump->flags = vlan->flags; 531 for (dump->begin = dump->end = vlan->vid_begin; 532 dump->begin <= vlan->vid_end; 533 dump->begin++, dump->end++) { 534 err = switchdev_port_vlan_dump_put(dev, dump); 535 if (err) 536 return err; 537 } 538 } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { 539 if (dump->begin > vlan->vid_begin && 540 dump->begin >= vlan->vid_end) { 541 if ((dump->begin - 1) == vlan->vid_end && 542 dump->flags == vlan->flags) { 543 /* prepend */ 544 dump->begin = vlan->vid_begin; 545 } else { 546 err = switchdev_port_vlan_dump_put(dev, dump); 547 dump->flags = vlan->flags; 548 dump->begin = vlan->vid_begin; 549 dump->end = vlan->vid_end; 550 } 551 } else if (dump->end <= vlan->vid_begin && 552 dump->end < vlan->vid_end) { 553 if ((dump->end + 1) == vlan->vid_begin && 554 dump->flags == vlan->flags) { 555 /* append */ 556 dump->end = vlan->vid_end; 557 } else { 558 err = switchdev_port_vlan_dump_put(dev, dump); 559 dump->flags = vlan->flags; 560 dump->begin = vlan->vid_begin; 561 dump->end = vlan->vid_end; 562 } 563 } else { 564 err = -EINVAL; 565 } 566 } 567 568 return err; 569 } 570 571 static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, 572 u32 filter_mask) 573 { 574 struct switchdev_vlan_dump dump = { 575 .obj = { 576 .id = SWITCHDEV_OBJ_PORT_VLAN, 577 .cb = switchdev_port_vlan_dump_cb, 578 }, 579 .skb = skb, 580 .filter_mask = filter_mask, 581 }; 582 int err = 0; 583 584 if ((filter_mask & RTEXT_FILTER_BRVLAN) || 585 (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { 586 err = switchdev_port_obj_dump(dev, &dump.obj); 587 if (err) 588 goto err_out; 589 if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) 590 /* last one */ 591 err = switchdev_port_vlan_dump_put(dev, &dump); 592 } 593 594 err_out: 595 return err == -EOPNOTSUPP ? 0 : err; 596 } 597 598 /** 599 * switchdev_port_bridge_getlink - Get bridge port attributes 600 * 601 * @dev: port device 602 * 603 * Called for SELF on rtnl_bridge_getlink to get bridge port 604 * attributes. 605 */ 606 int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 607 struct net_device *dev, u32 filter_mask, 608 int nlflags) 609 { 610 struct switchdev_attr attr = { 611 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 612 }; 613 u16 mode = BRIDGE_MODE_UNDEF; 614 u32 mask = BR_LEARNING | BR_LEARNING_SYNC; 615 int err; 616 617 err = switchdev_port_attr_get(dev, &attr); 618 if (err && err != -EOPNOTSUPP) 619 return err; 620 621 return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 622 attr.u.brport_flags, mask, nlflags, 623 filter_mask, switchdev_port_vlan_fill); 624 } 625 EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); 626 627 static int switchdev_port_br_setflag(struct net_device *dev, 628 struct nlattr *nlattr, 629 unsigned long brport_flag) 630 { 631 struct switchdev_attr attr = { 632 .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS, 633 }; 634 u8 flag = nla_get_u8(nlattr); 635 int err; 636 637 err = switchdev_port_attr_get(dev, &attr); 638 if (err) 639 return err; 640 641 if (flag) 642 attr.u.brport_flags |= brport_flag; 643 else 644 attr.u.brport_flags &= ~brport_flag; 645 646 return switchdev_port_attr_set(dev, &attr); 647 } 648 649 static const struct nla_policy 650 switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { 651 [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, 652 [IFLA_BRPORT_COST] = { .type = NLA_U32 }, 653 [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, 654 [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, 655 [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, 656 [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, 657 [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, 658 [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, 659 [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, 660 [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, 661 }; 662 663 static int switchdev_port_br_setlink_protinfo(struct net_device *dev, 664 struct nlattr *protinfo) 665 { 666 struct nlattr *attr; 667 int rem; 668 int err; 669 670 err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, 671 switchdev_port_bridge_policy); 672 if (err) 673 return err; 674 675 nla_for_each_nested(attr, protinfo, rem) { 676 switch (nla_type(attr)) { 677 case IFLA_BRPORT_LEARNING: 678 err = switchdev_port_br_setflag(dev, attr, 679 BR_LEARNING); 680 break; 681 case IFLA_BRPORT_LEARNING_SYNC: 682 err = switchdev_port_br_setflag(dev, attr, 683 BR_LEARNING_SYNC); 684 break; 685 default: 686 err = -EOPNOTSUPP; 687 break; 688 } 689 if (err) 690 return err; 691 } 692 693 return 0; 694 } 695 696 static int switchdev_port_br_afspec(struct net_device *dev, 697 struct nlattr *afspec, 698 int (*f)(struct net_device *dev, 699 struct switchdev_obj *obj)) 700 { 701 struct nlattr *attr; 702 struct bridge_vlan_info *vinfo; 703 struct switchdev_obj obj = { 704 .id = SWITCHDEV_OBJ_PORT_VLAN, 705 }; 706 struct switchdev_obj_vlan *vlan = &obj.u.vlan; 707 int rem; 708 int err; 709 710 nla_for_each_nested(attr, afspec, rem) { 711 if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) 712 continue; 713 if (nla_len(attr) != sizeof(struct bridge_vlan_info)) 714 return -EINVAL; 715 vinfo = nla_data(attr); 716 vlan->flags = vinfo->flags; 717 if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { 718 if (vlan->vid_begin) 719 return -EINVAL; 720 vlan->vid_begin = vinfo->vid; 721 } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { 722 if (!vlan->vid_begin) 723 return -EINVAL; 724 vlan->vid_end = vinfo->vid; 725 if (vlan->vid_end <= vlan->vid_begin) 726 return -EINVAL; 727 err = f(dev, &obj); 728 if (err) 729 return err; 730 memset(vlan, 0, sizeof(*vlan)); 731 } else { 732 if (vlan->vid_begin) 733 return -EINVAL; 734 vlan->vid_begin = vinfo->vid; 735 vlan->vid_end = vinfo->vid; 736 err = f(dev, &obj); 737 if (err) 738 return err; 739 memset(vlan, 0, sizeof(*vlan)); 740 } 741 } 742 743 return 0; 744 } 745 746 /** 747 * switchdev_port_bridge_setlink - Set bridge port attributes 748 * 749 * @dev: port device 750 * @nlh: netlink header 751 * @flags: netlink flags 752 * 753 * Called for SELF on rtnl_bridge_setlink to set bridge port 754 * attributes. 755 */ 756 int switchdev_port_bridge_setlink(struct net_device *dev, 757 struct nlmsghdr *nlh, u16 flags) 758 { 759 struct nlattr *protinfo; 760 struct nlattr *afspec; 761 int err = 0; 762 763 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 764 IFLA_PROTINFO); 765 if (protinfo) { 766 err = switchdev_port_br_setlink_protinfo(dev, protinfo); 767 if (err) 768 return err; 769 } 770 771 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 772 IFLA_AF_SPEC); 773 if (afspec) 774 err = switchdev_port_br_afspec(dev, afspec, 775 switchdev_port_obj_add); 776 777 return err; 778 } 779 EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); 780 781 /** 782 * switchdev_port_bridge_dellink - Set bridge port attributes 783 * 784 * @dev: port device 785 * @nlh: netlink header 786 * @flags: netlink flags 787 * 788 * Called for SELF on rtnl_bridge_dellink to set bridge port 789 * attributes. 790 */ 791 int switchdev_port_bridge_dellink(struct net_device *dev, 792 struct nlmsghdr *nlh, u16 flags) 793 { 794 struct nlattr *afspec; 795 796 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), 797 IFLA_AF_SPEC); 798 if (afspec) 799 return switchdev_port_br_afspec(dev, afspec, 800 switchdev_port_obj_del); 801 802 return 0; 803 } 804 EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); 805 806 /** 807 * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port 808 * 809 * @ndmsg: netlink hdr 810 * @nlattr: netlink attributes 811 * @dev: port device 812 * @addr: MAC address to add 813 * @vid: VLAN to add 814 * 815 * Add FDB entry to switch device. 816 */ 817 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], 818 struct net_device *dev, const unsigned char *addr, 819 u16 vid, u16 nlm_flags) 820 { 821 struct switchdev_obj obj = { 822 .id = SWITCHDEV_OBJ_PORT_FDB, 823 .u.fdb = { 824 .addr = addr, 825 .vid = vid, 826 }, 827 }; 828 829 return switchdev_port_obj_add(dev, &obj); 830 } 831 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); 832 833 /** 834 * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port 835 * 836 * @ndmsg: netlink hdr 837 * @nlattr: netlink attributes 838 * @dev: port device 839 * @addr: MAC address to delete 840 * @vid: VLAN to delete 841 * 842 * Delete FDB entry from switch device. 843 */ 844 int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], 845 struct net_device *dev, const unsigned char *addr, 846 u16 vid) 847 { 848 struct switchdev_obj obj = { 849 .id = SWITCHDEV_OBJ_PORT_FDB, 850 .u.fdb = { 851 .addr = addr, 852 .vid = vid, 853 }, 854 }; 855 856 return switchdev_port_obj_del(dev, &obj); 857 } 858 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); 859 860 struct switchdev_fdb_dump { 861 struct switchdev_obj obj; 862 struct sk_buff *skb; 863 struct netlink_callback *cb; 864 int idx; 865 }; 866 867 static int switchdev_port_fdb_dump_cb(struct net_device *dev, 868 struct switchdev_obj *obj) 869 { 870 struct switchdev_fdb_dump *dump = 871 container_of(obj, struct switchdev_fdb_dump, obj); 872 u32 portid = NETLINK_CB(dump->cb->skb).portid; 873 u32 seq = dump->cb->nlh->nlmsg_seq; 874 struct nlmsghdr *nlh; 875 struct ndmsg *ndm; 876 877 if (dump->idx < dump->cb->args[0]) 878 goto skip; 879 880 nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, 881 sizeof(*ndm), NLM_F_MULTI); 882 if (!nlh) 883 return -EMSGSIZE; 884 885 ndm = nlmsg_data(nlh); 886 ndm->ndm_family = AF_BRIDGE; 887 ndm->ndm_pad1 = 0; 888 ndm->ndm_pad2 = 0; 889 ndm->ndm_flags = NTF_SELF; 890 ndm->ndm_type = 0; 891 ndm->ndm_ifindex = dev->ifindex; 892 ndm->ndm_state = obj->u.fdb.ndm_state; 893 894 if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr)) 895 goto nla_put_failure; 896 897 if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid)) 898 goto nla_put_failure; 899 900 nlmsg_end(dump->skb, nlh); 901 902 skip: 903 dump->idx++; 904 return 0; 905 906 nla_put_failure: 907 nlmsg_cancel(dump->skb, nlh); 908 return -EMSGSIZE; 909 } 910 911 /** 912 * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries 913 * 914 * @skb: netlink skb 915 * @cb: netlink callback 916 * @dev: port device 917 * @filter_dev: filter device 918 * @idx: 919 * 920 * Delete FDB entry from switch device. 921 */ 922 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, 923 struct net_device *dev, 924 struct net_device *filter_dev, int idx) 925 { 926 struct switchdev_fdb_dump dump = { 927 .obj = { 928 .id = SWITCHDEV_OBJ_PORT_FDB, 929 .cb = switchdev_port_fdb_dump_cb, 930 }, 931 .skb = skb, 932 .cb = cb, 933 .idx = idx, 934 }; 935 936 switchdev_port_obj_dump(dev, &dump.obj); 937 return dump.idx; 938 } 939 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); 940 941 static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) 942 { 943 const struct switchdev_ops *ops = dev->switchdev_ops; 944 struct net_device *lower_dev; 945 struct net_device *port_dev; 946 struct list_head *iter; 947 948 /* Recusively search down until we find a sw port dev. 949 * (A sw port dev supports switchdev_port_attr_get). 950 */ 951 952 if (ops && ops->switchdev_port_attr_get) 953 return dev; 954 955 netdev_for_each_lower_dev(dev, lower_dev, iter) { 956 port_dev = switchdev_get_lowest_dev(lower_dev); 957 if (port_dev) 958 return port_dev; 959 } 960 961 return NULL; 962 } 963 964 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) 965 { 966 struct switchdev_attr attr = { 967 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 968 }; 969 struct switchdev_attr prev_attr; 970 struct net_device *dev = NULL; 971 int nhsel; 972 973 /* For this route, all nexthop devs must be on the same switch. */ 974 975 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { 976 const struct fib_nh *nh = &fi->fib_nh[nhsel]; 977 978 if (!nh->nh_dev) 979 return NULL; 980 981 dev = switchdev_get_lowest_dev(nh->nh_dev); 982 if (!dev) 983 return NULL; 984 985 if (switchdev_port_attr_get(dev, &attr)) 986 return NULL; 987 988 if (nhsel > 0 && 989 !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) 990 return NULL; 991 992 prev_attr = attr; 993 } 994 995 return dev; 996 } 997 998 /** 999 * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry 1000 * 1001 * @dst: route's IPv4 destination address 1002 * @dst_len: destination address length (prefix length) 1003 * @fi: route FIB info structure 1004 * @tos: route TOS 1005 * @type: route type 1006 * @nlflags: netlink flags passed in (NLM_F_*) 1007 * @tb_id: route table ID 1008 * 1009 * Add/modify switch IPv4 route entry. 1010 */ 1011 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, 1012 u8 tos, u8 type, u32 nlflags, u32 tb_id) 1013 { 1014 struct switchdev_obj fib_obj = { 1015 .id = SWITCHDEV_OBJ_IPV4_FIB, 1016 .u.ipv4_fib = { 1017 .dst = dst, 1018 .dst_len = dst_len, 1019 .fi = fi, 1020 .tos = tos, 1021 .type = type, 1022 .nlflags = nlflags, 1023 .tb_id = tb_id, 1024 }, 1025 }; 1026 struct net_device *dev; 1027 int err = 0; 1028 1029 /* Don't offload route if using custom ip rules or if 1030 * IPv4 FIB offloading has been disabled completely. 1031 */ 1032 1033 #ifdef CONFIG_IP_MULTIPLE_TABLES 1034 if (fi->fib_net->ipv4.fib_has_custom_rules) 1035 return 0; 1036 #endif 1037 1038 if (fi->fib_net->ipv4.fib_offload_disabled) 1039 return 0; 1040 1041 dev = switchdev_get_dev_by_nhs(fi); 1042 if (!dev) 1043 return 0; 1044 1045 err = switchdev_port_obj_add(dev, &fib_obj); 1046 if (!err) 1047 fi->fib_flags |= RTNH_F_OFFLOAD; 1048 1049 return err == -EOPNOTSUPP ? 0 : err; 1050 } 1051 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); 1052 1053 /** 1054 * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch 1055 * 1056 * @dst: route's IPv4 destination address 1057 * @dst_len: destination address length (prefix length) 1058 * @fi: route FIB info structure 1059 * @tos: route TOS 1060 * @type: route type 1061 * @tb_id: route table ID 1062 * 1063 * Delete IPv4 route entry from switch device. 1064 */ 1065 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, 1066 u8 tos, u8 type, u32 tb_id) 1067 { 1068 struct switchdev_obj fib_obj = { 1069 .id = SWITCHDEV_OBJ_IPV4_FIB, 1070 .u.ipv4_fib = { 1071 .dst = dst, 1072 .dst_len = dst_len, 1073 .fi = fi, 1074 .tos = tos, 1075 .type = type, 1076 .nlflags = 0, 1077 .tb_id = tb_id, 1078 }, 1079 }; 1080 struct net_device *dev; 1081 int err = 0; 1082 1083 if (!(fi->fib_flags & RTNH_F_OFFLOAD)) 1084 return 0; 1085 1086 dev = switchdev_get_dev_by_nhs(fi); 1087 if (!dev) 1088 return 0; 1089 1090 err = switchdev_port_obj_del(dev, &fib_obj); 1091 if (!err) 1092 fi->fib_flags &= ~RTNH_F_OFFLOAD; 1093 1094 return err == -EOPNOTSUPP ? 0 : err; 1095 } 1096 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); 1097 1098 /** 1099 * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation 1100 * 1101 * @fi: route FIB info structure 1102 */ 1103 void switchdev_fib_ipv4_abort(struct fib_info *fi) 1104 { 1105 /* There was a problem installing this route to the offload 1106 * device. For now, until we come up with more refined 1107 * policy handling, abruptly end IPv4 fib offloading for 1108 * for entire net by flushing offload device(s) of all 1109 * IPv4 routes, and mark IPv4 fib offloading broken from 1110 * this point forward. 1111 */ 1112 1113 fib_flush_external(fi->fib_net); 1114 fi->fib_net->ipv4.fib_offload_disabled = true; 1115 } 1116 EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); 1117 1118 static bool switchdev_port_same_parent_id(struct net_device *a, 1119 struct net_device *b) 1120 { 1121 struct switchdev_attr a_attr = { 1122 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1123 .flags = SWITCHDEV_F_NO_RECURSE, 1124 }; 1125 struct switchdev_attr b_attr = { 1126 .id = SWITCHDEV_ATTR_PORT_PARENT_ID, 1127 .flags = SWITCHDEV_F_NO_RECURSE, 1128 }; 1129 1130 if (switchdev_port_attr_get(a, &a_attr) || 1131 switchdev_port_attr_get(b, &b_attr)) 1132 return false; 1133 1134 return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); 1135 } 1136 1137 static u32 switchdev_port_fwd_mark_get(struct net_device *dev, 1138 struct net_device *group_dev) 1139 { 1140 struct net_device *lower_dev; 1141 struct list_head *iter; 1142 1143 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1144 if (lower_dev == dev) 1145 continue; 1146 if (switchdev_port_same_parent_id(dev, lower_dev)) 1147 return lower_dev->offload_fwd_mark; 1148 return switchdev_port_fwd_mark_get(dev, lower_dev); 1149 } 1150 1151 return dev->ifindex; 1152 } 1153 1154 static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, 1155 u32 old_mark, u32 *reset_mark) 1156 { 1157 struct net_device *lower_dev; 1158 struct list_head *iter; 1159 1160 netdev_for_each_lower_dev(group_dev, lower_dev, iter) { 1161 if (lower_dev->offload_fwd_mark == old_mark) { 1162 if (!*reset_mark) 1163 *reset_mark = lower_dev->ifindex; 1164 lower_dev->offload_fwd_mark = *reset_mark; 1165 } 1166 switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); 1167 } 1168 } 1169 1170 /** 1171 * switchdev_port_fwd_mark_set - Set port offload forwarding mark 1172 * 1173 * @dev: port device 1174 * @group_dev: containing device 1175 * @joining: true if dev is joining group; false if leaving group 1176 * 1177 * An ungrouped port's offload mark is just its ifindex. A grouped 1178 * port's (member of a bridge, for example) offload mark is the ifindex 1179 * of one of the ports in the group with the same parent (switch) ID. 1180 * Ports on the same device in the same group will have the same mark. 1181 * 1182 * Example: 1183 * 1184 * br0 ifindex=9 1185 * sw1p1 ifindex=2 mark=2 1186 * sw1p2 ifindex=3 mark=2 1187 * sw2p1 ifindex=4 mark=5 1188 * sw2p2 ifindex=5 mark=5 1189 * 1190 * If sw2p2 leaves the bridge, we'll have: 1191 * 1192 * br0 ifindex=9 1193 * sw1p1 ifindex=2 mark=2 1194 * sw1p2 ifindex=3 mark=2 1195 * sw2p1 ifindex=4 mark=4 1196 * sw2p2 ifindex=5 mark=5 1197 */ 1198 void switchdev_port_fwd_mark_set(struct net_device *dev, 1199 struct net_device *group_dev, 1200 bool joining) 1201 { 1202 u32 mark = dev->ifindex; 1203 u32 reset_mark = 0; 1204 1205 if (group_dev && joining) { 1206 mark = switchdev_port_fwd_mark_get(dev, group_dev); 1207 } else if (group_dev && !joining) { 1208 if (dev->offload_fwd_mark == mark) 1209 /* Ohoh, this port was the mark reference port, 1210 * but it's leaving the group, so reset the 1211 * mark for the remaining ports in the group. 1212 */ 1213 switchdev_port_fwd_mark_reset(group_dev, mark, 1214 &reset_mark); 1215 } 1216 1217 dev->offload_fwd_mark = mark; 1218 } 1219 EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); 1220