1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2010-2014 Intel Corporation. 4 */ 5 6 #include <linux/version.h> 7 #include <linux/module.h> 8 #include <linux/miscdevice.h> 9 #include <linux/netdevice.h> 10 #include <linux/etherdevice.h> 11 #include <linux/pci.h> 12 #include <linux/kthread.h> 13 #include <linux/rwsem.h> 14 #include <linux/mutex.h> 15 #include <linux/nsproxy.h> 16 #include <net/net_namespace.h> 17 #include <net/netns/generic.h> 18 19 #include <exec-env/rte_kni_common.h> 20 21 #include "compat.h" 22 #include "kni_dev.h" 23 24 MODULE_LICENSE("Dual BSD/GPL"); 25 MODULE_AUTHOR("Intel Corporation"); 26 MODULE_DESCRIPTION("Kernel Module for managing kni devices"); 27 28 #define KNI_RX_LOOP_NUM 1000 29 30 #define KNI_MAX_DEVICES 32 31 32 extern const struct pci_device_id ixgbe_pci_tbl[]; 33 extern const struct pci_device_id igb_pci_tbl[]; 34 35 /* loopback mode */ 36 static char *lo_mode; 37 38 /* Kernel thread mode */ 39 static char *kthread_mode; 40 static uint32_t multiple_kthread_on; 41 42 /* Default carrier state for created KNI network interfaces */ 43 static char *carrier; 44 uint32_t dflt_carrier; 45 46 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ 47 48 static int kni_net_id; 49 50 struct kni_net { 51 unsigned long device_in_use; /* device in use flag */ 52 struct mutex kni_kthread_lock; 53 struct task_struct *kni_kthread; 54 struct rw_semaphore kni_list_lock; 55 struct list_head kni_list_head; 56 }; 57 58 static int __net_init 59 kni_init_net(struct net *net) 60 { 61 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 62 struct kni_net *knet = net_generic(net, kni_net_id); 63 64 memset(knet, 0, sizeof(*knet)); 65 #else 66 struct kni_net *knet; 67 int ret; 68 69 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); 70 if (!knet) { 71 ret = -ENOMEM; 72 return ret; 73 } 74 #endif 75 76 /* Clear the bit of device in use */ 77 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 78 79 mutex_init(&knet->kni_kthread_lock); 80 81 init_rwsem(&knet->kni_list_lock); 82 INIT_LIST_HEAD(&knet->kni_list_head); 83 84 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 85 return 0; 86 #else 87 ret = net_assign_generic(net, kni_net_id, knet); 88 if (ret < 0) 89 kfree(knet); 90 91 return ret; 92 #endif 93 } 94 95 static void __net_exit 96 kni_exit_net(struct net *net) 97 { 98 struct kni_net *knet __maybe_unused; 99 100 knet = net_generic(net, kni_net_id); 101 mutex_destroy(&knet->kni_kthread_lock); 102 103 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS 104 kfree(knet); 105 #endif 106 } 107 108 static struct pernet_operations kni_net_ops = { 109 .init = kni_init_net, 110 .exit = kni_exit_net, 111 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 112 .id = &kni_net_id, 113 .size = sizeof(struct kni_net), 114 #endif 115 }; 116 117 static int 118 kni_thread_single(void *data) 119 { 120 struct kni_net *knet = data; 121 int j; 122 struct kni_dev *dev; 123 124 while (!kthread_should_stop()) { 125 down_read(&knet->kni_list_lock); 126 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 127 list_for_each_entry(dev, &knet->kni_list_head, list) { 128 kni_net_rx(dev); 129 kni_net_poll_resp(dev); 130 } 131 } 132 up_read(&knet->kni_list_lock); 133 #ifdef RTE_KNI_PREEMPT_DEFAULT 134 /* reschedule out for a while */ 135 schedule_timeout_interruptible( 136 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); 137 #endif 138 } 139 140 return 0; 141 } 142 143 static int 144 kni_thread_multiple(void *param) 145 { 146 int j; 147 struct kni_dev *dev = param; 148 149 while (!kthread_should_stop()) { 150 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 151 kni_net_rx(dev); 152 kni_net_poll_resp(dev); 153 } 154 #ifdef RTE_KNI_PREEMPT_DEFAULT 155 schedule_timeout_interruptible( 156 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL)); 157 #endif 158 } 159 160 return 0; 161 } 162 163 static int 164 kni_open(struct inode *inode, struct file *file) 165 { 166 struct net *net = current->nsproxy->net_ns; 167 struct kni_net *knet = net_generic(net, kni_net_id); 168 169 /* kni device can be opened by one user only per netns */ 170 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) 171 return -EBUSY; 172 173 file->private_data = get_net(net); 174 pr_debug("/dev/kni opened\n"); 175 176 return 0; 177 } 178 179 static int 180 kni_dev_remove(struct kni_dev *dev) 181 { 182 if (!dev) 183 return -ENODEV; 184 185 #ifdef RTE_KNI_KMOD_ETHTOOL 186 if (dev->pci_dev) { 187 if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev)) 188 ixgbe_kni_remove(dev->pci_dev); 189 else if (pci_match_id(igb_pci_tbl, dev->pci_dev)) 190 igb_kni_remove(dev->pci_dev); 191 } 192 #endif 193 194 if (dev->net_dev) { 195 unregister_netdev(dev->net_dev); 196 free_netdev(dev->net_dev); 197 } 198 199 kni_net_release_fifo_phy(dev); 200 201 return 0; 202 } 203 204 static int 205 kni_release(struct inode *inode, struct file *file) 206 { 207 struct net *net = file->private_data; 208 struct kni_net *knet = net_generic(net, kni_net_id); 209 struct kni_dev *dev, *n; 210 211 /* Stop kernel thread for single mode */ 212 if (multiple_kthread_on == 0) { 213 mutex_lock(&knet->kni_kthread_lock); 214 /* Stop kernel thread */ 215 if (knet->kni_kthread != NULL) { 216 kthread_stop(knet->kni_kthread); 217 knet->kni_kthread = NULL; 218 } 219 mutex_unlock(&knet->kni_kthread_lock); 220 } 221 222 down_write(&knet->kni_list_lock); 223 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 224 /* Stop kernel thread for multiple mode */ 225 if (multiple_kthread_on && dev->pthread != NULL) { 226 kthread_stop(dev->pthread); 227 dev->pthread = NULL; 228 } 229 230 kni_dev_remove(dev); 231 list_del(&dev->list); 232 } 233 up_write(&knet->kni_list_lock); 234 235 /* Clear the bit of device in use */ 236 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 237 238 put_net(net); 239 pr_debug("/dev/kni closed\n"); 240 241 return 0; 242 } 243 244 static int 245 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) 246 { 247 if (!kni || !dev) 248 return -1; 249 250 /* Check if network name has been used */ 251 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { 252 pr_err("KNI name %s duplicated\n", dev->name); 253 return -1; 254 } 255 256 return 0; 257 } 258 259 static int 260 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) 261 { 262 /** 263 * Create a new kernel thread for multiple mode, set its core affinity, 264 * and finally wake it up. 265 */ 266 if (multiple_kthread_on) { 267 kni->pthread = kthread_create(kni_thread_multiple, 268 (void *)kni, "kni_%s", kni->name); 269 if (IS_ERR(kni->pthread)) { 270 kni_dev_remove(kni); 271 return -ECANCELED; 272 } 273 274 if (force_bind) 275 kthread_bind(kni->pthread, kni->core_id); 276 wake_up_process(kni->pthread); 277 } else { 278 mutex_lock(&knet->kni_kthread_lock); 279 280 if (knet->kni_kthread == NULL) { 281 knet->kni_kthread = kthread_create(kni_thread_single, 282 (void *)knet, "kni_single"); 283 if (IS_ERR(knet->kni_kthread)) { 284 mutex_unlock(&knet->kni_kthread_lock); 285 kni_dev_remove(kni); 286 return -ECANCELED; 287 } 288 289 if (force_bind) 290 kthread_bind(knet->kni_kthread, kni->core_id); 291 wake_up_process(knet->kni_kthread); 292 } 293 294 mutex_unlock(&knet->kni_kthread_lock); 295 } 296 297 return 0; 298 } 299 300 static int 301 kni_ioctl_create(struct net *net, uint32_t ioctl_num, 302 unsigned long ioctl_param) 303 { 304 struct kni_net *knet = net_generic(net, kni_net_id); 305 int ret; 306 struct rte_kni_device_info dev_info; 307 struct net_device *net_dev = NULL; 308 struct kni_dev *kni, *dev, *n; 309 #ifdef RTE_KNI_KMOD_ETHTOOL 310 struct pci_dev *found_pci = NULL; 311 struct net_device *lad_dev = NULL; 312 struct pci_dev *pci = NULL; 313 #endif 314 315 pr_info("Creating kni...\n"); 316 /* Check the buffer size, to avoid warning */ 317 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 318 return -EINVAL; 319 320 /* Copy kni info from user space */ 321 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 322 return -EFAULT; 323 324 /* Check if name is zero-ended */ 325 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { 326 pr_err("kni.name not zero-terminated"); 327 return -EINVAL; 328 } 329 330 /** 331 * Check if the cpu core id is valid for binding. 332 */ 333 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { 334 pr_err("cpu %u is not online\n", dev_info.core_id); 335 return -EINVAL; 336 } 337 338 /* Check if it has been created */ 339 down_read(&knet->kni_list_lock); 340 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 341 if (kni_check_param(dev, &dev_info) < 0) { 342 up_read(&knet->kni_list_lock); 343 return -EINVAL; 344 } 345 } 346 up_read(&knet->kni_list_lock); 347 348 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, 349 #ifdef NET_NAME_USER 350 NET_NAME_USER, 351 #endif 352 kni_net_init); 353 if (net_dev == NULL) { 354 pr_err("error allocating device \"%s\"\n", dev_info.name); 355 return -EBUSY; 356 } 357 358 dev_net_set(net_dev, net); 359 360 kni = netdev_priv(net_dev); 361 362 kni->net_dev = net_dev; 363 kni->group_id = dev_info.group_id; 364 kni->core_id = dev_info.core_id; 365 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); 366 367 /* Translate user space info into kernel space info */ 368 kni->tx_q = phys_to_virt(dev_info.tx_phys); 369 kni->rx_q = phys_to_virt(dev_info.rx_phys); 370 kni->alloc_q = phys_to_virt(dev_info.alloc_phys); 371 kni->free_q = phys_to_virt(dev_info.free_phys); 372 373 kni->req_q = phys_to_virt(dev_info.req_phys); 374 kni->resp_q = phys_to_virt(dev_info.resp_phys); 375 kni->sync_va = dev_info.sync_va; 376 kni->sync_kva = phys_to_virt(dev_info.sync_phys); 377 378 kni->mbuf_size = dev_info.mbuf_size; 379 380 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", 381 (unsigned long long) dev_info.tx_phys, kni->tx_q); 382 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", 383 (unsigned long long) dev_info.rx_phys, kni->rx_q); 384 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", 385 (unsigned long long) dev_info.alloc_phys, kni->alloc_q); 386 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", 387 (unsigned long long) dev_info.free_phys, kni->free_q); 388 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", 389 (unsigned long long) dev_info.req_phys, kni->req_q); 390 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", 391 (unsigned long long) dev_info.resp_phys, kni->resp_q); 392 pr_debug("mbuf_size: %u\n", kni->mbuf_size); 393 394 pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n", 395 dev_info.bus, 396 dev_info.devid, 397 dev_info.function, 398 dev_info.vendor_id, 399 dev_info.device_id); 400 #ifdef RTE_KNI_KMOD_ETHTOOL 401 pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); 402 403 /* Support Ethtool */ 404 while (pci) { 405 pr_debug("pci_bus: %02x:%02x:%02x\n", 406 pci->bus->number, 407 PCI_SLOT(pci->devfn), 408 PCI_FUNC(pci->devfn)); 409 410 if ((pci->bus->number == dev_info.bus) && 411 (PCI_SLOT(pci->devfn) == dev_info.devid) && 412 (PCI_FUNC(pci->devfn) == dev_info.function)) { 413 found_pci = pci; 414 415 if (pci_match_id(ixgbe_pci_tbl, found_pci)) 416 ret = ixgbe_kni_probe(found_pci, &lad_dev); 417 else if (pci_match_id(igb_pci_tbl, found_pci)) 418 ret = igb_kni_probe(found_pci, &lad_dev); 419 else 420 ret = -1; 421 422 pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n", 423 pci, lad_dev); 424 if (ret == 0) { 425 kni->lad_dev = lad_dev; 426 kni_set_ethtool_ops(kni->net_dev); 427 } else { 428 pr_err("Device not supported by ethtool"); 429 kni->lad_dev = NULL; 430 } 431 432 kni->pci_dev = found_pci; 433 kni->device_id = dev_info.device_id; 434 break; 435 } 436 pci = pci_get_device(dev_info.vendor_id, 437 dev_info.device_id, pci); 438 } 439 if (pci) 440 pci_dev_put(pci); 441 #endif 442 443 if (kni->lad_dev) 444 ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr); 445 else { 446 /* if user has provided a valid mac address */ 447 if (is_valid_ether_addr(dev_info.mac_addr)) 448 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN); 449 else 450 /* 451 * Generate random mac address. eth_random_addr() is the 452 * newer version of generating mac address in kernel. 453 */ 454 random_ether_addr(net_dev->dev_addr); 455 } 456 457 if (dev_info.mtu) 458 net_dev->mtu = dev_info.mtu; 459 460 ret = register_netdev(net_dev); 461 if (ret) { 462 pr_err("error %i registering device \"%s\"\n", 463 ret, dev_info.name); 464 kni->net_dev = NULL; 465 kni_dev_remove(kni); 466 free_netdev(net_dev); 467 return -ENODEV; 468 } 469 470 netif_carrier_off(net_dev); 471 472 ret = kni_run_thread(knet, kni, dev_info.force_bind); 473 if (ret != 0) 474 return ret; 475 476 down_write(&knet->kni_list_lock); 477 list_add(&kni->list, &knet->kni_list_head); 478 up_write(&knet->kni_list_lock); 479 480 return 0; 481 } 482 483 static int 484 kni_ioctl_release(struct net *net, uint32_t ioctl_num, 485 unsigned long ioctl_param) 486 { 487 struct kni_net *knet = net_generic(net, kni_net_id); 488 int ret = -EINVAL; 489 struct kni_dev *dev, *n; 490 struct rte_kni_device_info dev_info; 491 492 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 493 return -EINVAL; 494 495 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 496 return -EFAULT; 497 498 /* Release the network device according to its name */ 499 if (strlen(dev_info.name) == 0) 500 return -EINVAL; 501 502 down_write(&knet->kni_list_lock); 503 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 504 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) 505 continue; 506 507 if (multiple_kthread_on && dev->pthread != NULL) { 508 kthread_stop(dev->pthread); 509 dev->pthread = NULL; 510 } 511 512 kni_dev_remove(dev); 513 list_del(&dev->list); 514 ret = 0; 515 break; 516 } 517 up_write(&knet->kni_list_lock); 518 pr_info("%s release kni named %s\n", 519 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); 520 521 return ret; 522 } 523 524 static int 525 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param) 526 { 527 int ret = -EINVAL; 528 struct net *net = current->nsproxy->net_ns; 529 530 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); 531 532 /* 533 * Switch according to the ioctl called 534 */ 535 switch (_IOC_NR(ioctl_num)) { 536 case _IOC_NR(RTE_KNI_IOCTL_TEST): 537 /* For test only, not used */ 538 break; 539 case _IOC_NR(RTE_KNI_IOCTL_CREATE): 540 ret = kni_ioctl_create(net, ioctl_num, ioctl_param); 541 break; 542 case _IOC_NR(RTE_KNI_IOCTL_RELEASE): 543 ret = kni_ioctl_release(net, ioctl_num, ioctl_param); 544 break; 545 default: 546 pr_debug("IOCTL default\n"); 547 break; 548 } 549 550 return ret; 551 } 552 553 static int 554 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num, 555 unsigned long ioctl_param) 556 { 557 /* 32 bits app on 64 bits OS to be supported later */ 558 pr_debug("Not implemented.\n"); 559 560 return -EINVAL; 561 } 562 563 static const struct file_operations kni_fops = { 564 .owner = THIS_MODULE, 565 .open = kni_open, 566 .release = kni_release, 567 .unlocked_ioctl = (void *)kni_ioctl, 568 .compat_ioctl = (void *)kni_compat_ioctl, 569 }; 570 571 static struct miscdevice kni_misc = { 572 .minor = MISC_DYNAMIC_MINOR, 573 .name = KNI_DEVICE, 574 .fops = &kni_fops, 575 }; 576 577 static int __init 578 kni_parse_kthread_mode(void) 579 { 580 if (!kthread_mode) 581 return 0; 582 583 if (strcmp(kthread_mode, "single") == 0) 584 return 0; 585 else if (strcmp(kthread_mode, "multiple") == 0) 586 multiple_kthread_on = 1; 587 else 588 return -1; 589 590 return 0; 591 } 592 593 static int __init 594 kni_parse_carrier_state(void) 595 { 596 if (!carrier) { 597 dflt_carrier = 0; 598 return 0; 599 } 600 601 if (strcmp(carrier, "off") == 0) 602 dflt_carrier = 0; 603 else if (strcmp(carrier, "on") == 0) 604 dflt_carrier = 1; 605 else 606 return -1; 607 608 return 0; 609 } 610 611 static int __init 612 kni_init(void) 613 { 614 int rc; 615 616 if (kni_parse_kthread_mode() < 0) { 617 pr_err("Invalid parameter for kthread_mode\n"); 618 return -EINVAL; 619 } 620 621 if (multiple_kthread_on == 0) 622 pr_debug("Single kernel thread for all KNI devices\n"); 623 else 624 pr_debug("Multiple kernel thread mode enabled\n"); 625 626 if (kni_parse_carrier_state() < 0) { 627 pr_err("Invalid parameter for carrier\n"); 628 return -EINVAL; 629 } 630 631 if (dflt_carrier == 0) 632 pr_debug("Default carrier state set to off.\n"); 633 else 634 pr_debug("Default carrier state set to on.\n"); 635 636 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 637 rc = register_pernet_subsys(&kni_net_ops); 638 #else 639 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); 640 #endif 641 if (rc) 642 return -EPERM; 643 644 rc = misc_register(&kni_misc); 645 if (rc != 0) { 646 pr_err("Misc registration failed\n"); 647 goto out; 648 } 649 650 /* Configure the lo mode according to the input parameter */ 651 kni_net_config_lo_mode(lo_mode); 652 653 return 0; 654 655 out: 656 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 657 unregister_pernet_subsys(&kni_net_ops); 658 #else 659 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 660 #endif 661 return rc; 662 } 663 664 static void __exit 665 kni_exit(void) 666 { 667 misc_deregister(&kni_misc); 668 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 669 unregister_pernet_subsys(&kni_net_ops); 670 #else 671 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 672 #endif 673 } 674 675 module_init(kni_init); 676 module_exit(kni_exit); 677 678 module_param(lo_mode, charp, 0644); 679 MODULE_PARM_DESC(lo_mode, 680 "KNI loopback mode (default=lo_mode_none):\n" 681 "\t\tlo_mode_none Kernel loopback disabled\n" 682 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n" 683 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" 684 "\t\t" 685 ); 686 687 module_param(kthread_mode, charp, 0644); 688 MODULE_PARM_DESC(kthread_mode, 689 "Kernel thread mode (default=single):\n" 690 "\t\tsingle Single kernel thread mode enabled.\n" 691 "\t\tmultiple Multiple kernel thread mode enabled.\n" 692 "\t\t" 693 ); 694 695 module_param(carrier, charp, 0644); 696 MODULE_PARM_DESC(carrier, 697 "Default carrier state for KNI interface (default=off):\n" 698 "\t\toff Interfaces will be created with carrier state set to off.\n" 699 "\t\ton Interfaces will be created with carrier state set to on.\n" 700 "\t\t" 701 ); 702