1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2010-2014 Intel Corporation. 4 */ 5 6 #include <linux/version.h> 7 #include <linux/module.h> 8 #include <linux/miscdevice.h> 9 #include <linux/netdevice.h> 10 #include <linux/etherdevice.h> 11 #include <linux/pci.h> 12 #include <linux/kthread.h> 13 #include <linux/rwsem.h> 14 #include <linux/mutex.h> 15 #include <linux/nsproxy.h> 16 #include <net/net_namespace.h> 17 #include <net/netns/generic.h> 18 19 #include <rte_kni_common.h> 20 21 #include "compat.h" 22 #include "kni_dev.h" 23 24 MODULE_VERSION(KNI_VERSION); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 MODULE_AUTHOR("Intel Corporation"); 27 MODULE_DESCRIPTION("Kernel Module for managing kni devices"); 28 29 #define KNI_RX_LOOP_NUM 1000 30 31 #define KNI_MAX_DEVICES 32 32 33 /* loopback mode */ 34 static char *lo_mode; 35 36 /* Kernel thread mode */ 37 static char *kthread_mode; 38 static uint32_t multiple_kthread_on; 39 40 /* Default carrier state for created KNI network interfaces */ 41 static char *carrier; 42 uint32_t kni_dflt_carrier; 43 44 /* Request processing support for bifurcated drivers. */ 45 static char *enable_bifurcated; 46 uint32_t bifurcated_support; 47 48 /* KNI thread scheduling interval */ 49 static long min_scheduling_interval = 100; /* us */ 50 static long max_scheduling_interval = 200; /* us */ 51 52 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ 53 54 static int kni_net_id; 55 56 struct kni_net { 57 unsigned long device_in_use; /* device in use flag */ 58 struct mutex kni_kthread_lock; 59 struct task_struct *kni_kthread; 60 struct rw_semaphore kni_list_lock; 61 struct list_head kni_list_head; 62 }; 63 64 static int __net_init 65 kni_init_net(struct net *net) 66 { 67 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 68 struct kni_net *knet = net_generic(net, kni_net_id); 69 70 memset(knet, 0, sizeof(*knet)); 71 #else 72 struct kni_net *knet; 73 int ret; 74 75 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); 76 if (!knet) { 77 ret = -ENOMEM; 78 return ret; 79 } 80 #endif 81 82 /* Clear the bit of device in use */ 83 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 84 85 mutex_init(&knet->kni_kthread_lock); 86 87 init_rwsem(&knet->kni_list_lock); 88 INIT_LIST_HEAD(&knet->kni_list_head); 89 90 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 91 return 0; 92 #else 93 ret = net_assign_generic(net, kni_net_id, knet); 94 if (ret < 0) 95 kfree(knet); 96 97 return ret; 98 #endif 99 } 100 101 static void __net_exit 102 kni_exit_net(struct net *net) 103 { 104 struct kni_net *knet __maybe_unused; 105 106 knet = net_generic(net, kni_net_id); 107 mutex_destroy(&knet->kni_kthread_lock); 108 109 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS 110 kfree(knet); 111 #endif 112 } 113 114 static struct pernet_operations kni_net_ops = { 115 .init = kni_init_net, 116 .exit = kni_exit_net, 117 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 118 .id = &kni_net_id, 119 .size = sizeof(struct kni_net), 120 #endif 121 }; 122 123 static int 124 kni_thread_single(void *data) 125 { 126 struct kni_net *knet = data; 127 int j; 128 struct kni_dev *dev; 129 130 while (!kthread_should_stop()) { 131 down_read(&knet->kni_list_lock); 132 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 133 list_for_each_entry(dev, &knet->kni_list_head, list) { 134 kni_net_rx(dev); 135 kni_net_poll_resp(dev); 136 } 137 } 138 up_read(&knet->kni_list_lock); 139 /* reschedule out for a while */ 140 usleep_range(min_scheduling_interval, max_scheduling_interval); 141 } 142 143 return 0; 144 } 145 146 static int 147 kni_thread_multiple(void *param) 148 { 149 int j; 150 struct kni_dev *dev = param; 151 152 while (!kthread_should_stop()) { 153 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 154 kni_net_rx(dev); 155 kni_net_poll_resp(dev); 156 } 157 usleep_range(min_scheduling_interval, max_scheduling_interval); 158 } 159 160 return 0; 161 } 162 163 static int 164 kni_open(struct inode *inode, struct file *file) 165 { 166 struct net *net = current->nsproxy->net_ns; 167 struct kni_net *knet = net_generic(net, kni_net_id); 168 169 /* kni device can be opened by one user only per netns */ 170 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) 171 return -EBUSY; 172 173 file->private_data = get_net(net); 174 pr_debug("/dev/kni opened\n"); 175 176 return 0; 177 } 178 179 static int 180 kni_dev_remove(struct kni_dev *dev) 181 { 182 if (!dev) 183 return -ENODEV; 184 185 /* 186 * The memory of kni device is allocated and released together 187 * with net device. Release mbuf before freeing net device. 188 */ 189 kni_net_release_fifo_phy(dev); 190 191 if (dev->net_dev) { 192 unregister_netdev(dev->net_dev); 193 free_netdev(dev->net_dev); 194 } 195 196 return 0; 197 } 198 199 static int 200 kni_release(struct inode *inode, struct file *file) 201 { 202 struct net *net = file->private_data; 203 struct kni_net *knet = net_generic(net, kni_net_id); 204 struct kni_dev *dev, *n; 205 206 /* Stop kernel thread for single mode */ 207 if (multiple_kthread_on == 0) { 208 mutex_lock(&knet->kni_kthread_lock); 209 /* Stop kernel thread */ 210 if (knet->kni_kthread != NULL) { 211 kthread_stop(knet->kni_kthread); 212 knet->kni_kthread = NULL; 213 } 214 mutex_unlock(&knet->kni_kthread_lock); 215 } 216 217 down_write(&knet->kni_list_lock); 218 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 219 /* Stop kernel thread for multiple mode */ 220 if (multiple_kthread_on && dev->pthread != NULL) { 221 kthread_stop(dev->pthread); 222 dev->pthread = NULL; 223 } 224 225 list_del(&dev->list); 226 kni_dev_remove(dev); 227 } 228 up_write(&knet->kni_list_lock); 229 230 /* Clear the bit of device in use */ 231 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 232 233 put_net(net); 234 pr_debug("/dev/kni closed\n"); 235 236 return 0; 237 } 238 239 static int 240 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) 241 { 242 if (!kni || !dev) 243 return -1; 244 245 /* Check if network name has been used */ 246 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { 247 pr_err("KNI name %s duplicated\n", dev->name); 248 return -1; 249 } 250 251 return 0; 252 } 253 254 static int 255 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) 256 { 257 /** 258 * Create a new kernel thread for multiple mode, set its core affinity, 259 * and finally wake it up. 260 */ 261 if (multiple_kthread_on) { 262 kni->pthread = kthread_create(kni_thread_multiple, 263 (void *)kni, "kni_%s", kni->name); 264 if (IS_ERR(kni->pthread)) { 265 kni_dev_remove(kni); 266 return -ECANCELED; 267 } 268 269 if (force_bind) 270 kthread_bind(kni->pthread, kni->core_id); 271 wake_up_process(kni->pthread); 272 } else { 273 mutex_lock(&knet->kni_kthread_lock); 274 275 if (knet->kni_kthread == NULL) { 276 knet->kni_kthread = kthread_create(kni_thread_single, 277 (void *)knet, "kni_single"); 278 if (IS_ERR(knet->kni_kthread)) { 279 mutex_unlock(&knet->kni_kthread_lock); 280 kni_dev_remove(kni); 281 return -ECANCELED; 282 } 283 284 if (force_bind) 285 kthread_bind(knet->kni_kthread, kni->core_id); 286 wake_up_process(knet->kni_kthread); 287 } 288 289 mutex_unlock(&knet->kni_kthread_lock); 290 } 291 292 return 0; 293 } 294 295 static int 296 kni_ioctl_create(struct net *net, uint32_t ioctl_num, 297 unsigned long ioctl_param) 298 { 299 struct kni_net *knet = net_generic(net, kni_net_id); 300 int ret; 301 struct rte_kni_device_info dev_info; 302 struct net_device *net_dev = NULL; 303 struct kni_dev *kni, *dev, *n; 304 305 pr_info("Creating kni...\n"); 306 /* Check the buffer size, to avoid warning */ 307 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 308 return -EINVAL; 309 310 /* Copy kni info from user space */ 311 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 312 return -EFAULT; 313 314 /* Check if name is zero-ended */ 315 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { 316 pr_err("kni.name not zero-terminated"); 317 return -EINVAL; 318 } 319 320 /** 321 * Check if the cpu core id is valid for binding. 322 */ 323 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { 324 pr_err("cpu %u is not online\n", dev_info.core_id); 325 return -EINVAL; 326 } 327 328 /* Check if it has been created */ 329 down_read(&knet->kni_list_lock); 330 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 331 if (kni_check_param(dev, &dev_info) < 0) { 332 up_read(&knet->kni_list_lock); 333 return -EINVAL; 334 } 335 } 336 up_read(&knet->kni_list_lock); 337 338 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, 339 #ifdef NET_NAME_USER 340 NET_NAME_USER, 341 #endif 342 kni_net_init); 343 if (net_dev == NULL) { 344 pr_err("error allocating device \"%s\"\n", dev_info.name); 345 return -EBUSY; 346 } 347 348 dev_net_set(net_dev, net); 349 350 kni = netdev_priv(net_dev); 351 352 kni->net_dev = net_dev; 353 kni->core_id = dev_info.core_id; 354 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); 355 356 /* Translate user space info into kernel space info */ 357 if (dev_info.iova_mode) { 358 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT 359 kni->tx_q = iova_to_kva(current, dev_info.tx_phys); 360 kni->rx_q = iova_to_kva(current, dev_info.rx_phys); 361 kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys); 362 kni->free_q = iova_to_kva(current, dev_info.free_phys); 363 364 kni->req_q = iova_to_kva(current, dev_info.req_phys); 365 kni->resp_q = iova_to_kva(current, dev_info.resp_phys); 366 kni->sync_va = dev_info.sync_va; 367 kni->sync_kva = iova_to_kva(current, dev_info.sync_phys); 368 kni->usr_tsk = current; 369 kni->iova_mode = 1; 370 #else 371 pr_err("KNI module does not support IOVA to VA translation\n"); 372 return -EINVAL; 373 #endif 374 } else { 375 376 kni->tx_q = phys_to_virt(dev_info.tx_phys); 377 kni->rx_q = phys_to_virt(dev_info.rx_phys); 378 kni->alloc_q = phys_to_virt(dev_info.alloc_phys); 379 kni->free_q = phys_to_virt(dev_info.free_phys); 380 381 kni->req_q = phys_to_virt(dev_info.req_phys); 382 kni->resp_q = phys_to_virt(dev_info.resp_phys); 383 kni->sync_va = dev_info.sync_va; 384 kni->sync_kva = phys_to_virt(dev_info.sync_phys); 385 kni->iova_mode = 0; 386 } 387 388 kni->mbuf_size = dev_info.mbuf_size; 389 390 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", 391 (unsigned long long) dev_info.tx_phys, kni->tx_q); 392 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", 393 (unsigned long long) dev_info.rx_phys, kni->rx_q); 394 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", 395 (unsigned long long) dev_info.alloc_phys, kni->alloc_q); 396 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", 397 (unsigned long long) dev_info.free_phys, kni->free_q); 398 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", 399 (unsigned long long) dev_info.req_phys, kni->req_q); 400 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", 401 (unsigned long long) dev_info.resp_phys, kni->resp_q); 402 pr_debug("mbuf_size: %u\n", kni->mbuf_size); 403 404 /* if user has provided a valid mac address */ 405 if (is_valid_ether_addr(dev_info.mac_addr)) 406 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN); 407 else 408 /* Generate random MAC address. */ 409 eth_random_addr(net_dev->dev_addr); 410 411 if (dev_info.mtu) 412 net_dev->mtu = dev_info.mtu; 413 #ifdef HAVE_MAX_MTU_PARAM 414 net_dev->max_mtu = net_dev->mtu; 415 416 if (dev_info.min_mtu) 417 net_dev->min_mtu = dev_info.min_mtu; 418 419 if (dev_info.max_mtu) 420 net_dev->max_mtu = dev_info.max_mtu; 421 #endif 422 423 ret = register_netdev(net_dev); 424 if (ret) { 425 pr_err("error %i registering device \"%s\"\n", 426 ret, dev_info.name); 427 kni->net_dev = NULL; 428 kni_dev_remove(kni); 429 free_netdev(net_dev); 430 return -ENODEV; 431 } 432 433 netif_carrier_off(net_dev); 434 435 ret = kni_run_thread(knet, kni, dev_info.force_bind); 436 if (ret != 0) 437 return ret; 438 439 down_write(&knet->kni_list_lock); 440 list_add(&kni->list, &knet->kni_list_head); 441 up_write(&knet->kni_list_lock); 442 443 return 0; 444 } 445 446 static int 447 kni_ioctl_release(struct net *net, uint32_t ioctl_num, 448 unsigned long ioctl_param) 449 { 450 struct kni_net *knet = net_generic(net, kni_net_id); 451 int ret = -EINVAL; 452 struct kni_dev *dev, *n; 453 struct rte_kni_device_info dev_info; 454 455 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 456 return -EINVAL; 457 458 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 459 return -EFAULT; 460 461 /* Release the network device according to its name */ 462 if (strlen(dev_info.name) == 0) 463 return -EINVAL; 464 465 down_write(&knet->kni_list_lock); 466 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 467 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) 468 continue; 469 470 if (multiple_kthread_on && dev->pthread != NULL) { 471 kthread_stop(dev->pthread); 472 dev->pthread = NULL; 473 } 474 475 list_del(&dev->list); 476 kni_dev_remove(dev); 477 ret = 0; 478 break; 479 } 480 up_write(&knet->kni_list_lock); 481 pr_info("%s release kni named %s\n", 482 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); 483 484 return ret; 485 } 486 487 static long 488 kni_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) 489 { 490 long ret = -EINVAL; 491 struct net *net = current->nsproxy->net_ns; 492 493 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); 494 495 /* 496 * Switch according to the ioctl called 497 */ 498 switch (_IOC_NR(ioctl_num)) { 499 case _IOC_NR(RTE_KNI_IOCTL_TEST): 500 /* For test only, not used */ 501 break; 502 case _IOC_NR(RTE_KNI_IOCTL_CREATE): 503 ret = kni_ioctl_create(net, ioctl_num, ioctl_param); 504 break; 505 case _IOC_NR(RTE_KNI_IOCTL_RELEASE): 506 ret = kni_ioctl_release(net, ioctl_num, ioctl_param); 507 break; 508 default: 509 pr_debug("IOCTL default\n"); 510 break; 511 } 512 513 return ret; 514 } 515 516 static long 517 kni_compat_ioctl(struct file *file, unsigned int ioctl_num, 518 unsigned long ioctl_param) 519 { 520 /* 32 bits app on 64 bits OS to be supported later */ 521 pr_debug("Not implemented.\n"); 522 523 return -EINVAL; 524 } 525 526 static const struct file_operations kni_fops = { 527 .owner = THIS_MODULE, 528 .open = kni_open, 529 .release = kni_release, 530 .unlocked_ioctl = kni_ioctl, 531 .compat_ioctl = kni_compat_ioctl, 532 }; 533 534 static struct miscdevice kni_misc = { 535 .minor = MISC_DYNAMIC_MINOR, 536 .name = KNI_DEVICE, 537 .fops = &kni_fops, 538 }; 539 540 static int __init 541 kni_parse_kthread_mode(void) 542 { 543 if (!kthread_mode) 544 return 0; 545 546 if (strcmp(kthread_mode, "single") == 0) 547 return 0; 548 else if (strcmp(kthread_mode, "multiple") == 0) 549 multiple_kthread_on = 1; 550 else 551 return -1; 552 553 return 0; 554 } 555 556 static int __init 557 kni_parse_carrier_state(void) 558 { 559 if (!carrier) { 560 kni_dflt_carrier = 0; 561 return 0; 562 } 563 564 if (strcmp(carrier, "off") == 0) 565 kni_dflt_carrier = 0; 566 else if (strcmp(carrier, "on") == 0) 567 kni_dflt_carrier = 1; 568 else 569 return -1; 570 571 return 0; 572 } 573 574 static int __init 575 kni_parse_bifurcated_support(void) 576 { 577 if (!enable_bifurcated) { 578 bifurcated_support = 0; 579 return 0; 580 } 581 582 if (strcmp(enable_bifurcated, "on") == 0) 583 bifurcated_support = 1; 584 else 585 return -1; 586 587 return 0; 588 } 589 590 static int __init 591 kni_init(void) 592 { 593 int rc; 594 595 if (kni_parse_kthread_mode() < 0) { 596 pr_err("Invalid parameter for kthread_mode\n"); 597 return -EINVAL; 598 } 599 600 if (multiple_kthread_on == 0) 601 pr_debug("Single kernel thread for all KNI devices\n"); 602 else 603 pr_debug("Multiple kernel thread mode enabled\n"); 604 605 if (kni_parse_carrier_state() < 0) { 606 pr_err("Invalid parameter for carrier\n"); 607 return -EINVAL; 608 } 609 610 if (kni_dflt_carrier == 0) 611 pr_debug("Default carrier state set to off.\n"); 612 else 613 pr_debug("Default carrier state set to on.\n"); 614 615 if (kni_parse_bifurcated_support() < 0) { 616 pr_err("Invalid parameter for bifurcated support\n"); 617 return -EINVAL; 618 } 619 if (bifurcated_support == 1) 620 pr_debug("bifurcated support is enabled.\n"); 621 622 if (min_scheduling_interval < 0 || max_scheduling_interval < 0 || 623 min_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || 624 max_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || 625 min_scheduling_interval >= max_scheduling_interval) { 626 pr_err("Invalid parameters for scheduling interval\n"); 627 return -EINVAL; 628 } 629 630 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 631 rc = register_pernet_subsys(&kni_net_ops); 632 #else 633 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); 634 #endif 635 if (rc) 636 return -EPERM; 637 638 rc = misc_register(&kni_misc); 639 if (rc != 0) { 640 pr_err("Misc registration failed\n"); 641 goto out; 642 } 643 644 /* Configure the lo mode according to the input parameter */ 645 kni_net_config_lo_mode(lo_mode); 646 647 return 0; 648 649 out: 650 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 651 unregister_pernet_subsys(&kni_net_ops); 652 #else 653 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 654 #endif 655 return rc; 656 } 657 658 static void __exit 659 kni_exit(void) 660 { 661 misc_deregister(&kni_misc); 662 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 663 unregister_pernet_subsys(&kni_net_ops); 664 #else 665 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 666 #endif 667 } 668 669 module_init(kni_init); 670 module_exit(kni_exit); 671 672 module_param(lo_mode, charp, 0644); 673 MODULE_PARM_DESC(lo_mode, 674 "KNI loopback mode (default=lo_mode_none):\n" 675 "\t\tlo_mode_none Kernel loopback disabled\n" 676 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n" 677 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" 678 "\t\t" 679 ); 680 681 module_param(kthread_mode, charp, 0644); 682 MODULE_PARM_DESC(kthread_mode, 683 "Kernel thread mode (default=single):\n" 684 "\t\tsingle Single kernel thread mode enabled.\n" 685 "\t\tmultiple Multiple kernel thread mode enabled.\n" 686 "\t\t" 687 ); 688 689 module_param(carrier, charp, 0644); 690 MODULE_PARM_DESC(carrier, 691 "Default carrier state for KNI interface (default=off):\n" 692 "\t\toff Interfaces will be created with carrier state set to off.\n" 693 "\t\ton Interfaces will be created with carrier state set to on.\n" 694 "\t\t" 695 ); 696 697 module_param(enable_bifurcated, charp, 0644); 698 MODULE_PARM_DESC(enable_bifurcated, 699 "Enable request processing support for bifurcated drivers, " 700 "which means releasing rtnl_lock before calling userspace callback and " 701 "supporting async requests (default=off):\n" 702 "\t\ton Enable request processing support for bifurcated drivers.\n" 703 "\t\t" 704 ); 705 706 module_param(min_scheduling_interval, long, 0644); 707 MODULE_PARM_DESC(min_scheduling_interval, 708 "KNI thread min scheduling interval (default=100 microseconds)" 709 ); 710 711 module_param(max_scheduling_interval, long, 0644); 712 MODULE_PARM_DESC(max_scheduling_interval, 713 "KNI thread max scheduling interval (default=200 microseconds)" 714 ); 715