1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(c) 2010-2014 Intel Corporation. 4 */ 5 6 #include <linux/version.h> 7 #include <linux/module.h> 8 #include <linux/miscdevice.h> 9 #include <linux/netdevice.h> 10 #include <linux/etherdevice.h> 11 #include <linux/pci.h> 12 #include <linux/kthread.h> 13 #include <linux/rwsem.h> 14 #include <linux/mutex.h> 15 #include <linux/nsproxy.h> 16 #include <net/net_namespace.h> 17 #include <net/netns/generic.h> 18 19 #include <rte_kni_common.h> 20 21 #include "compat.h" 22 #include "kni_dev.h" 23 24 MODULE_VERSION(KNI_VERSION); 25 MODULE_LICENSE("Dual BSD/GPL"); 26 MODULE_AUTHOR("Intel Corporation"); 27 MODULE_DESCRIPTION("Kernel Module for managing kni devices"); 28 29 #define KNI_RX_LOOP_NUM 1000 30 31 #define KNI_MAX_DEVICES 32 32 33 /* loopback mode */ 34 static char *lo_mode; 35 36 /* Kernel thread mode */ 37 static char *kthread_mode; 38 static uint32_t multiple_kthread_on; 39 40 /* Default carrier state for created KNI network interfaces */ 41 static char *carrier; 42 uint32_t kni_dflt_carrier; 43 44 /* Request processing support for bifurcated drivers. */ 45 static char *enable_bifurcated; 46 uint32_t bifurcated_support; 47 48 /* KNI thread scheduling interval */ 49 static long min_scheduling_interval = 100; /* us */ 50 static long max_scheduling_interval = 200; /* us */ 51 52 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ 53 54 static int kni_net_id; 55 56 struct kni_net { 57 unsigned long device_in_use; /* device in use flag */ 58 struct mutex kni_kthread_lock; 59 struct task_struct *kni_kthread; 60 struct rw_semaphore kni_list_lock; 61 struct list_head kni_list_head; 62 }; 63 64 static int __net_init 65 kni_init_net(struct net *net) 66 { 67 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 68 struct kni_net *knet = net_generic(net, kni_net_id); 69 70 memset(knet, 0, sizeof(*knet)); 71 #else 72 struct kni_net *knet; 73 int ret; 74 75 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL); 76 if (!knet) { 77 ret = -ENOMEM; 78 return ret; 79 } 80 #endif 81 82 /* Clear the bit of device in use */ 83 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 84 85 mutex_init(&knet->kni_kthread_lock); 86 87 init_rwsem(&knet->kni_list_lock); 88 INIT_LIST_HEAD(&knet->kni_list_head); 89 90 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 91 return 0; 92 #else 93 ret = net_assign_generic(net, kni_net_id, knet); 94 if (ret < 0) 95 kfree(knet); 96 97 return ret; 98 #endif 99 } 100 101 static void __net_exit 102 kni_exit_net(struct net *net) 103 { 104 struct kni_net *knet __maybe_unused; 105 106 knet = net_generic(net, kni_net_id); 107 mutex_destroy(&knet->kni_kthread_lock); 108 109 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS 110 kfree(knet); 111 #endif 112 } 113 114 static struct pernet_operations kni_net_ops = { 115 .init = kni_init_net, 116 .exit = kni_exit_net, 117 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 118 .id = &kni_net_id, 119 .size = sizeof(struct kni_net), 120 #endif 121 }; 122 123 static int 124 kni_thread_single(void *data) 125 { 126 struct kni_net *knet = data; 127 int j; 128 struct kni_dev *dev; 129 130 while (!kthread_should_stop()) { 131 down_read(&knet->kni_list_lock); 132 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 133 list_for_each_entry(dev, &knet->kni_list_head, list) { 134 kni_net_rx(dev); 135 kni_net_poll_resp(dev); 136 } 137 } 138 up_read(&knet->kni_list_lock); 139 /* reschedule out for a while */ 140 usleep_range(min_scheduling_interval, max_scheduling_interval); 141 } 142 143 return 0; 144 } 145 146 static int 147 kni_thread_multiple(void *param) 148 { 149 int j; 150 struct kni_dev *dev = param; 151 152 while (!kthread_should_stop()) { 153 for (j = 0; j < KNI_RX_LOOP_NUM; j++) { 154 kni_net_rx(dev); 155 kni_net_poll_resp(dev); 156 } 157 usleep_range(min_scheduling_interval, max_scheduling_interval); 158 } 159 160 return 0; 161 } 162 163 static int 164 kni_open(struct inode *inode, struct file *file) 165 { 166 struct net *net = current->nsproxy->net_ns; 167 struct kni_net *knet = net_generic(net, kni_net_id); 168 169 /* kni device can be opened by one user only per netns */ 170 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) 171 return -EBUSY; 172 173 file->private_data = get_net(net); 174 pr_debug("/dev/kni opened\n"); 175 176 return 0; 177 } 178 179 static int 180 kni_dev_remove(struct kni_dev *dev) 181 { 182 if (!dev) 183 return -ENODEV; 184 185 if (dev->net_dev) { 186 unregister_netdev(dev->net_dev); 187 free_netdev(dev->net_dev); 188 } 189 190 kni_net_release_fifo_phy(dev); 191 192 return 0; 193 } 194 195 static int 196 kni_release(struct inode *inode, struct file *file) 197 { 198 struct net *net = file->private_data; 199 struct kni_net *knet = net_generic(net, kni_net_id); 200 struct kni_dev *dev, *n; 201 202 /* Stop kernel thread for single mode */ 203 if (multiple_kthread_on == 0) { 204 mutex_lock(&knet->kni_kthread_lock); 205 /* Stop kernel thread */ 206 if (knet->kni_kthread != NULL) { 207 kthread_stop(knet->kni_kthread); 208 knet->kni_kthread = NULL; 209 } 210 mutex_unlock(&knet->kni_kthread_lock); 211 } 212 213 down_write(&knet->kni_list_lock); 214 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 215 /* Stop kernel thread for multiple mode */ 216 if (multiple_kthread_on && dev->pthread != NULL) { 217 kthread_stop(dev->pthread); 218 dev->pthread = NULL; 219 } 220 221 kni_dev_remove(dev); 222 list_del(&dev->list); 223 } 224 up_write(&knet->kni_list_lock); 225 226 /* Clear the bit of device in use */ 227 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); 228 229 put_net(net); 230 pr_debug("/dev/kni closed\n"); 231 232 return 0; 233 } 234 235 static int 236 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) 237 { 238 if (!kni || !dev) 239 return -1; 240 241 /* Check if network name has been used */ 242 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { 243 pr_err("KNI name %s duplicated\n", dev->name); 244 return -1; 245 } 246 247 return 0; 248 } 249 250 static int 251 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind) 252 { 253 /** 254 * Create a new kernel thread for multiple mode, set its core affinity, 255 * and finally wake it up. 256 */ 257 if (multiple_kthread_on) { 258 kni->pthread = kthread_create(kni_thread_multiple, 259 (void *)kni, "kni_%s", kni->name); 260 if (IS_ERR(kni->pthread)) { 261 kni_dev_remove(kni); 262 return -ECANCELED; 263 } 264 265 if (force_bind) 266 kthread_bind(kni->pthread, kni->core_id); 267 wake_up_process(kni->pthread); 268 } else { 269 mutex_lock(&knet->kni_kthread_lock); 270 271 if (knet->kni_kthread == NULL) { 272 knet->kni_kthread = kthread_create(kni_thread_single, 273 (void *)knet, "kni_single"); 274 if (IS_ERR(knet->kni_kthread)) { 275 mutex_unlock(&knet->kni_kthread_lock); 276 kni_dev_remove(kni); 277 return -ECANCELED; 278 } 279 280 if (force_bind) 281 kthread_bind(knet->kni_kthread, kni->core_id); 282 wake_up_process(knet->kni_kthread); 283 } 284 285 mutex_unlock(&knet->kni_kthread_lock); 286 } 287 288 return 0; 289 } 290 291 static int 292 kni_ioctl_create(struct net *net, uint32_t ioctl_num, 293 unsigned long ioctl_param) 294 { 295 struct kni_net *knet = net_generic(net, kni_net_id); 296 int ret; 297 struct rte_kni_device_info dev_info; 298 struct net_device *net_dev = NULL; 299 struct kni_dev *kni, *dev, *n; 300 301 pr_info("Creating kni...\n"); 302 /* Check the buffer size, to avoid warning */ 303 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 304 return -EINVAL; 305 306 /* Copy kni info from user space */ 307 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 308 return -EFAULT; 309 310 /* Check if name is zero-ended */ 311 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) { 312 pr_err("kni.name not zero-terminated"); 313 return -EINVAL; 314 } 315 316 /** 317 * Check if the cpu core id is valid for binding. 318 */ 319 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) { 320 pr_err("cpu %u is not online\n", dev_info.core_id); 321 return -EINVAL; 322 } 323 324 /* Check if it has been created */ 325 down_read(&knet->kni_list_lock); 326 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 327 if (kni_check_param(dev, &dev_info) < 0) { 328 up_read(&knet->kni_list_lock); 329 return -EINVAL; 330 } 331 } 332 up_read(&knet->kni_list_lock); 333 334 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, 335 #ifdef NET_NAME_USER 336 NET_NAME_USER, 337 #endif 338 kni_net_init); 339 if (net_dev == NULL) { 340 pr_err("error allocating device \"%s\"\n", dev_info.name); 341 return -EBUSY; 342 } 343 344 dev_net_set(net_dev, net); 345 346 kni = netdev_priv(net_dev); 347 348 kni->net_dev = net_dev; 349 kni->core_id = dev_info.core_id; 350 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); 351 352 /* Translate user space info into kernel space info */ 353 if (dev_info.iova_mode) { 354 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT 355 kni->tx_q = iova_to_kva(current, dev_info.tx_phys); 356 kni->rx_q = iova_to_kva(current, dev_info.rx_phys); 357 kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys); 358 kni->free_q = iova_to_kva(current, dev_info.free_phys); 359 360 kni->req_q = iova_to_kva(current, dev_info.req_phys); 361 kni->resp_q = iova_to_kva(current, dev_info.resp_phys); 362 kni->sync_va = dev_info.sync_va; 363 kni->sync_kva = iova_to_kva(current, dev_info.sync_phys); 364 kni->usr_tsk = current; 365 kni->iova_mode = 1; 366 #else 367 pr_err("KNI module does not support IOVA to VA translation\n"); 368 return -EINVAL; 369 #endif 370 } else { 371 372 kni->tx_q = phys_to_virt(dev_info.tx_phys); 373 kni->rx_q = phys_to_virt(dev_info.rx_phys); 374 kni->alloc_q = phys_to_virt(dev_info.alloc_phys); 375 kni->free_q = phys_to_virt(dev_info.free_phys); 376 377 kni->req_q = phys_to_virt(dev_info.req_phys); 378 kni->resp_q = phys_to_virt(dev_info.resp_phys); 379 kni->sync_va = dev_info.sync_va; 380 kni->sync_kva = phys_to_virt(dev_info.sync_phys); 381 kni->iova_mode = 0; 382 } 383 384 kni->mbuf_size = dev_info.mbuf_size; 385 386 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", 387 (unsigned long long) dev_info.tx_phys, kni->tx_q); 388 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", 389 (unsigned long long) dev_info.rx_phys, kni->rx_q); 390 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", 391 (unsigned long long) dev_info.alloc_phys, kni->alloc_q); 392 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n", 393 (unsigned long long) dev_info.free_phys, kni->free_q); 394 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n", 395 (unsigned long long) dev_info.req_phys, kni->req_q); 396 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", 397 (unsigned long long) dev_info.resp_phys, kni->resp_q); 398 pr_debug("mbuf_size: %u\n", kni->mbuf_size); 399 400 /* if user has provided a valid mac address */ 401 if (is_valid_ether_addr(dev_info.mac_addr)) 402 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN); 403 else 404 /* Generate random MAC address. */ 405 eth_random_addr(net_dev->dev_addr); 406 407 if (dev_info.mtu) 408 net_dev->mtu = dev_info.mtu; 409 #ifdef HAVE_MAX_MTU_PARAM 410 net_dev->max_mtu = net_dev->mtu; 411 412 if (dev_info.min_mtu) 413 net_dev->min_mtu = dev_info.min_mtu; 414 415 if (dev_info.max_mtu) 416 net_dev->max_mtu = dev_info.max_mtu; 417 #endif 418 419 ret = register_netdev(net_dev); 420 if (ret) { 421 pr_err("error %i registering device \"%s\"\n", 422 ret, dev_info.name); 423 kni->net_dev = NULL; 424 kni_dev_remove(kni); 425 free_netdev(net_dev); 426 return -ENODEV; 427 } 428 429 netif_carrier_off(net_dev); 430 431 ret = kni_run_thread(knet, kni, dev_info.force_bind); 432 if (ret != 0) 433 return ret; 434 435 down_write(&knet->kni_list_lock); 436 list_add(&kni->list, &knet->kni_list_head); 437 up_write(&knet->kni_list_lock); 438 439 return 0; 440 } 441 442 static int 443 kni_ioctl_release(struct net *net, uint32_t ioctl_num, 444 unsigned long ioctl_param) 445 { 446 struct kni_net *knet = net_generic(net, kni_net_id); 447 int ret = -EINVAL; 448 struct kni_dev *dev, *n; 449 struct rte_kni_device_info dev_info; 450 451 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) 452 return -EINVAL; 453 454 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info))) 455 return -EFAULT; 456 457 /* Release the network device according to its name */ 458 if (strlen(dev_info.name) == 0) 459 return -EINVAL; 460 461 down_write(&knet->kni_list_lock); 462 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { 463 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) 464 continue; 465 466 if (multiple_kthread_on && dev->pthread != NULL) { 467 kthread_stop(dev->pthread); 468 dev->pthread = NULL; 469 } 470 471 kni_dev_remove(dev); 472 list_del(&dev->list); 473 ret = 0; 474 break; 475 } 476 up_write(&knet->kni_list_lock); 477 pr_info("%s release kni named %s\n", 478 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); 479 480 return ret; 481 } 482 483 static long 484 kni_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) 485 { 486 long ret = -EINVAL; 487 struct net *net = current->nsproxy->net_ns; 488 489 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); 490 491 /* 492 * Switch according to the ioctl called 493 */ 494 switch (_IOC_NR(ioctl_num)) { 495 case _IOC_NR(RTE_KNI_IOCTL_TEST): 496 /* For test only, not used */ 497 break; 498 case _IOC_NR(RTE_KNI_IOCTL_CREATE): 499 ret = kni_ioctl_create(net, ioctl_num, ioctl_param); 500 break; 501 case _IOC_NR(RTE_KNI_IOCTL_RELEASE): 502 ret = kni_ioctl_release(net, ioctl_num, ioctl_param); 503 break; 504 default: 505 pr_debug("IOCTL default\n"); 506 break; 507 } 508 509 return ret; 510 } 511 512 static long 513 kni_compat_ioctl(struct file *file, unsigned int ioctl_num, 514 unsigned long ioctl_param) 515 { 516 /* 32 bits app on 64 bits OS to be supported later */ 517 pr_debug("Not implemented.\n"); 518 519 return -EINVAL; 520 } 521 522 static const struct file_operations kni_fops = { 523 .owner = THIS_MODULE, 524 .open = kni_open, 525 .release = kni_release, 526 .unlocked_ioctl = kni_ioctl, 527 .compat_ioctl = kni_compat_ioctl, 528 }; 529 530 static struct miscdevice kni_misc = { 531 .minor = MISC_DYNAMIC_MINOR, 532 .name = KNI_DEVICE, 533 .fops = &kni_fops, 534 }; 535 536 static int __init 537 kni_parse_kthread_mode(void) 538 { 539 if (!kthread_mode) 540 return 0; 541 542 if (strcmp(kthread_mode, "single") == 0) 543 return 0; 544 else if (strcmp(kthread_mode, "multiple") == 0) 545 multiple_kthread_on = 1; 546 else 547 return -1; 548 549 return 0; 550 } 551 552 static int __init 553 kni_parse_carrier_state(void) 554 { 555 if (!carrier) { 556 kni_dflt_carrier = 0; 557 return 0; 558 } 559 560 if (strcmp(carrier, "off") == 0) 561 kni_dflt_carrier = 0; 562 else if (strcmp(carrier, "on") == 0) 563 kni_dflt_carrier = 1; 564 else 565 return -1; 566 567 return 0; 568 } 569 570 static int __init 571 kni_parse_bifurcated_support(void) 572 { 573 if (!enable_bifurcated) { 574 bifurcated_support = 0; 575 return 0; 576 } 577 578 if (strcmp(enable_bifurcated, "on") == 0) 579 bifurcated_support = 1; 580 else 581 return -1; 582 583 return 0; 584 } 585 586 static int __init 587 kni_init(void) 588 { 589 int rc; 590 591 if (kni_parse_kthread_mode() < 0) { 592 pr_err("Invalid parameter for kthread_mode\n"); 593 return -EINVAL; 594 } 595 596 if (multiple_kthread_on == 0) 597 pr_debug("Single kernel thread for all KNI devices\n"); 598 else 599 pr_debug("Multiple kernel thread mode enabled\n"); 600 601 if (kni_parse_carrier_state() < 0) { 602 pr_err("Invalid parameter for carrier\n"); 603 return -EINVAL; 604 } 605 606 if (kni_dflt_carrier == 0) 607 pr_debug("Default carrier state set to off.\n"); 608 else 609 pr_debug("Default carrier state set to on.\n"); 610 611 if (kni_parse_bifurcated_support() < 0) { 612 pr_err("Invalid parameter for bifurcated support\n"); 613 return -EINVAL; 614 } 615 if (bifurcated_support == 1) 616 pr_debug("bifurcated support is enabled.\n"); 617 618 if (min_scheduling_interval < 0 || max_scheduling_interval < 0 || 619 min_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || 620 max_scheduling_interval > KNI_KTHREAD_MAX_RESCHEDULE_INTERVAL || 621 min_scheduling_interval >= max_scheduling_interval) { 622 pr_err("Invalid parameters for scheduling interval\n"); 623 return -EINVAL; 624 } 625 626 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 627 rc = register_pernet_subsys(&kni_net_ops); 628 #else 629 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); 630 #endif 631 if (rc) 632 return -EPERM; 633 634 rc = misc_register(&kni_misc); 635 if (rc != 0) { 636 pr_err("Misc registration failed\n"); 637 goto out; 638 } 639 640 /* Configure the lo mode according to the input parameter */ 641 kni_net_config_lo_mode(lo_mode); 642 643 return 0; 644 645 out: 646 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 647 unregister_pernet_subsys(&kni_net_ops); 648 #else 649 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 650 #endif 651 return rc; 652 } 653 654 static void __exit 655 kni_exit(void) 656 { 657 misc_deregister(&kni_misc); 658 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS 659 unregister_pernet_subsys(&kni_net_ops); 660 #else 661 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops); 662 #endif 663 } 664 665 module_init(kni_init); 666 module_exit(kni_exit); 667 668 module_param(lo_mode, charp, 0644); 669 MODULE_PARM_DESC(lo_mode, 670 "KNI loopback mode (default=lo_mode_none):\n" 671 "\t\tlo_mode_none Kernel loopback disabled\n" 672 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n" 673 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" 674 "\t\t" 675 ); 676 677 module_param(kthread_mode, charp, 0644); 678 MODULE_PARM_DESC(kthread_mode, 679 "Kernel thread mode (default=single):\n" 680 "\t\tsingle Single kernel thread mode enabled.\n" 681 "\t\tmultiple Multiple kernel thread mode enabled.\n" 682 "\t\t" 683 ); 684 685 module_param(carrier, charp, 0644); 686 MODULE_PARM_DESC(carrier, 687 "Default carrier state for KNI interface (default=off):\n" 688 "\t\toff Interfaces will be created with carrier state set to off.\n" 689 "\t\ton Interfaces will be created with carrier state set to on.\n" 690 "\t\t" 691 ); 692 693 module_param(enable_bifurcated, charp, 0644); 694 MODULE_PARM_DESC(enable_bifurcated, 695 "Enable request processing support for bifurcated drivers, " 696 "which means releasing rtnl_lock before calling userspace callback and " 697 "supporting async requests (default=off):\n" 698 "\t\ton Enable request processing support for bifurcated drivers.\n" 699 "\t\t" 700 ); 701 702 module_param(min_scheduling_interval, long, 0644); 703 MODULE_PARM_DESC(min_scheduling_interval, 704 "KNI thread min scheduling interval (default=100 microseconds)" 705 ); 706 707 module_param(max_scheduling_interval, long, 0644); 708 MODULE_PARM_DESC(max_scheduling_interval, 709 "KNI thread max scheduling interval (default=200 microseconds)" 710 ); 711