1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(c) 2010-2014 Intel Corporation.
4 */
5
6 #include <linux/version.h>
7 #include <linux/module.h>
8 #include <linux/miscdevice.h>
9 #include <linux/netdevice.h>
10 #include <linux/etherdevice.h>
11 #include <linux/pci.h>
12 #include <linux/kthread.h>
13 #include <linux/rwsem.h>
14 #include <linux/mutex.h>
15 #include <linux/nsproxy.h>
16 #include <net/net_namespace.h>
17 #include <net/netns/generic.h>
18
19 #include <rte_kni_common.h>
20
21 #include "compat.h"
22 #include "kni_dev.h"
23
24 MODULE_VERSION(KNI_VERSION);
25 MODULE_LICENSE("Dual BSD/GPL");
26 MODULE_AUTHOR("Intel Corporation");
27 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
28
29 #define KNI_RX_LOOP_NUM 1000
30
31 #define KNI_MAX_DEVICES 32
32
33 /* loopback mode */
34 static char *lo_mode;
35
36 /* Kernel thread mode */
37 static char *kthread_mode;
38 static uint32_t multiple_kthread_on;
39
40 /* Default carrier state for created KNI network interfaces */
41 static char *carrier;
42 uint32_t kni_dflt_carrier;
43
44 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
45
46 static int kni_net_id;
47
48 struct kni_net {
49 unsigned long device_in_use; /* device in use flag */
50 struct mutex kni_kthread_lock;
51 struct task_struct *kni_kthread;
52 struct rw_semaphore kni_list_lock;
53 struct list_head kni_list_head;
54 };
55
56 static int __net_init
kni_init_net(struct net * net)57 kni_init_net(struct net *net)
58 {
59 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
60 struct kni_net *knet = net_generic(net, kni_net_id);
61
62 memset(knet, 0, sizeof(*knet));
63 #else
64 struct kni_net *knet;
65 int ret;
66
67 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
68 if (!knet) {
69 ret = -ENOMEM;
70 return ret;
71 }
72 #endif
73
74 /* Clear the bit of device in use */
75 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
76
77 mutex_init(&knet->kni_kthread_lock);
78
79 init_rwsem(&knet->kni_list_lock);
80 INIT_LIST_HEAD(&knet->kni_list_head);
81
82 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
83 return 0;
84 #else
85 ret = net_assign_generic(net, kni_net_id, knet);
86 if (ret < 0)
87 kfree(knet);
88
89 return ret;
90 #endif
91 }
92
93 static void __net_exit
kni_exit_net(struct net * net)94 kni_exit_net(struct net *net)
95 {
96 struct kni_net *knet __maybe_unused;
97
98 knet = net_generic(net, kni_net_id);
99 mutex_destroy(&knet->kni_kthread_lock);
100
101 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
102 kfree(knet);
103 #endif
104 }
105
106 static struct pernet_operations kni_net_ops = {
107 .init = kni_init_net,
108 .exit = kni_exit_net,
109 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
110 .id = &kni_net_id,
111 .size = sizeof(struct kni_net),
112 #endif
113 };
114
115 static int
kni_thread_single(void * data)116 kni_thread_single(void *data)
117 {
118 struct kni_net *knet = data;
119 int j;
120 struct kni_dev *dev;
121
122 while (!kthread_should_stop()) {
123 down_read(&knet->kni_list_lock);
124 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
125 list_for_each_entry(dev, &knet->kni_list_head, list) {
126 kni_net_rx(dev);
127 kni_net_poll_resp(dev);
128 }
129 }
130 up_read(&knet->kni_list_lock);
131 #ifdef RTE_KNI_PREEMPT_DEFAULT
132 /* reschedule out for a while */
133 schedule_timeout_interruptible(
134 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
135 #endif
136 }
137
138 return 0;
139 }
140
141 static int
kni_thread_multiple(void * param)142 kni_thread_multiple(void *param)
143 {
144 int j;
145 struct kni_dev *dev = param;
146
147 while (!kthread_should_stop()) {
148 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
149 kni_net_rx(dev);
150 kni_net_poll_resp(dev);
151 }
152 #ifdef RTE_KNI_PREEMPT_DEFAULT
153 schedule_timeout_interruptible(
154 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
155 #endif
156 }
157
158 return 0;
159 }
160
161 static int
kni_open(struct inode * inode,struct file * file)162 kni_open(struct inode *inode, struct file *file)
163 {
164 struct net *net = current->nsproxy->net_ns;
165 struct kni_net *knet = net_generic(net, kni_net_id);
166
167 /* kni device can be opened by one user only per netns */
168 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
169 return -EBUSY;
170
171 file->private_data = get_net(net);
172 pr_debug("/dev/kni opened\n");
173
174 return 0;
175 }
176
177 static int
kni_dev_remove(struct kni_dev * dev)178 kni_dev_remove(struct kni_dev *dev)
179 {
180 if (!dev)
181 return -ENODEV;
182
183 if (dev->net_dev) {
184 unregister_netdev(dev->net_dev);
185 free_netdev(dev->net_dev);
186 }
187
188 kni_net_release_fifo_phy(dev);
189
190 return 0;
191 }
192
193 static int
kni_release(struct inode * inode,struct file * file)194 kni_release(struct inode *inode, struct file *file)
195 {
196 struct net *net = file->private_data;
197 struct kni_net *knet = net_generic(net, kni_net_id);
198 struct kni_dev *dev, *n;
199
200 /* Stop kernel thread for single mode */
201 if (multiple_kthread_on == 0) {
202 mutex_lock(&knet->kni_kthread_lock);
203 /* Stop kernel thread */
204 if (knet->kni_kthread != NULL) {
205 kthread_stop(knet->kni_kthread);
206 knet->kni_kthread = NULL;
207 }
208 mutex_unlock(&knet->kni_kthread_lock);
209 }
210
211 down_write(&knet->kni_list_lock);
212 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
213 /* Stop kernel thread for multiple mode */
214 if (multiple_kthread_on && dev->pthread != NULL) {
215 kthread_stop(dev->pthread);
216 dev->pthread = NULL;
217 }
218
219 kni_dev_remove(dev);
220 list_del(&dev->list);
221 }
222 up_write(&knet->kni_list_lock);
223
224 /* Clear the bit of device in use */
225 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
226
227 put_net(net);
228 pr_debug("/dev/kni closed\n");
229
230 return 0;
231 }
232
233 static int
kni_check_param(struct kni_dev * kni,struct rte_kni_device_info * dev)234 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
235 {
236 if (!kni || !dev)
237 return -1;
238
239 /* Check if network name has been used */
240 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
241 pr_err("KNI name %s duplicated\n", dev->name);
242 return -1;
243 }
244
245 return 0;
246 }
247
248 static int
kni_run_thread(struct kni_net * knet,struct kni_dev * kni,uint8_t force_bind)249 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
250 {
251 /**
252 * Create a new kernel thread for multiple mode, set its core affinity,
253 * and finally wake it up.
254 */
255 if (multiple_kthread_on) {
256 kni->pthread = kthread_create(kni_thread_multiple,
257 (void *)kni, "kni_%s", kni->name);
258 if (IS_ERR(kni->pthread)) {
259 kni_dev_remove(kni);
260 return -ECANCELED;
261 }
262
263 if (force_bind)
264 kthread_bind(kni->pthread, kni->core_id);
265 wake_up_process(kni->pthread);
266 } else {
267 mutex_lock(&knet->kni_kthread_lock);
268
269 if (knet->kni_kthread == NULL) {
270 knet->kni_kthread = kthread_create(kni_thread_single,
271 (void *)knet, "kni_single");
272 if (IS_ERR(knet->kni_kthread)) {
273 mutex_unlock(&knet->kni_kthread_lock);
274 kni_dev_remove(kni);
275 return -ECANCELED;
276 }
277
278 if (force_bind)
279 kthread_bind(knet->kni_kthread, kni->core_id);
280 wake_up_process(knet->kni_kthread);
281 }
282
283 mutex_unlock(&knet->kni_kthread_lock);
284 }
285
286 return 0;
287 }
288
289 static int
kni_ioctl_create(struct net * net,uint32_t ioctl_num,unsigned long ioctl_param)290 kni_ioctl_create(struct net *net, uint32_t ioctl_num,
291 unsigned long ioctl_param)
292 {
293 struct kni_net *knet = net_generic(net, kni_net_id);
294 int ret;
295 struct rte_kni_device_info dev_info;
296 struct net_device *net_dev = NULL;
297 struct kni_dev *kni, *dev, *n;
298
299 pr_info("Creating kni...\n");
300 /* Check the buffer size, to avoid warning */
301 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
302 return -EINVAL;
303
304 /* Copy kni info from user space */
305 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
306 return -EFAULT;
307
308 /* Check if name is zero-ended */
309 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
310 pr_err("kni.name not zero-terminated");
311 return -EINVAL;
312 }
313
314 /**
315 * Check if the cpu core id is valid for binding.
316 */
317 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
318 pr_err("cpu %u is not online\n", dev_info.core_id);
319 return -EINVAL;
320 }
321
322 /* Check if it has been created */
323 down_read(&knet->kni_list_lock);
324 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
325 if (kni_check_param(dev, &dev_info) < 0) {
326 up_read(&knet->kni_list_lock);
327 return -EINVAL;
328 }
329 }
330 up_read(&knet->kni_list_lock);
331
332 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
333 #ifdef NET_NAME_USER
334 NET_NAME_USER,
335 #endif
336 kni_net_init);
337 if (net_dev == NULL) {
338 pr_err("error allocating device \"%s\"\n", dev_info.name);
339 return -EBUSY;
340 }
341
342 dev_net_set(net_dev, net);
343
344 kni = netdev_priv(net_dev);
345
346 kni->net_dev = net_dev;
347 kni->core_id = dev_info.core_id;
348 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
349
350 /* Translate user space info into kernel space info */
351 if (dev_info.iova_mode) {
352 #ifdef HAVE_IOVA_TO_KVA_MAPPING_SUPPORT
353 kni->tx_q = iova_to_kva(current, dev_info.tx_phys);
354 kni->rx_q = iova_to_kva(current, dev_info.rx_phys);
355 kni->alloc_q = iova_to_kva(current, dev_info.alloc_phys);
356 kni->free_q = iova_to_kva(current, dev_info.free_phys);
357
358 kni->req_q = iova_to_kva(current, dev_info.req_phys);
359 kni->resp_q = iova_to_kva(current, dev_info.resp_phys);
360 kni->sync_va = dev_info.sync_va;
361 kni->sync_kva = iova_to_kva(current, dev_info.sync_phys);
362 kni->usr_tsk = current;
363 kni->iova_mode = 1;
364 #else
365 pr_err("KNI module does not support IOVA to VA translation\n");
366 return -EINVAL;
367 #endif
368 } else {
369
370 kni->tx_q = phys_to_virt(dev_info.tx_phys);
371 kni->rx_q = phys_to_virt(dev_info.rx_phys);
372 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
373 kni->free_q = phys_to_virt(dev_info.free_phys);
374
375 kni->req_q = phys_to_virt(dev_info.req_phys);
376 kni->resp_q = phys_to_virt(dev_info.resp_phys);
377 kni->sync_va = dev_info.sync_va;
378 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
379 kni->iova_mode = 0;
380 }
381
382 kni->mbuf_size = dev_info.mbuf_size;
383
384 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
385 (unsigned long long) dev_info.tx_phys, kni->tx_q);
386 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
387 (unsigned long long) dev_info.rx_phys, kni->rx_q);
388 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
389 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
390 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
391 (unsigned long long) dev_info.free_phys, kni->free_q);
392 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
393 (unsigned long long) dev_info.req_phys, kni->req_q);
394 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
395 (unsigned long long) dev_info.resp_phys, kni->resp_q);
396 pr_debug("mbuf_size: %u\n", kni->mbuf_size);
397
398 /* if user has provided a valid mac address */
399 if (is_valid_ether_addr(dev_info.mac_addr))
400 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
401 else
402 /*
403 * Generate random mac address. eth_random_addr() is the
404 * newer version of generating mac address in kernel.
405 */
406 random_ether_addr(net_dev->dev_addr);
407
408 if (dev_info.mtu)
409 net_dev->mtu = dev_info.mtu;
410 #ifdef HAVE_MAX_MTU_PARAM
411 net_dev->max_mtu = net_dev->mtu;
412
413 if (dev_info.min_mtu)
414 net_dev->min_mtu = dev_info.min_mtu;
415
416 if (dev_info.max_mtu)
417 net_dev->max_mtu = dev_info.max_mtu;
418 #endif
419
420 ret = register_netdev(net_dev);
421 if (ret) {
422 pr_err("error %i registering device \"%s\"\n",
423 ret, dev_info.name);
424 kni->net_dev = NULL;
425 kni_dev_remove(kni);
426 free_netdev(net_dev);
427 return -ENODEV;
428 }
429
430 netif_carrier_off(net_dev);
431
432 ret = kni_run_thread(knet, kni, dev_info.force_bind);
433 if (ret != 0)
434 return ret;
435
436 down_write(&knet->kni_list_lock);
437 list_add(&kni->list, &knet->kni_list_head);
438 up_write(&knet->kni_list_lock);
439
440 return 0;
441 }
442
443 static int
kni_ioctl_release(struct net * net,uint32_t ioctl_num,unsigned long ioctl_param)444 kni_ioctl_release(struct net *net, uint32_t ioctl_num,
445 unsigned long ioctl_param)
446 {
447 struct kni_net *knet = net_generic(net, kni_net_id);
448 int ret = -EINVAL;
449 struct kni_dev *dev, *n;
450 struct rte_kni_device_info dev_info;
451
452 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
453 return -EINVAL;
454
455 if (copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)))
456 return -EFAULT;
457
458 /* Release the network device according to its name */
459 if (strlen(dev_info.name) == 0)
460 return -EINVAL;
461
462 down_write(&knet->kni_list_lock);
463 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
464 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
465 continue;
466
467 if (multiple_kthread_on && dev->pthread != NULL) {
468 kthread_stop(dev->pthread);
469 dev->pthread = NULL;
470 }
471
472 kni_dev_remove(dev);
473 list_del(&dev->list);
474 ret = 0;
475 break;
476 }
477 up_write(&knet->kni_list_lock);
478 pr_info("%s release kni named %s\n",
479 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
480
481 return ret;
482 }
483
484 static int
kni_ioctl(struct inode * inode,uint32_t ioctl_num,unsigned long ioctl_param)485 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
486 {
487 int ret = -EINVAL;
488 struct net *net = current->nsproxy->net_ns;
489
490 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
491
492 /*
493 * Switch according to the ioctl called
494 */
495 switch (_IOC_NR(ioctl_num)) {
496 case _IOC_NR(RTE_KNI_IOCTL_TEST):
497 /* For test only, not used */
498 break;
499 case _IOC_NR(RTE_KNI_IOCTL_CREATE):
500 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
501 break;
502 case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
503 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
504 break;
505 default:
506 pr_debug("IOCTL default\n");
507 break;
508 }
509
510 return ret;
511 }
512
513 static int
kni_compat_ioctl(struct inode * inode,uint32_t ioctl_num,unsigned long ioctl_param)514 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
515 unsigned long ioctl_param)
516 {
517 /* 32 bits app on 64 bits OS to be supported later */
518 pr_debug("Not implemented.\n");
519
520 return -EINVAL;
521 }
522
523 static const struct file_operations kni_fops = {
524 .owner = THIS_MODULE,
525 .open = kni_open,
526 .release = kni_release,
527 .unlocked_ioctl = (void *)kni_ioctl,
528 .compat_ioctl = (void *)kni_compat_ioctl,
529 };
530
531 static struct miscdevice kni_misc = {
532 .minor = MISC_DYNAMIC_MINOR,
533 .name = KNI_DEVICE,
534 .fops = &kni_fops,
535 };
536
537 static int __init
kni_parse_kthread_mode(void)538 kni_parse_kthread_mode(void)
539 {
540 if (!kthread_mode)
541 return 0;
542
543 if (strcmp(kthread_mode, "single") == 0)
544 return 0;
545 else if (strcmp(kthread_mode, "multiple") == 0)
546 multiple_kthread_on = 1;
547 else
548 return -1;
549
550 return 0;
551 }
552
553 static int __init
kni_parse_carrier_state(void)554 kni_parse_carrier_state(void)
555 {
556 if (!carrier) {
557 kni_dflt_carrier = 0;
558 return 0;
559 }
560
561 if (strcmp(carrier, "off") == 0)
562 kni_dflt_carrier = 0;
563 else if (strcmp(carrier, "on") == 0)
564 kni_dflt_carrier = 1;
565 else
566 return -1;
567
568 return 0;
569 }
570
571 static int __init
kni_init(void)572 kni_init(void)
573 {
574 int rc;
575
576 if (kni_parse_kthread_mode() < 0) {
577 pr_err("Invalid parameter for kthread_mode\n");
578 return -EINVAL;
579 }
580
581 if (multiple_kthread_on == 0)
582 pr_debug("Single kernel thread for all KNI devices\n");
583 else
584 pr_debug("Multiple kernel thread mode enabled\n");
585
586 if (kni_parse_carrier_state() < 0) {
587 pr_err("Invalid parameter for carrier\n");
588 return -EINVAL;
589 }
590
591 if (kni_dflt_carrier == 0)
592 pr_debug("Default carrier state set to off.\n");
593 else
594 pr_debug("Default carrier state set to on.\n");
595
596 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
597 rc = register_pernet_subsys(&kni_net_ops);
598 #else
599 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
600 #endif
601 if (rc)
602 return -EPERM;
603
604 rc = misc_register(&kni_misc);
605 if (rc != 0) {
606 pr_err("Misc registration failed\n");
607 goto out;
608 }
609
610 /* Configure the lo mode according to the input parameter */
611 kni_net_config_lo_mode(lo_mode);
612
613 return 0;
614
615 out:
616 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
617 unregister_pernet_subsys(&kni_net_ops);
618 #else
619 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
620 #endif
621 return rc;
622 }
623
624 static void __exit
kni_exit(void)625 kni_exit(void)
626 {
627 misc_deregister(&kni_misc);
628 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
629 unregister_pernet_subsys(&kni_net_ops);
630 #else
631 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
632 #endif
633 }
634
635 module_init(kni_init);
636 module_exit(kni_exit);
637
638 module_param(lo_mode, charp, 0644);
639 MODULE_PARM_DESC(lo_mode,
640 "KNI loopback mode (default=lo_mode_none):\n"
641 "\t\tlo_mode_none Kernel loopback disabled\n"
642 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n"
643 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
644 "\t\t"
645 );
646
647 module_param(kthread_mode, charp, 0644);
648 MODULE_PARM_DESC(kthread_mode,
649 "Kernel thread mode (default=single):\n"
650 "\t\tsingle Single kernel thread mode enabled.\n"
651 "\t\tmultiple Multiple kernel thread mode enabled.\n"
652 "\t\t"
653 );
654
655 module_param(carrier, charp, 0644);
656 MODULE_PARM_DESC(carrier,
657 "Default carrier state for KNI interface (default=off):\n"
658 "\t\toff Interfaces will be created with carrier state set to off.\n"
659 "\t\ton Interfaces will be created with carrier state set to on.\n"
660 "\t\t"
661 );
662