xref: /f-stack/lib/ff_dpdk_kni.c (revision 2317ada5)
1 /*
2  * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice, this
9  *   list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  *   this list of conditions and the following disclaimer in the documentation
12  *   and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 #include <stdlib.h>
28 #include <arpa/inet.h>
29 #include <netinet/icmp6.h>
30 
31 #include <rte_config.h>
32 #include <rte_ether.h>
33 #include <rte_bus_pci.h>
34 #include <rte_ethdev.h>
35 #include <rte_kni.h>
36 #include <rte_malloc.h>
37 #include <rte_ring.h>
38 #include <rte_ip.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41 
42 #include "ff_dpdk_kni.h"
43 #include "ff_config.h"
44 
45 /* Callback for request of changing MTU */
46 /* Total octets in ethernet header */
47 #define KNI_ENET_HEADER_SIZE    14
48 
49 /* Total octets in the FCS */
50 #define KNI_ENET_FCS_SIZE       4
51 
52 #define set_bit(n, m)   (n | magic_bits[m])
53 #define clear_bit(n, m) (n & (~magic_bits[m]))
54 #define get_bit(n, m)   (n & magic_bits[m])
55 
56 static const int magic_bits[8] = {
57     0x80, 0x40, 0x20, 0x10,
58     0x8, 0x4, 0x2, 0x1
59 };
60 
61 static unsigned char *udp_port_bitmap = NULL;
62 static unsigned char *tcp_port_bitmap = NULL;
63 
64 /* Structure type for recording kni interface specific stats */
65 struct kni_interface_stats {
66     struct rte_kni *kni;
67 
68     /* number of pkts received from NIC, and sent to KNI */
69     uint64_t rx_packets;
70 
71     /* number of pkts received from NIC, but failed to send to KNI */
72     uint64_t rx_dropped;
73 
74     /* number of pkts received from KNI, and sent to NIC */
75     uint64_t tx_packets;
76 
77     /* number of pkts received from KNI, but failed to send to NIC */
78     uint64_t tx_dropped;
79 };
80 
81 struct rte_ring **kni_rp;
82 struct kni_interface_stats **kni_stat;
83 
84 static void
set_bitmap(uint16_t port,unsigned char * bitmap)85 set_bitmap(uint16_t port, unsigned char *bitmap)
86 {
87     port = htons(port);
88     unsigned char *p = bitmap + port/8;
89     *p = set_bit(*p, port % 8);
90 }
91 
92 static int
get_bitmap(uint16_t port,unsigned char * bitmap)93 get_bitmap(uint16_t port, unsigned char *bitmap)
94 {
95     unsigned char *p = bitmap + port/8;
96     return get_bit(*p, port % 8) > 0 ? 1 : 0;
97 }
98 
99 static void
kni_set_bitmap(const char * p,unsigned char * port_bitmap)100 kni_set_bitmap(const char *p, unsigned char *port_bitmap)
101 {
102     int i;
103     const char *head, *tail, *tail_num;
104     if(!p)
105         return;
106 
107     head = p;
108     while (1) {
109         tail = strstr(head, ",");
110         tail_num = strstr(head, "-");
111         if(tail_num && (!tail || tail_num < tail - 1)) {
112             for(i = atoi(head); i <= atoi(tail_num + 1); ++i) {
113                 set_bitmap(i, port_bitmap);
114             }
115         } else {
116             set_bitmap(atoi(head), port_bitmap);
117         }
118 
119         if(!tail)
120             break;
121 
122         head = tail + 1;
123     }
124 }
125 
126 /* Currently we don't support change mtu. */
127 static int
kni_change_mtu(uint16_t port_id,unsigned new_mtu)128 kni_change_mtu(uint16_t port_id, unsigned new_mtu)
129 {
130     return 0;
131 }
132 
133 static int
kni_config_network_interface(uint16_t port_id,uint8_t if_up)134 kni_config_network_interface(uint16_t port_id, uint8_t if_up)
135 {
136     int ret = 0;
137 
138     if (!rte_eth_dev_is_valid_port(port_id)) {
139         printf("Invalid port id %d\n", port_id);
140         return -EINVAL;
141     }
142 
143     printf("Configure network interface of %d %s\n",
144             port_id, if_up ? "up" : "down");
145 
146     ret = (if_up) ?
147         rte_eth_dev_set_link_up(port_id) :
148         rte_eth_dev_set_link_down(port_id);
149 
150     if(-ENOTSUP == ret) {
151         if (if_up != 0) {
152             /* Configure network interface up */
153             rte_eth_dev_stop(port_id);
154             ret = rte_eth_dev_start(port_id);
155         } else {
156             /* Configure network interface down */
157             rte_eth_dev_stop(port_id);
158             ret = 0;
159         }
160     }
161 
162     if (ret < 0)
163         printf("Failed to Configure network interface of %d %s\n",
164             port_id, if_up ? "up" : "down");
165 
166     return ret;
167 }
168 
169 static void
print_ethaddr(const char * name,struct rte_ether_addr * mac_addr)170 print_ethaddr(const char *name, struct rte_ether_addr *mac_addr)
171 {
172     char buf[RTE_ETHER_ADDR_FMT_SIZE];
173     rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, mac_addr);
174     printf("\t%s%s\n", name, buf);
175 }
176 
177 
178 /* Callback for request of configuring mac address */
179 static int
kni_config_mac_address(uint16_t port_id,uint8_t mac_addr[])180 kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
181 {
182     int ret = 0;
183 
184     if (!rte_eth_dev_is_valid_port(port_id)) {
185         printf("Invalid port id %d\n", port_id);
186         return -EINVAL;
187     }
188 
189     print_ethaddr("Address:", (struct rte_ether_addr *)mac_addr);
190 
191     ret = rte_eth_dev_default_mac_addr_set(port_id,
192                        (struct rte_ether_addr *)mac_addr);
193     if (ret < 0)
194         printf("Failed to config mac_addr for port %d\n", port_id);
195 
196     return ret;
197 }
198 
199 static int
kni_process_tx(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)200 kni_process_tx(uint16_t port_id, uint16_t queue_id,
201     struct rte_mbuf **pkts_burst, unsigned count)
202 {
203     /* read packet from kni ring(phy port) and transmit to kni */
204     uint16_t nb_tx, nb_kni_tx;
205     nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL);
206 
207     /* NB.
208      * if nb_tx is 0,it must call rte_kni_tx_burst
209      * must Call regularly rte_kni_tx_burst(kni, NULL, 0).
210      * detail https://embedded.communities.intel.com/thread/6668
211      */
212     nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_tx);
213     rte_kni_handle_request(kni_stat[port_id]->kni);
214     if(nb_kni_tx < nb_tx) {
215         uint16_t i;
216         for(i = nb_kni_tx; i < nb_tx; ++i)
217             rte_pktmbuf_free(pkts_burst[i]);
218 
219         kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx);
220     }
221 
222     kni_stat[port_id]->rx_packets += nb_kni_tx;
223     return 0;
224 }
225 
226 static int
kni_process_rx(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)227 kni_process_rx(uint16_t port_id, uint16_t queue_id,
228     struct rte_mbuf **pkts_burst, unsigned count)
229 {
230     uint16_t nb_kni_rx, nb_rx;
231 
232     /* read packet from kni, and transmit to phy port */
233     nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count);
234     if (nb_kni_rx > 0) {
235         nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx);
236         if (nb_rx < nb_kni_rx) {
237             uint16_t i;
238             for(i = nb_rx; i < nb_kni_rx; ++i)
239                 rte_pktmbuf_free(pkts_burst[i]);
240 
241             kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx);
242         }
243 
244         kni_stat[port_id]->tx_packets += nb_rx;
245     }
246     return 0;
247 }
248 
249 static enum FilterReturn
protocol_filter_l4(uint16_t port,unsigned char * bitmap)250 protocol_filter_l4(uint16_t port, unsigned char *bitmap)
251 {
252     if(get_bitmap(port, bitmap)) {
253         return FILTER_KNI;
254     }
255 
256     return FILTER_UNKNOWN;
257 }
258 
259 static enum FilterReturn
protocol_filter_tcp(const void * data,uint16_t len)260 protocol_filter_tcp(const void *data, uint16_t len)
261 {
262     if (len < sizeof(struct rte_tcp_hdr))
263         return FILTER_UNKNOWN;
264 
265     const struct rte_tcp_hdr *hdr;
266     hdr = (const struct rte_tcp_hdr *)data;
267 
268     return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap);
269 }
270 
271 static enum FilterReturn
protocol_filter_udp(const void * data,uint16_t len)272 protocol_filter_udp(const void* data,uint16_t len)
273 {
274     if (len < sizeof(struct rte_udp_hdr))
275         return FILTER_UNKNOWN;
276 
277     const struct rte_udp_hdr *hdr;
278     hdr = (const struct rte_udp_hdr *)data;
279 
280     return protocol_filter_l4(hdr->dst_port, udp_port_bitmap);
281 }
282 
283 #ifdef INET6
284 /*
285  * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
286  */
287 #ifndef IPPROTO_HIP
288 #define IPPROTO_HIP 139
289 #endif
290 
291 #ifndef IPPROTO_SHIM6
292 #define IPPROTO_SHIM6   140
293 #endif
294 
295 #ifndef IPPROTO_MH
296 #define IPPROTO_MH   135
297 #endif
298 static int
get_ipv6_hdr_len(uint8_t * proto,void * data,uint16_t len)299 get_ipv6_hdr_len(uint8_t *proto, void *data, uint16_t len)
300 {
301     int ext_hdr_len = 0;
302 
303     switch (*proto) {
304         case IPPROTO_HOPOPTS:   case IPPROTO_ROUTING:   case IPPROTO_DSTOPTS:
305         case IPPROTO_MH:        case IPPROTO_HIP:       case IPPROTO_SHIM6:
306             ext_hdr_len = *((uint8_t *)data + 1) + 1;
307             break;
308         case IPPROTO_FRAGMENT:
309             ext_hdr_len = 8;
310             break;
311         case IPPROTO_AH:
312             ext_hdr_len = (*((uint8_t *)data + 1) + 2) * 4;
313             break;
314         case IPPROTO_NONE:
315 #ifdef FF_IPSEC
316         case IPPROTO_ESP:
317             //proto = *((uint8_t *)data + len - 1 - 4);
318             //ext_hdr_len = len;
319 #endif
320         default:
321             return ext_hdr_len;
322     }
323 
324     if (ext_hdr_len >= len) {
325         return len;
326     }
327 
328     *proto = *((uint8_t *)data);
329     ext_hdr_len += get_ipv6_hdr_len(proto, data + ext_hdr_len, len - ext_hdr_len);
330 
331     return ext_hdr_len;
332 }
333 
334 static enum FilterReturn
protocol_filter_icmp6(void * data,uint16_t len)335 protocol_filter_icmp6(void *data, uint16_t len)
336 {
337     if (len < sizeof(struct icmp6_hdr))
338         return FILTER_UNKNOWN;
339 
340     const struct icmp6_hdr *hdr;
341     hdr = (const struct icmp6_hdr *)data;
342 
343     if (hdr->icmp6_type >= ND_ROUTER_SOLICIT && hdr->icmp6_type <= ND_REDIRECT)
344         return FILTER_NDP;
345 
346     return FILTER_UNKNOWN;
347 }
348 #endif
349 
350 static enum FilterReturn
protocol_filter_ip(const void * data,uint16_t len,uint16_t eth_frame_type)351 protocol_filter_ip(const void *data, uint16_t len, uint16_t eth_frame_type)
352 {
353     uint8_t proto;
354     int hdr_len;
355     void *next;
356     uint16_t next_len;
357 
358     if (eth_frame_type == RTE_ETHER_TYPE_IPV4) {
359         if(len < sizeof(struct rte_ipv4_hdr))
360             return FILTER_UNKNOWN;
361 
362         const struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data;
363         hdr_len = (hdr->version_ihl & 0x0f) << 2;
364         if (len < hdr_len)
365             return FILTER_UNKNOWN;
366 
367         proto = hdr->next_proto_id;
368 #ifdef INET6
369     } else if(eth_frame_type == RTE_ETHER_TYPE_IPV6) {
370         if(len < sizeof(struct rte_ipv6_hdr))
371             return FILTER_UNKNOWN;
372 
373         hdr_len = sizeof(struct rte_ipv6_hdr);
374         proto = ((struct rte_ipv6_hdr *)data)->proto;
375         hdr_len += get_ipv6_hdr_len(&proto, (void *)data + hdr_len, len - hdr_len);
376 
377         if (len < hdr_len)
378             return FILTER_UNKNOWN;
379 #endif
380     } else {
381         return FILTER_UNKNOWN;
382     }
383 
384     next = (void *)data + hdr_len;
385     next_len = len - hdr_len;
386 
387     switch (proto) {
388         case IPPROTO_TCP:
389 #ifdef FF_KNI
390             if (!enable_kni)
391                 break;
392 #else
393             break;
394 #endif
395             return protocol_filter_tcp(next, next_len);
396         case IPPROTO_UDP:
397 #ifdef FF_KNI
398             if (!enable_kni)
399                 break;
400 #else
401             break;
402 #endif
403             return protocol_filter_udp(next, next_len);
404         case IPPROTO_IPIP:
405             return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV4);
406 #ifdef INET6
407         case IPPROTO_IPV6:
408             return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV6);
409         case IPPROTO_ICMPV6:
410             return protocol_filter_icmp6(next, next_len);
411 #endif
412     }
413 
414     return FILTER_UNKNOWN;
415 }
416 
417 enum FilterReturn
ff_kni_proto_filter(const void * data,uint16_t len,uint16_t eth_frame_type)418 ff_kni_proto_filter(const void *data, uint16_t len, uint16_t eth_frame_type)
419 {
420     return protocol_filter_ip(data, len, eth_frame_type);
421 }
422 
423 void
ff_kni_init(uint16_t nb_ports,const char * tcp_ports,const char * udp_ports)424 ff_kni_init(uint16_t nb_ports, const char *tcp_ports, const char *udp_ports)
425 {
426     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
427         kni_stat = rte_zmalloc("kni:stat",
428             sizeof(struct kni_interface_stats *) * nb_ports,
429             RTE_CACHE_LINE_SIZE);
430         if (kni_stat == NULL)
431             rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) "
432                 "failed\n");
433 
434         rte_kni_init(nb_ports);
435     }
436 
437     uint16_t lcoreid = rte_lcore_id();
438     char name_buf[RTE_RING_NAMESIZE];
439     snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid);
440     kni_rp = rte_zmalloc(name_buf,
441             sizeof(struct rte_ring *) * nb_ports,
442             RTE_CACHE_LINE_SIZE);
443     if (kni_rp == NULL) {
444         rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
445                 "failed\n", name_buf);
446     }
447 
448     snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid);
449     tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192,
450         RTE_CACHE_LINE_SIZE);
451     if (tcp_port_bitmap == NULL) {
452         rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) "
453                 "failed\n", name_buf);
454     }
455 
456     snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid);
457     udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192,
458         RTE_CACHE_LINE_SIZE);
459     if (udp_port_bitmap == NULL) {
460         rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) "
461                 "failed\n",name_buf);
462     }
463 
464     memset(tcp_port_bitmap, 0, 8192);
465     memset(udp_port_bitmap, 0, 8192);
466 
467     kni_set_bitmap(tcp_ports, tcp_port_bitmap);
468     kni_set_bitmap(udp_ports, udp_port_bitmap);
469 }
470 
471 void
ff_kni_alloc(uint16_t port_id,unsigned socket_id,struct rte_mempool * mbuf_pool,unsigned ring_queue_size)472 ff_kni_alloc(uint16_t port_id, unsigned socket_id,
473     struct rte_mempool *mbuf_pool, unsigned ring_queue_size)
474 {
475     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
476         struct rte_kni_conf conf;
477         struct rte_kni_ops ops;
478         struct rte_eth_dev_info dev_info;
479         const struct rte_pci_device *pci_dev;
480         const struct rte_bus *bus = NULL;
481 
482         kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc(
483             "kni:stat_lcore",
484             sizeof(struct kni_interface_stats),
485             RTE_CACHE_LINE_SIZE);
486 
487         if (kni_stat[port_id] == NULL)
488             rte_panic("rte_zmalloc kni_interface_stats failed\n");
489 
490         /* only support one kni */
491         memset(&conf, 0, sizeof(conf));
492         snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id);
493         conf.core_id = rte_lcore_id();
494         conf.force_bind = 1;
495         conf.group_id = port_id;
496         uint16_t mtu;
497         rte_eth_dev_get_mtu(port_id, &mtu);
498         conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE;
499 
500         memset(&dev_info, 0, sizeof(dev_info));
501         rte_eth_dev_info_get(port_id, &dev_info);
502 
503         if (dev_info.device)
504             bus = rte_bus_find_by_device(dev_info.device);
505         if (bus && !strcmp(bus->name, "pci")) {
506             pci_dev = RTE_DEV_TO_PCI(dev_info.device);
507             conf.addr = pci_dev->addr;
508             conf.id = pci_dev->id;
509         }
510 
511         /* Get the interface default mac address */
512         rte_eth_macaddr_get(port_id,
513                 (struct rte_ether_addr *)&conf.mac_addr);
514 
515         memset(&ops, 0, sizeof(ops));
516         ops.port_id = port_id;
517         ops.change_mtu = kni_change_mtu;
518         ops.config_network_if = kni_config_network_interface;
519         ops.config_mac_address = kni_config_mac_address;
520 
521         kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops);
522         if (kni_stat[port_id]->kni == NULL)
523             rte_panic("create kni on port %u failed!\n", port_id);
524         else
525             printf("create kni on port %u success!\n", port_id);
526 
527         kni_stat[port_id]->rx_packets = 0;
528         kni_stat[port_id]->rx_dropped = 0;
529         kni_stat[port_id]->tx_packets = 0;
530         kni_stat[port_id]->tx_dropped = 0;
531     }
532 
533     char ring_name[RTE_KNI_NAMESIZE];
534     snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id);
535 
536     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
537         kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size,
538             socket_id, RING_F_SC_DEQ);
539 
540         if (rte_ring_lookup(ring_name) != kni_rp[port_id])
541             rte_panic("lookup kni ring failed!\n");
542     } else {
543         kni_rp[port_id] = rte_ring_lookup(ring_name);
544     }
545 
546     if (kni_rp[port_id] == NULL)
547         rte_panic("create kni ring failed!\n");
548 
549     printf("create kni ring success, %u ring entries are now free!\n",
550         rte_ring_free_count(kni_rp[port_id]));
551 }
552 
553 void
ff_kni_process(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)554 ff_kni_process(uint16_t port_id, uint16_t queue_id,
555     struct rte_mbuf **pkts_burst, unsigned count)
556 {
557     kni_process_tx(port_id, queue_id, pkts_burst, count);
558     kni_process_rx(port_id, queue_id, pkts_burst, count);
559 }
560 
561 /* enqueue the packet, and own it */
562 int
ff_kni_enqueue(uint16_t port_id,struct rte_mbuf * pkt)563 ff_kni_enqueue(uint16_t port_id, struct rte_mbuf *pkt)
564 {
565     int ret = rte_ring_enqueue(kni_rp[port_id], pkt);
566     if (ret < 0)
567         rte_pktmbuf_free(pkt);
568 
569     return 0;
570 }
571 
572