1 /*
2 * Copyright (C) 2017-2021 THL A29 Limited, a Tencent company.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice, this
9 * list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 *
25 */
26
27 #include <stdlib.h>
28 #include <arpa/inet.h>
29 #include <netinet/icmp6.h>
30
31 #include <rte_config.h>
32 #include <rte_ether.h>
33 #include <rte_bus_pci.h>
34 #include <rte_ethdev.h>
35 #include <rte_kni.h>
36 #include <rte_malloc.h>
37 #include <rte_ring.h>
38 #include <rte_ip.h>
39 #include <rte_tcp.h>
40 #include <rte_udp.h>
41
42 #include "ff_dpdk_kni.h"
43 #include "ff_config.h"
44
45 /* Callback for request of changing MTU */
46 /* Total octets in ethernet header */
47 #define KNI_ENET_HEADER_SIZE 14
48
49 /* Total octets in the FCS */
50 #define KNI_ENET_FCS_SIZE 4
51
52 #define set_bit(n, m) (n | magic_bits[m])
53 #define clear_bit(n, m) (n & (~magic_bits[m]))
54 #define get_bit(n, m) (n & magic_bits[m])
55
56 static const int magic_bits[8] = {
57 0x80, 0x40, 0x20, 0x10,
58 0x8, 0x4, 0x2, 0x1
59 };
60
61 static unsigned char *udp_port_bitmap = NULL;
62 static unsigned char *tcp_port_bitmap = NULL;
63
64 /* Structure type for recording kni interface specific stats */
65 struct kni_interface_stats {
66 struct rte_kni *kni;
67
68 /* number of pkts received from NIC, and sent to KNI */
69 uint64_t rx_packets;
70
71 /* number of pkts received from NIC, but failed to send to KNI */
72 uint64_t rx_dropped;
73
74 /* number of pkts received from KNI, and sent to NIC */
75 uint64_t tx_packets;
76
77 /* number of pkts received from KNI, but failed to send to NIC */
78 uint64_t tx_dropped;
79 };
80
81 struct rte_ring **kni_rp;
82 struct kni_interface_stats **kni_stat;
83
84 static void
set_bitmap(uint16_t port,unsigned char * bitmap)85 set_bitmap(uint16_t port, unsigned char *bitmap)
86 {
87 port = htons(port);
88 unsigned char *p = bitmap + port/8;
89 *p = set_bit(*p, port % 8);
90 }
91
92 static int
get_bitmap(uint16_t port,unsigned char * bitmap)93 get_bitmap(uint16_t port, unsigned char *bitmap)
94 {
95 unsigned char *p = bitmap + port/8;
96 return get_bit(*p, port % 8) > 0 ? 1 : 0;
97 }
98
99 static void
kni_set_bitmap(const char * p,unsigned char * port_bitmap)100 kni_set_bitmap(const char *p, unsigned char *port_bitmap)
101 {
102 int i;
103 const char *head, *tail, *tail_num;
104 if(!p)
105 return;
106
107 head = p;
108 while (1) {
109 tail = strstr(head, ",");
110 tail_num = strstr(head, "-");
111 if(tail_num && (!tail || tail_num < tail - 1)) {
112 for(i = atoi(head); i <= atoi(tail_num + 1); ++i) {
113 set_bitmap(i, port_bitmap);
114 }
115 } else {
116 set_bitmap(atoi(head), port_bitmap);
117 }
118
119 if(!tail)
120 break;
121
122 head = tail + 1;
123 }
124 }
125
126 /* Currently we don't support change mtu. */
127 static int
kni_change_mtu(uint16_t port_id,unsigned new_mtu)128 kni_change_mtu(uint16_t port_id, unsigned new_mtu)
129 {
130 return 0;
131 }
132
133 static int
kni_config_network_interface(uint16_t port_id,uint8_t if_up)134 kni_config_network_interface(uint16_t port_id, uint8_t if_up)
135 {
136 int ret = 0;
137
138 if (!rte_eth_dev_is_valid_port(port_id)) {
139 printf("Invalid port id %d\n", port_id);
140 return -EINVAL;
141 }
142
143 printf("Configure network interface of %d %s\n",
144 port_id, if_up ? "up" : "down");
145
146 ret = (if_up) ?
147 rte_eth_dev_set_link_up(port_id) :
148 rte_eth_dev_set_link_down(port_id);
149
150 if(-ENOTSUP == ret) {
151 if (if_up != 0) {
152 /* Configure network interface up */
153 rte_eth_dev_stop(port_id);
154 ret = rte_eth_dev_start(port_id);
155 } else {
156 /* Configure network interface down */
157 rte_eth_dev_stop(port_id);
158 ret = 0;
159 }
160 }
161
162 if (ret < 0)
163 printf("Failed to Configure network interface of %d %s\n",
164 port_id, if_up ? "up" : "down");
165
166 return ret;
167 }
168
169 static void
print_ethaddr(const char * name,struct rte_ether_addr * mac_addr)170 print_ethaddr(const char *name, struct rte_ether_addr *mac_addr)
171 {
172 char buf[RTE_ETHER_ADDR_FMT_SIZE];
173 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, mac_addr);
174 printf("\t%s%s\n", name, buf);
175 }
176
177
178 /* Callback for request of configuring mac address */
179 static int
kni_config_mac_address(uint16_t port_id,uint8_t mac_addr[])180 kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[])
181 {
182 int ret = 0;
183
184 if (!rte_eth_dev_is_valid_port(port_id)) {
185 printf("Invalid port id %d\n", port_id);
186 return -EINVAL;
187 }
188
189 print_ethaddr("Address:", (struct rte_ether_addr *)mac_addr);
190
191 ret = rte_eth_dev_default_mac_addr_set(port_id,
192 (struct rte_ether_addr *)mac_addr);
193 if (ret < 0)
194 printf("Failed to config mac_addr for port %d\n", port_id);
195
196 return ret;
197 }
198
199 static int
kni_process_tx(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)200 kni_process_tx(uint16_t port_id, uint16_t queue_id,
201 struct rte_mbuf **pkts_burst, unsigned count)
202 {
203 /* read packet from kni ring(phy port) and transmit to kni */
204 uint16_t nb_tx, nb_kni_tx;
205 nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL);
206
207 /* NB.
208 * if nb_tx is 0,it must call rte_kni_tx_burst
209 * must Call regularly rte_kni_tx_burst(kni, NULL, 0).
210 * detail https://embedded.communities.intel.com/thread/6668
211 */
212 nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_tx);
213 rte_kni_handle_request(kni_stat[port_id]->kni);
214 if(nb_kni_tx < nb_tx) {
215 uint16_t i;
216 for(i = nb_kni_tx; i < nb_tx; ++i)
217 rte_pktmbuf_free(pkts_burst[i]);
218
219 kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx);
220 }
221
222 kni_stat[port_id]->rx_packets += nb_kni_tx;
223 return 0;
224 }
225
226 static int
kni_process_rx(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)227 kni_process_rx(uint16_t port_id, uint16_t queue_id,
228 struct rte_mbuf **pkts_burst, unsigned count)
229 {
230 uint16_t nb_kni_rx, nb_rx;
231
232 /* read packet from kni, and transmit to phy port */
233 nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count);
234 if (nb_kni_rx > 0) {
235 nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx);
236 if (nb_rx < nb_kni_rx) {
237 uint16_t i;
238 for(i = nb_rx; i < nb_kni_rx; ++i)
239 rte_pktmbuf_free(pkts_burst[i]);
240
241 kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx);
242 }
243
244 kni_stat[port_id]->tx_packets += nb_rx;
245 }
246 return 0;
247 }
248
249 static enum FilterReturn
protocol_filter_l4(uint16_t port,unsigned char * bitmap)250 protocol_filter_l4(uint16_t port, unsigned char *bitmap)
251 {
252 if(get_bitmap(port, bitmap)) {
253 return FILTER_KNI;
254 }
255
256 return FILTER_UNKNOWN;
257 }
258
259 static enum FilterReturn
protocol_filter_tcp(const void * data,uint16_t len)260 protocol_filter_tcp(const void *data, uint16_t len)
261 {
262 if (len < sizeof(struct rte_tcp_hdr))
263 return FILTER_UNKNOWN;
264
265 const struct rte_tcp_hdr *hdr;
266 hdr = (const struct rte_tcp_hdr *)data;
267
268 return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap);
269 }
270
271 static enum FilterReturn
protocol_filter_udp(const void * data,uint16_t len)272 protocol_filter_udp(const void* data,uint16_t len)
273 {
274 if (len < sizeof(struct rte_udp_hdr))
275 return FILTER_UNKNOWN;
276
277 const struct rte_udp_hdr *hdr;
278 hdr = (const struct rte_udp_hdr *)data;
279
280 return protocol_filter_l4(hdr->dst_port, udp_port_bitmap);
281 }
282
283 #ifdef INET6
284 /*
285 * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml
286 */
287 #ifndef IPPROTO_HIP
288 #define IPPROTO_HIP 139
289 #endif
290
291 #ifndef IPPROTO_SHIM6
292 #define IPPROTO_SHIM6 140
293 #endif
294
295 #ifndef IPPROTO_MH
296 #define IPPROTO_MH 135
297 #endif
298 static int
get_ipv6_hdr_len(uint8_t * proto,void * data,uint16_t len)299 get_ipv6_hdr_len(uint8_t *proto, void *data, uint16_t len)
300 {
301 int ext_hdr_len = 0;
302
303 switch (*proto) {
304 case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS:
305 case IPPROTO_MH: case IPPROTO_HIP: case IPPROTO_SHIM6:
306 ext_hdr_len = *((uint8_t *)data + 1) + 1;
307 break;
308 case IPPROTO_FRAGMENT:
309 ext_hdr_len = 8;
310 break;
311 case IPPROTO_AH:
312 ext_hdr_len = (*((uint8_t *)data + 1) + 2) * 4;
313 break;
314 case IPPROTO_NONE:
315 #ifdef FF_IPSEC
316 case IPPROTO_ESP:
317 //proto = *((uint8_t *)data + len - 1 - 4);
318 //ext_hdr_len = len;
319 #endif
320 default:
321 return ext_hdr_len;
322 }
323
324 if (ext_hdr_len >= len) {
325 return len;
326 }
327
328 *proto = *((uint8_t *)data);
329 ext_hdr_len += get_ipv6_hdr_len(proto, data + ext_hdr_len, len - ext_hdr_len);
330
331 return ext_hdr_len;
332 }
333
334 static enum FilterReturn
protocol_filter_icmp6(void * data,uint16_t len)335 protocol_filter_icmp6(void *data, uint16_t len)
336 {
337 if (len < sizeof(struct icmp6_hdr))
338 return FILTER_UNKNOWN;
339
340 const struct icmp6_hdr *hdr;
341 hdr = (const struct icmp6_hdr *)data;
342
343 if (hdr->icmp6_type >= ND_ROUTER_SOLICIT && hdr->icmp6_type <= ND_REDIRECT)
344 return FILTER_NDP;
345
346 return FILTER_UNKNOWN;
347 }
348 #endif
349
350 static enum FilterReturn
protocol_filter_ip(const void * data,uint16_t len,uint16_t eth_frame_type)351 protocol_filter_ip(const void *data, uint16_t len, uint16_t eth_frame_type)
352 {
353 uint8_t proto;
354 int hdr_len;
355 void *next;
356 uint16_t next_len;
357
358 if (eth_frame_type == RTE_ETHER_TYPE_IPV4) {
359 if(len < sizeof(struct rte_ipv4_hdr))
360 return FILTER_UNKNOWN;
361
362 const struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data;
363 hdr_len = (hdr->version_ihl & 0x0f) << 2;
364 if (len < hdr_len)
365 return FILTER_UNKNOWN;
366
367 proto = hdr->next_proto_id;
368 #ifdef INET6
369 } else if(eth_frame_type == RTE_ETHER_TYPE_IPV6) {
370 if(len < sizeof(struct rte_ipv6_hdr))
371 return FILTER_UNKNOWN;
372
373 hdr_len = sizeof(struct rte_ipv6_hdr);
374 proto = ((struct rte_ipv6_hdr *)data)->proto;
375 hdr_len += get_ipv6_hdr_len(&proto, (void *)data + hdr_len, len - hdr_len);
376
377 if (len < hdr_len)
378 return FILTER_UNKNOWN;
379 #endif
380 } else {
381 return FILTER_UNKNOWN;
382 }
383
384 next = (void *)data + hdr_len;
385 next_len = len - hdr_len;
386
387 switch (proto) {
388 case IPPROTO_TCP:
389 #ifdef FF_KNI
390 if (!enable_kni)
391 break;
392 #else
393 break;
394 #endif
395 return protocol_filter_tcp(next, next_len);
396 case IPPROTO_UDP:
397 #ifdef FF_KNI
398 if (!enable_kni)
399 break;
400 #else
401 break;
402 #endif
403 return protocol_filter_udp(next, next_len);
404 case IPPROTO_IPIP:
405 return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV4);
406 #ifdef INET6
407 case IPPROTO_IPV6:
408 return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV6);
409 case IPPROTO_ICMPV6:
410 return protocol_filter_icmp6(next, next_len);
411 #endif
412 }
413
414 return FILTER_UNKNOWN;
415 }
416
417 enum FilterReturn
ff_kni_proto_filter(const void * data,uint16_t len,uint16_t eth_frame_type)418 ff_kni_proto_filter(const void *data, uint16_t len, uint16_t eth_frame_type)
419 {
420 return protocol_filter_ip(data, len, eth_frame_type);
421 }
422
423 void
ff_kni_init(uint16_t nb_ports,const char * tcp_ports,const char * udp_ports)424 ff_kni_init(uint16_t nb_ports, const char *tcp_ports, const char *udp_ports)
425 {
426 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
427 kni_stat = rte_zmalloc("kni:stat",
428 sizeof(struct kni_interface_stats *) * nb_ports,
429 RTE_CACHE_LINE_SIZE);
430 if (kni_stat == NULL)
431 rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) "
432 "failed\n");
433
434 rte_kni_init(nb_ports);
435 }
436
437 uint16_t lcoreid = rte_lcore_id();
438 char name_buf[RTE_RING_NAMESIZE];
439 snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid);
440 kni_rp = rte_zmalloc(name_buf,
441 sizeof(struct rte_ring *) * nb_ports,
442 RTE_CACHE_LINE_SIZE);
443 if (kni_rp == NULL) {
444 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
445 "failed\n", name_buf);
446 }
447
448 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid);
449 tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192,
450 RTE_CACHE_LINE_SIZE);
451 if (tcp_port_bitmap == NULL) {
452 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) "
453 "failed\n", name_buf);
454 }
455
456 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid);
457 udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192,
458 RTE_CACHE_LINE_SIZE);
459 if (udp_port_bitmap == NULL) {
460 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) "
461 "failed\n",name_buf);
462 }
463
464 memset(tcp_port_bitmap, 0, 8192);
465 memset(udp_port_bitmap, 0, 8192);
466
467 kni_set_bitmap(tcp_ports, tcp_port_bitmap);
468 kni_set_bitmap(udp_ports, udp_port_bitmap);
469 }
470
471 void
ff_kni_alloc(uint16_t port_id,unsigned socket_id,struct rte_mempool * mbuf_pool,unsigned ring_queue_size)472 ff_kni_alloc(uint16_t port_id, unsigned socket_id,
473 struct rte_mempool *mbuf_pool, unsigned ring_queue_size)
474 {
475 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
476 struct rte_kni_conf conf;
477 struct rte_kni_ops ops;
478 struct rte_eth_dev_info dev_info;
479 const struct rte_pci_device *pci_dev;
480 const struct rte_bus *bus = NULL;
481
482 kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc(
483 "kni:stat_lcore",
484 sizeof(struct kni_interface_stats),
485 RTE_CACHE_LINE_SIZE);
486
487 if (kni_stat[port_id] == NULL)
488 rte_panic("rte_zmalloc kni_interface_stats failed\n");
489
490 /* only support one kni */
491 memset(&conf, 0, sizeof(conf));
492 snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id);
493 conf.core_id = rte_lcore_id();
494 conf.force_bind = 1;
495 conf.group_id = port_id;
496 uint16_t mtu;
497 rte_eth_dev_get_mtu(port_id, &mtu);
498 conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE;
499
500 memset(&dev_info, 0, sizeof(dev_info));
501 rte_eth_dev_info_get(port_id, &dev_info);
502
503 if (dev_info.device)
504 bus = rte_bus_find_by_device(dev_info.device);
505 if (bus && !strcmp(bus->name, "pci")) {
506 pci_dev = RTE_DEV_TO_PCI(dev_info.device);
507 conf.addr = pci_dev->addr;
508 conf.id = pci_dev->id;
509 }
510
511 /* Get the interface default mac address */
512 rte_eth_macaddr_get(port_id,
513 (struct rte_ether_addr *)&conf.mac_addr);
514
515 memset(&ops, 0, sizeof(ops));
516 ops.port_id = port_id;
517 ops.change_mtu = kni_change_mtu;
518 ops.config_network_if = kni_config_network_interface;
519 ops.config_mac_address = kni_config_mac_address;
520
521 kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops);
522 if (kni_stat[port_id]->kni == NULL)
523 rte_panic("create kni on port %u failed!\n", port_id);
524 else
525 printf("create kni on port %u success!\n", port_id);
526
527 kni_stat[port_id]->rx_packets = 0;
528 kni_stat[port_id]->rx_dropped = 0;
529 kni_stat[port_id]->tx_packets = 0;
530 kni_stat[port_id]->tx_dropped = 0;
531 }
532
533 char ring_name[RTE_KNI_NAMESIZE];
534 snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id);
535
536 if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
537 kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size,
538 socket_id, RING_F_SC_DEQ);
539
540 if (rte_ring_lookup(ring_name) != kni_rp[port_id])
541 rte_panic("lookup kni ring failed!\n");
542 } else {
543 kni_rp[port_id] = rte_ring_lookup(ring_name);
544 }
545
546 if (kni_rp[port_id] == NULL)
547 rte_panic("create kni ring failed!\n");
548
549 printf("create kni ring success, %u ring entries are now free!\n",
550 rte_ring_free_count(kni_rp[port_id]));
551 }
552
553 void
ff_kni_process(uint16_t port_id,uint16_t queue_id,struct rte_mbuf ** pkts_burst,unsigned count)554 ff_kni_process(uint16_t port_id, uint16_t queue_id,
555 struct rte_mbuf **pkts_burst, unsigned count)
556 {
557 kni_process_tx(port_id, queue_id, pkts_burst, count);
558 kni_process_rx(port_id, queue_id, pkts_burst, count);
559 }
560
561 /* enqueue the packet, and own it */
562 int
ff_kni_enqueue(uint16_t port_id,struct rte_mbuf * pkt)563 ff_kni_enqueue(uint16_t port_id, struct rte_mbuf *pkt)
564 {
565 int ret = rte_ring_enqueue(kni_rp[port_id], pkt);
566 if (ret < 0)
567 rte_pktmbuf_free(pkt);
568
569 return 0;
570 }
571
572