1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 27 #include <stdlib.h> 28 #include <arpa/inet.h> 29 #include <netinet/icmp6.h> 30 31 #include <rte_config.h> 32 #include <rte_ether.h> 33 #include <rte_bus_pci.h> 34 #include <rte_ethdev.h> 35 #include <rte_kni.h> 36 #include <rte_malloc.h> 37 #include <rte_ring.h> 38 #include <rte_ip.h> 39 #include <rte_tcp.h> 40 #include <rte_udp.h> 41 42 #include "ff_dpdk_kni.h" 43 #include "ff_config.h" 44 45 /* Callback for request of changing MTU */ 46 /* Total octets in ethernet header */ 47 #define KNI_ENET_HEADER_SIZE 14 48 49 /* Total octets in the FCS */ 50 #define KNI_ENET_FCS_SIZE 4 51 52 #define set_bit(n, m) (n | magic_bits[m]) 53 #define clear_bit(n, m) (n & (~magic_bits[m])) 54 #define get_bit(n, m) (n & magic_bits[m]) 55 56 static const int magic_bits[8] = { 57 0x80, 0x40, 0x20, 0x10, 58 0x8, 0x4, 0x2, 0x1 59 }; 60 61 static unsigned char *udp_port_bitmap = NULL; 62 static unsigned char *tcp_port_bitmap = NULL; 63 64 /* Structure type for recording kni interface specific stats */ 65 struct kni_interface_stats { 66 struct rte_kni *kni; 67 68 /* number of pkts received from NIC, and sent to KNI */ 69 uint64_t rx_packets; 70 71 /* number of pkts received from NIC, but failed to send to KNI */ 72 uint64_t rx_dropped; 73 74 /* number of pkts received from KNI, and sent to NIC */ 75 uint64_t tx_packets; 76 77 /* number of pkts received from KNI, but failed to send to NIC */ 78 uint64_t tx_dropped; 79 }; 80 81 struct rte_ring **kni_rp; 82 struct kni_interface_stats **kni_stat; 83 84 static void 85 set_bitmap(uint16_t port, unsigned char *bitmap) 86 { 87 port = htons(port); 88 unsigned char *p = bitmap + port/8; 89 *p = set_bit(*p, port % 8); 90 } 91 92 static int 93 get_bitmap(uint16_t port, unsigned char *bitmap) 94 { 95 unsigned char *p = bitmap + port/8; 96 return get_bit(*p, port % 8) > 0 ? 1 : 0; 97 } 98 99 static void 100 kni_set_bitmap(const char *p, unsigned char *port_bitmap) 101 { 102 int i; 103 const char *head, *tail, *tail_num; 104 if(!p) 105 return; 106 107 head = p; 108 while (1) { 109 tail = strstr(head, ","); 110 tail_num = strstr(head, "-"); 111 if(tail_num && (!tail || tail_num < tail - 1)) { 112 for(i = atoi(head); i <= atoi(tail_num + 1); ++i) { 113 set_bitmap(i, port_bitmap); 114 } 115 } else { 116 set_bitmap(atoi(head), port_bitmap); 117 } 118 119 if(!tail) 120 break; 121 122 head = tail + 1; 123 } 124 } 125 126 /* Currently we don't support change mtu. */ 127 static int 128 kni_change_mtu(uint16_t port_id, unsigned new_mtu) 129 { 130 return 0; 131 } 132 133 static int 134 kni_config_network_interface(uint16_t port_id, uint8_t if_up) 135 { 136 int ret = 0; 137 138 if (!rte_eth_dev_is_valid_port(port_id)) { 139 printf("Invalid port id %d\n", port_id); 140 return -EINVAL; 141 } 142 143 printf("Configure network interface of %d %s\n", 144 port_id, if_up ? "up" : "down"); 145 146 ret = (if_up) ? 147 rte_eth_dev_set_link_up(port_id) : 148 rte_eth_dev_set_link_down(port_id); 149 150 if(-ENOTSUP == ret) { 151 if (if_up != 0) { 152 /* Configure network interface up */ 153 rte_eth_dev_stop(port_id); 154 ret = rte_eth_dev_start(port_id); 155 } else { 156 /* Configure network interface down */ 157 rte_eth_dev_stop(port_id); 158 ret = 0; 159 } 160 } 161 162 if (ret < 0) 163 printf("Failed to Configure network interface of %d %s\n", 164 port_id, if_up ? "up" : "down"); 165 166 return ret; 167 } 168 169 static void 170 print_ethaddr(const char *name, struct rte_ether_addr *mac_addr) 171 { 172 char buf[RTE_ETHER_ADDR_FMT_SIZE]; 173 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, mac_addr); 174 printf("\t%s%s\n", name, buf); 175 } 176 177 178 /* Callback for request of configuring mac address */ 179 static int 180 kni_config_mac_address(uint16_t port_id, uint8_t mac_addr[]) 181 { 182 int ret = 0; 183 184 if (!rte_eth_dev_is_valid_port(port_id)) { 185 printf("Invalid port id %d\n", port_id); 186 return -EINVAL; 187 } 188 189 print_ethaddr("Address:", (struct rte_ether_addr *)mac_addr); 190 191 ret = rte_eth_dev_default_mac_addr_set(port_id, 192 (struct rte_ether_addr *)mac_addr); 193 if (ret < 0) 194 printf("Failed to config mac_addr for port %d\n", port_id); 195 196 return ret; 197 } 198 199 static int 200 kni_process_tx(uint16_t port_id, uint16_t queue_id, 201 struct rte_mbuf **pkts_burst, unsigned count) 202 { 203 /* read packet from kni ring(phy port) and transmit to kni */ 204 uint16_t nb_tx, nb_kni_tx; 205 nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL); 206 207 /* NB. 208 * if nb_tx is 0,it must call rte_kni_tx_burst 209 * must Call regularly rte_kni_tx_burst(kni, NULL, 0). 210 * detail https://embedded.communities.intel.com/thread/6668 211 */ 212 nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_tx); 213 rte_kni_handle_request(kni_stat[port_id]->kni); 214 if(nb_kni_tx < nb_tx) { 215 uint16_t i; 216 for(i = nb_kni_tx; i < nb_tx; ++i) 217 rte_pktmbuf_free(pkts_burst[i]); 218 219 kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx); 220 } 221 222 kni_stat[port_id]->rx_packets += nb_kni_tx; 223 return 0; 224 } 225 226 static int 227 kni_process_rx(uint16_t port_id, uint16_t queue_id, 228 struct rte_mbuf **pkts_burst, unsigned count) 229 { 230 uint16_t nb_kni_rx, nb_rx; 231 232 /* read packet from kni, and transmit to phy port */ 233 nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count); 234 if (nb_kni_rx > 0) { 235 nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx); 236 if (nb_rx < nb_kni_rx) { 237 uint16_t i; 238 for(i = nb_rx; i < nb_kni_rx; ++i) 239 rte_pktmbuf_free(pkts_burst[i]); 240 241 kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx); 242 } 243 244 kni_stat[port_id]->tx_packets += nb_rx; 245 } 246 return 0; 247 } 248 249 static enum FilterReturn 250 protocol_filter_l4(uint16_t port, unsigned char *bitmap) 251 { 252 if(get_bitmap(port, bitmap)) { 253 return FILTER_KNI; 254 } 255 256 return FILTER_UNKNOWN; 257 } 258 259 static enum FilterReturn 260 protocol_filter_tcp(const void *data, uint16_t len) 261 { 262 if (len < sizeof(struct rte_tcp_hdr)) 263 return FILTER_UNKNOWN; 264 265 const struct rte_tcp_hdr *hdr; 266 hdr = (const struct rte_tcp_hdr *)data; 267 268 return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap); 269 } 270 271 static enum FilterReturn 272 protocol_filter_udp(const void* data,uint16_t len) 273 { 274 if (len < sizeof(struct rte_udp_hdr)) 275 return FILTER_UNKNOWN; 276 277 const struct rte_udp_hdr *hdr; 278 hdr = (const struct rte_udp_hdr *)data; 279 280 return protocol_filter_l4(hdr->dst_port, udp_port_bitmap); 281 } 282 283 #ifdef INET6 284 /* 285 * https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml 286 */ 287 #ifndef IPPROTO_HIP 288 #define IPPROTO_HIP 139 289 #endif 290 291 #ifndef IPPROTO_SHIM6 292 #define IPPROTO_SHIM6 140 293 #endif 294 295 #ifndef IPPROTO_MH 296 #define IPPROTO_MH 135 297 #endif 298 static int 299 get_ipv6_hdr_len(uint8_t *proto, void *data, uint16_t len) 300 { 301 int ext_hdr_len = 0; 302 303 switch (*proto) { 304 case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: 305 case IPPROTO_MH: case IPPROTO_HIP: case IPPROTO_SHIM6: 306 ext_hdr_len = *((uint8_t *)data + 1) + 1; 307 break; 308 case IPPROTO_FRAGMENT: 309 ext_hdr_len = 8; 310 break; 311 case IPPROTO_AH: 312 ext_hdr_len = (*((uint8_t *)data + 1) + 2) * 4; 313 break; 314 case IPPROTO_NONE: 315 #ifdef FF_IPSEC 316 case IPPROTO_ESP: 317 //proto = *((uint8_t *)data + len - 1 - 4); 318 //ext_hdr_len = len; 319 #endif 320 default: 321 return ext_hdr_len; 322 } 323 324 if (ext_hdr_len >= len) { 325 return len; 326 } 327 328 *proto = *((uint8_t *)data); 329 ext_hdr_len += get_ipv6_hdr_len(proto, data + ext_hdr_len, len - ext_hdr_len); 330 331 return ext_hdr_len; 332 } 333 334 static enum FilterReturn 335 protocol_filter_icmp6(void *data, uint16_t len) 336 { 337 if (len < sizeof(struct icmp6_hdr)) 338 return FILTER_UNKNOWN; 339 340 const struct icmp6_hdr *hdr; 341 hdr = (const struct icmp6_hdr *)data; 342 343 if (hdr->icmp6_type >= ND_ROUTER_SOLICIT && hdr->icmp6_type <= ND_REDIRECT) 344 return FILTER_NDP; 345 346 return FILTER_UNKNOWN; 347 } 348 #endif 349 350 static enum FilterReturn 351 protocol_filter_ip(const void *data, uint16_t len, uint16_t eth_frame_type) 352 { 353 uint8_t proto; 354 int hdr_len; 355 void *next; 356 uint16_t next_len; 357 358 if (eth_frame_type == RTE_ETHER_TYPE_IPV4) { 359 if(len < sizeof(struct rte_ipv4_hdr)) 360 return FILTER_UNKNOWN; 361 362 const struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data; 363 hdr_len = (hdr->version_ihl & 0x0f) << 2; 364 if (len < hdr_len) 365 return FILTER_UNKNOWN; 366 367 proto = hdr->next_proto_id; 368 #ifdef INET6 369 } else if(eth_frame_type == RTE_ETHER_TYPE_IPV6) { 370 if(len < sizeof(struct rte_ipv6_hdr)) 371 return FILTER_UNKNOWN; 372 373 hdr_len = sizeof(struct rte_ipv6_hdr); 374 proto = ((struct rte_ipv6_hdr *)data)->proto; 375 hdr_len += get_ipv6_hdr_len(&proto, (void *)data + hdr_len, len - hdr_len); 376 377 if (len < hdr_len) 378 return FILTER_UNKNOWN; 379 #endif 380 } else { 381 return FILTER_UNKNOWN; 382 } 383 384 next = (void *)data + hdr_len; 385 next_len = len - hdr_len; 386 387 switch (proto) { 388 case IPPROTO_TCP: 389 #ifdef FF_KNI 390 if (!enable_kni) 391 break; 392 #else 393 break; 394 #endif 395 return protocol_filter_tcp(next, next_len); 396 case IPPROTO_UDP: 397 #ifdef FF_KNI 398 if (!enable_kni) 399 break; 400 #else 401 break; 402 #endif 403 return protocol_filter_udp(next, next_len); 404 case IPPROTO_IPIP: 405 return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV4); 406 #ifdef INET6 407 case IPPROTO_IPV6: 408 return protocol_filter_ip(next, next_len, RTE_ETHER_TYPE_IPV6); 409 case IPPROTO_ICMPV6: 410 return protocol_filter_icmp6(next, next_len); 411 #endif 412 } 413 414 return FILTER_UNKNOWN; 415 } 416 417 enum FilterReturn 418 ff_kni_proto_filter(const void *data, uint16_t len, uint16_t eth_frame_type) 419 { 420 return protocol_filter_ip(data, len, eth_frame_type); 421 } 422 423 void 424 ff_kni_init(uint16_t nb_ports, const char *tcp_ports, const char *udp_ports) 425 { 426 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 427 kni_stat = rte_zmalloc("kni:stat", 428 sizeof(struct kni_interface_stats *) * nb_ports, 429 RTE_CACHE_LINE_SIZE); 430 if (kni_stat == NULL) 431 rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) " 432 "failed\n"); 433 434 rte_kni_init(nb_ports); 435 } 436 437 uint16_t lcoreid = rte_lcore_id(); 438 char name_buf[RTE_RING_NAMESIZE]; 439 snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid); 440 kni_rp = rte_zmalloc(name_buf, 441 sizeof(struct rte_ring *) * nb_ports, 442 RTE_CACHE_LINE_SIZE); 443 if (kni_rp == NULL) { 444 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 445 "failed\n", name_buf); 446 } 447 448 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid); 449 tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192, 450 RTE_CACHE_LINE_SIZE); 451 if (tcp_port_bitmap == NULL) { 452 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) " 453 "failed\n", name_buf); 454 } 455 456 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid); 457 udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192, 458 RTE_CACHE_LINE_SIZE); 459 if (udp_port_bitmap == NULL) { 460 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) " 461 "failed\n",name_buf); 462 } 463 464 memset(tcp_port_bitmap, 0, 8192); 465 memset(udp_port_bitmap, 0, 8192); 466 467 kni_set_bitmap(tcp_ports, tcp_port_bitmap); 468 kni_set_bitmap(udp_ports, udp_port_bitmap); 469 } 470 471 void 472 ff_kni_alloc(uint16_t port_id, unsigned socket_id, 473 struct rte_mempool *mbuf_pool, unsigned ring_queue_size) 474 { 475 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 476 struct rte_kni_conf conf; 477 struct rte_kni_ops ops; 478 struct rte_eth_dev_info dev_info; 479 const struct rte_pci_device *pci_dev; 480 const struct rte_bus *bus = NULL; 481 482 kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc( 483 "kni:stat_lcore", 484 sizeof(struct kni_interface_stats), 485 RTE_CACHE_LINE_SIZE); 486 487 if (kni_stat[port_id] == NULL) 488 rte_panic("rte_zmalloc kni_interface_stats failed\n"); 489 490 /* only support one kni */ 491 memset(&conf, 0, sizeof(conf)); 492 snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id); 493 conf.core_id = rte_lcore_id(); 494 conf.force_bind = 1; 495 conf.group_id = port_id; 496 uint16_t mtu; 497 rte_eth_dev_get_mtu(port_id, &mtu); 498 conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE; 499 500 memset(&dev_info, 0, sizeof(dev_info)); 501 rte_eth_dev_info_get(port_id, &dev_info); 502 503 if (dev_info.device) 504 bus = rte_bus_find_by_device(dev_info.device); 505 if (bus && !strcmp(bus->name, "pci")) { 506 pci_dev = RTE_DEV_TO_PCI(dev_info.device); 507 conf.addr = pci_dev->addr; 508 conf.id = pci_dev->id; 509 } 510 511 /* Get the interface default mac address */ 512 rte_eth_macaddr_get(port_id, 513 (struct rte_ether_addr *)&conf.mac_addr); 514 515 memset(&ops, 0, sizeof(ops)); 516 ops.port_id = port_id; 517 ops.change_mtu = kni_change_mtu; 518 ops.config_network_if = kni_config_network_interface; 519 ops.config_mac_address = kni_config_mac_address; 520 521 kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops); 522 if (kni_stat[port_id]->kni == NULL) 523 rte_panic("create kni on port %u failed!\n", port_id); 524 else 525 printf("create kni on port %u success!\n", port_id); 526 527 kni_stat[port_id]->rx_packets = 0; 528 kni_stat[port_id]->rx_dropped = 0; 529 kni_stat[port_id]->tx_packets = 0; 530 kni_stat[port_id]->tx_dropped = 0; 531 } 532 533 char ring_name[RTE_KNI_NAMESIZE]; 534 snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id); 535 536 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 537 kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size, 538 socket_id, RING_F_SC_DEQ); 539 540 if (rte_ring_lookup(ring_name) != kni_rp[port_id]) 541 rte_panic("lookup kni ring failed!\n"); 542 } else { 543 kni_rp[port_id] = rte_ring_lookup(ring_name); 544 } 545 546 if (kni_rp[port_id] == NULL) 547 rte_panic("create kni ring failed!\n"); 548 549 printf("create kni ring success, %u ring entries are now free!\n", 550 rte_ring_free_count(kni_rp[port_id])); 551 } 552 553 void 554 ff_kni_process(uint16_t port_id, uint16_t queue_id, 555 struct rte_mbuf **pkts_burst, unsigned count) 556 { 557 kni_process_tx(port_id, queue_id, pkts_burst, count); 558 kni_process_rx(port_id, queue_id, pkts_burst, count); 559 } 560 561 /* enqueue the packet, and own it */ 562 int 563 ff_kni_enqueue(uint16_t port_id, struct rte_mbuf *pkt) 564 { 565 int ret = rte_ring_enqueue(kni_rp[port_id], pkt); 566 if (ret < 0) 567 rte_pktmbuf_free(pkt); 568 569 return 0; 570 } 571 572