1 /* 2 * Copyright (C) 2017 THL A29 Limited, a Tencent company. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, this 9 * list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 18 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * 25 */ 26 27 #include <stdlib.h> 28 #include <arpa/inet.h> 29 30 #include <rte_config.h> 31 #include <rte_ether.h> 32 #include <rte_bus_pci.h> 33 #include <rte_ethdev.h> 34 #include <rte_kni.h> 35 #include <rte_malloc.h> 36 #include <rte_ring.h> 37 #include <rte_ip.h> 38 #include <rte_tcp.h> 39 #include <rte_udp.h> 40 41 #include "ff_dpdk_kni.h" 42 #include "ff_config.h" 43 44 /* Callback for request of changing MTU */ 45 /* Total octets in ethernet header */ 46 #define KNI_ENET_HEADER_SIZE 14 47 48 /* Total octets in the FCS */ 49 #define KNI_ENET_FCS_SIZE 4 50 51 #define set_bit(n, m) (n | magic_bits[m]) 52 #define clear_bit(n, m) (n & (~magic_bits[m])) 53 #define get_bit(n, m) (n & magic_bits[m]) 54 55 static const int magic_bits[8] = { 56 0x80, 0x40, 0x20, 0x10, 57 0x8, 0x4, 0x2, 0x1 58 }; 59 60 static unsigned char *udp_port_bitmap = NULL; 61 static unsigned char *tcp_port_bitmap = NULL; 62 63 /* Structure type for recording kni interface specific stats */ 64 struct kni_interface_stats { 65 struct rte_kni *kni; 66 67 /* number of pkts received from NIC, and sent to KNI */ 68 uint64_t rx_packets; 69 70 /* number of pkts received from NIC, but failed to send to KNI */ 71 uint64_t rx_dropped; 72 73 /* number of pkts received from KNI, and sent to NIC */ 74 uint64_t tx_packets; 75 76 /* number of pkts received from KNI, but failed to send to NIC */ 77 uint64_t tx_dropped; 78 }; 79 80 struct rte_ring **kni_rp; 81 struct kni_interface_stats **kni_stat; 82 83 static void 84 set_bitmap(uint16_t port, unsigned char *bitmap) 85 { 86 port = htons(port); 87 unsigned char *p = bitmap + port/8; 88 *p = set_bit(*p, port % 8); 89 } 90 91 static int 92 get_bitmap(uint16_t port, unsigned char *bitmap) 93 { 94 unsigned char *p = bitmap + port/8; 95 return get_bit(*p, port % 8) > 0 ? 1 : 0; 96 } 97 98 static void 99 kni_set_bitmap(const char *p, unsigned char *port_bitmap) 100 { 101 int i; 102 const char *head, *tail, *tail_num; 103 if(!p) 104 return; 105 106 head = p; 107 while (1) { 108 tail = strstr(head, ","); 109 tail_num = strstr(head, "-"); 110 if(tail_num && (!tail || tail_num < tail - 1)) { 111 for(i = atoi(head); i <= atoi(tail_num + 1); ++i) { 112 set_bitmap(i, port_bitmap); 113 } 114 } else { 115 set_bitmap(atoi(head), port_bitmap); 116 } 117 118 if(!tail) 119 break; 120 121 head = tail + 1; 122 } 123 } 124 125 /* Currently we don't support change mtu. */ 126 static int 127 kni_change_mtu(uint16_t port_id, unsigned new_mtu) 128 { 129 return 0; 130 } 131 132 static int 133 kni_config_network_interface(uint16_t port_id, uint8_t if_up) 134 { 135 int ret = 0; 136 137 if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) { 138 printf("Invalid port id %d\n", port_id); 139 return -EINVAL; 140 } 141 142 printf("Configure network interface of %d %s\n", 143 port_id, if_up ? "up" : "down"); 144 145 ret = (if_up) ? 146 rte_eth_dev_set_link_up(port_id) : 147 rte_eth_dev_set_link_down(port_id); 148 149 if(-ENOTSUP == ret) { 150 if (if_up != 0) { 151 /* Configure network interface up */ 152 rte_eth_dev_stop(port_id); 153 ret = rte_eth_dev_start(port_id); 154 } else { 155 /* Configure network interface down */ 156 rte_eth_dev_stop(port_id); 157 ret = 0; 158 } 159 } 160 161 if (ret < 0) 162 printf("Failed to Configure network interface of %d %s\n", 163 port_id, if_up ? "up" : "down"); 164 165 return ret; 166 } 167 168 static int 169 kni_process_tx(uint16_t port_id, uint16_t queue_id, 170 struct rte_mbuf **pkts_burst, unsigned count) 171 { 172 /* read packet from kni ring(phy port) and transmit to kni */ 173 uint16_t nb_tx, nb_kni_tx; 174 nb_tx = rte_ring_dequeue_burst(kni_rp[port_id], (void **)pkts_burst, count, NULL); 175 176 /* NB. 177 * if nb_tx is 0,it must call rte_kni_tx_burst 178 * must Call regularly rte_kni_tx_burst(kni, NULL, 0). 179 * detail https://embedded.communities.intel.com/thread/6668 180 */ 181 nb_kni_tx = rte_kni_tx_burst(kni_stat[port_id]->kni, pkts_burst, nb_tx); 182 rte_kni_handle_request(kni_stat[port_id]->kni); 183 if(nb_kni_tx < nb_tx) { 184 uint16_t i; 185 for(i = nb_kni_tx; i < nb_tx; ++i) 186 rte_pktmbuf_free(pkts_burst[i]); 187 188 kni_stat[port_id]->rx_dropped += (nb_tx - nb_kni_tx); 189 } 190 191 kni_stat[port_id]->rx_packets += nb_kni_tx; 192 return 0; 193 } 194 195 static int 196 kni_process_rx(uint16_t port_id, uint16_t queue_id, 197 struct rte_mbuf **pkts_burst, unsigned count) 198 { 199 uint16_t nb_kni_rx, nb_rx; 200 201 /* read packet from kni, and transmit to phy port */ 202 nb_kni_rx = rte_kni_rx_burst(kni_stat[port_id]->kni, pkts_burst, count); 203 if (nb_kni_rx > 0) { 204 nb_rx = rte_eth_tx_burst(port_id, queue_id, pkts_burst, nb_kni_rx); 205 if (nb_rx < nb_kni_rx) { 206 uint16_t i; 207 for(i = nb_rx; i < nb_kni_rx; ++i) 208 rte_pktmbuf_free(pkts_burst[i]); 209 210 kni_stat[port_id]->tx_dropped += (nb_kni_rx - nb_rx); 211 } 212 213 kni_stat[port_id]->tx_packets += nb_rx; 214 } 215 return 0; 216 } 217 218 static enum FilterReturn 219 protocol_filter_l4(uint16_t port, unsigned char *bitmap) 220 { 221 if(get_bitmap(port, bitmap)) { 222 return FILTER_KNI; 223 } 224 225 return FILTER_UNKNOWN; 226 } 227 228 static enum FilterReturn 229 protocol_filter_tcp(const void *data, uint16_t len) 230 { 231 if (len < sizeof(struct tcp_hdr)) 232 return FILTER_UNKNOWN; 233 234 const struct tcp_hdr *hdr; 235 hdr = (const struct tcp_hdr *)data; 236 237 return protocol_filter_l4(hdr->dst_port, tcp_port_bitmap); 238 } 239 240 static enum FilterReturn 241 protocol_filter_udp(const void* data,uint16_t len) 242 { 243 if (len < sizeof(struct udp_hdr)) 244 return FILTER_UNKNOWN; 245 246 const struct udp_hdr *hdr; 247 hdr = (const struct udp_hdr *)data; 248 249 return protocol_filter_l4(hdr->dst_port, udp_port_bitmap); 250 } 251 252 static enum FilterReturn 253 protocol_filter_ip(const void *data, uint16_t len) 254 { 255 if(len < sizeof(struct ipv4_hdr)) 256 return FILTER_UNKNOWN; 257 258 const struct ipv4_hdr *hdr; 259 hdr = (const struct ipv4_hdr *)data; 260 261 int hdr_len = (hdr->version_ihl & 0x0f) << 2; 262 if (len < hdr_len) 263 return FILTER_UNKNOWN; 264 265 void *next = (void *)data + hdr_len; 266 uint16_t next_len = len - hdr_len; 267 268 switch (hdr->next_proto_id) { 269 case IPPROTO_TCP: 270 return protocol_filter_tcp(next, next_len); 271 case IPPROTO_UDP: 272 return protocol_filter_udp(next, next_len); 273 case IPPROTO_IPIP: 274 return protocol_filter_ip(next, next_len); 275 } 276 277 return FILTER_UNKNOWN; 278 } 279 280 enum FilterReturn 281 ff_kni_proto_filter(const void *data, uint16_t len) 282 { 283 return protocol_filter_ip(data, len); 284 } 285 286 void 287 ff_kni_init(uint16_t nb_ports, const char *tcp_ports, const char *udp_ports) 288 { 289 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 290 kni_stat = rte_zmalloc("kni:stat", 291 sizeof(struct kni_interface_stats *) * nb_ports, 292 RTE_CACHE_LINE_SIZE); 293 if (kni_stat == NULL) 294 rte_exit(EXIT_FAILURE, "rte_zmalloc(1 (struct netio_kni_stat *)) " 295 "failed\n"); 296 297 rte_kni_init(nb_ports); 298 } 299 300 uint16_t lcoreid = rte_lcore_id(); 301 char name_buf[RTE_RING_NAMESIZE]; 302 snprintf(name_buf, RTE_RING_NAMESIZE, "kni::ring_%d", lcoreid); 303 kni_rp = rte_zmalloc(name_buf, 304 sizeof(struct rte_ring *) * nb_ports, 305 RTE_CACHE_LINE_SIZE); 306 if (kni_rp == NULL) { 307 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) " 308 "failed\n", name_buf); 309 } 310 311 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:tcp_port_bitmap_%d", lcoreid); 312 tcp_port_bitmap = rte_zmalloc("kni:tcp_port_bitmap", 8192, 313 RTE_CACHE_LINE_SIZE); 314 if (tcp_port_bitmap == NULL) { 315 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (tcp_port_bitmap)) " 316 "failed\n", name_buf); 317 } 318 319 snprintf(name_buf, RTE_RING_NAMESIZE, "kni:udp_port_bitmap_%d", lcoreid); 320 udp_port_bitmap = rte_zmalloc("kni:udp_port_bitmap", 8192, 321 RTE_CACHE_LINE_SIZE); 322 if (udp_port_bitmap == NULL) { 323 rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (udp_port_bitmap)) " 324 "failed\n",name_buf); 325 } 326 327 memset(tcp_port_bitmap, 0, 8192); 328 memset(udp_port_bitmap, 0, 8192); 329 330 kni_set_bitmap(tcp_ports, tcp_port_bitmap); 331 kni_set_bitmap(udp_ports, udp_port_bitmap); 332 } 333 334 void 335 ff_kni_alloc(uint16_t port_id, unsigned socket_id, 336 struct rte_mempool *mbuf_pool, unsigned ring_queue_size) 337 { 338 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 339 struct rte_kni_conf conf; 340 struct rte_kni_ops ops; 341 struct rte_eth_dev_info dev_info; 342 343 kni_stat[port_id] = (struct kni_interface_stats*)rte_zmalloc( 344 "kni:stat_lcore", 345 sizeof(struct kni_interface_stats), 346 RTE_CACHE_LINE_SIZE); 347 348 if (kni_stat[port_id] == NULL) 349 rte_panic("rte_zmalloc kni_interface_stats failed\n"); 350 351 /* only support one kni */ 352 memset(&conf, 0, sizeof(conf)); 353 snprintf(conf.name, RTE_KNI_NAMESIZE, "veth%u", port_id); 354 conf.core_id = rte_lcore_id(); 355 conf.force_bind = 1; 356 conf.group_id = port_id; 357 uint16_t mtu; 358 rte_eth_dev_get_mtu(port_id, &mtu); 359 conf.mbuf_size = mtu + KNI_ENET_HEADER_SIZE + KNI_ENET_FCS_SIZE; 360 361 memset(&dev_info, 0, sizeof(dev_info)); 362 rte_eth_dev_info_get(port_id, &dev_info); 363 conf.addr = dev_info.pci_dev->addr; 364 conf.id = dev_info.pci_dev->id; 365 366 memset(&ops, 0, sizeof(ops)); 367 ops.port_id = port_id; 368 ops.change_mtu = kni_change_mtu; 369 ops.config_network_if = kni_config_network_interface; 370 371 kni_stat[port_id]->kni = rte_kni_alloc(mbuf_pool, &conf, &ops); 372 if (kni_stat[port_id]->kni == NULL) 373 rte_panic("create kni on port %u failed!\n", port_id); 374 else 375 printf("create kni on port %u success!\n", port_id); 376 377 kni_stat[port_id]->rx_packets = 0; 378 kni_stat[port_id]->rx_dropped = 0; 379 kni_stat[port_id]->tx_packets = 0; 380 kni_stat[port_id]->tx_dropped = 0; 381 } 382 383 char ring_name[RTE_KNI_NAMESIZE]; 384 snprintf((char*)ring_name, RTE_KNI_NAMESIZE, "kni_ring_%u", port_id); 385 386 if (rte_eal_process_type() == RTE_PROC_PRIMARY) { 387 kni_rp[port_id] = rte_ring_create(ring_name, ring_queue_size, 388 socket_id, RING_F_SC_DEQ); 389 390 if (rte_ring_lookup(ring_name) != kni_rp[port_id]) 391 rte_panic("lookup kni ring failed!\n"); 392 } else { 393 kni_rp[port_id] = rte_ring_lookup(ring_name); 394 } 395 396 if (kni_rp[port_id] == NULL) 397 rte_panic("create kni ring failed!\n"); 398 399 printf("create kni ring success, %u ring entries are now free!\n", 400 rte_ring_free_count(kni_rp[port_id])); 401 } 402 403 404 void 405 ff_kni_process(uint16_t port_id, uint16_t queue_id, 406 struct rte_mbuf **pkts_burst, unsigned count) 407 { 408 kni_process_tx(port_id, queue_id, pkts_burst, count); 409 kni_process_rx(port_id, queue_id, pkts_burst, count); 410 } 411 412 /* enqueue the packet, and own it */ 413 int 414 ff_kni_enqueue(uint16_t port_id, struct rte_mbuf *pkt) 415 { 416 int ret = rte_ring_enqueue(kni_rp[port_id], pkt); 417 if (ret < 0) 418 rte_pktmbuf_free(pkt); 419 420 return 0; 421 } 422 423