1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for close/optind */ 8 #include <unistd.h> 9 /* for logging */ 10 #include "debug.h" 11 /* for num_devices_* */ 12 #include "config.h" 13 /* for rte_max_eth_ports */ 14 #include <rte_common.h> 15 /* for rte_eth_rxconf */ 16 #include <rte_ethdev.h> 17 /* for delay funcs */ 18 #include <rte_cycles.h> 19 /* for ip pesudo-chksum */ 20 #include <rte_ip.h> 21 #define ENABLE_STATS_IOCTL 1 22 #ifdef ENABLE_STATS_IOCTL 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /* for retrieving rte version(s) */ 29 #include <rte_version.h> 30 /*----------------------------------------------------------------------------*/ 31 /* Essential macros */ 32 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 33 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 34 35 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 36 #define NB_MBUF 8192 37 #define MEMPOOL_CACHE_SIZE 256 38 //#define RX_IDLE_ENABLE 1 39 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 40 #define RX_IDLE_THRESH 64 41 42 /* 43 * RX and TX Prefetch, Host, and Write-back threshold values should be 44 * carefully set for optimal performance. Consult the network 45 * controller's datasheet and supporting DPDK documentation for guidance 46 * on how these parameters should be set. 47 */ 48 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 49 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 50 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 51 52 /* 53 * These default values are optimized for use with the Intel(R) 82599 10 GbE 54 * Controller and the DPDK ixgbe PMD. Consider using other values for other 55 * network controllers and/or network drivers. 56 */ 57 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 58 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 59 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 60 61 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 62 63 /* 64 * Configurable number of RX/TX ring descriptors 65 */ 66 #define RTE_TEST_RX_DESC_DEFAULT 128 67 #define RTE_TEST_TX_DESC_DEFAULT 512 68 69 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 70 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 71 /*----------------------------------------------------------------------------*/ 72 /* packet memory pools for storing packet bufs */ 73 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 74 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 75 76 //#define DEBUG 1 77 #ifdef DEBUG 78 /* ethernet addresses of ports */ 79 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 80 #endif 81 82 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; 83 84 static struct rte_eth_conf port_conf = { 85 .rxmode = { 86 .mq_mode = ETH_MQ_RX_RSS, 87 .max_rx_pkt_len = ETHER_MAX_LEN, 88 .split_hdr_size = 0, 89 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02) 90 .header_split = 0, /**< Header Split disabled */ 91 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 92 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 93 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 94 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 95 #else 96 .offloads = DEV_RX_OFFLOAD_CHECKSUM, 97 #endif 98 }, 99 .rx_adv_conf = { 100 .rss_conf = { 101 .rss_key = NULL, 102 .rss_hf = ETH_RSS_TCP | ETH_RSS_UDP | 103 ETH_RSS_IP | ETH_RSS_L2_PAYLOAD 104 }, 105 }, 106 .txmode = { 107 .mq_mode = ETH_MQ_TX_NONE, 108 #if (RTE_VER_YEAR >= 18) && (RTE_VER_MONTH > 02) 109 .offloads = DEV_TX_OFFLOAD_IPV4_CKSUM | 110 DEV_TX_OFFLOAD_UDP_CKSUM | 111 DEV_TX_OFFLOAD_TCP_CKSUM 112 #endif 113 }, 114 }; 115 116 static const struct rte_eth_rxconf rx_conf = { 117 .rx_thresh = { 118 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 119 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 120 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 121 }, 122 .rx_free_thresh = 32, 123 }; 124 125 static const struct rte_eth_txconf tx_conf = { 126 .tx_thresh = { 127 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 128 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 129 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 130 }, 131 .tx_free_thresh = 0, /* Use PMD default values */ 132 .tx_rs_thresh = 0, /* Use PMD default values */ 133 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02) 134 /* 135 * As the example won't handle mult-segments and offload cases, 136 * set the flag by default. 137 */ 138 .txq_flags = 0x0, 139 #endif 140 }; 141 142 struct mbuf_table { 143 unsigned len; /* length of queued packets */ 144 struct rte_mbuf *m_table[MAX_PKT_BURST]; 145 }; 146 147 struct dpdk_private_context { 148 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 149 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 150 struct rte_mempool *pktmbuf_pool; 151 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 152 #ifdef RX_IDLE_ENABLE 153 uint8_t rx_idle; 154 #endif 155 #ifdef ENABLE_STATS_IOCTL 156 int fd; 157 #endif /* !ENABLE_STATS_IOCTL */ 158 } __rte_cache_aligned; 159 160 #ifdef ENABLE_STATS_IOCTL 161 /** 162 * stats struct passed on from user space to the driver 163 */ 164 struct stats_struct { 165 uint64_t tx_bytes; 166 uint64_t tx_pkts; 167 uint64_t rx_bytes; 168 uint64_t rx_pkts; 169 uint8_t qid; 170 uint8_t dev; 171 }; 172 #endif /* !ENABLE_STATS_IOCTL */ 173 /*----------------------------------------------------------------------------*/ 174 void 175 dpdk_init_handle(struct mtcp_thread_context *ctxt) 176 { 177 struct dpdk_private_context *dpc; 178 int i, j; 179 char mempool_name[20]; 180 181 /* create and initialize private I/O module context */ 182 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 183 if (ctxt->io_private_context == NULL) { 184 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 185 "Can't allocate memory\n"); 186 exit(EXIT_FAILURE); 187 } 188 189 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 190 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 191 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 192 193 /* set wmbufs correctly */ 194 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 195 /* Allocate wmbufs for each registered port */ 196 for (i = 0; i < MAX_PKT_BURST; i++) { 197 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 198 if (dpc->wmbufs[j].m_table[i] == NULL) { 199 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 200 ctxt->cpu, i, j); 201 exit(EXIT_FAILURE); 202 } 203 } 204 /* set mbufs queue length to 0 to begin with */ 205 dpc->wmbufs[j].len = 0; 206 } 207 208 #ifdef ENABLE_STATS_IOCTL 209 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 210 if (dpc->fd == -1) { 211 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 212 "Are you using mlx4/mlx5 driver?\n", 213 ctxt->cpu); 214 } 215 #endif /* !ENABLE_STATS_IOCTL */ 216 } 217 /*----------------------------------------------------------------------------*/ 218 int 219 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 220 { 221 struct dpdk_private_context *dpc; 222 mtcp_manager_t mtcp; 223 int ret; 224 int qid; 225 226 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 227 mtcp = ctxt->mtcp_manager; 228 ret = 0; 229 qid = cpu_qid_map[nif][ctxt->cpu]; 230 231 /* if queue is unassigned, skip it.. */ 232 if (unlikely(qid == 0xFF)) 233 return 0; 234 235 /* if there are packets in the queue... flush them out to the wire */ 236 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 237 struct rte_mbuf **pkts; 238 #ifdef ENABLE_STATS_IOCTL 239 struct stats_struct ss; 240 #endif /* !ENABLE_STATS_IOCTL */ 241 int cnt = dpc->wmbufs[nif].len; 242 pkts = dpc->wmbufs[nif].m_table; 243 #ifdef NETSTAT 244 mtcp->nstat.tx_packets[nif] += cnt; 245 #ifdef ENABLE_STATS_IOCTL 246 if (likely(dpc->fd) >= 0) { 247 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 248 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 249 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 250 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 251 ss.qid = ctxt->cpu; 252 ss.dev = nif; 253 ioctl(dpc->fd, 0, &ss); 254 } 255 #endif /* !ENABLE_STATS_IOCTL */ 256 #endif 257 do { 258 /* tx cnt # of packets */ 259 ret = rte_eth_tx_burst(nif, qid, 260 pkts, cnt); 261 pkts += ret; 262 cnt -= ret; 263 /* if not all pkts were sent... then repeat the cycle */ 264 } while (cnt > 0); 265 266 #ifndef SHARE_IO_BUFFER 267 int i; 268 /* time to allocate fresh mbufs for the queue */ 269 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 270 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 271 /* error checking */ 272 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 273 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 274 ctxt->cpu, i, nif); 275 exit(EXIT_FAILURE); 276 } 277 } 278 #endif 279 /* reset the len of mbufs var after flushing of packets */ 280 dpc->wmbufs[nif].len = 0; 281 } 282 283 return ret; 284 } 285 /*----------------------------------------------------------------------------*/ 286 uint8_t * 287 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 288 { 289 struct dpdk_private_context *dpc; 290 mtcp_manager_t mtcp; 291 struct rte_mbuf *m; 292 uint8_t *ptr; 293 int len_of_mbuf; 294 295 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 296 mtcp = ctxt->mtcp_manager; 297 298 /* sanity check */ 299 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 300 return NULL; 301 302 len_of_mbuf = dpc->wmbufs[nif].len; 303 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 304 305 /* retrieve the right write offset */ 306 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 307 m->pkt_len = m->data_len = pktsize; 308 m->nb_segs = 1; 309 m->next = NULL; 310 311 #ifdef NETSTAT 312 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR; 313 #endif 314 315 /* increment the len_of_mbuf var */ 316 dpc->wmbufs[nif].len = len_of_mbuf + 1; 317 318 return (uint8_t *)ptr; 319 } 320 /*----------------------------------------------------------------------------*/ 321 void 322 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 323 { 324 struct dpdk_private_context *dpc; 325 mtcp_manager_t mtcp; 326 int len_of_mbuf; 327 328 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 329 mtcp = ctxt->mtcp_manager; 330 331 /* sanity check */ 332 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 333 return; 334 335 len_of_mbuf = dpc->wmbufs[out_nif].len; 336 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 337 dpc->rmbufs[in_nif].m_table[index]; 338 339 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 340 341 #ifdef NETSTAT 342 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR; 343 #endif 344 345 /* increment the len_of_mbuf var */ 346 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 347 348 return; 349 } 350 /*----------------------------------------------------------------------------*/ 351 static inline void 352 free_pkts(struct rte_mbuf **mtable, unsigned len) 353 { 354 int i; 355 356 /* free the freaking packets */ 357 for (i = 0; i < len; i++) { 358 if (mtable[i]->udata64 == 1) { 359 rte_pktmbuf_free_seg(mtable[i]); 360 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 361 } 362 } 363 } 364 /*----------------------------------------------------------------------------*/ 365 int32_t 366 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 367 { 368 struct dpdk_private_context *dpc; 369 int ret; 370 uint8_t qid; 371 372 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 373 qid = cpu_qid_map[ifidx][ctxt->cpu]; 374 375 /* if queue is unassigned, skip it.. */ 376 if (qid == 0xFF) 377 return 0; 378 379 if (dpc->rmbufs[ifidx].len != 0) { 380 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 381 dpc->rmbufs[ifidx].len = 0; 382 } 383 384 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 385 dpc->pkts_burst, MAX_PKT_BURST); 386 #ifdef RX_IDLE_ENABLE 387 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 388 #endif 389 dpc->rmbufs[ifidx].len = ret; 390 391 return ret; 392 } 393 /*----------------------------------------------------------------------------*/ 394 uint8_t * 395 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 396 { 397 struct dpdk_private_context *dpc; 398 struct rte_mbuf *m; 399 uint8_t *pktbuf; 400 401 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 402 403 404 m = dpc->pkts_burst[index]; 405 /* tag to check if the packet is a local or a forwarded pkt */ 406 m->udata64 = 1; 407 /* don't enable pre-fetching... performance goes down */ 408 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 409 *len = m->pkt_len; 410 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 411 412 /* enqueue the pkt ptr in mbuf */ 413 dpc->rmbufs[ifidx].m_table[index] = m; 414 415 return pktbuf; 416 } 417 /*----------------------------------------------------------------------------*/ 418 int 419 dpdk_get_nif(struct ifreq *ifr) 420 { 421 int i; 422 static int num_dev = -1; 423 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 424 /* get mac addr entries of 'detected' dpdk ports */ 425 if (num_dev < 0) { 426 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02) 427 num_dev = rte_eth_dev_count(); 428 #else 429 num_dev = rte_eth_dev_count_avail(); 430 #endif 431 for (i = 0; i < num_dev; i++) 432 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 433 } 434 435 for (i = 0; i < num_dev; i++) 436 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 437 return i; 438 439 return -1; 440 } 441 /*----------------------------------------------------------------------------*/ 442 int32_t 443 dpdk_select(struct mtcp_thread_context *ctxt) 444 { 445 #ifdef RX_IDLE_ENABLE 446 struct dpdk_private_context *dpc; 447 448 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 449 if (dpc->rx_idle > RX_IDLE_THRESH) { 450 dpc->rx_idle = 0; 451 usleep(RX_IDLE_TIMEOUT); 452 } 453 #endif 454 return 0; 455 } 456 /*----------------------------------------------------------------------------*/ 457 void 458 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 459 { 460 struct dpdk_private_context *dpc; 461 int i; 462 463 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 464 465 /* free wmbufs */ 466 for (i = 0; i < g_config.mos->netdev_table->num; i++) 467 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 468 469 #ifdef ENABLE_STATS_IOCTL 470 /* free fd */ 471 if (dpc->fd >= 0) 472 close(dpc->fd); 473 #endif /* !ENABLE_STATS_IOCTL */ 474 475 /* free it all up */ 476 free(dpc); 477 } 478 /*----------------------------------------------------------------------------*/ 479 static void 480 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 481 { 482 #define CHECK_INTERVAL 100 /* 100ms */ 483 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 484 485 uint8_t portid, count, all_ports_up, print_flag = 0; 486 struct rte_eth_link link; 487 488 printf("\nChecking link status"); 489 fflush(stdout); 490 for (count = 0; count <= MAX_CHECK_TIME; count++) { 491 all_ports_up = 1; 492 for (portid = 0; portid < port_num; portid++) { 493 if ((port_mask & (1 << portid)) == 0) 494 continue; 495 memset(&link, 0, sizeof(link)); 496 rte_eth_link_get_nowait(portid, &link); 497 /* print link status if flag set */ 498 if (print_flag == 1) { 499 if (link.link_status) 500 printf("Port %d Link Up - speed %u " 501 "Mbps - %s\n", (uint8_t)portid, 502 (unsigned)link.link_speed, 503 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 504 ("full-duplex") : ("half-duplex\n")); 505 else 506 printf("Port %d Link Down\n", 507 (uint8_t)portid); 508 continue; 509 } 510 /* clear all_ports_up flag if any link down */ 511 if (link.link_status == 0) { 512 all_ports_up = 0; 513 break; 514 } 515 } 516 /* after finally printing all link status, get out */ 517 if (print_flag == 1) 518 break; 519 520 if (all_ports_up == 0) { 521 printf("."); 522 fflush(stdout); 523 rte_delay_ms(CHECK_INTERVAL); 524 } 525 526 /* set the print_flag if all ports up or timeout */ 527 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 528 print_flag = 1; 529 printf("done\n"); 530 } 531 } 532 } 533 /*----------------------------------------------------------------------------*/ 534 int32_t 535 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 536 { 537 struct dpdk_private_context *dpc; 538 struct rte_mbuf *m; 539 int len_of_mbuf; 540 struct iphdr *iph; 541 struct tcphdr *tcph; 542 RssInfo *rss_i; 543 void **argpptr = (void **)argp; 544 545 if (cmd == DRV_NAME) { 546 *argpptr = (void *)dev_info->driver_name; 547 return 0; 548 } 549 550 iph = (struct iphdr *)argp; 551 dpc = (struct dpdk_private_context *)ctx->io_private_context; 552 len_of_mbuf = dpc->wmbufs[nif].len; 553 rss_i = NULL; 554 555 switch (cmd) { 556 case PKT_TX_IP_CSUM: 557 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 558 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 559 m->l2_len = sizeof(struct ether_hdr); 560 m->l3_len = (iph->ihl<<2); 561 break; 562 case PKT_TX_TCP_CSUM: 563 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 564 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 565 m->ol_flags |= PKT_TX_TCP_CKSUM; 566 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 567 break; 568 case PKT_RX_RSS: 569 rss_i = (RssInfo *)argp; 570 m = dpc->pkts_burst[rss_i->pktidx]; 571 rss_i->hash_value = m->hash.rss; 572 break; 573 default: 574 goto dev_ioctl_err; 575 } 576 577 return 0; 578 dev_ioctl_err: 579 return -1; 580 } 581 /*----------------------------------------------------------------------------*/ 582 void 583 dpdk_load_module_upper_half(void) 584 { 585 int cpu = g_config.mos->num_cores, ret; 586 uint32_t cpumask = 0; 587 char cpumaskbuf[10]; 588 char mem_channels[5]; 589 590 /* set the log level */ 591 #if 0 592 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 593 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 594 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 595 rte_set_log_type(RTE_LOGTYPE_RING, 0); 596 rte_set_log_level(RTE_LOG_WARNING); 597 #else 598 rte_log_set_level(RTE_LOGTYPE_PMD, 0); 599 rte_log_set_level(RTE_LOGTYPE_MALLOC, 0); 600 rte_log_set_level(RTE_LOGTYPE_MEMPOOL, 0); 601 rte_log_set_level(RTE_LOGTYPE_RING, 0); 602 rte_log_set_level(RTE_LOG_WARNING, 0); 603 #endif 604 /* get the cpu mask */ 605 for (ret = 0; ret < cpu; ret++) 606 cpumask = (cpumask | (1 << ret)); 607 sprintf(cpumaskbuf, "%X", cpumask); 608 609 /* get the mem channels per socket */ 610 if (g_config.mos->nb_mem_channels == 0) { 611 TRACE_ERROR("DPDK module requires # of memory channels " 612 "per socket parameter!\n"); 613 exit(EXIT_FAILURE); 614 } 615 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 616 617 /* initialize the rte env first, what a waste of implementation effort! */ 618 char *argv[] = {"", 619 "-c", 620 cpumaskbuf, 621 "-n", 622 mem_channels, 623 "--proc-type=auto", 624 "" 625 }; 626 const int argc = 6; 627 628 /* 629 * re-set getopt extern variable optind. 630 * this issue was a bitch to debug 631 * rte_eal_init() internally uses getopt() syscall 632 * mtcp applications that also use an `external' getopt 633 * will cause a violent crash if optind is not reset to zero 634 * prior to calling the func below... 635 * see man getopt(3) for more details 636 */ 637 optind = 0; 638 639 /* initialize the dpdk eal env */ 640 ret = rte_eal_init(argc, argv); 641 if (ret < 0) 642 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 643 644 } 645 /*----------------------------------------------------------------------------*/ 646 void 647 dpdk_load_module_lower_half(void) 648 { 649 int portid, rxlcore_id, ret; 650 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 651 /* setting the rss key */ 652 static const uint8_t key[] = { 653 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 654 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 655 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 656 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 657 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 658 0x05, 0x05 659 }; 660 661 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 662 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 663 664 /* resetting cpu_qid mapping */ 665 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 666 667 if (!g_config.mos->multiprocess 668 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 669 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 670 char name[20]; 671 sprintf(name, "mbuf_pool-%d", rxlcore_id); 672 /* create the mbuf pools */ 673 pktmbuf_pool[rxlcore_id] = 674 rte_mempool_create(name, NB_MBUF, 675 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 676 sizeof(struct rte_pktmbuf_pool_private), 677 rte_pktmbuf_pool_init, NULL, 678 rte_pktmbuf_init, NULL, 679 rte_lcore_to_socket_id(rxlcore_id), 0); 680 if (pktmbuf_pool[rxlcore_id] == NULL) 681 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 682 } 683 684 /* Initialise each port */ 685 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 686 int num_queue = 0, eth_idx, i, queue_id; 687 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 688 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 689 break; 690 if (eth_idx == g_config.mos->netdev_table->num) 691 continue; 692 for (i = 0; i < sizeof(uint64_t) * 8; i++) 693 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 694 num_queue++; 695 696 /* check port capabilities */ 697 rte_eth_dev_info_get(portid, &dev_info[portid]); 698 699 #if (RTE_VER_YEAR >= 18) && (RTE_VER_MONTH > 02) 700 /* re-adjust rss_hf */ 701 port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info[portid].flow_type_rss_offloads; 702 #endif 703 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 704 num_queues = num_queue; 705 706 /* init port */ 707 printf("Initializing port %u... ", (unsigned) portid); 708 fflush(stdout); 709 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 710 &port_conf); 711 if (ret < 0) 712 rte_exit(EXIT_FAILURE, "Cannot configure device:" 713 "err=%d, port=%u\n", 714 ret, (unsigned) portid); 715 716 /* init one RX queue per CPU */ 717 fflush(stdout); 718 #ifdef DEBUG 719 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 720 #endif 721 queue_id = 0; 722 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 723 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 724 continue; 725 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 726 rte_eth_dev_socket_id(portid), &rx_conf, 727 pktmbuf_pool[rxlcore_id]); 728 if (ret < 0) 729 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 730 "err=%d, port=%u, queueid: %d\n", 731 ret, (unsigned) portid, rxlcore_id); 732 cpu_qid_map[portid][rxlcore_id] = queue_id++; 733 } 734 735 /* init one TX queue on each port per CPU (this is redundant for 736 * this app) */ 737 fflush(stdout); 738 queue_id = 0; 739 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 740 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 741 continue; 742 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 743 rte_eth_dev_socket_id(portid), &tx_conf); 744 if (ret < 0) 745 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 746 "err=%d, port=%u, queueid: %d\n", 747 ret, (unsigned) portid, rxlcore_id); 748 } 749 750 /* Start device */ 751 ret = rte_eth_dev_start(portid); 752 if (ret < 0) 753 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 754 ret, (unsigned) portid); 755 756 printf("done: \n"); 757 rte_eth_promiscuous_enable(portid); 758 759 /* retrieve current flow control settings per port */ 760 memset(&fc_conf, 0, sizeof(fc_conf)); 761 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 762 if (ret != 0) { 763 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 764 } 765 766 /* and just disable the rx/tx flow control */ 767 fc_conf.mode = RTE_FC_NONE; 768 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 769 if (ret != 0) { 770 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 771 ret); 772 } 773 774 #ifdef DEBUG 775 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 776 (unsigned) portid, 777 ports_eth_addr[portid].addr_bytes[0], 778 ports_eth_addr[portid].addr_bytes[1], 779 ports_eth_addr[portid].addr_bytes[2], 780 ports_eth_addr[portid].addr_bytes[3], 781 ports_eth_addr[portid].addr_bytes[4], 782 ports_eth_addr[portid].addr_bytes[5]); 783 #endif 784 /* only check for link status if the thread is master */ 785 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 786 } 787 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 788 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 789 char name[20]; 790 sprintf(name, "mbuf_pool-%d", rxlcore_id); 791 /* initialize the mbuf pools */ 792 pktmbuf_pool[rxlcore_id] = 793 rte_mempool_lookup(name); 794 if (pktmbuf_pool[rxlcore_id] == NULL) 795 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 796 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 797 cpu_qid_map[portid][rxlcore_id] = rxlcore_id; 798 } 799 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 800 num_queues = g_config.mos->num_cores; 801 } 802 803 } 804 /*----------------------------------------------------------------------------*/ 805 io_module_func dpdk_module_func = { 806 .load_module_upper_half = dpdk_load_module_upper_half, 807 .load_module_lower_half = dpdk_load_module_lower_half, 808 .init_handle = dpdk_init_handle, 809 .link_devices = NULL, 810 .release_pkt = NULL, 811 .send_pkts = dpdk_send_pkts, 812 .get_wptr = dpdk_get_wptr, 813 .recv_pkts = dpdk_recv_pkts, 814 .get_rptr = dpdk_get_rptr, 815 .get_nif = dpdk_get_nif, 816 .select = dpdk_select, 817 .destroy_handle = dpdk_destroy_handle, 818 .dev_ioctl = dpdk_dev_ioctl, 819 .set_wptr = dpdk_set_wptr, 820 }; 821 /*----------------------------------------------------------------------------*/ 822 823