1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for close/optind */ 8 #include <unistd.h> 9 /* for logging */ 10 #include "debug.h" 11 /* for num_devices_* */ 12 #include "config.h" 13 /* for rte_max_eth_ports */ 14 #include <rte_common.h> 15 /* for rte_eth_rxconf */ 16 #include <rte_ethdev.h> 17 /* for delay funcs */ 18 #include <rte_cycles.h> 19 /* for ip pesudo-chksum */ 20 #include <rte_ip.h> 21 #define ENABLE_STATS_IOCTL 1 22 #ifdef ENABLE_STATS_IOCTL 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 //#define RX_IDLE_ENABLE 1 37 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 38 #define RX_IDLE_THRESH 64 39 40 /* 41 * RX and TX Prefetch, Host, and Write-back threshold values should be 42 * carefully set for optimal performance. Consult the network 43 * controller's datasheet and supporting DPDK documentation for guidance 44 * on how these parameters should be set. 45 */ 46 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 47 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 48 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 49 50 /* 51 * These default values are optimized for use with the Intel(R) 82599 10 GbE 52 * Controller and the DPDK ixgbe PMD. Consider using other values for other 53 * network controllers and/or network drivers. 54 */ 55 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 56 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 57 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 58 59 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 60 61 /* 62 * Configurable number of RX/TX ring descriptors 63 */ 64 #define RTE_TEST_RX_DESC_DEFAULT 128 65 #define RTE_TEST_TX_DESC_DEFAULT 512 66 67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 69 /*----------------------------------------------------------------------------*/ 70 /* packet memory pools for storing packet bufs */ 71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 73 74 //#define DEBUG 1 75 #ifdef DEBUG 76 /* ethernet addresses of ports */ 77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 78 #endif 79 80 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; 81 82 static struct rte_eth_conf port_conf = { 83 .rxmode = { 84 .mq_mode = ETH_MQ_RX_RSS, 85 .max_rx_pkt_len = ETHER_MAX_LEN, 86 .split_hdr_size = 0, 87 .header_split = 0, /**< Header Split disabled */ 88 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 89 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 90 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 91 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 92 }, 93 .rx_adv_conf = { 94 .rss_conf = { 95 .rss_key = NULL, 96 .rss_hf = ETH_RSS_TCP | ETH_RSS_UDP | 97 ETH_RSS_IP | ETH_RSS_L2_PAYLOAD 98 }, 99 }, 100 .txmode = { 101 .mq_mode = ETH_MQ_TX_NONE, 102 }, 103 }; 104 105 static const struct rte_eth_rxconf rx_conf = { 106 .rx_thresh = { 107 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 108 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 109 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 110 }, 111 .rx_free_thresh = 32, 112 }; 113 114 static const struct rte_eth_txconf tx_conf = { 115 .tx_thresh = { 116 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 117 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 118 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 119 }, 120 .tx_free_thresh = 0, /* Use PMD default values */ 121 .tx_rs_thresh = 0, /* Use PMD default values */ 122 /* 123 * As the example won't handle mult-segments and offload cases, 124 * set the flag by default. 125 */ 126 .txq_flags = 0x0, 127 }; 128 129 struct mbuf_table { 130 unsigned len; /* length of queued packets */ 131 struct rte_mbuf *m_table[MAX_PKT_BURST]; 132 }; 133 134 struct dpdk_private_context { 135 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 136 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 137 struct rte_mempool *pktmbuf_pool; 138 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 139 #ifdef RX_IDLE_ENABLE 140 uint8_t rx_idle; 141 #endif 142 #ifdef ENABLE_STATS_IOCTL 143 int fd; 144 #endif /* !ENABLE_STATS_IOCTL */ 145 } __rte_cache_aligned; 146 147 #ifdef ENABLE_STATS_IOCTL 148 /** 149 * stats struct passed on from user space to the driver 150 */ 151 struct stats_struct { 152 uint64_t tx_bytes; 153 uint64_t tx_pkts; 154 uint64_t rx_bytes; 155 uint64_t rx_pkts; 156 uint8_t qid; 157 uint8_t dev; 158 }; 159 #endif /* !ENABLE_STATS_IOCTL */ 160 /*----------------------------------------------------------------------------*/ 161 void 162 dpdk_init_handle(struct mtcp_thread_context *ctxt) 163 { 164 struct dpdk_private_context *dpc; 165 int i, j; 166 char mempool_name[20]; 167 168 /* create and initialize private I/O module context */ 169 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 170 if (ctxt->io_private_context == NULL) { 171 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 172 "Can't allocate memory\n"); 173 exit(EXIT_FAILURE); 174 } 175 176 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 177 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 178 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 179 180 /* set wmbufs correctly */ 181 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 182 /* Allocate wmbufs for each registered port */ 183 for (i = 0; i < MAX_PKT_BURST; i++) { 184 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 185 if (dpc->wmbufs[j].m_table[i] == NULL) { 186 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 187 ctxt->cpu, i, j); 188 exit(EXIT_FAILURE); 189 } 190 } 191 /* set mbufs queue length to 0 to begin with */ 192 dpc->wmbufs[j].len = 0; 193 } 194 195 #ifdef ENABLE_STATS_IOCTL 196 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 197 if (dpc->fd == -1) { 198 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 199 "Are you using mlx4/mlx5 driver?\n", 200 ctxt->cpu); 201 } 202 #endif /* !ENABLE_STATS_IOCTL */ 203 } 204 /*----------------------------------------------------------------------------*/ 205 int 206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 207 { 208 struct dpdk_private_context *dpc; 209 mtcp_manager_t mtcp; 210 int ret; 211 int qid; 212 213 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 214 mtcp = ctxt->mtcp_manager; 215 ret = 0; 216 qid = cpu_qid_map[nif][ctxt->cpu]; 217 218 /* if queue is unassigned, skip it.. */ 219 if (unlikely(qid == 0xFF)) 220 return 0; 221 222 /* if there are packets in the queue... flush them out to the wire */ 223 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 224 struct rte_mbuf **pkts; 225 #ifdef ENABLE_STATS_IOCTL 226 struct stats_struct ss; 227 #endif /* !ENABLE_STATS_IOCTL */ 228 int cnt = dpc->wmbufs[nif].len; 229 pkts = dpc->wmbufs[nif].m_table; 230 #ifdef NETSTAT 231 mtcp->nstat.tx_packets[nif] += cnt; 232 #ifdef ENABLE_STATS_IOCTL 233 if (likely(dpc->fd) >= 0) { 234 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 235 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 236 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 237 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 238 ss.qid = ctxt->cpu; 239 ss.dev = nif; 240 ioctl(dpc->fd, 0, &ss); 241 } 242 #endif /* !ENABLE_STATS_IOCTL */ 243 #endif 244 do { 245 /* tx cnt # of packets */ 246 ret = rte_eth_tx_burst(nif, qid, 247 pkts, cnt); 248 pkts += ret; 249 cnt -= ret; 250 /* if not all pkts were sent... then repeat the cycle */ 251 } while (cnt > 0); 252 253 #ifndef SHARE_IO_BUFFER 254 int i; 255 /* time to allocate fresh mbufs for the queue */ 256 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 257 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 258 /* error checking */ 259 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 260 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 261 ctxt->cpu, i, nif); 262 exit(EXIT_FAILURE); 263 } 264 } 265 #endif 266 /* reset the len of mbufs var after flushing of packets */ 267 dpc->wmbufs[nif].len = 0; 268 } 269 270 return ret; 271 } 272 /*----------------------------------------------------------------------------*/ 273 uint8_t * 274 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 275 { 276 struct dpdk_private_context *dpc; 277 mtcp_manager_t mtcp; 278 struct rte_mbuf *m; 279 uint8_t *ptr; 280 int len_of_mbuf; 281 282 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 283 mtcp = ctxt->mtcp_manager; 284 285 /* sanity check */ 286 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 287 return NULL; 288 289 len_of_mbuf = dpc->wmbufs[nif].len; 290 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 291 292 /* retrieve the right write offset */ 293 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 294 m->pkt_len = m->data_len = pktsize; 295 m->nb_segs = 1; 296 m->next = NULL; 297 298 #ifdef NETSTAT 299 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR; 300 #endif 301 302 /* increment the len_of_mbuf var */ 303 dpc->wmbufs[nif].len = len_of_mbuf + 1; 304 305 return (uint8_t *)ptr; 306 } 307 /*----------------------------------------------------------------------------*/ 308 void 309 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 310 { 311 struct dpdk_private_context *dpc; 312 mtcp_manager_t mtcp; 313 int len_of_mbuf; 314 315 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 316 mtcp = ctxt->mtcp_manager; 317 318 /* sanity check */ 319 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 320 return; 321 322 len_of_mbuf = dpc->wmbufs[out_nif].len; 323 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 324 dpc->rmbufs[in_nif].m_table[index]; 325 326 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 327 328 #ifdef NETSTAT 329 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR; 330 #endif 331 332 /* increment the len_of_mbuf var */ 333 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 334 335 return; 336 } 337 /*----------------------------------------------------------------------------*/ 338 static inline void 339 free_pkts(struct rte_mbuf **mtable, unsigned len) 340 { 341 int i; 342 343 /* free the freaking packets */ 344 for (i = 0; i < len; i++) { 345 if (mtable[i]->udata64 == 1) { 346 rte_pktmbuf_free_seg(mtable[i]); 347 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 348 } 349 } 350 } 351 /*----------------------------------------------------------------------------*/ 352 int32_t 353 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 354 { 355 struct dpdk_private_context *dpc; 356 int ret; 357 uint8_t qid; 358 359 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 360 qid = cpu_qid_map[ifidx][ctxt->cpu]; 361 362 /* if queue is unassigned, skip it.. */ 363 if (qid == 0xFF) 364 return 0; 365 366 if (dpc->rmbufs[ifidx].len != 0) { 367 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 368 dpc->rmbufs[ifidx].len = 0; 369 } 370 371 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 372 dpc->pkts_burst, MAX_PKT_BURST); 373 #ifdef RX_IDLE_ENABLE 374 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 375 #endif 376 dpc->rmbufs[ifidx].len = ret; 377 378 return ret; 379 } 380 /*----------------------------------------------------------------------------*/ 381 uint8_t * 382 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 383 { 384 struct dpdk_private_context *dpc; 385 struct rte_mbuf *m; 386 uint8_t *pktbuf; 387 388 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 389 390 391 m = dpc->pkts_burst[index]; 392 /* tag to check if the packet is a local or a forwarded pkt */ 393 m->udata64 = 1; 394 /* don't enable pre-fetching... performance goes down */ 395 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 396 *len = m->pkt_len; 397 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 398 399 /* enqueue the pkt ptr in mbuf */ 400 dpc->rmbufs[ifidx].m_table[index] = m; 401 402 return pktbuf; 403 } 404 /*----------------------------------------------------------------------------*/ 405 int 406 dpdk_get_nif(struct ifreq *ifr) 407 { 408 int i; 409 static int num_dev = -1; 410 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 411 /* get mac addr entries of 'detected' dpdk ports */ 412 if (num_dev < 0) { 413 num_dev = rte_eth_dev_count(); 414 for (i = 0; i < num_dev; i++) 415 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 416 } 417 418 for (i = 0; i < num_dev; i++) 419 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 420 return i; 421 422 return -1; 423 } 424 /*----------------------------------------------------------------------------*/ 425 int32_t 426 dpdk_select(struct mtcp_thread_context *ctxt) 427 { 428 #ifdef RX_IDLE_ENABLE 429 struct dpdk_private_context *dpc; 430 431 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 432 if (dpc->rx_idle > RX_IDLE_THRESH) { 433 dpc->rx_idle = 0; 434 usleep(RX_IDLE_TIMEOUT); 435 } 436 #endif 437 return 0; 438 } 439 /*----------------------------------------------------------------------------*/ 440 void 441 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 442 { 443 struct dpdk_private_context *dpc; 444 int i; 445 446 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 447 448 /* free wmbufs */ 449 for (i = 0; i < g_config.mos->netdev_table->num; i++) 450 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 451 452 #ifdef ENABLE_STATS_IOCTL 453 /* free fd */ 454 if (dpc->fd >= 0) 455 close(dpc->fd); 456 #endif /* !ENABLE_STATS_IOCTL */ 457 458 /* free it all up */ 459 free(dpc); 460 } 461 /*----------------------------------------------------------------------------*/ 462 static void 463 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 464 { 465 #define CHECK_INTERVAL 100 /* 100ms */ 466 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 467 468 uint8_t portid, count, all_ports_up, print_flag = 0; 469 struct rte_eth_link link; 470 471 printf("\nChecking link status"); 472 fflush(stdout); 473 for (count = 0; count <= MAX_CHECK_TIME; count++) { 474 all_ports_up = 1; 475 for (portid = 0; portid < port_num; portid++) { 476 if ((port_mask & (1 << portid)) == 0) 477 continue; 478 memset(&link, 0, sizeof(link)); 479 rte_eth_link_get_nowait(portid, &link); 480 /* print link status if flag set */ 481 if (print_flag == 1) { 482 if (link.link_status) 483 printf("Port %d Link Up - speed %u " 484 "Mbps - %s\n", (uint8_t)portid, 485 (unsigned)link.link_speed, 486 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 487 ("full-duplex") : ("half-duplex\n")); 488 else 489 printf("Port %d Link Down\n", 490 (uint8_t)portid); 491 continue; 492 } 493 /* clear all_ports_up flag if any link down */ 494 if (link.link_status == 0) { 495 all_ports_up = 0; 496 break; 497 } 498 } 499 /* after finally printing all link status, get out */ 500 if (print_flag == 1) 501 break; 502 503 if (all_ports_up == 0) { 504 printf("."); 505 fflush(stdout); 506 rte_delay_ms(CHECK_INTERVAL); 507 } 508 509 /* set the print_flag if all ports up or timeout */ 510 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 511 print_flag = 1; 512 printf("done\n"); 513 } 514 } 515 } 516 /*----------------------------------------------------------------------------*/ 517 int32_t 518 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 519 { 520 struct dpdk_private_context *dpc; 521 struct rte_mbuf *m; 522 int len_of_mbuf; 523 struct iphdr *iph; 524 struct tcphdr *tcph; 525 RssInfo *rss_i; 526 void **argpptr = (void **)argp; 527 528 if (cmd == DRV_NAME) { 529 *argpptr = (void *)dev_info->driver_name; 530 return 0; 531 } 532 533 iph = (struct iphdr *)argp; 534 dpc = (struct dpdk_private_context *)ctx->io_private_context; 535 len_of_mbuf = dpc->wmbufs[nif].len; 536 rss_i = NULL; 537 538 switch (cmd) { 539 case PKT_TX_IP_CSUM: 540 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 541 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 542 m->l2_len = sizeof(struct ether_hdr); 543 m->l3_len = (iph->ihl<<2); 544 break; 545 case PKT_TX_TCP_CSUM: 546 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 547 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 548 m->ol_flags |= PKT_TX_TCP_CKSUM; 549 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 550 break; 551 case PKT_RX_RSS: 552 rss_i = (RssInfo *)argp; 553 m = dpc->pkts_burst[rss_i->pktidx]; 554 rss_i->hash_value = m->hash.rss; 555 break; 556 default: 557 goto dev_ioctl_err; 558 } 559 560 return 0; 561 dev_ioctl_err: 562 return -1; 563 } 564 /*----------------------------------------------------------------------------*/ 565 void 566 dpdk_load_module_upper_half(void) 567 { 568 int cpu = g_config.mos->num_cores, ret; 569 uint32_t cpumask = 0; 570 char cpumaskbuf[10]; 571 char mem_channels[5]; 572 573 /* set the log level */ 574 #if 0 575 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 576 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 577 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 578 rte_set_log_type(RTE_LOGTYPE_RING, 0); 579 rte_set_log_level(RTE_LOG_WARNING); 580 #else 581 rte_log_set_level(RTE_LOGTYPE_PMD, 0); 582 rte_log_set_level(RTE_LOGTYPE_MALLOC, 0); 583 rte_log_set_level(RTE_LOGTYPE_MEMPOOL, 0); 584 rte_log_set_level(RTE_LOGTYPE_RING, 0); 585 rte_log_set_level(RTE_LOG_WARNING, 0); 586 #endif 587 /* get the cpu mask */ 588 for (ret = 0; ret < cpu; ret++) 589 cpumask = (cpumask | (1 << ret)); 590 sprintf(cpumaskbuf, "%X", cpumask); 591 592 /* get the mem channels per socket */ 593 if (g_config.mos->nb_mem_channels == 0) { 594 TRACE_ERROR("DPDK module requires # of memory channels " 595 "per socket parameter!\n"); 596 exit(EXIT_FAILURE); 597 } 598 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 599 600 /* initialize the rte env first, what a waste of implementation effort! */ 601 char *argv[] = {"", 602 "-c", 603 cpumaskbuf, 604 "-n", 605 mem_channels, 606 "--proc-type=auto", 607 "" 608 }; 609 const int argc = 6; 610 611 /* 612 * re-set getopt extern variable optind. 613 * this issue was a bitch to debug 614 * rte_eal_init() internally uses getopt() syscall 615 * mtcp applications that also use an `external' getopt 616 * will cause a violent crash if optind is not reset to zero 617 * prior to calling the func below... 618 * see man getopt(3) for more details 619 */ 620 optind = 0; 621 622 /* initialize the dpdk eal env */ 623 ret = rte_eal_init(argc, argv); 624 if (ret < 0) 625 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 626 627 } 628 /*----------------------------------------------------------------------------*/ 629 void 630 dpdk_load_module_lower_half(void) 631 { 632 int portid, rxlcore_id, ret; 633 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 634 /* setting the rss key */ 635 static const uint8_t key[] = { 636 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 637 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 638 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 639 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 640 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 641 0x05, 0x05 642 }; 643 644 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 645 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 646 647 /* resetting cpu_qid mapping */ 648 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 649 650 if (!g_config.mos->multiprocess 651 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 652 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 653 char name[20]; 654 sprintf(name, "mbuf_pool-%d", rxlcore_id); 655 /* create the mbuf pools */ 656 pktmbuf_pool[rxlcore_id] = 657 rte_mempool_create(name, NB_MBUF, 658 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 659 sizeof(struct rte_pktmbuf_pool_private), 660 rte_pktmbuf_pool_init, NULL, 661 rte_pktmbuf_init, NULL, 662 rte_lcore_to_socket_id(rxlcore_id), 0); 663 if (pktmbuf_pool[rxlcore_id] == NULL) 664 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 665 } 666 667 /* Initialise each port */ 668 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 669 int num_queue = 0, eth_idx, i, queue_id; 670 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 671 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 672 break; 673 if (eth_idx == g_config.mos->netdev_table->num) 674 continue; 675 for (i = 0; i < sizeof(uint64_t) * 8; i++) 676 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 677 num_queue++; 678 679 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 680 num_queues = num_queue; 681 682 /* init port */ 683 printf("Initializing port %u... ", (unsigned) portid); 684 fflush(stdout); 685 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 686 &port_conf); 687 if (ret < 0) 688 rte_exit(EXIT_FAILURE, "Cannot configure device:" 689 "err=%d, port=%u\n", 690 ret, (unsigned) portid); 691 692 /* init one RX queue per CPU */ 693 fflush(stdout); 694 #ifdef DEBUG 695 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 696 #endif 697 /* check port capabilities */ 698 rte_eth_dev_info_get(portid, &dev_info[portid]); 699 700 queue_id = 0; 701 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 702 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 703 continue; 704 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 705 rte_eth_dev_socket_id(portid), &rx_conf, 706 pktmbuf_pool[rxlcore_id]); 707 if (ret < 0) 708 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 709 "err=%d, port=%u, queueid: %d\n", 710 ret, (unsigned) portid, rxlcore_id); 711 cpu_qid_map[portid][rxlcore_id] = queue_id++; 712 } 713 714 /* init one TX queue on each port per CPU (this is redundant for 715 * this app) */ 716 fflush(stdout); 717 queue_id = 0; 718 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 719 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 720 continue; 721 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 722 rte_eth_dev_socket_id(portid), &tx_conf); 723 if (ret < 0) 724 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 725 "err=%d, port=%u, queueid: %d\n", 726 ret, (unsigned) portid, rxlcore_id); 727 } 728 729 /* Start device */ 730 ret = rte_eth_dev_start(portid); 731 if (ret < 0) 732 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 733 ret, (unsigned) portid); 734 735 printf("done: \n"); 736 rte_eth_promiscuous_enable(portid); 737 738 /* retrieve current flow control settings per port */ 739 memset(&fc_conf, 0, sizeof(fc_conf)); 740 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 741 if (ret != 0) { 742 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 743 } 744 745 /* and just disable the rx/tx flow control */ 746 fc_conf.mode = RTE_FC_NONE; 747 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 748 if (ret != 0) { 749 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 750 ret); 751 } 752 753 #ifdef DEBUG 754 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 755 (unsigned) portid, 756 ports_eth_addr[portid].addr_bytes[0], 757 ports_eth_addr[portid].addr_bytes[1], 758 ports_eth_addr[portid].addr_bytes[2], 759 ports_eth_addr[portid].addr_bytes[3], 760 ports_eth_addr[portid].addr_bytes[4], 761 ports_eth_addr[portid].addr_bytes[5]); 762 #endif 763 /* only check for link status if the thread is master */ 764 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 765 } 766 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 767 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 768 char name[20]; 769 sprintf(name, "mbuf_pool-%d", rxlcore_id); 770 /* initialize the mbuf pools */ 771 pktmbuf_pool[rxlcore_id] = 772 rte_mempool_lookup(name); 773 if (pktmbuf_pool[rxlcore_id] == NULL) 774 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 775 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 776 cpu_qid_map[portid][rxlcore_id] = rxlcore_id; 777 } 778 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 779 num_queues = g_config.mos->num_cores; 780 } 781 782 } 783 /*----------------------------------------------------------------------------*/ 784 io_module_func dpdk_module_func = { 785 .load_module_upper_half = dpdk_load_module_upper_half, 786 .load_module_lower_half = dpdk_load_module_lower_half, 787 .init_handle = dpdk_init_handle, 788 .link_devices = NULL, 789 .release_pkt = NULL, 790 .send_pkts = dpdk_send_pkts, 791 .get_wptr = dpdk_get_wptr, 792 .recv_pkts = dpdk_recv_pkts, 793 .get_rptr = dpdk_get_rptr, 794 .get_nif = dpdk_get_nif, 795 .select = dpdk_select, 796 .destroy_handle = dpdk_destroy_handle, 797 .dev_ioctl = dpdk_dev_ioctl, 798 .set_wptr = dpdk_set_wptr, 799 }; 800 /*----------------------------------------------------------------------------*/ 801 802