1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for close/optind */ 8 #include <unistd.h> 9 /* for logging */ 10 #include "debug.h" 11 /* for num_devices_* */ 12 #include "config.h" 13 /* for rte_max_eth_ports */ 14 #include <rte_common.h> 15 /* for rte_eth_rxconf */ 16 #include <rte_ethdev.h> 17 /* for delay funcs */ 18 #include <rte_cycles.h> 19 /* for ip pesudo-chksum */ 20 #include <rte_ip.h> 21 #define ENABLE_STATS_IOCTL 1 22 #ifdef ENABLE_STATS_IOCTL 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 //#define RX_IDLE_ENABLE 1 37 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 38 #define RX_IDLE_THRESH 64 39 40 /* 41 * RX and TX Prefetch, Host, and Write-back threshold values should be 42 * carefully set for optimal performance. Consult the network 43 * controller's datasheet and supporting DPDK documentation for guidance 44 * on how these parameters should be set. 45 */ 46 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 47 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 48 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 49 50 /* 51 * These default values are optimized for use with the Intel(R) 82599 10 GbE 52 * Controller and the DPDK ixgbe PMD. Consider using other values for other 53 * network controllers and/or network drivers. 54 */ 55 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 56 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 57 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 58 59 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 60 61 /* 62 * Configurable number of RX/TX ring descriptors 63 */ 64 #define RTE_TEST_RX_DESC_DEFAULT 128 65 #define RTE_TEST_TX_DESC_DEFAULT 512 66 67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 69 /*----------------------------------------------------------------------------*/ 70 /* packet memory pools for storing packet bufs */ 71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 73 74 //#define DEBUG 1 75 #ifdef DEBUG 76 /* ethernet addresses of ports */ 77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 78 #endif 79 80 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS]; 81 82 static struct rte_eth_conf port_conf = { 83 .rxmode = { 84 .mq_mode = ETH_MQ_RX_RSS, 85 .max_rx_pkt_len = ETHER_MAX_LEN, 86 .split_hdr_size = 0, 87 .header_split = 0, /**< Header Split disabled */ 88 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 89 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 90 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 91 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 92 }, 93 .rx_adv_conf = { 94 .rss_conf = { 95 .rss_key = NULL, 96 .rss_hf = ETH_RSS_TCP | ETH_RSS_UDP | 97 ETH_RSS_IP | ETH_RSS_L2_PAYLOAD 98 }, 99 }, 100 .txmode = { 101 .mq_mode = ETH_MQ_TX_NONE, 102 }, 103 }; 104 105 static const struct rte_eth_rxconf rx_conf = { 106 .rx_thresh = { 107 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 108 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 109 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 110 }, 111 .rx_free_thresh = 32, 112 }; 113 114 static const struct rte_eth_txconf tx_conf = { 115 .tx_thresh = { 116 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 117 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 118 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 119 }, 120 .tx_free_thresh = 0, /* Use PMD default values */ 121 .tx_rs_thresh = 0, /* Use PMD default values */ 122 /* 123 * As the example won't handle mult-segments and offload cases, 124 * set the flag by default. 125 */ 126 .txq_flags = 0x0, 127 }; 128 129 struct mbuf_table { 130 unsigned len; /* length of queued packets */ 131 struct rte_mbuf *m_table[MAX_PKT_BURST]; 132 }; 133 134 struct dpdk_private_context { 135 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 136 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 137 struct rte_mempool *pktmbuf_pool; 138 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 139 #ifdef RX_IDLE_ENABLE 140 uint8_t rx_idle; 141 #endif 142 #ifdef ENABLE_STATS_IOCTL 143 int fd; 144 #endif /* !ENABLE_STATS_IOCTL */ 145 } __rte_cache_aligned; 146 147 #ifdef ENABLE_STATS_IOCTL 148 /** 149 * stats struct passed on from user space to the driver 150 */ 151 struct stats_struct { 152 uint64_t tx_bytes; 153 uint64_t tx_pkts; 154 uint64_t rx_bytes; 155 uint64_t rx_pkts; 156 uint8_t qid; 157 uint8_t dev; 158 }; 159 #endif /* !ENABLE_STATS_IOCTL */ 160 /*----------------------------------------------------------------------------*/ 161 void 162 dpdk_init_handle(struct mtcp_thread_context *ctxt) 163 { 164 struct dpdk_private_context *dpc; 165 int i, j; 166 char mempool_name[20]; 167 168 /* create and initialize private I/O module context */ 169 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 170 if (ctxt->io_private_context == NULL) { 171 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 172 "Can't allocate memory\n"); 173 exit(EXIT_FAILURE); 174 } 175 176 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 177 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 178 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 179 180 /* set wmbufs correctly */ 181 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 182 /* Allocate wmbufs for each registered port */ 183 for (i = 0; i < MAX_PKT_BURST; i++) { 184 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 185 if (dpc->wmbufs[j].m_table[i] == NULL) { 186 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 187 ctxt->cpu, i, j); 188 exit(EXIT_FAILURE); 189 } 190 } 191 /* set mbufs queue length to 0 to begin with */ 192 dpc->wmbufs[j].len = 0; 193 } 194 195 #ifdef ENABLE_STATS_IOCTL 196 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 197 if (dpc->fd == -1) { 198 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 199 "Are you using mlx4/mlx5 driver?\n", 200 ctxt->cpu); 201 } 202 #endif /* !ENABLE_STATS_IOCTL */ 203 } 204 /*----------------------------------------------------------------------------*/ 205 int 206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 207 { 208 struct dpdk_private_context *dpc; 209 mtcp_manager_t mtcp; 210 int ret; 211 int qid; 212 213 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 214 mtcp = ctxt->mtcp_manager; 215 ret = 0; 216 qid = cpu_qid_map[nif][ctxt->cpu]; 217 218 /* if queue is unassigned, skip it.. */ 219 if (unlikely(qid == 0xFF)) 220 return 0; 221 222 /* if there are packets in the queue... flush them out to the wire */ 223 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 224 struct rte_mbuf **pkts; 225 #ifdef ENABLE_STATS_IOCTL 226 struct stats_struct ss; 227 #endif /* !ENABLE_STATS_IOCTL */ 228 int cnt = dpc->wmbufs[nif].len; 229 pkts = dpc->wmbufs[nif].m_table; 230 #ifdef NETSTAT 231 mtcp->nstat.tx_packets[nif] += cnt; 232 #ifdef ENABLE_STATS_IOCTL 233 if (likely(dpc->fd) >= 0) { 234 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 235 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 236 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 237 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 238 ss.qid = ctxt->cpu; 239 ss.dev = nif; 240 ioctl(dpc->fd, 0, &ss); 241 } 242 #endif /* !ENABLE_STATS_IOCTL */ 243 #endif 244 do { 245 /* tx cnt # of packets */ 246 ret = rte_eth_tx_burst(nif, qid, 247 pkts, cnt); 248 pkts += ret; 249 cnt -= ret; 250 /* if not all pkts were sent... then repeat the cycle */ 251 } while (cnt > 0); 252 253 #ifndef SHARE_IO_BUFFER 254 int i; 255 /* time to allocate fresh mbufs for the queue */ 256 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 257 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 258 /* error checking */ 259 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 260 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 261 ctxt->cpu, i, nif); 262 exit(EXIT_FAILURE); 263 } 264 } 265 #endif 266 /* reset the len of mbufs var after flushing of packets */ 267 dpc->wmbufs[nif].len = 0; 268 } 269 270 return ret; 271 } 272 /*----------------------------------------------------------------------------*/ 273 uint8_t * 274 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 275 { 276 struct dpdk_private_context *dpc; 277 mtcp_manager_t mtcp; 278 struct rte_mbuf *m; 279 uint8_t *ptr; 280 int len_of_mbuf; 281 282 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 283 mtcp = ctxt->mtcp_manager; 284 285 /* sanity check */ 286 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 287 return NULL; 288 289 len_of_mbuf = dpc->wmbufs[nif].len; 290 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 291 292 /* retrieve the right write offset */ 293 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 294 m->pkt_len = m->data_len = pktsize; 295 m->nb_segs = 1; 296 m->next = NULL; 297 298 #ifdef NETSTAT 299 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR; 300 #endif 301 302 /* increment the len_of_mbuf var */ 303 dpc->wmbufs[nif].len = len_of_mbuf + 1; 304 305 return (uint8_t *)ptr; 306 } 307 /*----------------------------------------------------------------------------*/ 308 void 309 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 310 { 311 struct dpdk_private_context *dpc; 312 mtcp_manager_t mtcp; 313 int len_of_mbuf; 314 315 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 316 mtcp = ctxt->mtcp_manager; 317 318 /* sanity check */ 319 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 320 return; 321 322 len_of_mbuf = dpc->wmbufs[out_nif].len; 323 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 324 dpc->rmbufs[in_nif].m_table[index]; 325 326 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 327 328 #ifdef NETSTAT 329 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR; 330 #endif 331 332 /* increment the len_of_mbuf var */ 333 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 334 335 return; 336 } 337 /*----------------------------------------------------------------------------*/ 338 static inline void 339 free_pkts(struct rte_mbuf **mtable, unsigned len) 340 { 341 int i; 342 343 /* free the freaking packets */ 344 for (i = 0; i < len; i++) { 345 if (mtable[i]->udata64 == 1) { 346 rte_pktmbuf_free_seg(mtable[i]); 347 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 348 } 349 } 350 } 351 /*----------------------------------------------------------------------------*/ 352 int32_t 353 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 354 { 355 struct dpdk_private_context *dpc; 356 int ret; 357 uint8_t qid; 358 359 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 360 qid = cpu_qid_map[ifidx][ctxt->cpu]; 361 362 /* if queue is unassigned, skip it.. */ 363 if (qid == 0xFF) 364 return 0; 365 366 if (dpc->rmbufs[ifidx].len != 0) { 367 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 368 dpc->rmbufs[ifidx].len = 0; 369 } 370 371 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 372 dpc->pkts_burst, MAX_PKT_BURST); 373 #ifdef RX_IDLE_ENABLE 374 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 375 #endif 376 dpc->rmbufs[ifidx].len = ret; 377 378 return ret; 379 } 380 /*----------------------------------------------------------------------------*/ 381 uint8_t * 382 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 383 { 384 struct dpdk_private_context *dpc; 385 struct rte_mbuf *m; 386 uint8_t *pktbuf; 387 388 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 389 390 391 m = dpc->pkts_burst[index]; 392 /* tag to check if the packet is a local or a forwarded pkt */ 393 m->udata64 = 1; 394 /* don't enable pre-fetching... performance goes down */ 395 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 396 *len = m->pkt_len; 397 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 398 399 /* enqueue the pkt ptr in mbuf */ 400 dpc->rmbufs[ifidx].m_table[index] = m; 401 402 return pktbuf; 403 } 404 /*----------------------------------------------------------------------------*/ 405 int 406 dpdk_get_nif(struct ifreq *ifr) 407 { 408 int i; 409 static int num_dev = -1; 410 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 411 /* get mac addr entries of 'detected' dpdk ports */ 412 if (num_dev < 0) { 413 num_dev = rte_eth_dev_count(); 414 for (i = 0; i < num_dev; i++) 415 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 416 } 417 418 for (i = 0; i < num_dev; i++) 419 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 420 return i; 421 422 return -1; 423 } 424 /*----------------------------------------------------------------------------*/ 425 int32_t 426 dpdk_select(struct mtcp_thread_context *ctxt) 427 { 428 #ifdef RX_IDLE_ENABLE 429 struct dpdk_private_context *dpc; 430 431 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 432 if (dpc->rx_idle > RX_IDLE_THRESH) { 433 dpc->rx_idle = 0; 434 usleep(RX_IDLE_TIMEOUT); 435 } 436 #endif 437 return 0; 438 } 439 /*----------------------------------------------------------------------------*/ 440 void 441 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 442 { 443 struct dpdk_private_context *dpc; 444 int i; 445 446 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 447 448 /* free wmbufs */ 449 for (i = 0; i < g_config.mos->netdev_table->num; i++) 450 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 451 452 #ifdef ENABLE_STATS_IOCTL 453 /* free fd */ 454 if (dpc->fd >= 0) 455 close(dpc->fd); 456 #endif /* !ENABLE_STATS_IOCTL */ 457 458 /* free it all up */ 459 free(dpc); 460 } 461 /*----------------------------------------------------------------------------*/ 462 static void 463 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 464 { 465 #define CHECK_INTERVAL 100 /* 100ms */ 466 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 467 468 uint8_t portid, count, all_ports_up, print_flag = 0; 469 struct rte_eth_link link; 470 471 printf("\nChecking link status"); 472 fflush(stdout); 473 for (count = 0; count <= MAX_CHECK_TIME; count++) { 474 all_ports_up = 1; 475 for (portid = 0; portid < port_num; portid++) { 476 if ((port_mask & (1 << portid)) == 0) 477 continue; 478 memset(&link, 0, sizeof(link)); 479 rte_eth_link_get_nowait(portid, &link); 480 /* print link status if flag set */ 481 if (print_flag == 1) { 482 if (link.link_status) 483 printf("Port %d Link Up - speed %u " 484 "Mbps - %s\n", (uint8_t)portid, 485 (unsigned)link.link_speed, 486 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 487 ("full-duplex") : ("half-duplex\n")); 488 else 489 printf("Port %d Link Down\n", 490 (uint8_t)portid); 491 continue; 492 } 493 /* clear all_ports_up flag if any link down */ 494 if (link.link_status == 0) { 495 all_ports_up = 0; 496 break; 497 } 498 } 499 /* after finally printing all link status, get out */ 500 if (print_flag == 1) 501 break; 502 503 if (all_ports_up == 0) { 504 printf("."); 505 fflush(stdout); 506 rte_delay_ms(CHECK_INTERVAL); 507 } 508 509 /* set the print_flag if all ports up or timeout */ 510 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 511 print_flag = 1; 512 printf("done\n"); 513 } 514 } 515 } 516 /*----------------------------------------------------------------------------*/ 517 int32_t 518 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 519 { 520 struct dpdk_private_context *dpc; 521 struct rte_mbuf *m; 522 int len_of_mbuf; 523 struct iphdr *iph; 524 struct tcphdr *tcph; 525 RssInfo *rss_i; 526 void **argpptr = (void **)argp; 527 528 if (cmd == DRV_NAME) { 529 *argpptr = (void *)dev_info->driver_name; 530 return 0; 531 } 532 533 iph = (struct iphdr *)argp; 534 dpc = (struct dpdk_private_context *)ctx->io_private_context; 535 len_of_mbuf = dpc->wmbufs[nif].len; 536 rss_i = NULL; 537 538 switch (cmd) { 539 case PKT_TX_IP_CSUM: 540 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 541 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 542 m->l2_len = sizeof(struct ether_hdr); 543 m->l3_len = (iph->ihl<<2); 544 break; 545 case PKT_TX_TCP_CSUM: 546 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 547 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 548 m->ol_flags |= PKT_TX_TCP_CKSUM; 549 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 550 break; 551 case PKT_RX_RSS: 552 rss_i = (RssInfo *)argp; 553 m = dpc->pkts_burst[rss_i->pktidx]; 554 rss_i->hash_value = m->hash.rss; 555 break; 556 default: 557 goto dev_ioctl_err; 558 } 559 560 return 0; 561 dev_ioctl_err: 562 return -1; 563 } 564 /*----------------------------------------------------------------------------*/ 565 void 566 dpdk_load_module_upper_half(void) 567 { 568 int cpu = g_config.mos->num_cores, ret; 569 uint32_t cpumask = 0; 570 char cpumaskbuf[10]; 571 char mem_channels[5]; 572 573 /* set the log level */ 574 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 575 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 576 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 577 rte_set_log_type(RTE_LOGTYPE_RING, 0); 578 rte_set_log_level(RTE_LOG_WARNING); 579 580 /* get the cpu mask */ 581 for (ret = 0; ret < cpu; ret++) 582 cpumask = (cpumask | (1 << ret)); 583 sprintf(cpumaskbuf, "%X", cpumask); 584 585 /* get the mem channels per socket */ 586 if (g_config.mos->nb_mem_channels == 0) { 587 TRACE_ERROR("DPDK module requires # of memory channels " 588 "per socket parameter!\n"); 589 exit(EXIT_FAILURE); 590 } 591 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 592 593 /* initialize the rte env first, what a waste of implementation effort! */ 594 char *argv[] = {"", 595 "-c", 596 cpumaskbuf, 597 "-n", 598 mem_channels, 599 "--proc-type=auto", 600 "" 601 }; 602 const int argc = 6; 603 604 /* 605 * re-set getopt extern variable optind. 606 * this issue was a bitch to debug 607 * rte_eal_init() internally uses getopt() syscall 608 * mtcp applications that also use an `external' getopt 609 * will cause a violent crash if optind is not reset to zero 610 * prior to calling the func below... 611 * see man getopt(3) for more details 612 */ 613 optind = 0; 614 615 /* initialize the dpdk eal env */ 616 ret = rte_eal_init(argc, argv); 617 if (ret < 0) 618 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 619 620 } 621 /*----------------------------------------------------------------------------*/ 622 void 623 dpdk_load_module_lower_half(void) 624 { 625 int portid, rxlcore_id, ret; 626 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 627 /* setting the rss key */ 628 static const uint8_t key[] = { 629 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 630 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 631 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 632 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 633 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 634 0x05, 0x05 635 }; 636 637 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 638 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 639 640 /* resetting cpu_qid mapping */ 641 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 642 643 if (!g_config.mos->multiprocess 644 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 645 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 646 char name[20]; 647 sprintf(name, "mbuf_pool-%d", rxlcore_id); 648 /* create the mbuf pools */ 649 pktmbuf_pool[rxlcore_id] = 650 rte_mempool_create(name, NB_MBUF, 651 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 652 sizeof(struct rte_pktmbuf_pool_private), 653 rte_pktmbuf_pool_init, NULL, 654 rte_pktmbuf_init, NULL, 655 rte_lcore_to_socket_id(rxlcore_id), 0); 656 if (pktmbuf_pool[rxlcore_id] == NULL) 657 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 658 } 659 660 /* Initialise each port */ 661 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 662 int num_queue = 0, eth_idx, i, queue_id; 663 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 664 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 665 break; 666 if (eth_idx == g_config.mos->netdev_table->num) 667 continue; 668 for (i = 0; i < sizeof(uint64_t) * 8; i++) 669 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 670 num_queue++; 671 672 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 673 num_queues = num_queue; 674 675 /* init port */ 676 printf("Initializing port %u... ", (unsigned) portid); 677 fflush(stdout); 678 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 679 &port_conf); 680 if (ret < 0) 681 rte_exit(EXIT_FAILURE, "Cannot configure device:" 682 "err=%d, port=%u\n", 683 ret, (unsigned) portid); 684 685 /* init one RX queue per CPU */ 686 fflush(stdout); 687 #ifdef DEBUG 688 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 689 #endif 690 /* check port capabilities */ 691 rte_eth_dev_info_get(portid, &dev_info[portid]); 692 693 queue_id = 0; 694 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 695 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 696 continue; 697 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 698 rte_eth_dev_socket_id(portid), &rx_conf, 699 pktmbuf_pool[rxlcore_id]); 700 if (ret < 0) 701 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 702 "err=%d, port=%u, queueid: %d\n", 703 ret, (unsigned) portid, rxlcore_id); 704 cpu_qid_map[portid][rxlcore_id] = queue_id++; 705 } 706 707 /* init one TX queue on each port per CPU (this is redundant for 708 * this app) */ 709 fflush(stdout); 710 queue_id = 0; 711 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 712 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 713 continue; 714 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 715 rte_eth_dev_socket_id(portid), &tx_conf); 716 if (ret < 0) 717 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 718 "err=%d, port=%u, queueid: %d\n", 719 ret, (unsigned) portid, rxlcore_id); 720 } 721 722 /* Start device */ 723 ret = rte_eth_dev_start(portid); 724 if (ret < 0) 725 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 726 ret, (unsigned) portid); 727 728 printf("done: \n"); 729 rte_eth_promiscuous_enable(portid); 730 731 /* retrieve current flow control settings per port */ 732 memset(&fc_conf, 0, sizeof(fc_conf)); 733 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 734 if (ret != 0) { 735 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 736 } 737 738 /* and just disable the rx/tx flow control */ 739 fc_conf.mode = RTE_FC_NONE; 740 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 741 if (ret != 0) { 742 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 743 ret); 744 } 745 746 #ifdef DEBUG 747 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 748 (unsigned) portid, 749 ports_eth_addr[portid].addr_bytes[0], 750 ports_eth_addr[portid].addr_bytes[1], 751 ports_eth_addr[portid].addr_bytes[2], 752 ports_eth_addr[portid].addr_bytes[3], 753 ports_eth_addr[portid].addr_bytes[4], 754 ports_eth_addr[portid].addr_bytes[5]); 755 #endif 756 /* only check for link status if the thread is master */ 757 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 758 } 759 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 760 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 761 char name[20]; 762 sprintf(name, "mbuf_pool-%d", rxlcore_id); 763 /* initialize the mbuf pools */ 764 pktmbuf_pool[rxlcore_id] = 765 rte_mempool_lookup(name); 766 if (pktmbuf_pool[rxlcore_id] == NULL) 767 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 768 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 769 cpu_qid_map[portid][rxlcore_id] = rxlcore_id; 770 } 771 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 772 num_queues = g_config.mos->num_cores; 773 } 774 775 } 776 /*----------------------------------------------------------------------------*/ 777 io_module_func dpdk_module_func = { 778 .load_module_upper_half = dpdk_load_module_upper_half, 779 .load_module_lower_half = dpdk_load_module_lower_half, 780 .init_handle = dpdk_init_handle, 781 .link_devices = NULL, 782 .release_pkt = NULL, 783 .send_pkts = dpdk_send_pkts, 784 .get_wptr = dpdk_get_wptr, 785 .recv_pkts = dpdk_recv_pkts, 786 .get_rptr = dpdk_get_rptr, 787 .get_nif = dpdk_get_nif, 788 .select = dpdk_select, 789 .destroy_handle = dpdk_destroy_handle, 790 .dev_ioctl = dpdk_dev_ioctl, 791 .set_wptr = dpdk_set_wptr, 792 }; 793 /*----------------------------------------------------------------------------*/ 794 795