1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for logging */ 8 #include "debug.h" 9 /* for num_devices_* */ 10 #include "config.h" 11 /* for rte_max_eth_ports */ 12 #include <rte_common.h> 13 /* for rte_eth_rxconf */ 14 #include <rte_ethdev.h> 15 /* for delay funcs */ 16 #include <rte_cycles.h> 17 /* for ip pesudo-chksum */ 18 #include <rte_ip.h> 19 #define ENABLE_STATS_IOCTL 1 20 #ifdef ENABLE_STATS_IOCTL 21 /* for close */ 22 #include <unistd.h> 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 37 /* 38 * RX and TX Prefetch, Host, and Write-back threshold values should be 39 * carefully set for optimal performance. Consult the network 40 * controller's datasheet and supporting DPDK documentation for guidance 41 * on how these parameters should be set. 42 */ 43 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 44 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 45 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 46 47 /* 48 * These default values are optimized for use with the Intel(R) 82599 10 GbE 49 * Controller and the DPDK ixgbe PMD. Consider using other values for other 50 * network controllers and/or network drivers. 51 */ 52 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 53 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 54 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 55 56 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 57 58 /* 59 * Configurable number of RX/TX ring descriptors 60 */ 61 #define RTE_TEST_RX_DESC_DEFAULT 128 62 #define RTE_TEST_TX_DESC_DEFAULT 512 63 64 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 65 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 66 /*----------------------------------------------------------------------------*/ 67 /* packet memory pools for storing packet bufs */ 68 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 69 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 70 71 //#define DEBUG 1 72 #ifdef DEBUG 73 /* ethernet addresses of ports */ 74 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 75 #endif 76 77 static struct rte_eth_conf port_conf = { 78 .rxmode = { 79 .mq_mode = ETH_MQ_RX_RSS, 80 .max_rx_pkt_len = ETHER_MAX_LEN, 81 .split_hdr_size = 0, 82 .header_split = 0, /**< Header Split disabled */ 83 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 84 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 85 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 86 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 87 }, 88 .rx_adv_conf = { 89 .rss_conf = { 90 .rss_key = NULL, 91 .rss_hf = ETH_RSS_TCP 92 }, 93 }, 94 .txmode = { 95 .mq_mode = ETH_MQ_TX_NONE, 96 }, 97 #if 0 98 .fdir_conf = { 99 .mode = RTE_FDIR_MODE_PERFECT, 100 .pballoc = RTE_FDIR_PBALLOC_256K, 101 .status = RTE_FDIR_REPORT_STATUS_ALWAYS, 102 //.flexbytes_offset = 0x6, 103 .drop_queue = 127, 104 }, 105 #endif 106 }; 107 108 static const struct rte_eth_rxconf rx_conf = { 109 .rx_thresh = { 110 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 111 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 112 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 113 }, 114 .rx_free_thresh = 32, 115 }; 116 117 static const struct rte_eth_txconf tx_conf = { 118 .tx_thresh = { 119 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 120 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 121 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 122 }, 123 .tx_free_thresh = 0, /* Use PMD default values */ 124 .tx_rs_thresh = 0, /* Use PMD default values */ 125 /* 126 * As the example won't handle mult-segments and offload cases, 127 * set the flag by default. 128 */ 129 .txq_flags = 0x0, 130 }; 131 132 struct mbuf_table { 133 unsigned len; /* length of queued packets */ 134 struct rte_mbuf *m_table[MAX_PKT_BURST]; 135 }; 136 137 struct dpdk_private_context { 138 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 139 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 140 struct rte_mempool *pktmbuf_pool; 141 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 142 #ifdef ENABLE_STATS_IOCTL 143 int fd; 144 #endif /* !ENABLE_STATS_IOCTL */ 145 } __rte_cache_aligned; 146 147 #ifdef ENABLE_STATS_IOCTL 148 /** 149 * stats struct passed on from user space to the driver 150 */ 151 struct stats_struct { 152 uint64_t tx_bytes; 153 uint64_t tx_pkts; 154 uint64_t rx_bytes; 155 uint64_t rx_pkts; 156 uint8_t qid; 157 uint8_t dev; 158 }; 159 #endif /* !ENABLE_STATS_IOCTL */ 160 /*----------------------------------------------------------------------------*/ 161 void 162 dpdk_init_handle(struct mtcp_thread_context *ctxt) 163 { 164 struct dpdk_private_context *dpc; 165 int i, j; 166 char mempool_name[20]; 167 168 /* create and initialize private I/O module context */ 169 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 170 if (ctxt->io_private_context == NULL) { 171 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 172 "Can't allocate memory\n"); 173 exit(EXIT_FAILURE); 174 } 175 176 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 177 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 178 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 179 180 /* set wmbufs correctly */ 181 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 182 /* Allocate wmbufs for each registered port */ 183 for (i = 0; i < MAX_PKT_BURST; i++) { 184 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 185 if (dpc->wmbufs[j].m_table[i] == NULL) { 186 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 187 ctxt->cpu, i, j); 188 exit(EXIT_FAILURE); 189 } 190 } 191 /* set mbufs queue length to 0 to begin with */ 192 dpc->wmbufs[j].len = 0; 193 } 194 195 #ifdef ENABLE_STATS_IOCTL 196 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 197 if (dpc->fd == -1) { 198 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d!\n", 199 ctxt->cpu); 200 exit(EXIT_FAILURE); 201 } 202 #endif /* !ENABLE_STATS_IOCTL */ 203 } 204 /*----------------------------------------------------------------------------*/ 205 int 206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 207 { 208 struct dpdk_private_context *dpc; 209 mtcp_manager_t mtcp; 210 int ret; 211 int qid; 212 213 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 214 mtcp = ctxt->mtcp_manager; 215 ret = 0; 216 qid = cpu_qid_map[nif][ctxt->cpu]; 217 218 /* if queue is unassigned, skip it.. */ 219 if (unlikely(qid == 0xFF)) 220 return 0; 221 222 /* if there are packets in the queue... flush them out to the wire */ 223 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 224 struct rte_mbuf **pkts; 225 #ifdef ENABLE_STATS_IOCTL 226 struct stats_struct ss; 227 #endif /* !ENABLE_STATS_IOCTL */ 228 int cnt = dpc->wmbufs[nif].len; 229 pkts = dpc->wmbufs[nif].m_table; 230 #ifdef NETSTAT 231 mtcp->nstat.tx_packets[nif] += cnt; 232 #ifdef ENABLE_STATS_IOCTL 233 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 234 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 235 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 236 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 237 ss.qid = ctxt->cpu; 238 ss.dev = nif; 239 ioctl(dpc->fd, 0, &ss); 240 #endif /* !ENABLE_STATS_IOCTL */ 241 #endif 242 do { 243 /* tx cnt # of packets */ 244 ret = rte_eth_tx_burst(nif, qid, 245 pkts, cnt); 246 pkts += ret; 247 cnt -= ret; 248 /* if not all pkts were sent... then repeat the cycle */ 249 } while (cnt > 0); 250 251 #ifndef SHARE_IO_BUFFER 252 int i; 253 /* time to allocate fresh mbufs for the queue */ 254 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 255 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 256 /* error checking */ 257 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 258 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 259 ctxt->cpu, i, nif); 260 exit(EXIT_FAILURE); 261 } 262 } 263 #endif 264 /* reset the len of mbufs var after flushing of packets */ 265 dpc->wmbufs[nif].len = 0; 266 } 267 268 return ret; 269 } 270 /*----------------------------------------------------------------------------*/ 271 uint8_t * 272 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 273 { 274 struct dpdk_private_context *dpc; 275 mtcp_manager_t mtcp; 276 struct rte_mbuf *m; 277 uint8_t *ptr; 278 int len_of_mbuf; 279 280 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 281 mtcp = ctxt->mtcp_manager; 282 283 /* sanity check */ 284 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 285 return NULL; 286 287 len_of_mbuf = dpc->wmbufs[nif].len; 288 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 289 290 /* retrieve the right write offset */ 291 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 292 m->pkt_len = m->data_len = pktsize; 293 m->nb_segs = 1; 294 m->next = NULL; 295 296 #ifdef NETSTAT 297 mtcp->nstat.tx_bytes[nif] += pktsize + 24; 298 #endif 299 300 /* increment the len_of_mbuf var */ 301 dpc->wmbufs[nif].len = len_of_mbuf + 1; 302 303 return (uint8_t *)ptr; 304 } 305 /*----------------------------------------------------------------------------*/ 306 void 307 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 308 { 309 struct dpdk_private_context *dpc; 310 mtcp_manager_t mtcp; 311 int len_of_mbuf; 312 313 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 314 mtcp = ctxt->mtcp_manager; 315 316 /* sanity check */ 317 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 318 return; 319 320 len_of_mbuf = dpc->wmbufs[out_nif].len; 321 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 322 dpc->rmbufs[in_nif].m_table[index]; 323 324 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 325 326 #ifdef NETSTAT 327 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + 24; 328 #endif 329 330 /* increment the len_of_mbuf var */ 331 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 332 333 return; 334 } 335 /*----------------------------------------------------------------------------*/ 336 static inline void 337 free_pkts(struct rte_mbuf **mtable, unsigned len) 338 { 339 int i; 340 341 /* free the freaking packets */ 342 for (i = 0; i < len; i++) { 343 if (mtable[i]->udata64 == 1) { 344 rte_pktmbuf_free_seg(mtable[i]); 345 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 346 } 347 } 348 } 349 /*----------------------------------------------------------------------------*/ 350 int32_t 351 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 352 { 353 struct dpdk_private_context *dpc; 354 int ret; 355 uint8_t qid; 356 357 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 358 qid = cpu_qid_map[ifidx][ctxt->cpu]; 359 360 /* if queue is unassigned, skip it.. */ 361 if (qid == 0xFF) 362 return 0; 363 364 if (dpc->rmbufs[ifidx].len != 0) { 365 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 366 dpc->rmbufs[ifidx].len = 0; 367 } 368 369 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 370 dpc->pkts_burst, MAX_PKT_BURST); 371 372 dpc->rmbufs[ifidx].len = ret; 373 374 return ret; 375 } 376 /*----------------------------------------------------------------------------*/ 377 uint8_t * 378 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 379 { 380 struct dpdk_private_context *dpc; 381 struct rte_mbuf *m; 382 uint8_t *pktbuf; 383 384 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 385 386 387 m = dpc->pkts_burst[index]; 388 /* tag to check if the packet is a local or a forwarded pkt */ 389 m->udata64 = 1; 390 /* don't enable pre-fetching... performance goes down */ 391 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 392 *len = m->pkt_len; 393 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 394 395 /* enqueue the pkt ptr in mbuf */ 396 dpc->rmbufs[ifidx].m_table[index] = m; 397 398 return pktbuf; 399 } 400 /*----------------------------------------------------------------------------*/ 401 int 402 dpdk_get_nif(struct ifreq *ifr) 403 { 404 int i; 405 static int num_dev = -1; 406 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 407 /* get mac addr entries of 'detected' dpdk ports */ 408 if (num_dev < 0) { 409 num_dev = rte_eth_dev_count(); 410 for (i = 0; i < num_dev; i++) 411 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 412 } 413 414 for (i = 0; i < num_dev; i++) 415 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 416 return i; 417 418 return -1; 419 } 420 /*----------------------------------------------------------------------------*/ 421 void 422 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 423 { 424 struct dpdk_private_context *dpc; 425 int i; 426 427 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 428 429 /* free wmbufs */ 430 for (i = 0; i < g_config.mos->netdev_table->num; i++) 431 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 432 433 #ifdef ENABLE_STATS_IOCTL 434 /* free fd */ 435 close(dpc->fd); 436 #endif /* !ENABLE_STATS_IOCTL */ 437 438 /* free it all up */ 439 free(dpc); 440 } 441 /*----------------------------------------------------------------------------*/ 442 static void 443 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 444 { 445 #define CHECK_INTERVAL 100 /* 100ms */ 446 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 447 448 uint8_t portid, count, all_ports_up, print_flag = 0; 449 struct rte_eth_link link; 450 451 printf("\nChecking link status"); 452 fflush(stdout); 453 for (count = 0; count <= MAX_CHECK_TIME; count++) { 454 all_ports_up = 1; 455 for (portid = 0; portid < port_num; portid++) { 456 if ((port_mask & (1 << portid)) == 0) 457 continue; 458 memset(&link, 0, sizeof(link)); 459 rte_eth_link_get_nowait(portid, &link); 460 /* print link status if flag set */ 461 if (print_flag == 1) { 462 if (link.link_status) 463 printf("Port %d Link Up - speed %u " 464 "Mbps - %s\n", (uint8_t)portid, 465 (unsigned)link.link_speed, 466 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 467 ("full-duplex") : ("half-duplex\n")); 468 else 469 printf("Port %d Link Down\n", 470 (uint8_t)portid); 471 continue; 472 } 473 /* clear all_ports_up flag if any link down */ 474 if (link.link_status == 0) { 475 all_ports_up = 0; 476 break; 477 } 478 } 479 /* after finally printing all link status, get out */ 480 if (print_flag == 1) 481 break; 482 483 if (all_ports_up == 0) { 484 printf("."); 485 fflush(stdout); 486 rte_delay_ms(CHECK_INTERVAL); 487 } 488 489 /* set the print_flag if all ports up or timeout */ 490 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 491 print_flag = 1; 492 printf("done\n"); 493 } 494 } 495 } 496 /*----------------------------------------------------------------------------*/ 497 #if 0 498 static void 499 dpdk_enable_fdir(int portid, uint8_t is_master) 500 { 501 struct rte_fdir_masks fdir_masks; 502 struct rte_fdir_filter fdir_filter; 503 int ret; 504 505 memset(&fdir_filter, 0, sizeof(struct rte_fdir_filter)); 506 fdir_filter.iptype = RTE_FDIR_IPTYPE_IPV4; 507 fdir_filter.l4type = RTE_FDIR_L4TYPE_TCP; 508 fdir_filter.ip_dst.ipv4_addr = g_config.mos->netdev_table->ent[portid]->ip_addr; 509 510 if (is_master) { 511 memset(&fdir_masks, 0, sizeof(struct rte_fdir_masks)); 512 fdir_masks.src_ipv4_mask = 0x0; 513 fdir_masks.dst_ipv4_mask = 0xFFFFFFFF; 514 fdir_masks.src_port_mask = 0x0; 515 fdir_masks.dst_port_mask = 0x0; 516 517 /* 518 * enable the following if the filter is IP-only 519 * (non-TCP, non-UDP) 520 */ 521 /* fdir_masks.only_ip_flow = 1; */ 522 rte_eth_dev_fdir_set_masks(portid, &fdir_masks); 523 ret = rte_eth_dev_fdir_add_perfect_filter(portid, 524 &fdir_filter, 525 0, 526 g_config.mos->multiprocess_curr_core, 527 0); 528 } else { 529 ret = rte_eth_dev_fdir_update_perfect_filter(portid, 530 &fdir_filter, 531 0, 532 g_config.mos->multiprocess_curr_core, 533 0); 534 } 535 if (ret < 0) { 536 rte_exit(EXIT_FAILURE, 537 "fdir_add_perfect_filter_t call failed!: %d\n", 538 ret); 539 } 540 fprintf(stderr, "Filter for device ifidx: %d added\n", portid); 541 } 542 #endif 543 /*----------------------------------------------------------------------------*/ 544 int32_t 545 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 546 { 547 struct dpdk_private_context *dpc; 548 struct rte_mbuf *m; 549 int len_of_mbuf; 550 struct iphdr *iph; 551 struct tcphdr *tcph; 552 RssInfo *rss_i; 553 554 iph = (struct iphdr *)argp; 555 dpc = (struct dpdk_private_context *)ctx->io_private_context; 556 len_of_mbuf = dpc->wmbufs[nif].len; 557 rss_i = NULL; 558 559 switch (cmd) { 560 case PKT_TX_IP_CSUM: 561 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 562 m->ol_flags = PKT_TX_OUTER_IP_CKSUM | 563 PKT_TX_IP_CKSUM | PKT_TX_IPV4; 564 m->l2_len = sizeof(struct ether_hdr); 565 m->l3_len = (iph->ihl<<2); 566 break; 567 case PKT_TX_TCP_CSUM: 568 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 569 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 570 m->ol_flags |= PKT_TX_TCP_CKSUM; 571 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 572 break; 573 case PKT_RX_RSS: 574 rss_i = (RssInfo *)argp; 575 m = dpc->pkts_burst[rss_i->pktidx]; 576 rss_i->hash_value = m->hash.rss; 577 break; 578 default: 579 goto dev_ioctl_err; 580 } 581 582 return 0; 583 dev_ioctl_err: 584 return -1; 585 } 586 /*----------------------------------------------------------------------------*/ 587 void 588 dpdk_load_module_upper_half(void) 589 { 590 int cpu = g_config.mos->num_cores, ret; 591 uint32_t cpumask = 0; 592 char cpumaskbuf[10]; 593 char mem_channels[5]; 594 595 /* set the log level */ 596 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 597 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 598 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 599 rte_set_log_type(RTE_LOGTYPE_RING, 0); 600 rte_set_log_level(RTE_LOG_WARNING); 601 602 /* get the cpu mask */ 603 for (ret = 0; ret < cpu; ret++) 604 cpumask = (cpumask | (1 << ret)); 605 sprintf(cpumaskbuf, "%X", cpumask); 606 607 /* get the mem channels per socket */ 608 if (g_config.mos->nb_mem_channels == 0) { 609 TRACE_ERROR("DPDK module requires # of memory channels " 610 "per socket parameter!\n"); 611 exit(EXIT_FAILURE); 612 } 613 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 614 615 /* initialize the rte env first, what a waste of implementation effort! */ 616 char *argv[] = {"", 617 "-c", 618 cpumaskbuf, 619 "-n", 620 mem_channels, 621 "--proc-type=auto", 622 "" 623 }; 624 const int argc = 6; 625 626 /* 627 * re-set getopt extern variable optind. 628 * this issue was a bitch to debug 629 * rte_eal_init() internally uses getopt() syscall 630 * mtcp applications that also use an `external' getopt 631 * will cause a violent crash if optind is not reset to zero 632 * prior to calling the func below... 633 * see man getopt(3) for more details 634 */ 635 optind = 0; 636 637 /* initialize the dpdk eal env */ 638 ret = rte_eal_init(argc, argv); 639 if (ret < 0) 640 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 641 642 } 643 /*----------------------------------------------------------------------------*/ 644 void 645 dpdk_load_module_lower_half(void) 646 { 647 int portid, rxlcore_id, ret; 648 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 649 /* setting the rss key */ 650 static const uint8_t key[] = { 651 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 652 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 653 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 654 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 655 }; 656 657 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 658 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 659 660 /* resetting cpu_qid mapping */ 661 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 662 663 if (!g_config.mos->multiprocess 664 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 665 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 666 char name[20]; 667 sprintf(name, "mbuf_pool-%d", rxlcore_id); 668 /* create the mbuf pools */ 669 pktmbuf_pool[rxlcore_id] = 670 rte_mempool_create(name, NB_MBUF, 671 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 672 sizeof(struct rte_pktmbuf_pool_private), 673 rte_pktmbuf_pool_init, NULL, 674 rte_pktmbuf_init, NULL, 675 rte_lcore_to_socket_id(rxlcore_id), 0); 676 if (pktmbuf_pool[rxlcore_id] == NULL) 677 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 678 } 679 680 /* Initialise each port */ 681 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 682 int num_queue = 0, eth_idx, i, queue_id; 683 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 684 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 685 break; 686 if (eth_idx == g_config.mos->netdev_table->num) 687 continue; 688 for (i = 0; i < sizeof(uint64_t) * 8; i++) 689 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 690 num_queue++; 691 692 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 693 num_queues = num_queue; 694 695 /* init port */ 696 printf("Initializing port %u... ", (unsigned) portid); 697 fflush(stdout); 698 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 699 &port_conf); 700 if (ret < 0) 701 rte_exit(EXIT_FAILURE, "Cannot configure device:" 702 "err=%d, port=%u\n", 703 ret, (unsigned) portid); 704 705 /* init one RX queue per CPU */ 706 fflush(stdout); 707 #ifdef DEBUG 708 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 709 #endif 710 queue_id = 0; 711 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 712 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 713 continue; 714 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 715 rte_eth_dev_socket_id(portid), &rx_conf, 716 pktmbuf_pool[rxlcore_id]); 717 if (ret < 0) 718 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 719 "err=%d, port=%u, queueid: %d\n", 720 ret, (unsigned) portid, rxlcore_id); 721 cpu_qid_map[portid][rxlcore_id] = queue_id++; 722 } 723 724 /* init one TX queue on each port per CPU (this is redundant for 725 * this app) */ 726 fflush(stdout); 727 queue_id = 0; 728 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 729 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 730 continue; 731 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 732 rte_eth_dev_socket_id(portid), &tx_conf); 733 if (ret < 0) 734 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 735 "err=%d, port=%u, queueid: %d\n", 736 ret, (unsigned) portid, rxlcore_id); 737 } 738 739 /* Start device */ 740 ret = rte_eth_dev_start(portid); 741 if (ret < 0) 742 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 743 ret, (unsigned) portid); 744 745 printf("done: \n"); 746 rte_eth_promiscuous_enable(portid); 747 748 /* retrieve current flow control settings per port */ 749 memset(&fc_conf, 0, sizeof(fc_conf)); 750 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 751 if (ret != 0) { 752 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 753 } 754 755 /* and just disable the rx/tx flow control */ 756 fc_conf.mode = RTE_FC_NONE; 757 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 758 if (ret != 0) { 759 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 760 ret); 761 } 762 763 #ifdef DEBUG 764 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 765 (unsigned) portid, 766 ports_eth_addr[portid].addr_bytes[0], 767 ports_eth_addr[portid].addr_bytes[1], 768 ports_eth_addr[portid].addr_bytes[2], 769 ports_eth_addr[portid].addr_bytes[3], 770 ports_eth_addr[portid].addr_bytes[4], 771 ports_eth_addr[portid].addr_bytes[5]); 772 #endif 773 #if 0 774 /* if multi-process support is enabled, then turn on FDIR */ 775 if (g_config.mos->multiprocess) 776 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 777 #endif 778 } 779 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 780 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 781 char name[20]; 782 sprintf(name, "mbuf_pool-%d", rxlcore_id); 783 /* initialize the mbuf pools */ 784 pktmbuf_pool[rxlcore_id] = 785 rte_mempool_lookup(name); 786 if (pktmbuf_pool[rxlcore_id] == NULL) 787 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 788 } 789 #if 0 790 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 791 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 792 #endif 793 } 794 795 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 796 } 797 /*----------------------------------------------------------------------------*/ 798 io_module_func dpdk_module_func = { 799 .load_module_upper_half = dpdk_load_module_upper_half, 800 .load_module_lower_half = dpdk_load_module_lower_half, 801 .init_handle = dpdk_init_handle, 802 .link_devices = NULL, 803 .release_pkt = NULL, 804 .send_pkts = dpdk_send_pkts, 805 .get_wptr = dpdk_get_wptr, 806 .recv_pkts = dpdk_recv_pkts, 807 .get_rptr = dpdk_get_rptr, 808 .get_nif = dpdk_get_nif, 809 .select = NULL, 810 .destroy_handle = dpdk_destroy_handle, 811 .dev_ioctl = dpdk_dev_ioctl, 812 .set_wptr = dpdk_set_wptr, 813 }; 814 /*----------------------------------------------------------------------------*/ 815 816