1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for logging */ 8 #include "debug.h" 9 /* for num_devices_* */ 10 #include "config.h" 11 /* for rte_max_eth_ports */ 12 #include <rte_common.h> 13 /* for rte_eth_rxconf */ 14 #include <rte_ethdev.h> 15 /* for delay funcs */ 16 #include <rte_cycles.h> 17 /* for ip pesudo-chksum */ 18 #include <rte_ip.h> 19 #define ENABLE_STATS_IOCTL 1 20 #ifdef ENABLE_STATS_IOCTL 21 /* for close */ 22 #include <unistd.h> 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 37 /* 38 * RX and TX Prefetch, Host, and Write-back threshold values should be 39 * carefully set for optimal performance. Consult the network 40 * controller's datasheet and supporting DPDK documentation for guidance 41 * on how these parameters should be set. 42 */ 43 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 44 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 45 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 46 47 /* 48 * These default values are optimized for use with the Intel(R) 82599 10 GbE 49 * Controller and the DPDK ixgbe PMD. Consider using other values for other 50 * network controllers and/or network drivers. 51 */ 52 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 53 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 54 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 55 56 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 57 58 /* 59 * Configurable number of RX/TX ring descriptors 60 */ 61 #define RTE_TEST_RX_DESC_DEFAULT 128 62 #define RTE_TEST_TX_DESC_DEFAULT 512 63 64 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 65 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 66 /*----------------------------------------------------------------------------*/ 67 /* packet memory pools for storing packet bufs */ 68 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 69 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 70 71 //#define DEBUG 1 72 #ifdef DEBUG 73 /* ethernet addresses of ports */ 74 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 75 #endif 76 77 static struct rte_eth_conf port_conf = { 78 .rxmode = { 79 .mq_mode = ETH_MQ_RX_RSS, 80 .max_rx_pkt_len = ETHER_MAX_LEN, 81 .split_hdr_size = 0, 82 .header_split = 0, /**< Header Split disabled */ 83 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 84 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 85 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 86 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 87 }, 88 .rx_adv_conf = { 89 .rss_conf = { 90 .rss_key = NULL, 91 .rss_hf = ETH_RSS_TCP 92 }, 93 }, 94 .txmode = { 95 .mq_mode = ETH_MQ_TX_NONE, 96 }, 97 #if 0 98 .fdir_conf = { 99 .mode = RTE_FDIR_MODE_PERFECT, 100 .pballoc = RTE_FDIR_PBALLOC_256K, 101 .status = RTE_FDIR_REPORT_STATUS_ALWAYS, 102 //.flexbytes_offset = 0x6, 103 .drop_queue = 127, 104 }, 105 #endif 106 }; 107 108 static const struct rte_eth_rxconf rx_conf = { 109 .rx_thresh = { 110 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 111 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 112 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 113 }, 114 .rx_free_thresh = 32, 115 }; 116 117 static const struct rte_eth_txconf tx_conf = { 118 .tx_thresh = { 119 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 120 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 121 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 122 }, 123 .tx_free_thresh = 0, /* Use PMD default values */ 124 .tx_rs_thresh = 0, /* Use PMD default values */ 125 /* 126 * As the example won't handle mult-segments and offload cases, 127 * set the flag by default. 128 */ 129 .txq_flags = 0x0, 130 }; 131 132 struct mbuf_table { 133 unsigned len; /* length of queued packets */ 134 struct rte_mbuf *m_table[MAX_PKT_BURST]; 135 }; 136 137 struct dpdk_private_context { 138 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 139 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 140 struct rte_mempool *pktmbuf_pool; 141 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 142 #ifdef ENABLE_STATS_IOCTL 143 int fd; 144 #endif /* !ENABLE_STATS_IOCTL */ 145 } __rte_cache_aligned; 146 147 #ifdef ENABLE_STATS_IOCTL 148 /** 149 * stats struct passed on from user space to the driver 150 */ 151 struct stats_struct { 152 uint64_t tx_bytes; 153 uint64_t tx_pkts; 154 uint64_t rx_bytes; 155 uint64_t rx_pkts; 156 uint8_t qid; 157 uint8_t dev; 158 }; 159 #endif /* !ENABLE_STATS_IOCTL */ 160 /*----------------------------------------------------------------------------*/ 161 void 162 dpdk_init_handle(struct mtcp_thread_context *ctxt) 163 { 164 struct dpdk_private_context *dpc; 165 int i, j; 166 char mempool_name[20]; 167 168 /* create and initialize private I/O module context */ 169 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 170 if (ctxt->io_private_context == NULL) { 171 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 172 "Can't allocate memory\n"); 173 exit(EXIT_FAILURE); 174 } 175 176 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 177 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 178 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 179 180 /* set wmbufs correctly */ 181 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 182 /* Allocate wmbufs for each registered port */ 183 for (i = 0; i < MAX_PKT_BURST; i++) { 184 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 185 if (dpc->wmbufs[j].m_table[i] == NULL) { 186 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 187 ctxt->cpu, i, j); 188 exit(EXIT_FAILURE); 189 } 190 } 191 /* set mbufs queue length to 0 to begin with */ 192 dpc->wmbufs[j].len = 0; 193 } 194 195 #ifdef ENABLE_STATS_IOCTL 196 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 197 if (dpc->fd == -1) { 198 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d!\n", 199 ctxt->cpu); 200 exit(EXIT_FAILURE); 201 } 202 #endif /* !ENABLE_STATS_IOCTL */ 203 } 204 /*----------------------------------------------------------------------------*/ 205 int 206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 207 { 208 struct dpdk_private_context *dpc; 209 mtcp_manager_t mtcp; 210 int ret; 211 int qid; 212 213 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 214 mtcp = ctxt->mtcp_manager; 215 ret = 0; 216 qid = cpu_qid_map[nif][ctxt->cpu]; 217 218 /* if queue is unassigned, skip it.. */ 219 if (unlikely(qid == 0xFF)) 220 return 0; 221 222 /* if there are packets in the queue... flush them out to the wire */ 223 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 224 struct rte_mbuf **pkts; 225 #ifdef ENABLE_STATS_IOCTL 226 struct stats_struct ss; 227 #endif /* !ENABLE_STATS_IOCTL */ 228 int cnt = dpc->wmbufs[nif].len; 229 pkts = dpc->wmbufs[nif].m_table; 230 #ifdef NETSTAT 231 mtcp->nstat.tx_packets[nif] += cnt; 232 #ifdef ENABLE_STATS_IOCTL 233 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 234 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 235 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 236 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 237 ss.qid = ctxt->cpu; 238 ss.dev = nif; 239 ioctl(dpc->fd, 0, &ss); 240 #endif /* !ENABLE_STATS_IOCTL */ 241 #endif 242 do { 243 /* tx cnt # of packets */ 244 ret = rte_eth_tx_burst(nif, qid, 245 pkts, cnt); 246 pkts += ret; 247 cnt -= ret; 248 /* if not all pkts were sent... then repeat the cycle */ 249 } while (cnt > 0); 250 251 #ifndef SHARE_IO_BUFFER 252 int i; 253 /* time to allocate fresh mbufs for the queue */ 254 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 255 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 256 /* error checking */ 257 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 258 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 259 ctxt->cpu, i, nif); 260 exit(EXIT_FAILURE); 261 } 262 } 263 #endif 264 /* reset the len of mbufs var after flushing of packets */ 265 dpc->wmbufs[nif].len = 0; 266 } 267 268 return ret; 269 } 270 /*----------------------------------------------------------------------------*/ 271 uint8_t * 272 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 273 { 274 struct dpdk_private_context *dpc; 275 mtcp_manager_t mtcp; 276 struct rte_mbuf *m; 277 uint8_t *ptr; 278 int len_of_mbuf; 279 280 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 281 mtcp = ctxt->mtcp_manager; 282 283 /* sanity check */ 284 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 285 return NULL; 286 287 len_of_mbuf = dpc->wmbufs[nif].len; 288 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 289 290 /* retrieve the right write offset */ 291 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 292 m->pkt_len = m->data_len = pktsize; 293 m->nb_segs = 1; 294 m->next = NULL; 295 296 #ifdef NETSTAT 297 mtcp->nstat.tx_bytes[nif] += pktsize + 24; 298 #endif 299 300 /* increment the len_of_mbuf var */ 301 dpc->wmbufs[nif].len = len_of_mbuf + 1; 302 303 return (uint8_t *)ptr; 304 } 305 /*----------------------------------------------------------------------------*/ 306 void 307 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 308 { 309 struct dpdk_private_context *dpc; 310 mtcp_manager_t mtcp; 311 int len_of_mbuf; 312 313 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 314 mtcp = ctxt->mtcp_manager; 315 316 /* sanity check */ 317 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 318 return; 319 320 len_of_mbuf = dpc->wmbufs[out_nif].len; 321 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 322 dpc->rmbufs[in_nif].m_table[index]; 323 324 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 325 326 #ifdef NETSTAT 327 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + 24; 328 #endif 329 330 /* increment the len_of_mbuf var */ 331 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 332 333 return; 334 } 335 /*----------------------------------------------------------------------------*/ 336 static inline void 337 free_pkts(struct rte_mbuf **mtable, unsigned len) 338 { 339 int i; 340 341 /* free the freaking packets */ 342 for (i = 0; i < len; i++) { 343 if (mtable[i]->udata64 == 1) { 344 rte_pktmbuf_free_seg(mtable[i]); 345 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 346 } 347 } 348 } 349 /*----------------------------------------------------------------------------*/ 350 int32_t 351 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 352 { 353 struct dpdk_private_context *dpc; 354 int ret; 355 uint8_t qid; 356 357 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 358 qid = cpu_qid_map[ifidx][ctxt->cpu]; 359 360 /* if queue is unassigned, skip it.. */ 361 if (qid == 0xFF) 362 return 0; 363 364 if (dpc->rmbufs[ifidx].len != 0) { 365 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 366 dpc->rmbufs[ifidx].len = 0; 367 } 368 369 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 370 dpc->pkts_burst, MAX_PKT_BURST); 371 372 dpc->rmbufs[ifidx].len = ret; 373 374 return ret; 375 } 376 /*----------------------------------------------------------------------------*/ 377 uint8_t * 378 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 379 { 380 struct dpdk_private_context *dpc; 381 struct rte_mbuf *m; 382 uint8_t *pktbuf; 383 384 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 385 386 387 m = dpc->pkts_burst[index]; 388 /* tag to check if the packet is a local or a forwarded pkt */ 389 m->udata64 = 1; 390 /* don't enable pre-fetching... performance goes down */ 391 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 392 *len = m->pkt_len; 393 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 394 395 /* enqueue the pkt ptr in mbuf */ 396 dpc->rmbufs[ifidx].m_table[index] = m; 397 398 return pktbuf; 399 } 400 /*----------------------------------------------------------------------------*/ 401 int 402 dpdk_get_nif(struct ifreq *ifr) 403 { 404 int i; 405 static int num_dev = -1; 406 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 407 /* get mac addr entries of 'detected' dpdk ports */ 408 if (num_dev < 0) { 409 num_dev = rte_eth_dev_count(); 410 for (i = 0; i < num_dev; i++) 411 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 412 } 413 414 for (i = 0; i < num_dev; i++) 415 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 416 return i; 417 418 return -1; 419 } 420 /*----------------------------------------------------------------------------*/ 421 void 422 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 423 { 424 struct dpdk_private_context *dpc; 425 int i; 426 427 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 428 429 /* free wmbufs */ 430 for (i = 0; i < g_config.mos->netdev_table->num; i++) 431 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 432 433 #ifdef ENABLE_STATS_IOCTL 434 /* free fd */ 435 close(dpc->fd); 436 #endif /* !ENABLE_STATS_IOCTL */ 437 438 /* free it all up */ 439 free(dpc); 440 } 441 /*----------------------------------------------------------------------------*/ 442 static void 443 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 444 { 445 #define CHECK_INTERVAL 100 /* 100ms */ 446 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 447 448 uint8_t portid, count, all_ports_up, print_flag = 0; 449 struct rte_eth_link link; 450 451 printf("\nChecking link status"); 452 fflush(stdout); 453 for (count = 0; count <= MAX_CHECK_TIME; count++) { 454 all_ports_up = 1; 455 for (portid = 0; portid < port_num; portid++) { 456 if ((port_mask & (1 << portid)) == 0) 457 continue; 458 memset(&link, 0, sizeof(link)); 459 rte_eth_link_get_nowait(portid, &link); 460 /* print link status if flag set */ 461 if (print_flag == 1) { 462 if (link.link_status) 463 printf("Port %d Link Up - speed %u " 464 "Mbps - %s\n", (uint8_t)portid, 465 (unsigned)link.link_speed, 466 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 467 ("full-duplex") : ("half-duplex\n")); 468 else 469 printf("Port %d Link Down\n", 470 (uint8_t)portid); 471 continue; 472 } 473 /* clear all_ports_up flag if any link down */ 474 if (link.link_status == 0) { 475 all_ports_up = 0; 476 break; 477 } 478 } 479 /* after finally printing all link status, get out */ 480 if (print_flag == 1) 481 break; 482 483 if (all_ports_up == 0) { 484 printf("."); 485 fflush(stdout); 486 rte_delay_ms(CHECK_INTERVAL); 487 } 488 489 /* set the print_flag if all ports up or timeout */ 490 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 491 print_flag = 1; 492 printf("done\n"); 493 } 494 } 495 } 496 /*----------------------------------------------------------------------------*/ 497 #if 0 498 static void 499 dpdk_enable_fdir(int portid, uint8_t is_master) 500 { 501 struct rte_fdir_masks fdir_masks; 502 struct rte_fdir_filter fdir_filter; 503 int ret; 504 505 memset(&fdir_filter, 0, sizeof(struct rte_fdir_filter)); 506 fdir_filter.iptype = RTE_FDIR_IPTYPE_IPV4; 507 fdir_filter.l4type = RTE_FDIR_L4TYPE_TCP; 508 fdir_filter.ip_dst.ipv4_addr = g_config.mos->netdev_table->ent[portid]->ip_addr; 509 510 if (is_master) { 511 memset(&fdir_masks, 0, sizeof(struct rte_fdir_masks)); 512 fdir_masks.src_ipv4_mask = 0x0; 513 fdir_masks.dst_ipv4_mask = 0xFFFFFFFF; 514 fdir_masks.src_port_mask = 0x0; 515 fdir_masks.dst_port_mask = 0x0; 516 517 /* 518 * enable the following if the filter is IP-only 519 * (non-TCP, non-UDP) 520 */ 521 /* fdir_masks.only_ip_flow = 1; */ 522 rte_eth_dev_fdir_set_masks(portid, &fdir_masks); 523 ret = rte_eth_dev_fdir_add_perfect_filter(portid, 524 &fdir_filter, 525 0, 526 g_config.mos->multiprocess_curr_core, 527 0); 528 } else { 529 ret = rte_eth_dev_fdir_update_perfect_filter(portid, 530 &fdir_filter, 531 0, 532 g_config.mos->multiprocess_curr_core, 533 0); 534 } 535 if (ret < 0) { 536 rte_exit(EXIT_FAILURE, 537 "fdir_add_perfect_filter_t call failed!: %d\n", 538 ret); 539 } 540 fprintf(stderr, "Filter for device ifidx: %d added\n", portid); 541 } 542 #endif 543 /*----------------------------------------------------------------------------*/ 544 int32_t 545 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 546 { 547 struct dpdk_private_context *dpc; 548 struct rte_mbuf *m; 549 int len_of_mbuf; 550 struct iphdr *iph; 551 struct tcphdr *tcph; 552 RssInfo *rss_i; 553 554 iph = (struct iphdr *)argp; 555 dpc = (struct dpdk_private_context *)ctx->io_private_context; 556 len_of_mbuf = dpc->wmbufs[nif].len; 557 rss_i = NULL; 558 559 switch (cmd) { 560 case PKT_TX_IP_CSUM: 561 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 562 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 563 m->l2_len = sizeof(struct ether_hdr); 564 m->l3_len = (iph->ihl<<2); 565 break; 566 case PKT_TX_TCP_CSUM: 567 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 568 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 569 m->ol_flags |= PKT_TX_TCP_CKSUM; 570 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 571 break; 572 case PKT_RX_RSS: 573 rss_i = (RssInfo *)argp; 574 m = dpc->pkts_burst[rss_i->pktidx]; 575 rss_i->hash_value = m->hash.rss; 576 break; 577 default: 578 goto dev_ioctl_err; 579 } 580 581 return 0; 582 dev_ioctl_err: 583 return -1; 584 } 585 /*----------------------------------------------------------------------------*/ 586 void 587 dpdk_load_module_upper_half(void) 588 { 589 int cpu = g_config.mos->num_cores, ret; 590 uint32_t cpumask = 0; 591 char cpumaskbuf[10]; 592 char mem_channels[5]; 593 594 /* set the log level */ 595 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 596 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 597 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 598 rte_set_log_type(RTE_LOGTYPE_RING, 0); 599 rte_set_log_level(RTE_LOG_WARNING); 600 601 /* get the cpu mask */ 602 for (ret = 0; ret < cpu; ret++) 603 cpumask = (cpumask | (1 << ret)); 604 sprintf(cpumaskbuf, "%X", cpumask); 605 606 /* get the mem channels per socket */ 607 if (g_config.mos->nb_mem_channels == 0) { 608 TRACE_ERROR("DPDK module requires # of memory channels " 609 "per socket parameter!\n"); 610 exit(EXIT_FAILURE); 611 } 612 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 613 614 /* initialize the rte env first, what a waste of implementation effort! */ 615 char *argv[] = {"", 616 "-c", 617 cpumaskbuf, 618 "-n", 619 mem_channels, 620 "--proc-type=auto", 621 "" 622 }; 623 const int argc = 6; 624 625 /* 626 * re-set getopt extern variable optind. 627 * this issue was a bitch to debug 628 * rte_eal_init() internally uses getopt() syscall 629 * mtcp applications that also use an `external' getopt 630 * will cause a violent crash if optind is not reset to zero 631 * prior to calling the func below... 632 * see man getopt(3) for more details 633 */ 634 optind = 0; 635 636 /* initialize the dpdk eal env */ 637 ret = rte_eal_init(argc, argv); 638 if (ret < 0) 639 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 640 641 } 642 /*----------------------------------------------------------------------------*/ 643 void 644 dpdk_load_module_lower_half(void) 645 { 646 int portid, rxlcore_id, ret; 647 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 648 /* setting the rss key */ 649 static const uint8_t key[] = { 650 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 651 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 652 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 653 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 654 }; 655 656 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 657 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 658 659 /* resetting cpu_qid mapping */ 660 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 661 662 if (!g_config.mos->multiprocess 663 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 664 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 665 char name[20]; 666 sprintf(name, "mbuf_pool-%d", rxlcore_id); 667 /* create the mbuf pools */ 668 pktmbuf_pool[rxlcore_id] = 669 rte_mempool_create(name, NB_MBUF, 670 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 671 sizeof(struct rte_pktmbuf_pool_private), 672 rte_pktmbuf_pool_init, NULL, 673 rte_pktmbuf_init, NULL, 674 rte_lcore_to_socket_id(rxlcore_id), 0); 675 if (pktmbuf_pool[rxlcore_id] == NULL) 676 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 677 } 678 679 /* Initialise each port */ 680 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 681 int num_queue = 0, eth_idx, i, queue_id; 682 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 683 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 684 break; 685 if (eth_idx == g_config.mos->netdev_table->num) 686 continue; 687 for (i = 0; i < sizeof(uint64_t) * 8; i++) 688 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 689 num_queue++; 690 691 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 692 num_queues = num_queue; 693 694 /* init port */ 695 printf("Initializing port %u... ", (unsigned) portid); 696 fflush(stdout); 697 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 698 &port_conf); 699 if (ret < 0) 700 rte_exit(EXIT_FAILURE, "Cannot configure device:" 701 "err=%d, port=%u\n", 702 ret, (unsigned) portid); 703 704 /* init one RX queue per CPU */ 705 fflush(stdout); 706 #ifdef DEBUG 707 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 708 #endif 709 queue_id = 0; 710 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 711 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 712 continue; 713 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 714 rte_eth_dev_socket_id(portid), &rx_conf, 715 pktmbuf_pool[rxlcore_id]); 716 if (ret < 0) 717 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 718 "err=%d, port=%u, queueid: %d\n", 719 ret, (unsigned) portid, rxlcore_id); 720 cpu_qid_map[portid][rxlcore_id] = queue_id++; 721 } 722 723 /* init one TX queue on each port per CPU (this is redundant for 724 * this app) */ 725 fflush(stdout); 726 queue_id = 0; 727 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 728 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 729 continue; 730 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 731 rte_eth_dev_socket_id(portid), &tx_conf); 732 if (ret < 0) 733 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 734 "err=%d, port=%u, queueid: %d\n", 735 ret, (unsigned) portid, rxlcore_id); 736 } 737 738 /* Start device */ 739 ret = rte_eth_dev_start(portid); 740 if (ret < 0) 741 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 742 ret, (unsigned) portid); 743 744 printf("done: \n"); 745 rte_eth_promiscuous_enable(portid); 746 747 /* retrieve current flow control settings per port */ 748 memset(&fc_conf, 0, sizeof(fc_conf)); 749 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 750 if (ret != 0) { 751 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 752 } 753 754 /* and just disable the rx/tx flow control */ 755 fc_conf.mode = RTE_FC_NONE; 756 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 757 if (ret != 0) { 758 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 759 ret); 760 } 761 762 #ifdef DEBUG 763 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 764 (unsigned) portid, 765 ports_eth_addr[portid].addr_bytes[0], 766 ports_eth_addr[portid].addr_bytes[1], 767 ports_eth_addr[portid].addr_bytes[2], 768 ports_eth_addr[portid].addr_bytes[3], 769 ports_eth_addr[portid].addr_bytes[4], 770 ports_eth_addr[portid].addr_bytes[5]); 771 #endif 772 #if 0 773 /* if multi-process support is enabled, then turn on FDIR */ 774 if (g_config.mos->multiprocess) 775 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 776 #endif 777 } 778 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 779 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 780 char name[20]; 781 sprintf(name, "mbuf_pool-%d", rxlcore_id); 782 /* initialize the mbuf pools */ 783 pktmbuf_pool[rxlcore_id] = 784 rte_mempool_lookup(name); 785 if (pktmbuf_pool[rxlcore_id] == NULL) 786 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 787 } 788 #if 0 789 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 790 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 791 #endif 792 } 793 794 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 795 } 796 /*----------------------------------------------------------------------------*/ 797 io_module_func dpdk_module_func = { 798 .load_module_upper_half = dpdk_load_module_upper_half, 799 .load_module_lower_half = dpdk_load_module_lower_half, 800 .init_handle = dpdk_init_handle, 801 .link_devices = NULL, 802 .release_pkt = NULL, 803 .send_pkts = dpdk_send_pkts, 804 .get_wptr = dpdk_get_wptr, 805 .recv_pkts = dpdk_recv_pkts, 806 .get_rptr = dpdk_get_rptr, 807 .get_nif = dpdk_get_nif, 808 .select = NULL, 809 .destroy_handle = dpdk_destroy_handle, 810 .dev_ioctl = dpdk_dev_ioctl, 811 .set_wptr = dpdk_set_wptr, 812 }; 813 /*----------------------------------------------------------------------------*/ 814 815