1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for logging */ 8 #include "debug.h" 9 /* for num_devices_* */ 10 #include "config.h" 11 /* for rte_max_eth_ports */ 12 #include <rte_common.h> 13 /* for rte_eth_rxconf */ 14 #include <rte_ethdev.h> 15 /* for delay funcs */ 16 #include <rte_cycles.h> 17 /* for ip pesudo-chksum */ 18 #include <rte_ip.h> 19 #define ENABLE_STATS_IOCTL 1 20 #ifdef ENABLE_STATS_IOCTL 21 /* for close */ 22 #include <unistd.h> 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 37 /* 38 * RX and TX Prefetch, Host, and Write-back threshold values should be 39 * carefully set for optimal performance. Consult the network 40 * controller's datasheet and supporting DPDK documentation for guidance 41 * on how these parameters should be set. 42 */ 43 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 44 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 45 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 46 47 /* 48 * These default values are optimized for use with the Intel(R) 82599 10 GbE 49 * Controller and the DPDK ixgbe PMD. Consider using other values for other 50 * network controllers and/or network drivers. 51 */ 52 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 53 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 54 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 55 56 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 57 58 /* 59 * Configurable number of RX/TX ring descriptors 60 */ 61 #define RTE_TEST_RX_DESC_DEFAULT 128 62 #define RTE_TEST_TX_DESC_DEFAULT 512 63 64 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 65 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 66 /*----------------------------------------------------------------------------*/ 67 /* packet memory pools for storing packet bufs */ 68 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 69 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 70 71 //#define DEBUG 1 72 #ifdef DEBUG 73 /* ethernet addresses of ports */ 74 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 75 #endif 76 77 static struct rte_eth_conf port_conf = { 78 .rxmode = { 79 .mq_mode = ETH_MQ_RX_RSS, 80 .max_rx_pkt_len = ETHER_MAX_LEN, 81 .split_hdr_size = 0, 82 .header_split = 0, /**< Header Split disabled */ 83 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 84 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 85 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 86 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 87 }, 88 .rx_adv_conf = { 89 .rss_conf = { 90 .rss_key = NULL, 91 .rss_hf = ETH_RSS_TCP 92 }, 93 }, 94 .txmode = { 95 .mq_mode = ETH_MQ_TX_NONE, 96 }, 97 #if 0 98 .fdir_conf = { 99 .mode = RTE_FDIR_MODE_PERFECT, 100 .pballoc = RTE_FDIR_PBALLOC_256K, 101 .status = RTE_FDIR_REPORT_STATUS_ALWAYS, 102 //.flexbytes_offset = 0x6, 103 .drop_queue = 127, 104 }, 105 #endif 106 }; 107 108 static const struct rte_eth_rxconf rx_conf = { 109 .rx_thresh = { 110 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 111 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 112 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 113 }, 114 .rx_free_thresh = 32, 115 }; 116 117 static const struct rte_eth_txconf tx_conf = { 118 .tx_thresh = { 119 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 120 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 121 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 122 }, 123 .tx_free_thresh = 0, /* Use PMD default values */ 124 .tx_rs_thresh = 0, /* Use PMD default values */ 125 /* 126 * As the example won't handle mult-segments and offload cases, 127 * set the flag by default. 128 */ 129 .txq_flags = 0x0, 130 }; 131 132 struct mbuf_table { 133 unsigned len; /* length of queued packets */ 134 struct rte_mbuf *m_table[MAX_PKT_BURST]; 135 }; 136 137 struct dpdk_private_context { 138 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 139 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 140 struct rte_mempool *pktmbuf_pool; 141 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 142 #ifdef ENABLE_STATS_IOCTL 143 int fd; 144 #endif /* !ENABLE_STATS_IOCTL */ 145 } __rte_cache_aligned; 146 147 #ifdef ENABLE_STATS_IOCTL 148 /** 149 * stats struct passed on from user space to the driver 150 */ 151 struct stats_struct { 152 uint64_t tx_bytes; 153 uint64_t tx_pkts; 154 uint64_t rx_bytes; 155 uint64_t rx_pkts; 156 uint8_t qid; 157 uint8_t dev; 158 }; 159 #endif /* !ENABLE_STATS_IOCTL */ 160 /*----------------------------------------------------------------------------*/ 161 void 162 dpdk_init_handle(struct mtcp_thread_context *ctxt) 163 { 164 struct dpdk_private_context *dpc; 165 int i, j; 166 char mempool_name[20]; 167 168 /* create and initialize private I/O module context */ 169 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 170 if (ctxt->io_private_context == NULL) { 171 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 172 "Can't allocate memory\n"); 173 exit(EXIT_FAILURE); 174 } 175 176 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 177 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 178 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 179 180 /* set wmbufs correctly */ 181 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 182 /* Allocate wmbufs for each registered port */ 183 for (i = 0; i < MAX_PKT_BURST; i++) { 184 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 185 if (dpc->wmbufs[j].m_table[i] == NULL) { 186 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 187 ctxt->cpu, i, j); 188 exit(EXIT_FAILURE); 189 } 190 } 191 /* set mbufs queue length to 0 to begin with */ 192 dpc->wmbufs[j].len = 0; 193 } 194 195 #ifdef ENABLE_STATS_IOCTL 196 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 197 if (dpc->fd == -1) { 198 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 199 "Are you using mlx4/mlx5 driver?\n", 200 ctxt->cpu); 201 } 202 #endif /* !ENABLE_STATS_IOCTL */ 203 } 204 /*----------------------------------------------------------------------------*/ 205 int 206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 207 { 208 struct dpdk_private_context *dpc; 209 mtcp_manager_t mtcp; 210 int ret; 211 int qid; 212 213 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 214 mtcp = ctxt->mtcp_manager; 215 ret = 0; 216 qid = cpu_qid_map[nif][ctxt->cpu]; 217 218 /* if queue is unassigned, skip it.. */ 219 if (unlikely(qid == 0xFF)) 220 return 0; 221 222 /* if there are packets in the queue... flush them out to the wire */ 223 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 224 struct rte_mbuf **pkts; 225 #ifdef ENABLE_STATS_IOCTL 226 struct stats_struct ss; 227 #endif /* !ENABLE_STATS_IOCTL */ 228 int cnt = dpc->wmbufs[nif].len; 229 pkts = dpc->wmbufs[nif].m_table; 230 #ifdef NETSTAT 231 mtcp->nstat.tx_packets[nif] += cnt; 232 #ifdef ENABLE_STATS_IOCTL 233 if (likely(dpc->fd) >= 0) { 234 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 235 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 236 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 237 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 238 ss.qid = ctxt->cpu; 239 ss.dev = nif; 240 ioctl(dpc->fd, 0, &ss); 241 } 242 #endif /* !ENABLE_STATS_IOCTL */ 243 #endif 244 do { 245 /* tx cnt # of packets */ 246 ret = rte_eth_tx_burst(nif, qid, 247 pkts, cnt); 248 pkts += ret; 249 cnt -= ret; 250 /* if not all pkts were sent... then repeat the cycle */ 251 } while (cnt > 0); 252 253 #ifndef SHARE_IO_BUFFER 254 int i; 255 /* time to allocate fresh mbufs for the queue */ 256 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 257 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 258 /* error checking */ 259 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 260 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 261 ctxt->cpu, i, nif); 262 exit(EXIT_FAILURE); 263 } 264 } 265 #endif 266 /* reset the len of mbufs var after flushing of packets */ 267 dpc->wmbufs[nif].len = 0; 268 } 269 270 return ret; 271 } 272 /*----------------------------------------------------------------------------*/ 273 uint8_t * 274 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 275 { 276 struct dpdk_private_context *dpc; 277 mtcp_manager_t mtcp; 278 struct rte_mbuf *m; 279 uint8_t *ptr; 280 int len_of_mbuf; 281 282 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 283 mtcp = ctxt->mtcp_manager; 284 285 /* sanity check */ 286 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 287 return NULL; 288 289 len_of_mbuf = dpc->wmbufs[nif].len; 290 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 291 292 /* retrieve the right write offset */ 293 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 294 m->pkt_len = m->data_len = pktsize; 295 m->nb_segs = 1; 296 m->next = NULL; 297 298 #ifdef NETSTAT 299 mtcp->nstat.tx_bytes[nif] += pktsize + 24; 300 #endif 301 302 /* increment the len_of_mbuf var */ 303 dpc->wmbufs[nif].len = len_of_mbuf + 1; 304 305 return (uint8_t *)ptr; 306 } 307 /*----------------------------------------------------------------------------*/ 308 void 309 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 310 { 311 struct dpdk_private_context *dpc; 312 mtcp_manager_t mtcp; 313 int len_of_mbuf; 314 315 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 316 mtcp = ctxt->mtcp_manager; 317 318 /* sanity check */ 319 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 320 return; 321 322 len_of_mbuf = dpc->wmbufs[out_nif].len; 323 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 324 dpc->rmbufs[in_nif].m_table[index]; 325 326 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 327 328 #ifdef NETSTAT 329 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + 24; 330 #endif 331 332 /* increment the len_of_mbuf var */ 333 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 334 335 return; 336 } 337 /*----------------------------------------------------------------------------*/ 338 static inline void 339 free_pkts(struct rte_mbuf **mtable, unsigned len) 340 { 341 int i; 342 343 /* free the freaking packets */ 344 for (i = 0; i < len; i++) { 345 if (mtable[i]->udata64 == 1) { 346 rte_pktmbuf_free_seg(mtable[i]); 347 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 348 } 349 } 350 } 351 /*----------------------------------------------------------------------------*/ 352 int32_t 353 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 354 { 355 struct dpdk_private_context *dpc; 356 int ret; 357 uint8_t qid; 358 359 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 360 qid = cpu_qid_map[ifidx][ctxt->cpu]; 361 362 /* if queue is unassigned, skip it.. */ 363 if (qid == 0xFF) 364 return 0; 365 366 if (dpc->rmbufs[ifidx].len != 0) { 367 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 368 dpc->rmbufs[ifidx].len = 0; 369 } 370 371 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 372 dpc->pkts_burst, MAX_PKT_BURST); 373 374 dpc->rmbufs[ifidx].len = ret; 375 376 return ret; 377 } 378 /*----------------------------------------------------------------------------*/ 379 uint8_t * 380 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 381 { 382 struct dpdk_private_context *dpc; 383 struct rte_mbuf *m; 384 uint8_t *pktbuf; 385 386 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 387 388 389 m = dpc->pkts_burst[index]; 390 /* tag to check if the packet is a local or a forwarded pkt */ 391 m->udata64 = 1; 392 /* don't enable pre-fetching... performance goes down */ 393 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 394 *len = m->pkt_len; 395 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 396 397 /* enqueue the pkt ptr in mbuf */ 398 dpc->rmbufs[ifidx].m_table[index] = m; 399 400 return pktbuf; 401 } 402 /*----------------------------------------------------------------------------*/ 403 int 404 dpdk_get_nif(struct ifreq *ifr) 405 { 406 int i; 407 static int num_dev = -1; 408 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 409 /* get mac addr entries of 'detected' dpdk ports */ 410 if (num_dev < 0) { 411 num_dev = rte_eth_dev_count(); 412 for (i = 0; i < num_dev; i++) 413 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 414 } 415 416 for (i = 0; i < num_dev; i++) 417 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 418 return i; 419 420 return -1; 421 } 422 /*----------------------------------------------------------------------------*/ 423 void 424 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 425 { 426 struct dpdk_private_context *dpc; 427 int i; 428 429 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 430 431 /* free wmbufs */ 432 for (i = 0; i < g_config.mos->netdev_table->num; i++) 433 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 434 435 #ifdef ENABLE_STATS_IOCTL 436 /* free fd */ 437 if (dpc->fd >= 0) 438 close(dpc->fd); 439 #endif /* !ENABLE_STATS_IOCTL */ 440 441 /* free it all up */ 442 free(dpc); 443 } 444 /*----------------------------------------------------------------------------*/ 445 static void 446 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 447 { 448 #define CHECK_INTERVAL 100 /* 100ms */ 449 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 450 451 uint8_t portid, count, all_ports_up, print_flag = 0; 452 struct rte_eth_link link; 453 454 printf("\nChecking link status"); 455 fflush(stdout); 456 for (count = 0; count <= MAX_CHECK_TIME; count++) { 457 all_ports_up = 1; 458 for (portid = 0; portid < port_num; portid++) { 459 if ((port_mask & (1 << portid)) == 0) 460 continue; 461 memset(&link, 0, sizeof(link)); 462 rte_eth_link_get_nowait(portid, &link); 463 /* print link status if flag set */ 464 if (print_flag == 1) { 465 if (link.link_status) 466 printf("Port %d Link Up - speed %u " 467 "Mbps - %s\n", (uint8_t)portid, 468 (unsigned)link.link_speed, 469 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 470 ("full-duplex") : ("half-duplex\n")); 471 else 472 printf("Port %d Link Down\n", 473 (uint8_t)portid); 474 continue; 475 } 476 /* clear all_ports_up flag if any link down */ 477 if (link.link_status == 0) { 478 all_ports_up = 0; 479 break; 480 } 481 } 482 /* after finally printing all link status, get out */ 483 if (print_flag == 1) 484 break; 485 486 if (all_ports_up == 0) { 487 printf("."); 488 fflush(stdout); 489 rte_delay_ms(CHECK_INTERVAL); 490 } 491 492 /* set the print_flag if all ports up or timeout */ 493 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 494 print_flag = 1; 495 printf("done\n"); 496 } 497 } 498 } 499 /*----------------------------------------------------------------------------*/ 500 #if 0 501 static void 502 dpdk_enable_fdir(int portid, uint8_t is_master) 503 { 504 struct rte_fdir_masks fdir_masks; 505 struct rte_fdir_filter fdir_filter; 506 int ret; 507 508 memset(&fdir_filter, 0, sizeof(struct rte_fdir_filter)); 509 fdir_filter.iptype = RTE_FDIR_IPTYPE_IPV4; 510 fdir_filter.l4type = RTE_FDIR_L4TYPE_TCP; 511 fdir_filter.ip_dst.ipv4_addr = g_config.mos->netdev_table->ent[portid]->ip_addr; 512 513 if (is_master) { 514 memset(&fdir_masks, 0, sizeof(struct rte_fdir_masks)); 515 fdir_masks.src_ipv4_mask = 0x0; 516 fdir_masks.dst_ipv4_mask = 0xFFFFFFFF; 517 fdir_masks.src_port_mask = 0x0; 518 fdir_masks.dst_port_mask = 0x0; 519 520 /* 521 * enable the following if the filter is IP-only 522 * (non-TCP, non-UDP) 523 */ 524 /* fdir_masks.only_ip_flow = 1; */ 525 rte_eth_dev_fdir_set_masks(portid, &fdir_masks); 526 ret = rte_eth_dev_fdir_add_perfect_filter(portid, 527 &fdir_filter, 528 0, 529 g_config.mos->multiprocess_curr_core, 530 0); 531 } else { 532 ret = rte_eth_dev_fdir_update_perfect_filter(portid, 533 &fdir_filter, 534 0, 535 g_config.mos->multiprocess_curr_core, 536 0); 537 } 538 if (ret < 0) { 539 rte_exit(EXIT_FAILURE, 540 "fdir_add_perfect_filter_t call failed!: %d\n", 541 ret); 542 } 543 fprintf(stderr, "Filter for device ifidx: %d added\n", portid); 544 } 545 #endif 546 /*----------------------------------------------------------------------------*/ 547 int32_t 548 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 549 { 550 struct dpdk_private_context *dpc; 551 struct rte_mbuf *m; 552 int len_of_mbuf; 553 struct iphdr *iph; 554 struct tcphdr *tcph; 555 RssInfo *rss_i; 556 557 iph = (struct iphdr *)argp; 558 dpc = (struct dpdk_private_context *)ctx->io_private_context; 559 len_of_mbuf = dpc->wmbufs[nif].len; 560 rss_i = NULL; 561 562 switch (cmd) { 563 case PKT_TX_IP_CSUM: 564 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 565 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 566 m->l2_len = sizeof(struct ether_hdr); 567 m->l3_len = (iph->ihl<<2); 568 break; 569 case PKT_TX_TCP_CSUM: 570 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 571 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 572 m->ol_flags |= PKT_TX_TCP_CKSUM; 573 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 574 break; 575 case PKT_RX_RSS: 576 rss_i = (RssInfo *)argp; 577 m = dpc->pkts_burst[rss_i->pktidx]; 578 rss_i->hash_value = m->hash.rss; 579 break; 580 default: 581 goto dev_ioctl_err; 582 } 583 584 return 0; 585 dev_ioctl_err: 586 return -1; 587 } 588 /*----------------------------------------------------------------------------*/ 589 void 590 dpdk_load_module_upper_half(void) 591 { 592 int cpu = g_config.mos->num_cores, ret; 593 uint32_t cpumask = 0; 594 char cpumaskbuf[10]; 595 char mem_channels[5]; 596 597 /* set the log level */ 598 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 599 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 600 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 601 rte_set_log_type(RTE_LOGTYPE_RING, 0); 602 rte_set_log_level(RTE_LOG_WARNING); 603 604 /* get the cpu mask */ 605 for (ret = 0; ret < cpu; ret++) 606 cpumask = (cpumask | (1 << ret)); 607 sprintf(cpumaskbuf, "%X", cpumask); 608 609 /* get the mem channels per socket */ 610 if (g_config.mos->nb_mem_channels == 0) { 611 TRACE_ERROR("DPDK module requires # of memory channels " 612 "per socket parameter!\n"); 613 exit(EXIT_FAILURE); 614 } 615 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 616 617 /* initialize the rte env first, what a waste of implementation effort! */ 618 char *argv[] = {"", 619 "-c", 620 cpumaskbuf, 621 "-n", 622 mem_channels, 623 "--proc-type=auto", 624 "" 625 }; 626 const int argc = 6; 627 628 /* 629 * re-set getopt extern variable optind. 630 * this issue was a bitch to debug 631 * rte_eal_init() internally uses getopt() syscall 632 * mtcp applications that also use an `external' getopt 633 * will cause a violent crash if optind is not reset to zero 634 * prior to calling the func below... 635 * see man getopt(3) for more details 636 */ 637 optind = 0; 638 639 /* initialize the dpdk eal env */ 640 ret = rte_eal_init(argc, argv); 641 if (ret < 0) 642 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 643 644 } 645 /*----------------------------------------------------------------------------*/ 646 void 647 dpdk_load_module_lower_half(void) 648 { 649 int portid, rxlcore_id, ret; 650 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 651 /* setting the rss key */ 652 static const uint8_t key[] = { 653 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 654 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 655 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 656 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 657 }; 658 659 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 660 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 661 662 /* resetting cpu_qid mapping */ 663 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 664 665 if (!g_config.mos->multiprocess 666 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 667 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 668 char name[20]; 669 sprintf(name, "mbuf_pool-%d", rxlcore_id); 670 /* create the mbuf pools */ 671 pktmbuf_pool[rxlcore_id] = 672 rte_mempool_create(name, NB_MBUF, 673 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 674 sizeof(struct rte_pktmbuf_pool_private), 675 rte_pktmbuf_pool_init, NULL, 676 rte_pktmbuf_init, NULL, 677 rte_lcore_to_socket_id(rxlcore_id), 0); 678 if (pktmbuf_pool[rxlcore_id] == NULL) 679 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 680 } 681 682 /* Initialise each port */ 683 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 684 int num_queue = 0, eth_idx, i, queue_id; 685 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 686 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 687 break; 688 if (eth_idx == g_config.mos->netdev_table->num) 689 continue; 690 for (i = 0; i < sizeof(uint64_t) * 8; i++) 691 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 692 num_queue++; 693 694 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 695 num_queues = num_queue; 696 697 /* init port */ 698 printf("Initializing port %u... ", (unsigned) portid); 699 fflush(stdout); 700 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 701 &port_conf); 702 if (ret < 0) 703 rte_exit(EXIT_FAILURE, "Cannot configure device:" 704 "err=%d, port=%u\n", 705 ret, (unsigned) portid); 706 707 /* init one RX queue per CPU */ 708 fflush(stdout); 709 #ifdef DEBUG 710 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 711 #endif 712 queue_id = 0; 713 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 714 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 715 continue; 716 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 717 rte_eth_dev_socket_id(portid), &rx_conf, 718 pktmbuf_pool[rxlcore_id]); 719 if (ret < 0) 720 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 721 "err=%d, port=%u, queueid: %d\n", 722 ret, (unsigned) portid, rxlcore_id); 723 cpu_qid_map[portid][rxlcore_id] = queue_id++; 724 } 725 726 /* init one TX queue on each port per CPU (this is redundant for 727 * this app) */ 728 fflush(stdout); 729 queue_id = 0; 730 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 731 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 732 continue; 733 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 734 rte_eth_dev_socket_id(portid), &tx_conf); 735 if (ret < 0) 736 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 737 "err=%d, port=%u, queueid: %d\n", 738 ret, (unsigned) portid, rxlcore_id); 739 } 740 741 /* Start device */ 742 ret = rte_eth_dev_start(portid); 743 if (ret < 0) 744 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 745 ret, (unsigned) portid); 746 747 printf("done: \n"); 748 rte_eth_promiscuous_enable(portid); 749 750 /* retrieve current flow control settings per port */ 751 memset(&fc_conf, 0, sizeof(fc_conf)); 752 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 753 if (ret != 0) { 754 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 755 } 756 757 /* and just disable the rx/tx flow control */ 758 fc_conf.mode = RTE_FC_NONE; 759 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 760 if (ret != 0) { 761 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 762 ret); 763 } 764 765 #ifdef DEBUG 766 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 767 (unsigned) portid, 768 ports_eth_addr[portid].addr_bytes[0], 769 ports_eth_addr[portid].addr_bytes[1], 770 ports_eth_addr[portid].addr_bytes[2], 771 ports_eth_addr[portid].addr_bytes[3], 772 ports_eth_addr[portid].addr_bytes[4], 773 ports_eth_addr[portid].addr_bytes[5]); 774 #endif 775 #if 0 776 /* if multi-process support is enabled, then turn on FDIR */ 777 if (g_config.mos->multiprocess) 778 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 779 #endif 780 } 781 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 782 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 783 char name[20]; 784 sprintf(name, "mbuf_pool-%d", rxlcore_id); 785 /* initialize the mbuf pools */ 786 pktmbuf_pool[rxlcore_id] = 787 rte_mempool_lookup(name); 788 if (pktmbuf_pool[rxlcore_id] == NULL) 789 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 790 } 791 #if 0 792 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 793 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 794 #endif 795 } 796 797 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 798 } 799 /*----------------------------------------------------------------------------*/ 800 io_module_func dpdk_module_func = { 801 .load_module_upper_half = dpdk_load_module_upper_half, 802 .load_module_lower_half = dpdk_load_module_lower_half, 803 .init_handle = dpdk_init_handle, 804 .link_devices = NULL, 805 .release_pkt = NULL, 806 .send_pkts = dpdk_send_pkts, 807 .get_wptr = dpdk_get_wptr, 808 .recv_pkts = dpdk_recv_pkts, 809 .get_rptr = dpdk_get_rptr, 810 .get_nif = dpdk_get_nif, 811 .select = NULL, 812 .destroy_handle = dpdk_destroy_handle, 813 .dev_ioctl = dpdk_dev_ioctl, 814 .set_wptr = dpdk_set_wptr, 815 }; 816 /*----------------------------------------------------------------------------*/ 817 818