1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for close/optind */ 8 #include <unistd.h> 9 /* for logging */ 10 #include "debug.h" 11 /* for num_devices_* */ 12 #include "config.h" 13 /* for rte_max_eth_ports */ 14 #include <rte_common.h> 15 /* for rte_eth_rxconf */ 16 #include <rte_ethdev.h> 17 /* for delay funcs */ 18 #include <rte_cycles.h> 19 /* for ip pesudo-chksum */ 20 #include <rte_ip.h> 21 #define ENABLE_STATS_IOCTL 1 22 #ifdef ENABLE_STATS_IOCTL 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 //#define RX_IDLE_ENABLE 1 37 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 38 #define RX_IDLE_THRESH 64 39 40 /* 41 * RX and TX Prefetch, Host, and Write-back threshold values should be 42 * carefully set for optimal performance. Consult the network 43 * controller's datasheet and supporting DPDK documentation for guidance 44 * on how these parameters should be set. 45 */ 46 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 47 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 48 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 49 50 /* 51 * These default values are optimized for use with the Intel(R) 82599 10 GbE 52 * Controller and the DPDK ixgbe PMD. Consider using other values for other 53 * network controllers and/or network drivers. 54 */ 55 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 56 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 57 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 58 59 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 60 61 /* 62 * Configurable number of RX/TX ring descriptors 63 */ 64 #define RTE_TEST_RX_DESC_DEFAULT 128 65 #define RTE_TEST_TX_DESC_DEFAULT 512 66 67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 69 /*----------------------------------------------------------------------------*/ 70 /* packet memory pools for storing packet bufs */ 71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 73 74 //#define DEBUG 1 75 #ifdef DEBUG 76 /* ethernet addresses of ports */ 77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 78 #endif 79 80 static struct rte_eth_conf port_conf = { 81 .rxmode = { 82 .mq_mode = ETH_MQ_RX_RSS, 83 .max_rx_pkt_len = ETHER_MAX_LEN, 84 .split_hdr_size = 0, 85 .header_split = 0, /**< Header Split disabled */ 86 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 87 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 88 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 89 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 90 }, 91 .rx_adv_conf = { 92 .rss_conf = { 93 .rss_key = NULL, 94 .rss_hf = ETH_RSS_TCP 95 }, 96 }, 97 .txmode = { 98 .mq_mode = ETH_MQ_TX_NONE, 99 }, 100 }; 101 102 static const struct rte_eth_rxconf rx_conf = { 103 .rx_thresh = { 104 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 105 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 106 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 107 }, 108 .rx_free_thresh = 32, 109 }; 110 111 static const struct rte_eth_txconf tx_conf = { 112 .tx_thresh = { 113 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 114 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 115 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 116 }, 117 .tx_free_thresh = 0, /* Use PMD default values */ 118 .tx_rs_thresh = 0, /* Use PMD default values */ 119 /* 120 * As the example won't handle mult-segments and offload cases, 121 * set the flag by default. 122 */ 123 .txq_flags = 0x0, 124 }; 125 126 struct mbuf_table { 127 unsigned len; /* length of queued packets */ 128 struct rte_mbuf *m_table[MAX_PKT_BURST]; 129 }; 130 131 struct dpdk_private_context { 132 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 133 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 134 struct rte_mempool *pktmbuf_pool; 135 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 136 #ifdef RX_IDLE_ENABLE 137 uint8_t rx_idle; 138 #endif 139 #ifdef ENABLE_STATS_IOCTL 140 int fd; 141 #endif /* !ENABLE_STATS_IOCTL */ 142 } __rte_cache_aligned; 143 144 #ifdef ENABLE_STATS_IOCTL 145 /** 146 * stats struct passed on from user space to the driver 147 */ 148 struct stats_struct { 149 uint64_t tx_bytes; 150 uint64_t tx_pkts; 151 uint64_t rx_bytes; 152 uint64_t rx_pkts; 153 uint8_t qid; 154 uint8_t dev; 155 }; 156 #endif /* !ENABLE_STATS_IOCTL */ 157 /*----------------------------------------------------------------------------*/ 158 void 159 dpdk_init_handle(struct mtcp_thread_context *ctxt) 160 { 161 struct dpdk_private_context *dpc; 162 int i, j; 163 char mempool_name[20]; 164 165 /* create and initialize private I/O module context */ 166 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 167 if (ctxt->io_private_context == NULL) { 168 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 169 "Can't allocate memory\n"); 170 exit(EXIT_FAILURE); 171 } 172 173 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 174 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 175 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 176 177 /* set wmbufs correctly */ 178 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 179 /* Allocate wmbufs for each registered port */ 180 for (i = 0; i < MAX_PKT_BURST; i++) { 181 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 182 if (dpc->wmbufs[j].m_table[i] == NULL) { 183 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 184 ctxt->cpu, i, j); 185 exit(EXIT_FAILURE); 186 } 187 } 188 /* set mbufs queue length to 0 to begin with */ 189 dpc->wmbufs[j].len = 0; 190 } 191 192 #ifdef ENABLE_STATS_IOCTL 193 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 194 if (dpc->fd == -1) { 195 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 196 "Are you using mlx4/mlx5 driver?\n", 197 ctxt->cpu); 198 } 199 #endif /* !ENABLE_STATS_IOCTL */ 200 } 201 /*----------------------------------------------------------------------------*/ 202 int 203 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 204 { 205 struct dpdk_private_context *dpc; 206 mtcp_manager_t mtcp; 207 int ret; 208 int qid; 209 210 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 211 mtcp = ctxt->mtcp_manager; 212 ret = 0; 213 qid = cpu_qid_map[nif][ctxt->cpu]; 214 215 /* if queue is unassigned, skip it.. */ 216 if (unlikely(qid == 0xFF)) 217 return 0; 218 219 /* if there are packets in the queue... flush them out to the wire */ 220 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 221 struct rte_mbuf **pkts; 222 #ifdef ENABLE_STATS_IOCTL 223 struct stats_struct ss; 224 #endif /* !ENABLE_STATS_IOCTL */ 225 int cnt = dpc->wmbufs[nif].len; 226 pkts = dpc->wmbufs[nif].m_table; 227 #ifdef NETSTAT 228 mtcp->nstat.tx_packets[nif] += cnt; 229 #ifdef ENABLE_STATS_IOCTL 230 if (likely(dpc->fd) >= 0) { 231 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 232 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 233 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 234 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 235 ss.qid = ctxt->cpu; 236 ss.dev = nif; 237 ioctl(dpc->fd, 0, &ss); 238 } 239 #endif /* !ENABLE_STATS_IOCTL */ 240 #endif 241 do { 242 /* tx cnt # of packets */ 243 ret = rte_eth_tx_burst(nif, qid, 244 pkts, cnt); 245 pkts += ret; 246 cnt -= ret; 247 /* if not all pkts were sent... then repeat the cycle */ 248 } while (cnt > 0); 249 250 #ifndef SHARE_IO_BUFFER 251 int i; 252 /* time to allocate fresh mbufs for the queue */ 253 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 254 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 255 /* error checking */ 256 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 257 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 258 ctxt->cpu, i, nif); 259 exit(EXIT_FAILURE); 260 } 261 } 262 #endif 263 /* reset the len of mbufs var after flushing of packets */ 264 dpc->wmbufs[nif].len = 0; 265 } 266 267 return ret; 268 } 269 /*----------------------------------------------------------------------------*/ 270 uint8_t * 271 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 272 { 273 struct dpdk_private_context *dpc; 274 mtcp_manager_t mtcp; 275 struct rte_mbuf *m; 276 uint8_t *ptr; 277 int len_of_mbuf; 278 279 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 280 mtcp = ctxt->mtcp_manager; 281 282 /* sanity check */ 283 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 284 return NULL; 285 286 len_of_mbuf = dpc->wmbufs[nif].len; 287 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 288 289 /* retrieve the right write offset */ 290 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 291 m->pkt_len = m->data_len = pktsize; 292 m->nb_segs = 1; 293 m->next = NULL; 294 295 #ifdef NETSTAT 296 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR; 297 #endif 298 299 /* increment the len_of_mbuf var */ 300 dpc->wmbufs[nif].len = len_of_mbuf + 1; 301 302 return (uint8_t *)ptr; 303 } 304 /*----------------------------------------------------------------------------*/ 305 void 306 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 307 { 308 struct dpdk_private_context *dpc; 309 mtcp_manager_t mtcp; 310 int len_of_mbuf; 311 312 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 313 mtcp = ctxt->mtcp_manager; 314 315 /* sanity check */ 316 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 317 return; 318 319 len_of_mbuf = dpc->wmbufs[out_nif].len; 320 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 321 dpc->rmbufs[in_nif].m_table[index]; 322 323 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 324 325 #ifdef NETSTAT 326 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR; 327 #endif 328 329 /* increment the len_of_mbuf var */ 330 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 331 332 return; 333 } 334 /*----------------------------------------------------------------------------*/ 335 static inline void 336 free_pkts(struct rte_mbuf **mtable, unsigned len) 337 { 338 int i; 339 340 /* free the freaking packets */ 341 for (i = 0; i < len; i++) { 342 if (mtable[i]->udata64 == 1) { 343 rte_pktmbuf_free_seg(mtable[i]); 344 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 345 } 346 } 347 } 348 /*----------------------------------------------------------------------------*/ 349 int32_t 350 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 351 { 352 struct dpdk_private_context *dpc; 353 int ret; 354 uint8_t qid; 355 356 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 357 qid = cpu_qid_map[ifidx][ctxt->cpu]; 358 359 /* if queue is unassigned, skip it.. */ 360 if (qid == 0xFF) 361 return 0; 362 363 if (dpc->rmbufs[ifidx].len != 0) { 364 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 365 dpc->rmbufs[ifidx].len = 0; 366 } 367 368 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 369 dpc->pkts_burst, MAX_PKT_BURST); 370 #ifdef RX_IDLE_ENABLE 371 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 372 #endif 373 dpc->rmbufs[ifidx].len = ret; 374 375 return ret; 376 } 377 /*----------------------------------------------------------------------------*/ 378 uint8_t * 379 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 380 { 381 struct dpdk_private_context *dpc; 382 struct rte_mbuf *m; 383 uint8_t *pktbuf; 384 385 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 386 387 388 m = dpc->pkts_burst[index]; 389 /* tag to check if the packet is a local or a forwarded pkt */ 390 m->udata64 = 1; 391 /* don't enable pre-fetching... performance goes down */ 392 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 393 *len = m->pkt_len; 394 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 395 396 /* enqueue the pkt ptr in mbuf */ 397 dpc->rmbufs[ifidx].m_table[index] = m; 398 399 return pktbuf; 400 } 401 /*----------------------------------------------------------------------------*/ 402 int 403 dpdk_get_nif(struct ifreq *ifr) 404 { 405 int i; 406 static int num_dev = -1; 407 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 408 /* get mac addr entries of 'detected' dpdk ports */ 409 if (num_dev < 0) { 410 num_dev = rte_eth_dev_count(); 411 for (i = 0; i < num_dev; i++) 412 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 413 } 414 415 for (i = 0; i < num_dev; i++) 416 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 417 return i; 418 419 return -1; 420 } 421 /*----------------------------------------------------------------------------*/ 422 int32_t 423 dpdk_select(struct mtcp_thread_context *ctxt) 424 { 425 #ifdef RX_IDLE_ENABLE 426 struct dpdk_private_context *dpc; 427 428 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 429 if (dpc->rx_idle > RX_IDLE_THRESH) { 430 dpc->rx_idle = 0; 431 usleep(RX_IDLE_TIMEOUT); 432 } 433 #endif 434 return 0; 435 } 436 /*----------------------------------------------------------------------------*/ 437 void 438 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 439 { 440 struct dpdk_private_context *dpc; 441 int i; 442 443 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 444 445 /* free wmbufs */ 446 for (i = 0; i < g_config.mos->netdev_table->num; i++) 447 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 448 449 #ifdef ENABLE_STATS_IOCTL 450 /* free fd */ 451 if (dpc->fd >= 0) 452 close(dpc->fd); 453 #endif /* !ENABLE_STATS_IOCTL */ 454 455 /* free it all up */ 456 free(dpc); 457 } 458 /*----------------------------------------------------------------------------*/ 459 static void 460 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 461 { 462 #define CHECK_INTERVAL 100 /* 100ms */ 463 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 464 465 uint8_t portid, count, all_ports_up, print_flag = 0; 466 struct rte_eth_link link; 467 468 printf("\nChecking link status"); 469 fflush(stdout); 470 for (count = 0; count <= MAX_CHECK_TIME; count++) { 471 all_ports_up = 1; 472 for (portid = 0; portid < port_num; portid++) { 473 if ((port_mask & (1 << portid)) == 0) 474 continue; 475 memset(&link, 0, sizeof(link)); 476 rte_eth_link_get_nowait(portid, &link); 477 /* print link status if flag set */ 478 if (print_flag == 1) { 479 if (link.link_status) 480 printf("Port %d Link Up - speed %u " 481 "Mbps - %s\n", (uint8_t)portid, 482 (unsigned)link.link_speed, 483 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 484 ("full-duplex") : ("half-duplex\n")); 485 else 486 printf("Port %d Link Down\n", 487 (uint8_t)portid); 488 continue; 489 } 490 /* clear all_ports_up flag if any link down */ 491 if (link.link_status == 0) { 492 all_ports_up = 0; 493 break; 494 } 495 } 496 /* after finally printing all link status, get out */ 497 if (print_flag == 1) 498 break; 499 500 if (all_ports_up == 0) { 501 printf("."); 502 fflush(stdout); 503 rte_delay_ms(CHECK_INTERVAL); 504 } 505 506 /* set the print_flag if all ports up or timeout */ 507 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 508 print_flag = 1; 509 printf("done\n"); 510 } 511 } 512 } 513 /*----------------------------------------------------------------------------*/ 514 int32_t 515 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 516 { 517 struct dpdk_private_context *dpc; 518 struct rte_mbuf *m; 519 int len_of_mbuf; 520 struct iphdr *iph; 521 struct tcphdr *tcph; 522 RssInfo *rss_i; 523 524 iph = (struct iphdr *)argp; 525 dpc = (struct dpdk_private_context *)ctx->io_private_context; 526 len_of_mbuf = dpc->wmbufs[nif].len; 527 rss_i = NULL; 528 529 switch (cmd) { 530 case PKT_TX_IP_CSUM: 531 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 532 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 533 m->l2_len = sizeof(struct ether_hdr); 534 m->l3_len = (iph->ihl<<2); 535 break; 536 case PKT_TX_TCP_CSUM: 537 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 538 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 539 m->ol_flags |= PKT_TX_TCP_CKSUM; 540 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 541 break; 542 case PKT_RX_RSS: 543 rss_i = (RssInfo *)argp; 544 m = dpc->pkts_burst[rss_i->pktidx]; 545 rss_i->hash_value = m->hash.rss; 546 break; 547 default: 548 goto dev_ioctl_err; 549 } 550 551 return 0; 552 dev_ioctl_err: 553 return -1; 554 } 555 /*----------------------------------------------------------------------------*/ 556 void 557 dpdk_load_module_upper_half(void) 558 { 559 int cpu = g_config.mos->num_cores, ret; 560 uint32_t cpumask = 0; 561 char cpumaskbuf[10]; 562 char mem_channels[5]; 563 564 /* set the log level */ 565 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 566 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 567 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 568 rte_set_log_type(RTE_LOGTYPE_RING, 0); 569 rte_set_log_level(RTE_LOG_WARNING); 570 571 /* get the cpu mask */ 572 for (ret = 0; ret < cpu; ret++) 573 cpumask = (cpumask | (1 << ret)); 574 sprintf(cpumaskbuf, "%X", cpumask); 575 576 /* get the mem channels per socket */ 577 if (g_config.mos->nb_mem_channels == 0) { 578 TRACE_ERROR("DPDK module requires # of memory channels " 579 "per socket parameter!\n"); 580 exit(EXIT_FAILURE); 581 } 582 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 583 584 /* initialize the rte env first, what a waste of implementation effort! */ 585 char *argv[] = {"", 586 "-c", 587 cpumaskbuf, 588 "-n", 589 mem_channels, 590 "--proc-type=auto", 591 "" 592 }; 593 const int argc = 6; 594 595 /* 596 * re-set getopt extern variable optind. 597 * this issue was a bitch to debug 598 * rte_eal_init() internally uses getopt() syscall 599 * mtcp applications that also use an `external' getopt 600 * will cause a violent crash if optind is not reset to zero 601 * prior to calling the func below... 602 * see man getopt(3) for more details 603 */ 604 optind = 0; 605 606 /* initialize the dpdk eal env */ 607 ret = rte_eal_init(argc, argv); 608 if (ret < 0) 609 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 610 611 } 612 /*----------------------------------------------------------------------------*/ 613 void 614 dpdk_load_module_lower_half(void) 615 { 616 int portid, rxlcore_id, ret; 617 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 618 /* setting the rss key */ 619 static const uint8_t key[] = { 620 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 621 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 622 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 623 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 624 }; 625 626 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 627 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 628 629 /* resetting cpu_qid mapping */ 630 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 631 632 if (!g_config.mos->multiprocess 633 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 634 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 635 char name[20]; 636 sprintf(name, "mbuf_pool-%d", rxlcore_id); 637 /* create the mbuf pools */ 638 pktmbuf_pool[rxlcore_id] = 639 rte_mempool_create(name, NB_MBUF, 640 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 641 sizeof(struct rte_pktmbuf_pool_private), 642 rte_pktmbuf_pool_init, NULL, 643 rte_pktmbuf_init, NULL, 644 rte_lcore_to_socket_id(rxlcore_id), 0); 645 if (pktmbuf_pool[rxlcore_id] == NULL) 646 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 647 } 648 649 /* Initialise each port */ 650 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 651 int num_queue = 0, eth_idx, i, queue_id; 652 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 653 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 654 break; 655 if (eth_idx == g_config.mos->netdev_table->num) 656 continue; 657 for (i = 0; i < sizeof(uint64_t) * 8; i++) 658 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 659 num_queue++; 660 661 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 662 num_queues = num_queue; 663 664 /* init port */ 665 printf("Initializing port %u... ", (unsigned) portid); 666 fflush(stdout); 667 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 668 &port_conf); 669 if (ret < 0) 670 rte_exit(EXIT_FAILURE, "Cannot configure device:" 671 "err=%d, port=%u\n", 672 ret, (unsigned) portid); 673 674 /* init one RX queue per CPU */ 675 fflush(stdout); 676 #ifdef DEBUG 677 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 678 #endif 679 queue_id = 0; 680 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 681 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 682 continue; 683 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 684 rte_eth_dev_socket_id(portid), &rx_conf, 685 pktmbuf_pool[rxlcore_id]); 686 if (ret < 0) 687 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 688 "err=%d, port=%u, queueid: %d\n", 689 ret, (unsigned) portid, rxlcore_id); 690 cpu_qid_map[portid][rxlcore_id] = queue_id++; 691 } 692 693 /* init one TX queue on each port per CPU (this is redundant for 694 * this app) */ 695 fflush(stdout); 696 queue_id = 0; 697 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 698 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 699 continue; 700 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 701 rte_eth_dev_socket_id(portid), &tx_conf); 702 if (ret < 0) 703 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 704 "err=%d, port=%u, queueid: %d\n", 705 ret, (unsigned) portid, rxlcore_id); 706 } 707 708 /* Start device */ 709 ret = rte_eth_dev_start(portid); 710 if (ret < 0) 711 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 712 ret, (unsigned) portid); 713 714 printf("done: \n"); 715 rte_eth_promiscuous_enable(portid); 716 717 /* retrieve current flow control settings per port */ 718 memset(&fc_conf, 0, sizeof(fc_conf)); 719 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 720 if (ret != 0) { 721 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 722 } 723 724 /* and just disable the rx/tx flow control */ 725 fc_conf.mode = RTE_FC_NONE; 726 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 727 if (ret != 0) { 728 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 729 ret); 730 } 731 732 #ifdef DEBUG 733 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 734 (unsigned) portid, 735 ports_eth_addr[portid].addr_bytes[0], 736 ports_eth_addr[portid].addr_bytes[1], 737 ports_eth_addr[portid].addr_bytes[2], 738 ports_eth_addr[portid].addr_bytes[3], 739 ports_eth_addr[portid].addr_bytes[4], 740 ports_eth_addr[portid].addr_bytes[5]); 741 #endif 742 /* only check for link status if the thread is master */ 743 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 744 } 745 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 746 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 747 char name[20]; 748 sprintf(name, "mbuf_pool-%d", rxlcore_id); 749 /* initialize the mbuf pools */ 750 pktmbuf_pool[rxlcore_id] = 751 rte_mempool_lookup(name); 752 if (pktmbuf_pool[rxlcore_id] == NULL) 753 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 754 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 755 cpu_qid_map[portid][rxlcore_id] = rxlcore_id; 756 } 757 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 758 num_queues = g_config.mos->num_cores; 759 } 760 761 } 762 /*----------------------------------------------------------------------------*/ 763 io_module_func dpdk_module_func = { 764 .load_module_upper_half = dpdk_load_module_upper_half, 765 .load_module_lower_half = dpdk_load_module_lower_half, 766 .init_handle = dpdk_init_handle, 767 .link_devices = NULL, 768 .release_pkt = NULL, 769 .send_pkts = dpdk_send_pkts, 770 .get_wptr = dpdk_get_wptr, 771 .recv_pkts = dpdk_recv_pkts, 772 .get_rptr = dpdk_get_rptr, 773 .get_nif = dpdk_get_nif, 774 .select = dpdk_select, 775 .destroy_handle = dpdk_destroy_handle, 776 .dev_ioctl = dpdk_dev_ioctl, 777 .set_wptr = dpdk_set_wptr, 778 }; 779 /*----------------------------------------------------------------------------*/ 780 781