1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for close/optind */ 8 #include <unistd.h> 9 /* for logging */ 10 #include "debug.h" 11 /* for num_devices_* */ 12 #include "config.h" 13 /* for rte_max_eth_ports */ 14 #include <rte_common.h> 15 /* for rte_eth_rxconf */ 16 #include <rte_ethdev.h> 17 /* for delay funcs */ 18 #include <rte_cycles.h> 19 /* for ip pesudo-chksum */ 20 #include <rte_ip.h> 21 #define ENABLE_STATS_IOCTL 1 22 #ifdef ENABLE_STATS_IOCTL 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 //#define RX_IDLE_ENABLE 1 37 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 38 #define RX_IDLE_THRESH 64 39 40 /* 41 * RX and TX Prefetch, Host, and Write-back threshold values should be 42 * carefully set for optimal performance. Consult the network 43 * controller's datasheet and supporting DPDK documentation for guidance 44 * on how these parameters should be set. 45 */ 46 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 47 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 48 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 49 50 /* 51 * These default values are optimized for use with the Intel(R) 82599 10 GbE 52 * Controller and the DPDK ixgbe PMD. Consider using other values for other 53 * network controllers and/or network drivers. 54 */ 55 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 56 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 57 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 58 59 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 60 61 /* 62 * Configurable number of RX/TX ring descriptors 63 */ 64 #define RTE_TEST_RX_DESC_DEFAULT 128 65 #define RTE_TEST_TX_DESC_DEFAULT 512 66 67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 69 /*----------------------------------------------------------------------------*/ 70 /* packet memory pools for storing packet bufs */ 71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 73 74 //#define DEBUG 1 75 #ifdef DEBUG 76 /* ethernet addresses of ports */ 77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 78 #endif 79 80 static struct rte_eth_conf port_conf = { 81 .rxmode = { 82 .mq_mode = ETH_MQ_RX_RSS, 83 .max_rx_pkt_len = ETHER_MAX_LEN, 84 .split_hdr_size = 0, 85 .header_split = 0, /**< Header Split disabled */ 86 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 87 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 88 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 89 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 90 }, 91 .rx_adv_conf = { 92 .rss_conf = { 93 .rss_key = NULL, 94 .rss_hf = ETH_RSS_TCP | ETH_RSS_UDP | 95 ETH_RSS_IP | ETH_RSS_L2_PAYLOAD 96 }, 97 }, 98 .txmode = { 99 .mq_mode = ETH_MQ_TX_NONE, 100 }, 101 }; 102 103 static const struct rte_eth_rxconf rx_conf = { 104 .rx_thresh = { 105 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 106 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 107 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 108 }, 109 .rx_free_thresh = 32, 110 }; 111 112 static const struct rte_eth_txconf tx_conf = { 113 .tx_thresh = { 114 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 115 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 116 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 117 }, 118 .tx_free_thresh = 0, /* Use PMD default values */ 119 .tx_rs_thresh = 0, /* Use PMD default values */ 120 /* 121 * As the example won't handle mult-segments and offload cases, 122 * set the flag by default. 123 */ 124 .txq_flags = 0x0, 125 }; 126 127 struct mbuf_table { 128 unsigned len; /* length of queued packets */ 129 struct rte_mbuf *m_table[MAX_PKT_BURST]; 130 }; 131 132 struct dpdk_private_context { 133 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 134 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 135 struct rte_mempool *pktmbuf_pool; 136 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 137 #ifdef RX_IDLE_ENABLE 138 uint8_t rx_idle; 139 #endif 140 #ifdef ENABLE_STATS_IOCTL 141 int fd; 142 #endif /* !ENABLE_STATS_IOCTL */ 143 } __rte_cache_aligned; 144 145 #ifdef ENABLE_STATS_IOCTL 146 /** 147 * stats struct passed on from user space to the driver 148 */ 149 struct stats_struct { 150 uint64_t tx_bytes; 151 uint64_t tx_pkts; 152 uint64_t rx_bytes; 153 uint64_t rx_pkts; 154 uint8_t qid; 155 uint8_t dev; 156 }; 157 #endif /* !ENABLE_STATS_IOCTL */ 158 /*----------------------------------------------------------------------------*/ 159 void 160 dpdk_init_handle(struct mtcp_thread_context *ctxt) 161 { 162 struct dpdk_private_context *dpc; 163 int i, j; 164 char mempool_name[20]; 165 166 /* create and initialize private I/O module context */ 167 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 168 if (ctxt->io_private_context == NULL) { 169 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 170 "Can't allocate memory\n"); 171 exit(EXIT_FAILURE); 172 } 173 174 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 175 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 176 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 177 178 /* set wmbufs correctly */ 179 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 180 /* Allocate wmbufs for each registered port */ 181 for (i = 0; i < MAX_PKT_BURST; i++) { 182 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 183 if (dpc->wmbufs[j].m_table[i] == NULL) { 184 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 185 ctxt->cpu, i, j); 186 exit(EXIT_FAILURE); 187 } 188 } 189 /* set mbufs queue length to 0 to begin with */ 190 dpc->wmbufs[j].len = 0; 191 } 192 193 #ifdef ENABLE_STATS_IOCTL 194 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 195 if (dpc->fd == -1) { 196 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 197 "Are you using mlx4/mlx5 driver?\n", 198 ctxt->cpu); 199 } 200 #endif /* !ENABLE_STATS_IOCTL */ 201 } 202 /*----------------------------------------------------------------------------*/ 203 int 204 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 205 { 206 struct dpdk_private_context *dpc; 207 mtcp_manager_t mtcp; 208 int ret; 209 int qid; 210 211 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 212 mtcp = ctxt->mtcp_manager; 213 ret = 0; 214 qid = cpu_qid_map[nif][ctxt->cpu]; 215 216 /* if queue is unassigned, skip it.. */ 217 if (unlikely(qid == 0xFF)) 218 return 0; 219 220 /* if there are packets in the queue... flush them out to the wire */ 221 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 222 struct rte_mbuf **pkts; 223 #ifdef ENABLE_STATS_IOCTL 224 struct stats_struct ss; 225 #endif /* !ENABLE_STATS_IOCTL */ 226 int cnt = dpc->wmbufs[nif].len; 227 pkts = dpc->wmbufs[nif].m_table; 228 #ifdef NETSTAT 229 mtcp->nstat.tx_packets[nif] += cnt; 230 #ifdef ENABLE_STATS_IOCTL 231 if (likely(dpc->fd) >= 0) { 232 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 233 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 234 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 235 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 236 ss.qid = ctxt->cpu; 237 ss.dev = nif; 238 ioctl(dpc->fd, 0, &ss); 239 } 240 #endif /* !ENABLE_STATS_IOCTL */ 241 #endif 242 do { 243 /* tx cnt # of packets */ 244 ret = rte_eth_tx_burst(nif, qid, 245 pkts, cnt); 246 pkts += ret; 247 cnt -= ret; 248 /* if not all pkts were sent... then repeat the cycle */ 249 } while (cnt > 0); 250 251 #ifndef SHARE_IO_BUFFER 252 int i; 253 /* time to allocate fresh mbufs for the queue */ 254 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 255 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 256 /* error checking */ 257 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 258 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 259 ctxt->cpu, i, nif); 260 exit(EXIT_FAILURE); 261 } 262 } 263 #endif 264 /* reset the len of mbufs var after flushing of packets */ 265 dpc->wmbufs[nif].len = 0; 266 } 267 268 return ret; 269 } 270 /*----------------------------------------------------------------------------*/ 271 uint8_t * 272 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 273 { 274 struct dpdk_private_context *dpc; 275 mtcp_manager_t mtcp; 276 struct rte_mbuf *m; 277 uint8_t *ptr; 278 int len_of_mbuf; 279 280 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 281 mtcp = ctxt->mtcp_manager; 282 283 /* sanity check */ 284 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 285 return NULL; 286 287 len_of_mbuf = dpc->wmbufs[nif].len; 288 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 289 290 /* retrieve the right write offset */ 291 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 292 m->pkt_len = m->data_len = pktsize; 293 m->nb_segs = 1; 294 m->next = NULL; 295 296 #ifdef NETSTAT 297 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR; 298 #endif 299 300 /* increment the len_of_mbuf var */ 301 dpc->wmbufs[nif].len = len_of_mbuf + 1; 302 303 return (uint8_t *)ptr; 304 } 305 /*----------------------------------------------------------------------------*/ 306 void 307 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 308 { 309 struct dpdk_private_context *dpc; 310 mtcp_manager_t mtcp; 311 int len_of_mbuf; 312 313 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 314 mtcp = ctxt->mtcp_manager; 315 316 /* sanity check */ 317 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 318 return; 319 320 len_of_mbuf = dpc->wmbufs[out_nif].len; 321 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 322 dpc->rmbufs[in_nif].m_table[index]; 323 324 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 325 326 #ifdef NETSTAT 327 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR; 328 #endif 329 330 /* increment the len_of_mbuf var */ 331 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 332 333 return; 334 } 335 /*----------------------------------------------------------------------------*/ 336 static inline void 337 free_pkts(struct rte_mbuf **mtable, unsigned len) 338 { 339 int i; 340 341 /* free the freaking packets */ 342 for (i = 0; i < len; i++) { 343 if (mtable[i]->udata64 == 1) { 344 rte_pktmbuf_free_seg(mtable[i]); 345 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 346 } 347 } 348 } 349 /*----------------------------------------------------------------------------*/ 350 int32_t 351 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 352 { 353 struct dpdk_private_context *dpc; 354 int ret; 355 uint8_t qid; 356 357 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 358 qid = cpu_qid_map[ifidx][ctxt->cpu]; 359 360 /* if queue is unassigned, skip it.. */ 361 if (qid == 0xFF) 362 return 0; 363 364 if (dpc->rmbufs[ifidx].len != 0) { 365 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 366 dpc->rmbufs[ifidx].len = 0; 367 } 368 369 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 370 dpc->pkts_burst, MAX_PKT_BURST); 371 #ifdef RX_IDLE_ENABLE 372 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 373 #endif 374 dpc->rmbufs[ifidx].len = ret; 375 376 return ret; 377 } 378 /*----------------------------------------------------------------------------*/ 379 uint8_t * 380 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 381 { 382 struct dpdk_private_context *dpc; 383 struct rte_mbuf *m; 384 uint8_t *pktbuf; 385 386 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 387 388 389 m = dpc->pkts_burst[index]; 390 /* tag to check if the packet is a local or a forwarded pkt */ 391 m->udata64 = 1; 392 /* don't enable pre-fetching... performance goes down */ 393 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 394 *len = m->pkt_len; 395 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 396 397 /* enqueue the pkt ptr in mbuf */ 398 dpc->rmbufs[ifidx].m_table[index] = m; 399 400 return pktbuf; 401 } 402 /*----------------------------------------------------------------------------*/ 403 int 404 dpdk_get_nif(struct ifreq *ifr) 405 { 406 int i; 407 static int num_dev = -1; 408 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 409 /* get mac addr entries of 'detected' dpdk ports */ 410 if (num_dev < 0) { 411 num_dev = rte_eth_dev_count(); 412 for (i = 0; i < num_dev; i++) 413 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 414 } 415 416 for (i = 0; i < num_dev; i++) 417 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 418 return i; 419 420 return -1; 421 } 422 /*----------------------------------------------------------------------------*/ 423 int32_t 424 dpdk_select(struct mtcp_thread_context *ctxt) 425 { 426 #ifdef RX_IDLE_ENABLE 427 struct dpdk_private_context *dpc; 428 429 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 430 if (dpc->rx_idle > RX_IDLE_THRESH) { 431 dpc->rx_idle = 0; 432 usleep(RX_IDLE_TIMEOUT); 433 } 434 #endif 435 return 0; 436 } 437 /*----------------------------------------------------------------------------*/ 438 void 439 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 440 { 441 struct dpdk_private_context *dpc; 442 int i; 443 444 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 445 446 /* free wmbufs */ 447 for (i = 0; i < g_config.mos->netdev_table->num; i++) 448 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 449 450 #ifdef ENABLE_STATS_IOCTL 451 /* free fd */ 452 if (dpc->fd >= 0) 453 close(dpc->fd); 454 #endif /* !ENABLE_STATS_IOCTL */ 455 456 /* free it all up */ 457 free(dpc); 458 } 459 /*----------------------------------------------------------------------------*/ 460 static void 461 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 462 { 463 #define CHECK_INTERVAL 100 /* 100ms */ 464 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 465 466 uint8_t portid, count, all_ports_up, print_flag = 0; 467 struct rte_eth_link link; 468 469 printf("\nChecking link status"); 470 fflush(stdout); 471 for (count = 0; count <= MAX_CHECK_TIME; count++) { 472 all_ports_up = 1; 473 for (portid = 0; portid < port_num; portid++) { 474 if ((port_mask & (1 << portid)) == 0) 475 continue; 476 memset(&link, 0, sizeof(link)); 477 rte_eth_link_get_nowait(portid, &link); 478 /* print link status if flag set */ 479 if (print_flag == 1) { 480 if (link.link_status) 481 printf("Port %d Link Up - speed %u " 482 "Mbps - %s\n", (uint8_t)portid, 483 (unsigned)link.link_speed, 484 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 485 ("full-duplex") : ("half-duplex\n")); 486 else 487 printf("Port %d Link Down\n", 488 (uint8_t)portid); 489 continue; 490 } 491 /* clear all_ports_up flag if any link down */ 492 if (link.link_status == 0) { 493 all_ports_up = 0; 494 break; 495 } 496 } 497 /* after finally printing all link status, get out */ 498 if (print_flag == 1) 499 break; 500 501 if (all_ports_up == 0) { 502 printf("."); 503 fflush(stdout); 504 rte_delay_ms(CHECK_INTERVAL); 505 } 506 507 /* set the print_flag if all ports up or timeout */ 508 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 509 print_flag = 1; 510 printf("done\n"); 511 } 512 } 513 } 514 /*----------------------------------------------------------------------------*/ 515 int32_t 516 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 517 { 518 struct dpdk_private_context *dpc; 519 struct rte_mbuf *m; 520 int len_of_mbuf; 521 struct iphdr *iph; 522 struct tcphdr *tcph; 523 RssInfo *rss_i; 524 525 iph = (struct iphdr *)argp; 526 dpc = (struct dpdk_private_context *)ctx->io_private_context; 527 len_of_mbuf = dpc->wmbufs[nif].len; 528 rss_i = NULL; 529 530 switch (cmd) { 531 case PKT_TX_IP_CSUM: 532 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 533 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 534 m->l2_len = sizeof(struct ether_hdr); 535 m->l3_len = (iph->ihl<<2); 536 break; 537 case PKT_TX_TCP_CSUM: 538 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 539 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 540 m->ol_flags |= PKT_TX_TCP_CKSUM; 541 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 542 break; 543 case PKT_RX_RSS: 544 rss_i = (RssInfo *)argp; 545 m = dpc->pkts_burst[rss_i->pktidx]; 546 rss_i->hash_value = m->hash.rss; 547 break; 548 default: 549 goto dev_ioctl_err; 550 } 551 552 return 0; 553 dev_ioctl_err: 554 return -1; 555 } 556 /*----------------------------------------------------------------------------*/ 557 void 558 dpdk_load_module_upper_half(void) 559 { 560 int cpu = g_config.mos->num_cores, ret; 561 uint32_t cpumask = 0; 562 char cpumaskbuf[10]; 563 char mem_channels[5]; 564 565 /* set the log level */ 566 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 567 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 568 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 569 rte_set_log_type(RTE_LOGTYPE_RING, 0); 570 rte_set_log_level(RTE_LOG_WARNING); 571 572 /* get the cpu mask */ 573 for (ret = 0; ret < cpu; ret++) 574 cpumask = (cpumask | (1 << ret)); 575 sprintf(cpumaskbuf, "%X", cpumask); 576 577 /* get the mem channels per socket */ 578 if (g_config.mos->nb_mem_channels == 0) { 579 TRACE_ERROR("DPDK module requires # of memory channels " 580 "per socket parameter!\n"); 581 exit(EXIT_FAILURE); 582 } 583 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 584 585 /* initialize the rte env first, what a waste of implementation effort! */ 586 char *argv[] = {"", 587 "-c", 588 cpumaskbuf, 589 "-n", 590 mem_channels, 591 "--proc-type=auto", 592 "" 593 }; 594 const int argc = 6; 595 596 /* 597 * re-set getopt extern variable optind. 598 * this issue was a bitch to debug 599 * rte_eal_init() internally uses getopt() syscall 600 * mtcp applications that also use an `external' getopt 601 * will cause a violent crash if optind is not reset to zero 602 * prior to calling the func below... 603 * see man getopt(3) for more details 604 */ 605 optind = 0; 606 607 /* initialize the dpdk eal env */ 608 ret = rte_eal_init(argc, argv); 609 if (ret < 0) 610 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 611 612 } 613 /*----------------------------------------------------------------------------*/ 614 void 615 dpdk_load_module_lower_half(void) 616 { 617 int portid, rxlcore_id, ret; 618 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 619 /* setting the rss key */ 620 static const uint8_t key[] = { 621 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 622 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 623 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 624 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 625 }; 626 627 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 628 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 629 630 /* resetting cpu_qid mapping */ 631 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 632 633 if (!g_config.mos->multiprocess 634 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 635 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 636 char name[20]; 637 sprintf(name, "mbuf_pool-%d", rxlcore_id); 638 /* create the mbuf pools */ 639 pktmbuf_pool[rxlcore_id] = 640 rte_mempool_create(name, NB_MBUF, 641 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 642 sizeof(struct rte_pktmbuf_pool_private), 643 rte_pktmbuf_pool_init, NULL, 644 rte_pktmbuf_init, NULL, 645 rte_lcore_to_socket_id(rxlcore_id), 0); 646 if (pktmbuf_pool[rxlcore_id] == NULL) 647 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 648 } 649 650 /* Initialise each port */ 651 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 652 int num_queue = 0, eth_idx, i, queue_id; 653 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 654 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 655 break; 656 if (eth_idx == g_config.mos->netdev_table->num) 657 continue; 658 for (i = 0; i < sizeof(uint64_t) * 8; i++) 659 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 660 num_queue++; 661 662 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 663 num_queues = num_queue; 664 665 /* init port */ 666 printf("Initializing port %u... ", (unsigned) portid); 667 fflush(stdout); 668 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 669 &port_conf); 670 if (ret < 0) 671 rte_exit(EXIT_FAILURE, "Cannot configure device:" 672 "err=%d, port=%u\n", 673 ret, (unsigned) portid); 674 675 /* init one RX queue per CPU */ 676 fflush(stdout); 677 #ifdef DEBUG 678 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 679 #endif 680 queue_id = 0; 681 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 682 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 683 continue; 684 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 685 rte_eth_dev_socket_id(portid), &rx_conf, 686 pktmbuf_pool[rxlcore_id]); 687 if (ret < 0) 688 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 689 "err=%d, port=%u, queueid: %d\n", 690 ret, (unsigned) portid, rxlcore_id); 691 cpu_qid_map[portid][rxlcore_id] = queue_id++; 692 } 693 694 /* init one TX queue on each port per CPU (this is redundant for 695 * this app) */ 696 fflush(stdout); 697 queue_id = 0; 698 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 699 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 700 continue; 701 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 702 rte_eth_dev_socket_id(portid), &tx_conf); 703 if (ret < 0) 704 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 705 "err=%d, port=%u, queueid: %d\n", 706 ret, (unsigned) portid, rxlcore_id); 707 } 708 709 /* Start device */ 710 ret = rte_eth_dev_start(portid); 711 if (ret < 0) 712 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 713 ret, (unsigned) portid); 714 715 printf("done: \n"); 716 rte_eth_promiscuous_enable(portid); 717 718 /* retrieve current flow control settings per port */ 719 memset(&fc_conf, 0, sizeof(fc_conf)); 720 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 721 if (ret != 0) { 722 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 723 } 724 725 /* and just disable the rx/tx flow control */ 726 fc_conf.mode = RTE_FC_NONE; 727 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 728 if (ret != 0) { 729 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 730 ret); 731 } 732 733 #ifdef DEBUG 734 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 735 (unsigned) portid, 736 ports_eth_addr[portid].addr_bytes[0], 737 ports_eth_addr[portid].addr_bytes[1], 738 ports_eth_addr[portid].addr_bytes[2], 739 ports_eth_addr[portid].addr_bytes[3], 740 ports_eth_addr[portid].addr_bytes[4], 741 ports_eth_addr[portid].addr_bytes[5]); 742 #endif 743 /* only check for link status if the thread is master */ 744 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 745 } 746 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 747 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 748 char name[20]; 749 sprintf(name, "mbuf_pool-%d", rxlcore_id); 750 /* initialize the mbuf pools */ 751 pktmbuf_pool[rxlcore_id] = 752 rte_mempool_lookup(name); 753 if (pktmbuf_pool[rxlcore_id] == NULL) 754 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 755 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 756 cpu_qid_map[portid][rxlcore_id] = rxlcore_id; 757 } 758 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 759 num_queues = g_config.mos->num_cores; 760 } 761 762 } 763 /*----------------------------------------------------------------------------*/ 764 io_module_func dpdk_module_func = { 765 .load_module_upper_half = dpdk_load_module_upper_half, 766 .load_module_lower_half = dpdk_load_module_lower_half, 767 .init_handle = dpdk_init_handle, 768 .link_devices = NULL, 769 .release_pkt = NULL, 770 .send_pkts = dpdk_send_pkts, 771 .get_wptr = dpdk_get_wptr, 772 .recv_pkts = dpdk_recv_pkts, 773 .get_rptr = dpdk_get_rptr, 774 .get_nif = dpdk_get_nif, 775 .select = dpdk_select, 776 .destroy_handle = dpdk_destroy_handle, 777 .dev_ioctl = dpdk_dev_ioctl, 778 .set_wptr = dpdk_set_wptr, 779 }; 780 /*----------------------------------------------------------------------------*/ 781 782