1 /* for io_module_func def'ns */ 2 #include "io_module.h" 3 /* for mtcp related def'ns */ 4 #include "mtcp.h" 5 /* for errno */ 6 #include <errno.h> 7 /* for logging */ 8 #include "debug.h" 9 /* for num_devices_* */ 10 #include "config.h" 11 /* for rte_max_eth_ports */ 12 #include <rte_common.h> 13 /* for rte_eth_rxconf */ 14 #include <rte_ethdev.h> 15 /* for delay funcs */ 16 #include <rte_cycles.h> 17 /* for ip pesudo-chksum */ 18 #include <rte_ip.h> 19 #define ENABLE_STATS_IOCTL 1 20 #ifdef ENABLE_STATS_IOCTL 21 /* for close */ 22 #include <unistd.h> 23 /* for open */ 24 #include <fcntl.h> 25 /* for ioctl */ 26 #include <sys/ioctl.h> 27 #endif /* !ENABLE_STATS_IOCTL */ 28 /*----------------------------------------------------------------------------*/ 29 /* Essential macros */ 30 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS 31 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS 32 33 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) 34 #define NB_MBUF 8192 35 #define MEMPOOL_CACHE_SIZE 256 36 //#define RX_IDLE_ENABLE 1 37 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */ 38 #define RX_IDLE_THRESH 64 39 40 /* 41 * RX and TX Prefetch, Host, and Write-back threshold values should be 42 * carefully set for optimal performance. Consult the network 43 * controller's datasheet and supporting DPDK documentation for guidance 44 * on how these parameters should be set. 45 */ 46 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */ 47 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */ 48 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */ 49 50 /* 51 * These default values are optimized for use with the Intel(R) 82599 10 GbE 52 * Controller and the DPDK ixgbe PMD. Consider using other values for other 53 * network controllers and/or network drivers. 54 */ 55 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */ 56 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */ 57 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */ 58 59 #define MAX_PKT_BURST /*32*/64/*128*//*32*/ 60 61 /* 62 * Configurable number of RX/TX ring descriptors 63 */ 64 #define RTE_TEST_RX_DESC_DEFAULT 128 65 #define RTE_TEST_TX_DESC_DEFAULT 512 66 67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT; 68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT; 69 /*----------------------------------------------------------------------------*/ 70 /* packet memory pools for storing packet bufs */ 71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL}; 72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}}; 73 74 //#define DEBUG 1 75 #ifdef DEBUG 76 /* ethernet addresses of ports */ 77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 78 #endif 79 80 static struct rte_eth_conf port_conf = { 81 .rxmode = { 82 .mq_mode = ETH_MQ_RX_RSS, 83 .max_rx_pkt_len = ETHER_MAX_LEN, 84 .split_hdr_size = 0, 85 .header_split = 0, /**< Header Split disabled */ 86 .hw_ip_checksum = 1, /**< IP checksum offload enabled */ 87 .hw_vlan_filter = 0, /**< VLAN filtering disabled */ 88 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */ 89 .hw_strip_crc = 1, /**< CRC stripped by hardware */ 90 }, 91 .rx_adv_conf = { 92 .rss_conf = { 93 .rss_key = NULL, 94 .rss_hf = ETH_RSS_TCP 95 }, 96 }, 97 .txmode = { 98 .mq_mode = ETH_MQ_TX_NONE, 99 }, 100 #if 0 101 .fdir_conf = { 102 .mode = RTE_FDIR_MODE_PERFECT, 103 .pballoc = RTE_FDIR_PBALLOC_256K, 104 .status = RTE_FDIR_REPORT_STATUS_ALWAYS, 105 //.flexbytes_offset = 0x6, 106 .drop_queue = 127, 107 }, 108 #endif 109 }; 110 111 static const struct rte_eth_rxconf rx_conf = { 112 .rx_thresh = { 113 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */ 114 .hthresh = RX_HTHRESH, /* RX host threshold reg */ 115 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */ 116 }, 117 .rx_free_thresh = 32, 118 }; 119 120 static const struct rte_eth_txconf tx_conf = { 121 .tx_thresh = { 122 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */ 123 .hthresh = TX_HTHRESH, /* TX host threshold reg */ 124 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */ 125 }, 126 .tx_free_thresh = 0, /* Use PMD default values */ 127 .tx_rs_thresh = 0, /* Use PMD default values */ 128 /* 129 * As the example won't handle mult-segments and offload cases, 130 * set the flag by default. 131 */ 132 .txq_flags = 0x0, 133 }; 134 135 struct mbuf_table { 136 unsigned len; /* length of queued packets */ 137 struct rte_mbuf *m_table[MAX_PKT_BURST]; 138 }; 139 140 struct dpdk_private_context { 141 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS]; 142 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS]; 143 struct rte_mempool *pktmbuf_pool; 144 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 145 #ifdef RX_IDLE_ENABLE 146 uint8_t rx_idle; 147 #endif 148 #ifdef ENABLE_STATS_IOCTL 149 int fd; 150 #endif /* !ENABLE_STATS_IOCTL */ 151 } __rte_cache_aligned; 152 153 #ifdef ENABLE_STATS_IOCTL 154 /** 155 * stats struct passed on from user space to the driver 156 */ 157 struct stats_struct { 158 uint64_t tx_bytes; 159 uint64_t tx_pkts; 160 uint64_t rx_bytes; 161 uint64_t rx_pkts; 162 uint8_t qid; 163 uint8_t dev; 164 }; 165 #endif /* !ENABLE_STATS_IOCTL */ 166 /*----------------------------------------------------------------------------*/ 167 void 168 dpdk_init_handle(struct mtcp_thread_context *ctxt) 169 { 170 struct dpdk_private_context *dpc; 171 int i, j; 172 char mempool_name[20]; 173 174 /* create and initialize private I/O module context */ 175 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context)); 176 if (ctxt->io_private_context == NULL) { 177 TRACE_ERROR("Failed to initialize ctxt->io_private_context: " 178 "Can't allocate memory\n"); 179 exit(EXIT_FAILURE); 180 } 181 182 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu); 183 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 184 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu]; 185 186 /* set wmbufs correctly */ 187 for (j = 0; j < g_config.mos->netdev_table->num; j++) { 188 /* Allocate wmbufs for each registered port */ 189 for (i = 0; i < MAX_PKT_BURST; i++) { 190 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 191 if (dpc->wmbufs[j].m_table[i] == NULL) { 192 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 193 ctxt->cpu, i, j); 194 exit(EXIT_FAILURE); 195 } 196 } 197 /* set mbufs queue length to 0 to begin with */ 198 dpc->wmbufs[j].len = 0; 199 } 200 201 #ifdef ENABLE_STATS_IOCTL 202 dpc->fd = open("/dev/dpdk-iface", O_RDWR); 203 if (dpc->fd == -1) { 204 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! " 205 "Are you using mlx4/mlx5 driver?\n", 206 ctxt->cpu); 207 } 208 #endif /* !ENABLE_STATS_IOCTL */ 209 } 210 /*----------------------------------------------------------------------------*/ 211 int 212 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif) 213 { 214 struct dpdk_private_context *dpc; 215 mtcp_manager_t mtcp; 216 int ret; 217 int qid; 218 219 dpc = (struct dpdk_private_context *)ctxt->io_private_context; 220 mtcp = ctxt->mtcp_manager; 221 ret = 0; 222 qid = cpu_qid_map[nif][ctxt->cpu]; 223 224 /* if queue is unassigned, skip it.. */ 225 if (unlikely(qid == 0xFF)) 226 return 0; 227 228 /* if there are packets in the queue... flush them out to the wire */ 229 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) { 230 struct rte_mbuf **pkts; 231 #ifdef ENABLE_STATS_IOCTL 232 struct stats_struct ss; 233 #endif /* !ENABLE_STATS_IOCTL */ 234 int cnt = dpc->wmbufs[nif].len; 235 pkts = dpc->wmbufs[nif].m_table; 236 #ifdef NETSTAT 237 mtcp->nstat.tx_packets[nif] += cnt; 238 #ifdef ENABLE_STATS_IOCTL 239 if (likely(dpc->fd) >= 0) { 240 ss.tx_pkts = mtcp->nstat.tx_packets[nif]; 241 ss.tx_bytes = mtcp->nstat.tx_bytes[nif]; 242 ss.rx_pkts = mtcp->nstat.rx_packets[nif]; 243 ss.rx_bytes = mtcp->nstat.rx_bytes[nif]; 244 ss.qid = ctxt->cpu; 245 ss.dev = nif; 246 ioctl(dpc->fd, 0, &ss); 247 } 248 #endif /* !ENABLE_STATS_IOCTL */ 249 #endif 250 do { 251 /* tx cnt # of packets */ 252 ret = rte_eth_tx_burst(nif, qid, 253 pkts, cnt); 254 pkts += ret; 255 cnt -= ret; 256 /* if not all pkts were sent... then repeat the cycle */ 257 } while (cnt > 0); 258 259 #ifndef SHARE_IO_BUFFER 260 int i; 261 /* time to allocate fresh mbufs for the queue */ 262 for (i = 0; i < dpc->wmbufs[nif].len; i++) { 263 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]); 264 /* error checking */ 265 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) { 266 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n", 267 ctxt->cpu, i, nif); 268 exit(EXIT_FAILURE); 269 } 270 } 271 #endif 272 /* reset the len of mbufs var after flushing of packets */ 273 dpc->wmbufs[nif].len = 0; 274 } 275 276 return ret; 277 } 278 /*----------------------------------------------------------------------------*/ 279 uint8_t * 280 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize) 281 { 282 struct dpdk_private_context *dpc; 283 mtcp_manager_t mtcp; 284 struct rte_mbuf *m; 285 uint8_t *ptr; 286 int len_of_mbuf; 287 288 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 289 mtcp = ctxt->mtcp_manager; 290 291 /* sanity check */ 292 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST)) 293 return NULL; 294 295 len_of_mbuf = dpc->wmbufs[nif].len; 296 m = dpc->wmbufs[nif].m_table[len_of_mbuf]; 297 298 /* retrieve the right write offset */ 299 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *); 300 m->pkt_len = m->data_len = pktsize; 301 m->nb_segs = 1; 302 m->next = NULL; 303 304 #ifdef NETSTAT 305 mtcp->nstat.tx_bytes[nif] += pktsize + 24; 306 #endif 307 308 /* increment the len_of_mbuf var */ 309 dpc->wmbufs[nif].len = len_of_mbuf + 1; 310 311 return (uint8_t *)ptr; 312 } 313 /*----------------------------------------------------------------------------*/ 314 void 315 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index) 316 { 317 struct dpdk_private_context *dpc; 318 mtcp_manager_t mtcp; 319 int len_of_mbuf; 320 321 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 322 mtcp = ctxt->mtcp_manager; 323 324 /* sanity check */ 325 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST)) 326 return; 327 328 len_of_mbuf = dpc->wmbufs[out_nif].len; 329 dpc->wmbufs[out_nif].m_table[len_of_mbuf] = 330 dpc->rmbufs[in_nif].m_table[index]; 331 332 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0; 333 334 #ifdef NETSTAT 335 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + 24; 336 #endif 337 338 /* increment the len_of_mbuf var */ 339 dpc->wmbufs[out_nif].len = len_of_mbuf + 1; 340 341 return; 342 } 343 /*----------------------------------------------------------------------------*/ 344 static inline void 345 free_pkts(struct rte_mbuf **mtable, unsigned len) 346 { 347 int i; 348 349 /* free the freaking packets */ 350 for (i = 0; i < len; i++) { 351 if (mtable[i]->udata64 == 1) { 352 rte_pktmbuf_free_seg(mtable[i]); 353 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]); 354 } 355 } 356 } 357 /*----------------------------------------------------------------------------*/ 358 int32_t 359 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx) 360 { 361 struct dpdk_private_context *dpc; 362 int ret; 363 uint8_t qid; 364 365 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 366 qid = cpu_qid_map[ifidx][ctxt->cpu]; 367 368 /* if queue is unassigned, skip it.. */ 369 if (qid == 0xFF) 370 return 0; 371 372 if (dpc->rmbufs[ifidx].len != 0) { 373 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len); 374 dpc->rmbufs[ifidx].len = 0; 375 } 376 377 ret = rte_eth_rx_burst((uint8_t)ifidx, qid, 378 dpc->pkts_burst, MAX_PKT_BURST); 379 #ifdef RX_IDLE_ENABLE 380 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1; 381 #endif 382 dpc->rmbufs[ifidx].len = ret; 383 384 return ret; 385 } 386 /*----------------------------------------------------------------------------*/ 387 uint8_t * 388 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len) 389 { 390 struct dpdk_private_context *dpc; 391 struct rte_mbuf *m; 392 uint8_t *pktbuf; 393 394 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 395 396 397 m = dpc->pkts_burst[index]; 398 /* tag to check if the packet is a local or a forwarded pkt */ 399 m->udata64 = 1; 400 /* don't enable pre-fetching... performance goes down */ 401 //rte_prefetch0(rte_pktmbuf_mtod(m, void *)); 402 *len = m->pkt_len; 403 pktbuf = rte_pktmbuf_mtod(m, uint8_t *); 404 405 /* enqueue the pkt ptr in mbuf */ 406 dpc->rmbufs[ifidx].m_table[index] = m; 407 408 return pktbuf; 409 } 410 /*----------------------------------------------------------------------------*/ 411 int 412 dpdk_get_nif(struct ifreq *ifr) 413 { 414 int i; 415 static int num_dev = -1; 416 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; 417 /* get mac addr entries of 'detected' dpdk ports */ 418 if (num_dev < 0) { 419 num_dev = rte_eth_dev_count(); 420 for (i = 0; i < num_dev; i++) 421 rte_eth_macaddr_get(i, &ports_eth_addr[i]); 422 } 423 424 for (i = 0; i < num_dev; i++) 425 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN)) 426 return i; 427 428 return -1; 429 } 430 /*----------------------------------------------------------------------------*/ 431 int32_t 432 dpdk_select(struct mtcp_thread_context *ctxt) 433 { 434 #ifdef RX_IDLE_ENABLE 435 struct dpdk_private_context *dpc; 436 437 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 438 if (dpc->rx_idle > RX_IDLE_THRESH) { 439 dpc->rx_idle = 0; 440 usleep(RX_IDLE_TIMEOUT); 441 } 442 #endif 443 return 0; 444 } 445 /*----------------------------------------------------------------------------*/ 446 void 447 dpdk_destroy_handle(struct mtcp_thread_context *ctxt) 448 { 449 struct dpdk_private_context *dpc; 450 int i; 451 452 dpc = (struct dpdk_private_context *) ctxt->io_private_context; 453 454 /* free wmbufs */ 455 for (i = 0; i < g_config.mos->netdev_table->num; i++) 456 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST); 457 458 #ifdef ENABLE_STATS_IOCTL 459 /* free fd */ 460 if (dpc->fd >= 0) 461 close(dpc->fd); 462 #endif /* !ENABLE_STATS_IOCTL */ 463 464 /* free it all up */ 465 free(dpc); 466 } 467 /*----------------------------------------------------------------------------*/ 468 static void 469 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask) 470 { 471 #define CHECK_INTERVAL 100 /* 100ms */ 472 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */ 473 474 uint8_t portid, count, all_ports_up, print_flag = 0; 475 struct rte_eth_link link; 476 477 printf("\nChecking link status"); 478 fflush(stdout); 479 for (count = 0; count <= MAX_CHECK_TIME; count++) { 480 all_ports_up = 1; 481 for (portid = 0; portid < port_num; portid++) { 482 if ((port_mask & (1 << portid)) == 0) 483 continue; 484 memset(&link, 0, sizeof(link)); 485 rte_eth_link_get_nowait(portid, &link); 486 /* print link status if flag set */ 487 if (print_flag == 1) { 488 if (link.link_status) 489 printf("Port %d Link Up - speed %u " 490 "Mbps - %s\n", (uint8_t)portid, 491 (unsigned)link.link_speed, 492 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? 493 ("full-duplex") : ("half-duplex\n")); 494 else 495 printf("Port %d Link Down\n", 496 (uint8_t)portid); 497 continue; 498 } 499 /* clear all_ports_up flag if any link down */ 500 if (link.link_status == 0) { 501 all_ports_up = 0; 502 break; 503 } 504 } 505 /* after finally printing all link status, get out */ 506 if (print_flag == 1) 507 break; 508 509 if (all_ports_up == 0) { 510 printf("."); 511 fflush(stdout); 512 rte_delay_ms(CHECK_INTERVAL); 513 } 514 515 /* set the print_flag if all ports up or timeout */ 516 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) { 517 print_flag = 1; 518 printf("done\n"); 519 } 520 } 521 } 522 /*----------------------------------------------------------------------------*/ 523 #if 0 524 static void 525 dpdk_enable_fdir(int portid, uint8_t is_master) 526 { 527 struct rte_fdir_masks fdir_masks; 528 struct rte_fdir_filter fdir_filter; 529 int ret; 530 531 memset(&fdir_filter, 0, sizeof(struct rte_fdir_filter)); 532 fdir_filter.iptype = RTE_FDIR_IPTYPE_IPV4; 533 fdir_filter.l4type = RTE_FDIR_L4TYPE_TCP; 534 fdir_filter.ip_dst.ipv4_addr = g_config.mos->netdev_table->ent[portid]->ip_addr; 535 536 if (is_master) { 537 memset(&fdir_masks, 0, sizeof(struct rte_fdir_masks)); 538 fdir_masks.src_ipv4_mask = 0x0; 539 fdir_masks.dst_ipv4_mask = 0xFFFFFFFF; 540 fdir_masks.src_port_mask = 0x0; 541 fdir_masks.dst_port_mask = 0x0; 542 543 /* 544 * enable the following if the filter is IP-only 545 * (non-TCP, non-UDP) 546 */ 547 /* fdir_masks.only_ip_flow = 1; */ 548 rte_eth_dev_fdir_set_masks(portid, &fdir_masks); 549 ret = rte_eth_dev_fdir_add_perfect_filter(portid, 550 &fdir_filter, 551 0, 552 g_config.mos->multiprocess_curr_core, 553 0); 554 } else { 555 ret = rte_eth_dev_fdir_update_perfect_filter(portid, 556 &fdir_filter, 557 0, 558 g_config.mos->multiprocess_curr_core, 559 0); 560 } 561 if (ret < 0) { 562 rte_exit(EXIT_FAILURE, 563 "fdir_add_perfect_filter_t call failed!: %d\n", 564 ret); 565 } 566 fprintf(stderr, "Filter for device ifidx: %d added\n", portid); 567 } 568 #endif 569 /*----------------------------------------------------------------------------*/ 570 int32_t 571 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp) 572 { 573 struct dpdk_private_context *dpc; 574 struct rte_mbuf *m; 575 int len_of_mbuf; 576 struct iphdr *iph; 577 struct tcphdr *tcph; 578 RssInfo *rss_i; 579 580 iph = (struct iphdr *)argp; 581 dpc = (struct dpdk_private_context *)ctx->io_private_context; 582 len_of_mbuf = dpc->wmbufs[nif].len; 583 rss_i = NULL; 584 585 switch (cmd) { 586 case PKT_TX_IP_CSUM: 587 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 588 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4; 589 m->l2_len = sizeof(struct ether_hdr); 590 m->l3_len = (iph->ihl<<2); 591 break; 592 case PKT_TX_TCP_CSUM: 593 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1]; 594 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2)); 595 m->ol_flags |= PKT_TX_TCP_CKSUM; 596 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags); 597 break; 598 case PKT_RX_RSS: 599 rss_i = (RssInfo *)argp; 600 m = dpc->pkts_burst[rss_i->pktidx]; 601 rss_i->hash_value = m->hash.rss; 602 break; 603 default: 604 goto dev_ioctl_err; 605 } 606 607 return 0; 608 dev_ioctl_err: 609 return -1; 610 } 611 /*----------------------------------------------------------------------------*/ 612 void 613 dpdk_load_module_upper_half(void) 614 { 615 int cpu = g_config.mos->num_cores, ret; 616 uint32_t cpumask = 0; 617 char cpumaskbuf[10]; 618 char mem_channels[5]; 619 620 /* set the log level */ 621 rte_set_log_type(RTE_LOGTYPE_PMD, 0); 622 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0); 623 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0); 624 rte_set_log_type(RTE_LOGTYPE_RING, 0); 625 rte_set_log_level(RTE_LOG_WARNING); 626 627 /* get the cpu mask */ 628 for (ret = 0; ret < cpu; ret++) 629 cpumask = (cpumask | (1 << ret)); 630 sprintf(cpumaskbuf, "%X", cpumask); 631 632 /* get the mem channels per socket */ 633 if (g_config.mos->nb_mem_channels == 0) { 634 TRACE_ERROR("DPDK module requires # of memory channels " 635 "per socket parameter!\n"); 636 exit(EXIT_FAILURE); 637 } 638 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels); 639 640 /* initialize the rte env first, what a waste of implementation effort! */ 641 char *argv[] = {"", 642 "-c", 643 cpumaskbuf, 644 "-n", 645 mem_channels, 646 "--proc-type=auto", 647 "" 648 }; 649 const int argc = 6; 650 651 /* 652 * re-set getopt extern variable optind. 653 * this issue was a bitch to debug 654 * rte_eal_init() internally uses getopt() syscall 655 * mtcp applications that also use an `external' getopt 656 * will cause a violent crash if optind is not reset to zero 657 * prior to calling the func below... 658 * see man getopt(3) for more details 659 */ 660 optind = 0; 661 662 /* initialize the dpdk eal env */ 663 ret = rte_eal_init(argc, argv); 664 if (ret < 0) 665 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n"); 666 667 } 668 /*----------------------------------------------------------------------------*/ 669 void 670 dpdk_load_module_lower_half(void) 671 { 672 int portid, rxlcore_id, ret; 673 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */ 674 /* setting the rss key */ 675 static const uint8_t key[] = { 676 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 677 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 678 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 679 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 680 }; 681 682 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key; 683 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key); 684 685 /* resetting cpu_qid mapping */ 686 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map)); 687 688 if (!g_config.mos->multiprocess 689 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) { 690 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 691 char name[20]; 692 sprintf(name, "mbuf_pool-%d", rxlcore_id); 693 /* create the mbuf pools */ 694 pktmbuf_pool[rxlcore_id] = 695 rte_mempool_create(name, NB_MBUF, 696 MBUF_SIZE, MEMPOOL_CACHE_SIZE, 697 sizeof(struct rte_pktmbuf_pool_private), 698 rte_pktmbuf_pool_init, NULL, 699 rte_pktmbuf_init, NULL, 700 rte_lcore_to_socket_id(rxlcore_id), 0); 701 if (pktmbuf_pool[rxlcore_id] == NULL) 702 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 703 } 704 705 /* Initialise each port */ 706 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) { 707 int num_queue = 0, eth_idx, i, queue_id; 708 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++) 709 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex) 710 break; 711 if (eth_idx == g_config.mos->netdev_table->num) 712 continue; 713 for (i = 0; i < sizeof(uint64_t) * 8; i++) 714 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i)) 715 num_queue++; 716 717 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */ 718 num_queues = num_queue; 719 720 /* init port */ 721 printf("Initializing port %u... ", (unsigned) portid); 722 fflush(stdout); 723 ret = rte_eth_dev_configure(portid, num_queue, num_queue, 724 &port_conf); 725 if (ret < 0) 726 rte_exit(EXIT_FAILURE, "Cannot configure device:" 727 "err=%d, port=%u\n", 728 ret, (unsigned) portid); 729 730 /* init one RX queue per CPU */ 731 fflush(stdout); 732 #ifdef DEBUG 733 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]); 734 #endif 735 queue_id = 0; 736 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 737 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 738 continue; 739 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd, 740 rte_eth_dev_socket_id(portid), &rx_conf, 741 pktmbuf_pool[rxlcore_id]); 742 if (ret < 0) 743 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:" 744 "err=%d, port=%u, queueid: %d\n", 745 ret, (unsigned) portid, rxlcore_id); 746 cpu_qid_map[portid][rxlcore_id] = queue_id++; 747 } 748 749 /* init one TX queue on each port per CPU (this is redundant for 750 * this app) */ 751 fflush(stdout); 752 queue_id = 0; 753 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 754 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id))) 755 continue; 756 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd, 757 rte_eth_dev_socket_id(portid), &tx_conf); 758 if (ret < 0) 759 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:" 760 "err=%d, port=%u, queueid: %d\n", 761 ret, (unsigned) portid, rxlcore_id); 762 } 763 764 /* Start device */ 765 ret = rte_eth_dev_start(portid); 766 if (ret < 0) 767 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n", 768 ret, (unsigned) portid); 769 770 printf("done: \n"); 771 rte_eth_promiscuous_enable(portid); 772 773 /* retrieve current flow control settings per port */ 774 memset(&fc_conf, 0, sizeof(fc_conf)); 775 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf); 776 if (ret != 0) { 777 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n"); 778 } 779 780 /* and just disable the rx/tx flow control */ 781 fc_conf.mode = RTE_FC_NONE; 782 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf); 783 if (ret != 0) { 784 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n", 785 ret); 786 } 787 788 #ifdef DEBUG 789 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n", 790 (unsigned) portid, 791 ports_eth_addr[portid].addr_bytes[0], 792 ports_eth_addr[portid].addr_bytes[1], 793 ports_eth_addr[portid].addr_bytes[2], 794 ports_eth_addr[portid].addr_bytes[3], 795 ports_eth_addr[portid].addr_bytes[4], 796 ports_eth_addr[portid].addr_bytes[5]); 797 #endif 798 #if 0 799 /* if multi-process support is enabled, then turn on FDIR */ 800 if (g_config.mos->multiprocess) 801 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 802 #endif 803 } 804 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */ 805 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) { 806 char name[20]; 807 sprintf(name, "mbuf_pool-%d", rxlcore_id); 808 /* initialize the mbuf pools */ 809 pktmbuf_pool[rxlcore_id] = 810 rte_mempool_lookup(name); 811 if (pktmbuf_pool[rxlcore_id] == NULL) 812 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n"); 813 } 814 #if 0 815 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) 816 dpdk_enable_fdir(portid, g_config.mos->multiprocess_is_master); 817 #endif 818 } 819 820 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF); 821 } 822 /*----------------------------------------------------------------------------*/ 823 io_module_func dpdk_module_func = { 824 .load_module_upper_half = dpdk_load_module_upper_half, 825 .load_module_lower_half = dpdk_load_module_lower_half, 826 .init_handle = dpdk_init_handle, 827 .link_devices = NULL, 828 .release_pkt = NULL, 829 .send_pkts = dpdk_send_pkts, 830 .get_wptr = dpdk_get_wptr, 831 .recv_pkts = dpdk_recv_pkts, 832 .get_rptr = dpdk_get_rptr, 833 .get_nif = dpdk_get_nif, 834 .select = dpdk_select, 835 .destroy_handle = dpdk_destroy_handle, 836 .dev_ioctl = dpdk_dev_ioctl, 837 .set_wptr = dpdk_set_wptr, 838 }; 839 /*----------------------------------------------------------------------------*/ 840 841