1 /* for io_module_func def'ns */
2 #include "io_module.h"
3 /* for mtcp related def'ns */
4 #include "mtcp.h"
5 /* for errno */
6 #include <errno.h>
7 /* for close/optind */
8 #include <unistd.h>
9 /* for logging */
10 #include "debug.h"
11 /* for num_devices_* */
12 #include "config.h"
13 /* for rte_max_eth_ports */
14 #include <rte_common.h>
15 /* for rte_eth_rxconf */
16 #include <rte_ethdev.h>
17 /* for delay funcs */
18 #include <rte_cycles.h>
19 /* for ip pesudo-chksum */
20 #include <rte_ip.h>
21 #define ENABLE_STATS_IOCTL 1
22 #ifdef ENABLE_STATS_IOCTL
23 /* for open */
24 #include <fcntl.h>
25 /* for ioctl */
26 #include <sys/ioctl.h>
27 #endif /* !ENABLE_STATS_IOCTL */
28 /* for retrieving rte version(s) */
29 #include <rte_version.h>
30 /*----------------------------------------------------------------------------*/
31 /* Essential macros */
32 #define MAX_RX_QUEUE_PER_LCORE MAX_CPUS
33 #define MAX_TX_QUEUE_PER_PORT MAX_CPUS
34
35 #define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
36 #define NB_MBUF 8192
37 #define MEMPOOL_CACHE_SIZE 256
38 //#define RX_IDLE_ENABLE 1
39 #define RX_IDLE_TIMEOUT 1 /* in micro-seconds */
40 #define RX_IDLE_THRESH 64
41
42 /*
43 * RX and TX Prefetch, Host, and Write-back threshold values should be
44 * carefully set for optimal performance. Consult the network
45 * controller's datasheet and supporting DPDK documentation for guidance
46 * on how these parameters should be set.
47 */
48 #define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
49 #define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
50 #define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
51
52 /*
53 * These default values are optimized for use with the Intel(R) 82599 10 GbE
54 * Controller and the DPDK ixgbe PMD. Consider using other values for other
55 * network controllers and/or network drivers.
56 */
57 #define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
58 #define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
59 #define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
60
61 #define MAX_PKT_BURST /*32*/64/*128*//*32*/
62
63 /*
64 * Configurable number of RX/TX ring descriptors
65 */
66 #define RTE_TEST_RX_DESC_DEFAULT 128
67 #define RTE_TEST_TX_DESC_DEFAULT 512
68
69 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
70 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
71 /*----------------------------------------------------------------------------*/
72 /* packet memory pools for storing packet bufs */
73 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL};
74 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}};
75
76 //#define DEBUG 1
77 #ifdef DEBUG
78 /* ethernet addresses of ports */
79 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
80 #endif
81
82 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS];
83
84 static struct rte_eth_conf port_conf = {
85 .rxmode = {
86 .mq_mode = ETH_MQ_RX_RSS,
87 .max_rx_pkt_len = ETHER_MAX_LEN,
88 .split_hdr_size = 0,
89 #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
90 .header_split = 0, /**< Header Split disabled */
91 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
92 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
93 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
94 .hw_strip_crc = 1, /**< CRC stripped by hardware */
95 #else
96 .offloads = DEV_RX_OFFLOAD_CHECKSUM,
97 #endif
98 },
99 .rx_adv_conf = {
100 .rss_conf = {
101 .rss_key = NULL,
102 .rss_hf = ETH_RSS_TCP | ETH_RSS_UDP |
103 ETH_RSS_IP | ETH_RSS_L2_PAYLOAD
104 },
105 },
106 .txmode = {
107 .mq_mode = ETH_MQ_TX_NONE,
108 #if RTE_VERSION >= RTE_VERSION_NUM(18, 2, 0, 0)
109 .offloads = DEV_TX_OFFLOAD_IPV4_CKSUM |
110 DEV_TX_OFFLOAD_UDP_CKSUM |
111 DEV_TX_OFFLOAD_TCP_CKSUM
112 #endif
113 },
114 };
115
116 static const struct rte_eth_rxconf rx_conf = {
117 .rx_thresh = {
118 .pthresh = RX_PTHRESH, /* RX prefetch threshold reg */
119 .hthresh = RX_HTHRESH, /* RX host threshold reg */
120 .wthresh = RX_WTHRESH, /* RX write-back threshold reg */
121 },
122 .rx_free_thresh = 32,
123 };
124
125 static const struct rte_eth_txconf tx_conf = {
126 .tx_thresh = {
127 .pthresh = TX_PTHRESH, /* TX prefetch threshold reg */
128 .hthresh = TX_HTHRESH, /* TX host threshold reg */
129 .wthresh = TX_WTHRESH, /* TX write-back threshold reg */
130 },
131 .tx_free_thresh = 0, /* Use PMD default values */
132 .tx_rs_thresh = 0, /* Use PMD default values */
133 #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
134 /*
135 * As the example won't handle mult-segments and offload cases,
136 * set the flag by default.
137 */
138 .txq_flags = 0x0,
139 #endif
140 };
141
142 struct mbuf_table {
143 unsigned len; /* length of queued packets */
144 struct rte_mbuf *m_table[MAX_PKT_BURST];
145 };
146
147 struct dpdk_private_context {
148 struct mbuf_table rmbufs[RTE_MAX_ETHPORTS];
149 struct mbuf_table wmbufs[RTE_MAX_ETHPORTS];
150 struct rte_mempool *pktmbuf_pool;
151 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
152 #ifdef RX_IDLE_ENABLE
153 uint8_t rx_idle;
154 #endif
155 #ifdef ENABLE_STATS_IOCTL
156 int fd;
157 #endif /* !ENABLE_STATS_IOCTL */
158 } __rte_cache_aligned;
159
160 #ifdef ENABLE_STATS_IOCTL
161 /**
162 * stats struct passed on from user space to the driver
163 */
164 struct stats_struct {
165 uint64_t tx_bytes;
166 uint64_t tx_pkts;
167 uint64_t rx_bytes;
168 uint64_t rx_pkts;
169 uint8_t qid;
170 uint8_t dev;
171 };
172 #endif /* !ENABLE_STATS_IOCTL */
173 /*----------------------------------------------------------------------------*/
174 void
dpdk_init_handle(struct mtcp_thread_context * ctxt)175 dpdk_init_handle(struct mtcp_thread_context *ctxt)
176 {
177 struct dpdk_private_context *dpc;
178 int i, j;
179 char mempool_name[20];
180
181 /* create and initialize private I/O module context */
182 ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context));
183 if (ctxt->io_private_context == NULL) {
184 TRACE_ERROR("Failed to initialize ctxt->io_private_context: "
185 "Can't allocate memory\n");
186 exit(EXIT_FAILURE);
187 }
188
189 sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu);
190 dpc = (struct dpdk_private_context *)ctxt->io_private_context;
191 dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu];
192
193 /* set wmbufs correctly */
194 for (j = 0; j < g_config.mos->netdev_table->num; j++) {
195 /* Allocate wmbufs for each registered port */
196 for (i = 0; i < MAX_PKT_BURST; i++) {
197 dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
198 if (dpc->wmbufs[j].m_table[i] == NULL) {
199 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
200 ctxt->cpu, i, j);
201 exit(EXIT_FAILURE);
202 }
203 }
204 /* set mbufs queue length to 0 to begin with */
205 dpc->wmbufs[j].len = 0;
206 }
207
208 #ifdef ENABLE_STATS_IOCTL
209 dpc->fd = open("/dev/dpdk-iface", O_RDWR);
210 if (dpc->fd == -1) {
211 TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! "
212 "Are you using mlx4/mlx5 driver?\n",
213 ctxt->cpu);
214 }
215 #endif /* !ENABLE_STATS_IOCTL */
216 }
217 /*----------------------------------------------------------------------------*/
218 int
dpdk_send_pkts(struct mtcp_thread_context * ctxt,int nif)219 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif)
220 {
221 struct dpdk_private_context *dpc;
222 mtcp_manager_t mtcp;
223 int ret;
224 int qid;
225
226 dpc = (struct dpdk_private_context *)ctxt->io_private_context;
227 mtcp = ctxt->mtcp_manager;
228 ret = 0;
229 qid = cpu_qid_map[nif][ctxt->cpu];
230
231 /* if queue is unassigned, skip it.. */
232 if (unlikely(qid == 0xFF))
233 return 0;
234
235 /* if there are packets in the queue... flush them out to the wire */
236 if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) {
237 struct rte_mbuf **pkts;
238 #ifdef ENABLE_STATS_IOCTL
239 struct stats_struct ss;
240 #endif /* !ENABLE_STATS_IOCTL */
241 int cnt = dpc->wmbufs[nif].len;
242 pkts = dpc->wmbufs[nif].m_table;
243 #ifdef NETSTAT
244 mtcp->nstat.tx_packets[nif] += cnt;
245 #ifdef ENABLE_STATS_IOCTL
246 if (likely(dpc->fd) >= 0) {
247 ss.tx_pkts = mtcp->nstat.tx_packets[nif];
248 ss.tx_bytes = mtcp->nstat.tx_bytes[nif];
249 ss.rx_pkts = mtcp->nstat.rx_packets[nif];
250 ss.rx_bytes = mtcp->nstat.rx_bytes[nif];
251 ss.qid = ctxt->cpu;
252 ss.dev = nif;
253 ioctl(dpc->fd, 0, &ss);
254 }
255 #endif /* !ENABLE_STATS_IOCTL */
256 #endif
257 do {
258 /* tx cnt # of packets */
259 ret = rte_eth_tx_burst(nif, qid,
260 pkts, cnt);
261 pkts += ret;
262 cnt -= ret;
263 /* if not all pkts were sent... then repeat the cycle */
264 } while (cnt > 0);
265
266 #ifndef SHARE_IO_BUFFER
267 int i;
268 /* time to allocate fresh mbufs for the queue */
269 for (i = 0; i < dpc->wmbufs[nif].len; i++) {
270 dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
271 /* error checking */
272 if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) {
273 TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
274 ctxt->cpu, i, nif);
275 exit(EXIT_FAILURE);
276 }
277 }
278 #endif
279 /* reset the len of mbufs var after flushing of packets */
280 dpc->wmbufs[nif].len = 0;
281 }
282
283 return ret;
284 }
285 /*----------------------------------------------------------------------------*/
286 uint8_t *
dpdk_get_wptr(struct mtcp_thread_context * ctxt,int nif,uint16_t pktsize)287 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize)
288 {
289 struct dpdk_private_context *dpc;
290 mtcp_manager_t mtcp;
291 struct rte_mbuf *m;
292 uint8_t *ptr;
293 int len_of_mbuf;
294
295 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
296 mtcp = ctxt->mtcp_manager;
297
298 /* sanity check */
299 if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST))
300 return NULL;
301
302 len_of_mbuf = dpc->wmbufs[nif].len;
303 m = dpc->wmbufs[nif].m_table[len_of_mbuf];
304
305 /* retrieve the right write offset */
306 ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *);
307 m->pkt_len = m->data_len = pktsize;
308 m->nb_segs = 1;
309 m->next = NULL;
310
311 #ifdef NETSTAT
312 mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR;
313 #endif
314
315 /* increment the len_of_mbuf var */
316 dpc->wmbufs[nif].len = len_of_mbuf + 1;
317
318 return (uint8_t *)ptr;
319 }
320 /*----------------------------------------------------------------------------*/
321 void
dpdk_set_wptr(struct mtcp_thread_context * ctxt,int out_nif,int in_nif,int index)322 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index)
323 {
324 struct dpdk_private_context *dpc;
325 mtcp_manager_t mtcp;
326 int len_of_mbuf;
327
328 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
329 mtcp = ctxt->mtcp_manager;
330
331 /* sanity check */
332 if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST))
333 return;
334
335 len_of_mbuf = dpc->wmbufs[out_nif].len;
336 dpc->wmbufs[out_nif].m_table[len_of_mbuf] =
337 dpc->rmbufs[in_nif].m_table[index];
338
339 dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0;
340
341 #ifdef NETSTAT
342 mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR;
343 #endif
344
345 /* increment the len_of_mbuf var */
346 dpc->wmbufs[out_nif].len = len_of_mbuf + 1;
347
348 return;
349 }
350 /*----------------------------------------------------------------------------*/
351 static inline void
free_pkts(struct rte_mbuf ** mtable,unsigned len)352 free_pkts(struct rte_mbuf **mtable, unsigned len)
353 {
354 int i;
355
356 /* free the freaking packets */
357 for (i = 0; i < len; i++) {
358 if (mtable[i]->udata64 == 1) {
359 rte_pktmbuf_free_seg(mtable[i]);
360 RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]);
361 }
362 }
363 }
364 /*----------------------------------------------------------------------------*/
365 int32_t
dpdk_recv_pkts(struct mtcp_thread_context * ctxt,int ifidx)366 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx)
367 {
368 struct dpdk_private_context *dpc;
369 int ret;
370 uint8_t qid;
371
372 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
373 qid = cpu_qid_map[ifidx][ctxt->cpu];
374
375 /* if queue is unassigned, skip it.. */
376 if (qid == 0xFF)
377 return 0;
378
379 if (dpc->rmbufs[ifidx].len != 0) {
380 free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len);
381 dpc->rmbufs[ifidx].len = 0;
382 }
383
384 ret = rte_eth_rx_burst((uint8_t)ifidx, qid,
385 dpc->pkts_burst, MAX_PKT_BURST);
386 #ifdef RX_IDLE_ENABLE
387 dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1;
388 #endif
389 dpc->rmbufs[ifidx].len = ret;
390
391 return ret;
392 }
393 /*----------------------------------------------------------------------------*/
394 uint8_t *
dpdk_get_rptr(struct mtcp_thread_context * ctxt,int ifidx,int index,uint16_t * len)395 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len)
396 {
397 struct dpdk_private_context *dpc;
398 struct rte_mbuf *m;
399 uint8_t *pktbuf;
400
401 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
402
403
404 m = dpc->pkts_burst[index];
405 /* tag to check if the packet is a local or a forwarded pkt */
406 m->udata64 = 1;
407 /* don't enable pre-fetching... performance goes down */
408 //rte_prefetch0(rte_pktmbuf_mtod(m, void *));
409 *len = m->pkt_len;
410 pktbuf = rte_pktmbuf_mtod(m, uint8_t *);
411
412 /* enqueue the pkt ptr in mbuf */
413 dpc->rmbufs[ifidx].m_table[index] = m;
414
415 return pktbuf;
416 }
417 /*----------------------------------------------------------------------------*/
418 int
dpdk_get_nif(struct ifreq * ifr)419 dpdk_get_nif(struct ifreq *ifr)
420 {
421 int i;
422 static int num_dev = -1;
423 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
424 /* get mac addr entries of 'detected' dpdk ports */
425 if (num_dev < 0) {
426 #if RTE_VERSION < RTE_VERSION_NUM(18, 5, 0, 0)
427 num_dev = rte_eth_dev_count();
428 #else
429 num_dev = rte_eth_dev_count_avail();
430 #endif
431 for (i = 0; i < num_dev; i++)
432 rte_eth_macaddr_get(i, &ports_eth_addr[i]);
433 }
434
435 for (i = 0; i < num_dev; i++)
436 if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN))
437 return i;
438
439 return -1;
440 }
441 /*----------------------------------------------------------------------------*/
442 int32_t
dpdk_select(struct mtcp_thread_context * ctxt)443 dpdk_select(struct mtcp_thread_context *ctxt)
444 {
445 #ifdef RX_IDLE_ENABLE
446 struct dpdk_private_context *dpc;
447
448 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
449 if (dpc->rx_idle > RX_IDLE_THRESH) {
450 dpc->rx_idle = 0;
451 usleep(RX_IDLE_TIMEOUT);
452 }
453 #endif
454 return 0;
455 }
456 /*----------------------------------------------------------------------------*/
457 void
dpdk_destroy_handle(struct mtcp_thread_context * ctxt)458 dpdk_destroy_handle(struct mtcp_thread_context *ctxt)
459 {
460 struct dpdk_private_context *dpc;
461 int i;
462
463 dpc = (struct dpdk_private_context *) ctxt->io_private_context;
464
465 /* free wmbufs */
466 for (i = 0; i < g_config.mos->netdev_table->num; i++)
467 free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST);
468
469 #ifdef ENABLE_STATS_IOCTL
470 /* free fd */
471 if (dpc->fd >= 0)
472 close(dpc->fd);
473 #endif /* !ENABLE_STATS_IOCTL */
474
475 /* free it all up */
476 free(dpc);
477 }
478 /*----------------------------------------------------------------------------*/
479 static void
check_all_ports_link_status(uint8_t port_num,uint32_t port_mask)480 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
481 {
482 #define CHECK_INTERVAL 100 /* 100ms */
483 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
484
485 uint8_t portid, count, all_ports_up, print_flag = 0;
486 struct rte_eth_link link;
487
488 printf("\nChecking link status");
489 fflush(stdout);
490 for (count = 0; count <= MAX_CHECK_TIME; count++) {
491 all_ports_up = 1;
492 for (portid = 0; portid < port_num; portid++) {
493 if ((port_mask & (1 << portid)) == 0)
494 continue;
495 memset(&link, 0, sizeof(link));
496 rte_eth_link_get_nowait(portid, &link);
497 /* print link status if flag set */
498 if (print_flag == 1) {
499 if (link.link_status)
500 printf("Port %d Link Up - speed %u "
501 "Mbps - %s\n", (uint8_t)portid,
502 (unsigned)link.link_speed,
503 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
504 ("full-duplex") : ("half-duplex\n"));
505 else
506 printf("Port %d Link Down\n",
507 (uint8_t)portid);
508 continue;
509 }
510 /* clear all_ports_up flag if any link down */
511 if (link.link_status == 0) {
512 all_ports_up = 0;
513 break;
514 }
515 }
516 /* after finally printing all link status, get out */
517 if (print_flag == 1)
518 break;
519
520 if (all_ports_up == 0) {
521 printf(".");
522 fflush(stdout);
523 rte_delay_ms(CHECK_INTERVAL);
524 }
525
526 /* set the print_flag if all ports up or timeout */
527 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
528 print_flag = 1;
529 printf("done\n");
530 }
531 }
532 }
533 /*----------------------------------------------------------------------------*/
534 int32_t
dpdk_dev_ioctl(struct mtcp_thread_context * ctx,int nif,int cmd,void * argp)535 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp)
536 {
537 struct dpdk_private_context *dpc;
538 struct rte_mbuf *m;
539 int len_of_mbuf;
540 struct iphdr *iph;
541 struct tcphdr *tcph;
542 RssInfo *rss_i;
543 void **argpptr = (void **)argp;
544
545 if (cmd == DRV_NAME) {
546 *argpptr = (void *)dev_info->driver_name;
547 return 0;
548 }
549
550 iph = (struct iphdr *)argp;
551 dpc = (struct dpdk_private_context *)ctx->io_private_context;
552 len_of_mbuf = dpc->wmbufs[nif].len;
553 rss_i = NULL;
554
555 switch (cmd) {
556 case PKT_TX_IP_CSUM:
557 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
558 m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4;
559 m->l2_len = sizeof(struct ether_hdr);
560 m->l3_len = (iph->ihl<<2);
561 break;
562 case PKT_TX_TCP_CSUM:
563 m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
564 tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2));
565 m->ol_flags |= PKT_TX_TCP_CKSUM;
566 tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags);
567 break;
568 case PKT_RX_RSS:
569 rss_i = (RssInfo *)argp;
570 m = dpc->pkts_burst[rss_i->pktidx];
571 rss_i->hash_value = m->hash.rss;
572 break;
573 default:
574 goto dev_ioctl_err;
575 }
576
577 return 0;
578 dev_ioctl_err:
579 return -1;
580 }
581 /*----------------------------------------------------------------------------*/
582 void
dpdk_load_module_upper_half(void)583 dpdk_load_module_upper_half(void)
584 {
585 int cpu = g_config.mos->num_cores, ret;
586 uint32_t cpumask = 0;
587 char cpumaskbuf[10];
588 char mem_channels[5];
589
590 /* set the log level */
591 #if RTE_VERSION < RTE_VERSION_NUM(17, 5, 0, 0)
592 rte_set_log_type(RTE_LOGTYPE_PMD, 0);
593 rte_set_log_type(RTE_LOGTYPE_MALLOC, 0);
594 rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0);
595 rte_set_log_type(RTE_LOGTYPE_RING, 0);
596 rte_set_log_level(RTE_LOG_WARNING);
597 #else
598 rte_log_set_level(RTE_LOGTYPE_PMD, 0);
599 rte_log_set_level(RTE_LOGTYPE_MALLOC, 0);
600 rte_log_set_level(RTE_LOGTYPE_MEMPOOL, 0);
601 rte_log_set_level(RTE_LOGTYPE_RING, 0);
602 rte_log_set_level(RTE_LOG_WARNING, 0);
603 #endif
604 /* get the cpu mask */
605 for (ret = 0; ret < cpu; ret++)
606 cpumask = (cpumask | (1 << ret));
607 sprintf(cpumaskbuf, "%X", cpumask);
608
609 /* get the mem channels per socket */
610 if (g_config.mos->nb_mem_channels == 0) {
611 TRACE_ERROR("DPDK module requires # of memory channels "
612 "per socket parameter!\n");
613 exit(EXIT_FAILURE);
614 }
615 sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels);
616
617 /* initialize the rte env first, what a waste of implementation effort! */
618 char *argv[] = {"",
619 "-c",
620 cpumaskbuf,
621 "-n",
622 mem_channels,
623 "--proc-type=auto",
624 ""
625 };
626 const int argc = 6;
627
628 /*
629 * re-set getopt extern variable optind.
630 * this issue was a bitch to debug
631 * rte_eal_init() internally uses getopt() syscall
632 * mtcp applications that also use an `external' getopt
633 * will cause a violent crash if optind is not reset to zero
634 * prior to calling the func below...
635 * see man getopt(3) for more details
636 */
637 optind = 0;
638
639 /* initialize the dpdk eal env */
640 ret = rte_eal_init(argc, argv);
641 if (ret < 0)
642 rte_exit(EXIT_FAILURE, "Invalid EAL args!\n");
643
644 }
645 /*----------------------------------------------------------------------------*/
646 void
dpdk_load_module_lower_half(void)647 dpdk_load_module_lower_half(void)
648 {
649 int portid, rxlcore_id, ret;
650 struct rte_eth_fc_conf fc_conf; /* for Ethernet flow control settings */
651 /* setting the rss key */
652 static const uint8_t key[] = {
653 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
654 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
655 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
656 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
657 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
658 0x05, 0x05
659 };
660
661 port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)key;
662 port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key);
663
664 /* resetting cpu_qid mapping */
665 memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map));
666
667 if (!g_config.mos->multiprocess
668 || (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) {
669 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
670 char name[20];
671 sprintf(name, "mbuf_pool-%d", rxlcore_id);
672 /* create the mbuf pools */
673 pktmbuf_pool[rxlcore_id] =
674 rte_mempool_create(name, NB_MBUF,
675 MBUF_SIZE, MEMPOOL_CACHE_SIZE,
676 sizeof(struct rte_pktmbuf_pool_private),
677 rte_pktmbuf_pool_init, NULL,
678 rte_pktmbuf_init, NULL,
679 rte_lcore_to_socket_id(rxlcore_id), 0);
680 if (pktmbuf_pool[rxlcore_id] == NULL)
681 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
682 }
683
684 /* Initialise each port */
685 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) {
686 int num_queue = 0, eth_idx, i, queue_id;
687 for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++)
688 if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex)
689 break;
690 if (eth_idx == g_config.mos->netdev_table->num)
691 continue;
692 for (i = 0; i < sizeof(uint64_t) * 8; i++)
693 if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i))
694 num_queue++;
695
696 /* check port capabilities */
697 rte_eth_dev_info_get(portid, &dev_info[portid]);
698
699 #if RTE_VERSION >= RTE_VERSION_NUM(18, 2, 0, 0)
700 /* re-adjust rss_hf */
701 port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info[portid].flow_type_rss_offloads;
702 #endif
703 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
704 num_queues = num_queue;
705
706 /* init port */
707 printf("Initializing port %u... ", (unsigned) portid);
708 fflush(stdout);
709 ret = rte_eth_dev_configure(portid, num_queue, num_queue,
710 &port_conf);
711 if (ret < 0)
712 rte_exit(EXIT_FAILURE, "Cannot configure device:"
713 "err=%d, port=%u\n",
714 ret, (unsigned) portid);
715
716 /* init one RX queue per CPU */
717 fflush(stdout);
718 #ifdef DEBUG
719 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
720 #endif
721 queue_id = 0;
722 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
723 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
724 continue;
725 ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd,
726 rte_eth_dev_socket_id(portid), &rx_conf,
727 pktmbuf_pool[rxlcore_id]);
728 if (ret < 0)
729 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:"
730 "err=%d, port=%u, queueid: %d\n",
731 ret, (unsigned) portid, rxlcore_id);
732 cpu_qid_map[portid][rxlcore_id] = queue_id++;
733 }
734
735 /* init one TX queue on each port per CPU (this is redundant for
736 * this app) */
737 fflush(stdout);
738 queue_id = 0;
739 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
740 if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
741 continue;
742 ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd,
743 rte_eth_dev_socket_id(portid), &tx_conf);
744 if (ret < 0)
745 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:"
746 "err=%d, port=%u, queueid: %d\n",
747 ret, (unsigned) portid, rxlcore_id);
748 }
749
750 /* Start device */
751 ret = rte_eth_dev_start(portid);
752 if (ret < 0)
753 rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
754 ret, (unsigned) portid);
755
756 printf("done: \n");
757 rte_eth_promiscuous_enable(portid);
758
759 /* retrieve current flow control settings per port */
760 memset(&fc_conf, 0, sizeof(fc_conf));
761 ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
762 if (ret != 0) {
763 rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
764 }
765
766 /* and just disable the rx/tx flow control */
767 fc_conf.mode = RTE_FC_NONE;
768 ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf);
769 if (ret != 0) {
770 rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
771 ret);
772 }
773
774 #ifdef DEBUG
775 printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
776 (unsigned) portid,
777 ports_eth_addr[portid].addr_bytes[0],
778 ports_eth_addr[portid].addr_bytes[1],
779 ports_eth_addr[portid].addr_bytes[2],
780 ports_eth_addr[portid].addr_bytes[3],
781 ports_eth_addr[portid].addr_bytes[4],
782 ports_eth_addr[portid].addr_bytes[5]);
783 #endif
784 /* only check for link status if the thread is master */
785 check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF);
786 }
787 } else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */
788 for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
789 char name[20];
790 sprintf(name, "mbuf_pool-%d", rxlcore_id);
791 /* initialize the mbuf pools */
792 pktmbuf_pool[rxlcore_id] =
793 rte_mempool_lookup(name);
794 if (pktmbuf_pool[rxlcore_id] == NULL)
795 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
796 for (portid = 0; portid < g_config.mos->netdev_table->num; portid++)
797 cpu_qid_map[portid][rxlcore_id] = rxlcore_id;
798 }
799 /* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
800 num_queues = g_config.mos->num_cores;
801 }
802
803 }
804 /*----------------------------------------------------------------------------*/
805 io_module_func dpdk_module_func = {
806 .load_module_upper_half = dpdk_load_module_upper_half,
807 .load_module_lower_half = dpdk_load_module_lower_half,
808 .init_handle = dpdk_init_handle,
809 .link_devices = NULL,
810 .release_pkt = NULL,
811 .send_pkts = dpdk_send_pkts,
812 .get_wptr = dpdk_get_wptr,
813 .recv_pkts = dpdk_recv_pkts,
814 .get_rptr = dpdk_get_rptr,
815 .get_nif = dpdk_get_nif,
816 .select = dpdk_select,
817 .destroy_handle = dpdk_destroy_handle,
818 .dev_ioctl = dpdk_dev_ioctl,
819 .set_wptr = dpdk_set_wptr,
820 };
821 /*----------------------------------------------------------------------------*/
822
823