1 /* for io_module_func def'ns */
2 #include "io_module.h"
3 /* for mtcp related def'ns */
4 #include "mtcp.h"
5 /* for errno */
6 #include <errno.h>
7 /* for close/optind */
8 #include <unistd.h>
9 /* for logging */
10 #include "debug.h"
11 /* for num_devices_* */
12 #include "config.h"
13 /* for rte_max_eth_ports */
14 #include <rte_common.h>
15 /* for rte_eth_rxconf */
16 #include <rte_ethdev.h>
17 /* for delay funcs */
18 #include <rte_cycles.h>
19 /* for ip pesudo-chksum */
20 #include <rte_ip.h>
21 #define ENABLE_STATS_IOCTL		1
22 #ifdef ENABLE_STATS_IOCTL
23 /* for open */
24 #include <fcntl.h>
25 /* for ioctl */
26 #include <sys/ioctl.h>
27 #endif /* !ENABLE_STATS_IOCTL */
28 /* for retrieving rte version(s) */
29 #include <rte_version.h>
30 /*----------------------------------------------------------------------------*/
31 /* Essential macros */
32 #define MAX_RX_QUEUE_PER_LCORE		MAX_CPUS
33 #define MAX_TX_QUEUE_PER_PORT		MAX_CPUS
34 
35 #define MBUF_SIZE 			(2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
36 #define NB_MBUF				8192
37 #define MEMPOOL_CACHE_SIZE		256
38 //#define RX_IDLE_ENABLE			1
39 #define RX_IDLE_TIMEOUT			1	/* in micro-seconds */
40 #define RX_IDLE_THRESH			64
41 
42 /*
43  * RX and TX Prefetch, Host, and Write-back threshold values should be
44  * carefully set for optimal performance. Consult the network
45  * controller's datasheet and supporting DPDK documentation for guidance
46  * on how these parameters should be set.
47  */
48 #define RX_PTHRESH 			8 /**< Default values of RX prefetch threshold reg. */
49 #define RX_HTHRESH 			8 /**< Default values of RX host threshold reg. */
50 #define RX_WTHRESH 			4 /**< Default values of RX write-back threshold reg. */
51 
52 /*
53  * These default values are optimized for use with the Intel(R) 82599 10 GbE
54  * Controller and the DPDK ixgbe PMD. Consider using other values for other
55  * network controllers and/or network drivers.
56  */
57 #define TX_PTHRESH 			36 /**< Default values of TX prefetch threshold reg. */
58 #define TX_HTHRESH			0  /**< Default values of TX host threshold reg. */
59 #define TX_WTHRESH			0  /**< Default values of TX write-back threshold reg. */
60 
61 #define MAX_PKT_BURST			/*32*/64/*128*//*32*/
62 
63 /*
64  * Configurable number of RX/TX ring descriptors
65  */
66 #define RTE_TEST_RX_DESC_DEFAULT	128
67 #define RTE_TEST_TX_DESC_DEFAULT	512
68 
69 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
70 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
71 /*----------------------------------------------------------------------------*/
72 /* packet memory pools for storing packet bufs */
73 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL};
74 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}};
75 
76 //#define DEBUG				1
77 #ifdef DEBUG
78 /* ethernet addresses of ports */
79 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
80 #endif
81 
82 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS];
83 
84 static struct rte_eth_conf port_conf = {
85 	.rxmode = {
86 		.mq_mode	= 	ETH_MQ_RX_RSS,
87 		.max_rx_pkt_len = 	ETHER_MAX_LEN,
88 		.split_hdr_size = 	0,
89 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02)
90 		.header_split   = 	0, /**< Header Split disabled */
91 		.hw_ip_checksum = 	1, /**< IP checksum offload enabled */
92 		.hw_vlan_filter = 	0, /**< VLAN filtering disabled */
93 		.jumbo_frame    = 	0, /**< Jumbo Frame Support disabled */
94 		.hw_strip_crc   = 	1, /**< CRC stripped by hardware */
95 #else
96 		.offloads	=	DEV_RX_OFFLOAD_CHECKSUM,
97 #endif
98 	},
99 	.rx_adv_conf = {
100 		.rss_conf = {
101 			.rss_key = 	NULL,
102 			.rss_hf = 	ETH_RSS_TCP | ETH_RSS_UDP |
103 					ETH_RSS_IP | ETH_RSS_L2_PAYLOAD
104 		},
105 	},
106 	.txmode = {
107 		.mq_mode = 		ETH_MQ_TX_NONE,
108 #if (RTE_VER_YEAR >= 18) && (RTE_VER_MONTH >= 02)
109 		.offloads	=	DEV_TX_OFFLOAD_IPV4_CKSUM |
110 					DEV_TX_OFFLOAD_UDP_CKSUM |
111 					DEV_TX_OFFLOAD_TCP_CKSUM
112 #endif
113 	},
114 };
115 
116 static const struct rte_eth_rxconf rx_conf = {
117 	.rx_thresh = {
118 		.pthresh = 		RX_PTHRESH, /* RX prefetch threshold reg */
119 		.hthresh = 		RX_HTHRESH, /* RX host threshold reg */
120 		.wthresh = 		RX_WTHRESH, /* RX write-back threshold reg */
121 	},
122 	.rx_free_thresh = 		32,
123 };
124 
125 static const struct rte_eth_txconf tx_conf = {
126 	.tx_thresh = {
127 		.pthresh = 		TX_PTHRESH, /* TX prefetch threshold reg */
128 		.hthresh = 		TX_HTHRESH, /* TX host threshold reg */
129 		.wthresh = 		TX_WTHRESH, /* TX write-back threshold reg */
130 	},
131 	.tx_free_thresh = 		0, /* Use PMD default values */
132 	.tx_rs_thresh = 		0, /* Use PMD default values */
133 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02)
134 	/*
135 	 * As the example won't handle mult-segments and offload cases,
136 	 * set the flag by default.
137 	 */
138 	.txq_flags = 			0x0,
139 #endif
140 };
141 
142 struct mbuf_table {
143 	unsigned len; /* length of queued packets */
144 	struct rte_mbuf *m_table[MAX_PKT_BURST];
145 };
146 
147 struct dpdk_private_context {
148 	struct mbuf_table rmbufs[RTE_MAX_ETHPORTS];
149 	struct mbuf_table wmbufs[RTE_MAX_ETHPORTS];
150 	struct rte_mempool *pktmbuf_pool;
151 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
152 #ifdef RX_IDLE_ENABLE
153 	uint8_t rx_idle;
154 #endif
155 #ifdef ENABLE_STATS_IOCTL
156 	int fd;
157 #endif /* !ENABLE_STATS_IOCTL */
158 } __rte_cache_aligned;
159 
160 #ifdef ENABLE_STATS_IOCTL
161 /**
162  * stats struct passed on from user space to the driver
163  */
164 struct stats_struct {
165 	uint64_t tx_bytes;
166 	uint64_t tx_pkts;
167 	uint64_t rx_bytes;
168 	uint64_t rx_pkts;
169 	uint8_t qid;
170 	uint8_t dev;
171 };
172 #endif /* !ENABLE_STATS_IOCTL */
173 /*----------------------------------------------------------------------------*/
174 void
175 dpdk_init_handle(struct mtcp_thread_context *ctxt)
176 {
177 	struct dpdk_private_context *dpc;
178 	int i, j;
179 	char mempool_name[20];
180 
181 	/* create and initialize private I/O module context */
182 	ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context));
183 	if (ctxt->io_private_context == NULL) {
184 		TRACE_ERROR("Failed to initialize ctxt->io_private_context: "
185 			    "Can't allocate memory\n");
186 		exit(EXIT_FAILURE);
187 	}
188 
189 	sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu);
190 	dpc = (struct dpdk_private_context *)ctxt->io_private_context;
191 	dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu];
192 
193 	/* set wmbufs correctly */
194 	for (j = 0; j < g_config.mos->netdev_table->num; j++) {
195 		/* Allocate wmbufs for each registered port */
196 		for (i = 0; i < MAX_PKT_BURST; i++) {
197 			dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
198 			if (dpc->wmbufs[j].m_table[i] == NULL) {
199 				TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
200 					    ctxt->cpu, i, j);
201 				exit(EXIT_FAILURE);
202 			}
203 		}
204 		/* set mbufs queue length to 0 to begin with */
205 		dpc->wmbufs[j].len = 0;
206 	}
207 
208 #ifdef ENABLE_STATS_IOCTL
209 	dpc->fd = open("/dev/dpdk-iface", O_RDWR);
210 	if (dpc->fd == -1) {
211 		TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! "
212 			    "Are you using mlx4/mlx5 driver?\n",
213 			    ctxt->cpu);
214 	}
215 #endif /* !ENABLE_STATS_IOCTL */
216 }
217 /*----------------------------------------------------------------------------*/
218 int
219 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif)
220 {
221 	struct dpdk_private_context *dpc;
222 	mtcp_manager_t mtcp;
223 	int ret;
224 	int qid;
225 
226 	dpc = (struct dpdk_private_context *)ctxt->io_private_context;
227 	mtcp = ctxt->mtcp_manager;
228 	ret = 0;
229 	qid = cpu_qid_map[nif][ctxt->cpu];
230 
231 	/* if queue is unassigned, skip it.. */
232 	if (unlikely(qid == 0xFF))
233 		return 0;
234 
235 	/* if there are packets in the queue... flush them out to the wire */
236 	if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) {
237 		struct rte_mbuf **pkts;
238 #ifdef ENABLE_STATS_IOCTL
239 		struct stats_struct ss;
240 #endif /* !ENABLE_STATS_IOCTL */
241 		int cnt = dpc->wmbufs[nif].len;
242 		pkts = dpc->wmbufs[nif].m_table;
243 #ifdef NETSTAT
244 		mtcp->nstat.tx_packets[nif] += cnt;
245 #ifdef ENABLE_STATS_IOCTL
246 		if (likely(dpc->fd) >= 0) {
247 			ss.tx_pkts = mtcp->nstat.tx_packets[nif];
248 			ss.tx_bytes = mtcp->nstat.tx_bytes[nif];
249 			ss.rx_pkts = mtcp->nstat.rx_packets[nif];
250 			ss.rx_bytes = mtcp->nstat.rx_bytes[nif];
251 			ss.qid = ctxt->cpu;
252 			ss.dev = nif;
253 			ioctl(dpc->fd, 0, &ss);
254 		}
255 #endif /* !ENABLE_STATS_IOCTL */
256 #endif
257 		do {
258 			/* tx cnt # of packets */
259 			ret = rte_eth_tx_burst(nif, qid,
260 					       pkts, cnt);
261 			pkts += ret;
262 			cnt -= ret;
263 			/* if not all pkts were sent... then repeat the cycle */
264 		} while (cnt > 0);
265 
266 #ifndef SHARE_IO_BUFFER
267 		int i;
268 		/* time to allocate fresh mbufs for the queue */
269 		for (i = 0; i < dpc->wmbufs[nif].len; i++) {
270 			dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
271 			/* error checking */
272 			if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) {
273 				TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
274 					    ctxt->cpu, i, nif);
275 				exit(EXIT_FAILURE);
276 			}
277 		}
278 #endif
279 		/* reset the len of mbufs var after flushing of packets */
280 		dpc->wmbufs[nif].len = 0;
281 	}
282 
283 	return ret;
284 }
285 /*----------------------------------------------------------------------------*/
286 uint8_t *
287 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize)
288 {
289 	struct dpdk_private_context *dpc;
290 	mtcp_manager_t mtcp;
291 	struct rte_mbuf *m;
292 	uint8_t *ptr;
293 	int len_of_mbuf;
294 
295 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
296 	mtcp = ctxt->mtcp_manager;
297 
298 	/* sanity check */
299 	if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST))
300 		return NULL;
301 
302 	len_of_mbuf = dpc->wmbufs[nif].len;
303 	m = dpc->wmbufs[nif].m_table[len_of_mbuf];
304 
305 	/* retrieve the right write offset */
306 	ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *);
307 	m->pkt_len = m->data_len = pktsize;
308 	m->nb_segs = 1;
309 	m->next = NULL;
310 
311 #ifdef NETSTAT
312 	mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR;
313 #endif
314 
315 	/* increment the len_of_mbuf var */
316 	dpc->wmbufs[nif].len = len_of_mbuf + 1;
317 
318 	return (uint8_t *)ptr;
319 }
320 /*----------------------------------------------------------------------------*/
321 void
322 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index)
323 {
324 	struct dpdk_private_context *dpc;
325 	mtcp_manager_t mtcp;
326 	int len_of_mbuf;
327 
328 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
329 	mtcp = ctxt->mtcp_manager;
330 
331 	/* sanity check */
332 	if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST))
333 		return;
334 
335 	len_of_mbuf = dpc->wmbufs[out_nif].len;
336 	dpc->wmbufs[out_nif].m_table[len_of_mbuf] =
337 		dpc->rmbufs[in_nif].m_table[index];
338 
339 	dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0;
340 
341 #ifdef NETSTAT
342 	mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR;
343 #endif
344 
345 	/* increment the len_of_mbuf var */
346 	dpc->wmbufs[out_nif].len = len_of_mbuf + 1;
347 
348 	return;
349 }
350 /*----------------------------------------------------------------------------*/
351 static inline void
352 free_pkts(struct rte_mbuf **mtable, unsigned len)
353 {
354 	int i;
355 
356 	/* free the freaking packets */
357 	for (i = 0; i < len; i++) {
358 		if (mtable[i]->udata64 == 1) {
359 			rte_pktmbuf_free_seg(mtable[i]);
360 			RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]);
361 		}
362 	}
363 }
364 /*----------------------------------------------------------------------------*/
365 int32_t
366 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx)
367 {
368 	struct dpdk_private_context *dpc;
369 	int ret;
370 	uint8_t qid;
371 
372 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
373 	qid = cpu_qid_map[ifidx][ctxt->cpu];
374 
375 	/* if queue is unassigned, skip it.. */
376 	if (qid == 0xFF)
377 		return 0;
378 
379 	if (dpc->rmbufs[ifidx].len != 0) {
380 		free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len);
381 		dpc->rmbufs[ifidx].len = 0;
382 	}
383 
384 	ret = rte_eth_rx_burst((uint8_t)ifidx, qid,
385 			       dpc->pkts_burst, MAX_PKT_BURST);
386 #ifdef RX_IDLE_ENABLE
387 	dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1;
388 #endif
389 	dpc->rmbufs[ifidx].len = ret;
390 
391 	return ret;
392 }
393 /*----------------------------------------------------------------------------*/
394 uint8_t *
395 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len)
396 {
397 	struct dpdk_private_context *dpc;
398 	struct rte_mbuf *m;
399 	uint8_t *pktbuf;
400 
401 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
402 
403 
404 	m = dpc->pkts_burst[index];
405 	/* tag to check if the packet is a local or a forwarded pkt */
406 	m->udata64 = 1;
407 	/* don't enable pre-fetching... performance goes down */
408 	//rte_prefetch0(rte_pktmbuf_mtod(m, void *));
409 	*len = m->pkt_len;
410 	pktbuf = rte_pktmbuf_mtod(m, uint8_t *);
411 
412 	/* enqueue the pkt ptr in mbuf */
413 	dpc->rmbufs[ifidx].m_table[index] = m;
414 
415 	return pktbuf;
416 }
417 /*----------------------------------------------------------------------------*/
418 int
419 dpdk_get_nif(struct ifreq *ifr)
420 {
421 	int i;
422 	static int num_dev = -1;
423 	static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
424 	/* get mac addr entries of 'detected' dpdk ports */
425 	if (num_dev < 0) {
426 #if (RTE_VER_YEAR <= 18) && (RTE_VER_MONTH <= 02)
427 		num_dev = rte_eth_dev_count();
428 #else
429 		num_dev = rte_eth_dev_count_avail();
430 #endif
431 		for (i = 0; i < num_dev; i++)
432 			rte_eth_macaddr_get(i, &ports_eth_addr[i]);
433 	}
434 
435 	for (i = 0; i < num_dev; i++)
436 		if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN))
437 			return i;
438 
439 	return -1;
440 }
441 /*----------------------------------------------------------------------------*/
442 int32_t
443 dpdk_select(struct mtcp_thread_context *ctxt)
444 {
445 #ifdef RX_IDLE_ENABLE
446 	struct dpdk_private_context *dpc;
447 
448 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
449 	if (dpc->rx_idle > RX_IDLE_THRESH) {
450 		dpc->rx_idle = 0;
451 		usleep(RX_IDLE_TIMEOUT);
452 	}
453 #endif
454 	return 0;
455 }
456 /*----------------------------------------------------------------------------*/
457 void
458 dpdk_destroy_handle(struct mtcp_thread_context *ctxt)
459 {
460 	struct dpdk_private_context *dpc;
461 	int i;
462 
463 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
464 
465 	/* free wmbufs */
466 	for (i = 0; i < g_config.mos->netdev_table->num; i++)
467 		free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST);
468 
469 #ifdef ENABLE_STATS_IOCTL
470 	/* free fd */
471 	if (dpc->fd >= 0)
472 		close(dpc->fd);
473 #endif /* !ENABLE_STATS_IOCTL */
474 
475 	/* free it all up */
476 	free(dpc);
477 }
478 /*----------------------------------------------------------------------------*/
479 static void
480 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
481 {
482 #define CHECK_INTERVAL 			100 /* 100ms */
483 #define MAX_CHECK_TIME 			90 /* 9s (90 * 100ms) in total */
484 
485 	uint8_t portid, count, all_ports_up, print_flag = 0;
486 	struct rte_eth_link link;
487 
488 	printf("\nChecking link status");
489 	fflush(stdout);
490 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
491 		all_ports_up = 1;
492 		for (portid = 0; portid < port_num; portid++) {
493 			if ((port_mask & (1 << portid)) == 0)
494 				continue;
495 			memset(&link, 0, sizeof(link));
496 			rte_eth_link_get_nowait(portid, &link);
497 			/* print link status if flag set */
498 			if (print_flag == 1) {
499 				if (link.link_status)
500 					printf("Port %d Link Up - speed %u "
501 						"Mbps - %s\n", (uint8_t)portid,
502 						(unsigned)link.link_speed,
503 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
504 					("full-duplex") : ("half-duplex\n"));
505 				else
506 					printf("Port %d Link Down\n",
507 						(uint8_t)portid);
508 				continue;
509 			}
510 			/* clear all_ports_up flag if any link down */
511 			if (link.link_status == 0) {
512 				all_ports_up = 0;
513 				break;
514 			}
515 		}
516 		/* after finally printing all link status, get out */
517 		if (print_flag == 1)
518 			break;
519 
520 		if (all_ports_up == 0) {
521 			printf(".");
522 			fflush(stdout);
523 			rte_delay_ms(CHECK_INTERVAL);
524 		}
525 
526 		/* set the print_flag if all ports up or timeout */
527 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
528 			print_flag = 1;
529 			printf("done\n");
530 		}
531 	}
532 }
533 /*----------------------------------------------------------------------------*/
534 int32_t
535 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp)
536 {
537 	struct dpdk_private_context *dpc;
538 	struct rte_mbuf *m;
539 	int len_of_mbuf;
540 	struct iphdr *iph;
541 	struct tcphdr *tcph;
542 	RssInfo *rss_i;
543 	void **argpptr = (void **)argp;
544 
545 	if (cmd == DRV_NAME) {
546 		*argpptr = (void *)dev_info->driver_name;
547 		return 0;
548 	}
549 
550 	iph = (struct iphdr *)argp;
551 	dpc = (struct dpdk_private_context *)ctx->io_private_context;
552 	len_of_mbuf = dpc->wmbufs[nif].len;
553 	rss_i = NULL;
554 
555 	switch (cmd) {
556 	case PKT_TX_IP_CSUM:
557 		m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
558 		m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4;
559 		m->l2_len = sizeof(struct ether_hdr);
560 		m->l3_len = (iph->ihl<<2);
561 		break;
562 	case PKT_TX_TCP_CSUM:
563 		m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
564 		tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2));
565 		m->ol_flags |= PKT_TX_TCP_CKSUM;
566 		tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags);
567 		break;
568 	case PKT_RX_RSS:
569 		rss_i = (RssInfo *)argp;
570 		m = dpc->pkts_burst[rss_i->pktidx];
571 		rss_i->hash_value = m->hash.rss;
572 		break;
573 	default:
574 		goto dev_ioctl_err;
575 	}
576 
577 	return 0;
578  dev_ioctl_err:
579 	return -1;
580 }
581 /*----------------------------------------------------------------------------*/
582 void
583 dpdk_load_module_upper_half(void)
584 {
585 	int cpu = g_config.mos->num_cores, ret;
586 	uint32_t cpumask = 0;
587 	char cpumaskbuf[10];
588 	char mem_channels[5];
589 
590 	/* set the log level */
591 #if 0
592 	rte_set_log_type(RTE_LOGTYPE_PMD, 0);
593 	rte_set_log_type(RTE_LOGTYPE_MALLOC, 0);
594 	rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0);
595 	rte_set_log_type(RTE_LOGTYPE_RING, 0);
596 	rte_set_log_level(RTE_LOG_WARNING);
597 #else
598 	rte_log_set_level(RTE_LOGTYPE_PMD, 0);
599 	rte_log_set_level(RTE_LOGTYPE_MALLOC, 0);
600 	rte_log_set_level(RTE_LOGTYPE_MEMPOOL, 0);
601 	rte_log_set_level(RTE_LOGTYPE_RING, 0);
602 	rte_log_set_level(RTE_LOG_WARNING, 0);
603 #endif
604 	/* get the cpu mask */
605 	for (ret = 0; ret < cpu; ret++)
606 		cpumask = (cpumask | (1 << ret));
607 	sprintf(cpumaskbuf, "%X", cpumask);
608 
609 	/* get the mem channels per socket */
610 	if (g_config.mos->nb_mem_channels == 0) {
611 		TRACE_ERROR("DPDK module requires # of memory channels "
612 				"per socket parameter!\n");
613 		exit(EXIT_FAILURE);
614 	}
615 	sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels);
616 
617 	/* initialize the rte env first, what a waste of implementation effort!  */
618 	char *argv[] = {"",
619 			"-c",
620 			cpumaskbuf,
621 			"-n",
622 			mem_channels,
623 			"--proc-type=auto",
624 			""
625 	};
626 	const int argc = 6;
627 
628 	/*
629 	 * re-set getopt extern variable optind.
630 	 * this issue was a bitch to debug
631 	 * rte_eal_init() internally uses getopt() syscall
632 	 * mtcp applications that also use an `external' getopt
633 	 * will cause a violent crash if optind is not reset to zero
634 	 * prior to calling the func below...
635 	 * see man getopt(3) for more details
636 	 */
637 	optind = 0;
638 
639 	/* initialize the dpdk eal env */
640 	ret = rte_eal_init(argc, argv);
641 	if (ret < 0)
642 		rte_exit(EXIT_FAILURE, "Invalid EAL args!\n");
643 
644 }
645 /*----------------------------------------------------------------------------*/
646 void
647 dpdk_load_module_lower_half(void)
648 {
649 	int portid, rxlcore_id, ret;
650 	struct rte_eth_fc_conf fc_conf;	/* for Ethernet flow control settings */
651 	/* setting the rss key */
652 	static const uint8_t key[] = {
653 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
654 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
655 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
656 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
657 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
658 		0x05, 0x05
659 	};
660 
661 	port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key;
662 	port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key);
663 
664 	/* resetting cpu_qid mapping */
665 	memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map));
666 
667 	if (!g_config.mos->multiprocess
668 			|| (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) {
669 		for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
670 			char name[20];
671 			sprintf(name, "mbuf_pool-%d", rxlcore_id);
672 			/* create the mbuf pools */
673 			pktmbuf_pool[rxlcore_id] =
674 				rte_mempool_create(name, NB_MBUF,
675 						   MBUF_SIZE, MEMPOOL_CACHE_SIZE,
676 						   sizeof(struct rte_pktmbuf_pool_private),
677 						   rte_pktmbuf_pool_init, NULL,
678 						   rte_pktmbuf_init, NULL,
679 						   rte_lcore_to_socket_id(rxlcore_id), 0);
680 			if (pktmbuf_pool[rxlcore_id] == NULL)
681 				rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
682 		}
683 
684 		/* Initialise each port */
685 		for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) {
686 			int num_queue = 0, eth_idx, i, queue_id;
687 			for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++)
688 				if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex)
689 					break;
690 			if (eth_idx == g_config.mos->netdev_table->num)
691 				continue;
692 			for (i = 0; i < sizeof(uint64_t) * 8; i++)
693 				if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i))
694 					num_queue++;
695 
696 			/* check port capabilities */
697 			rte_eth_dev_info_get(portid, &dev_info[portid]);
698 
699 #if (RTE_VER_YEAR >= 18) && (RTE_VER_MONTH >= 02)
700 			/* re-adjust rss_hf */
701 			port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info[portid].flow_type_rss_offloads;
702 #endif
703 			/* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
704 			num_queues = num_queue;
705 
706 			/* init port */
707 			printf("Initializing port %u... ", (unsigned) portid);
708 			fflush(stdout);
709 			ret = rte_eth_dev_configure(portid, num_queue, num_queue,
710 										&port_conf);
711 			if (ret < 0)
712 				rte_exit(EXIT_FAILURE, "Cannot configure device:"
713 									   "err=%d, port=%u\n",
714 									   ret, (unsigned) portid);
715 
716 			/* init one RX queue per CPU */
717 			fflush(stdout);
718 #ifdef DEBUG
719 			rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
720 #endif
721 			queue_id = 0;
722 			for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
723 				if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
724 					continue;
725 				ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd,
726 						rte_eth_dev_socket_id(portid), &rx_conf,
727 						pktmbuf_pool[rxlcore_id]);
728 				if (ret < 0)
729 					rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:"
730 										   "err=%d, port=%u, queueid: %d\n",
731 										   ret, (unsigned) portid, rxlcore_id);
732 				cpu_qid_map[portid][rxlcore_id] = queue_id++;
733 			}
734 
735 			/* init one TX queue on each port per CPU (this is redundant for
736 			 * this app) */
737 			fflush(stdout);
738 			queue_id = 0;
739 			for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
740 				if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
741 					continue;
742 				ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd,
743 						rte_eth_dev_socket_id(portid), &tx_conf);
744 				if (ret < 0)
745 					rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:"
746 										   "err=%d, port=%u, queueid: %d\n",
747 										   ret, (unsigned) portid, rxlcore_id);
748 			}
749 
750 			/* Start device */
751 			ret = rte_eth_dev_start(portid);
752 			if (ret < 0)
753 				rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
754 									   ret, (unsigned) portid);
755 
756 			printf("done: \n");
757 			rte_eth_promiscuous_enable(portid);
758 
759 			/* retrieve current flow control settings per port */
760 			memset(&fc_conf, 0, sizeof(fc_conf));
761 			ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
762 			if (ret != 0) {
763 				rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
764 			}
765 
766 			/* and just disable the rx/tx flow control */
767 			fc_conf.mode = RTE_FC_NONE;
768 			ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf);
769 			if (ret != 0) {
770 				rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
771 					 ret);
772 			}
773 
774 #ifdef DEBUG
775 			printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
776 					(unsigned) portid,
777 					ports_eth_addr[portid].addr_bytes[0],
778 					ports_eth_addr[portid].addr_bytes[1],
779 					ports_eth_addr[portid].addr_bytes[2],
780 					ports_eth_addr[portid].addr_bytes[3],
781 					ports_eth_addr[portid].addr_bytes[4],
782 					ports_eth_addr[portid].addr_bytes[5]);
783 #endif
784 			/* only check for link status if the thread is master */
785 			check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF);
786 		}
787 	} else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */
788 		for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
789 			char name[20];
790 			sprintf(name, "mbuf_pool-%d", rxlcore_id);
791 			/* initialize the mbuf pools */
792 			pktmbuf_pool[rxlcore_id] =
793 				rte_mempool_lookup(name);
794 			if (pktmbuf_pool[rxlcore_id] == NULL)
795 				rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
796 			for (portid = 0; portid < g_config.mos->netdev_table->num; portid++)
797 				cpu_qid_map[portid][rxlcore_id] = rxlcore_id;
798 		}
799 		/* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
800 		num_queues = g_config.mos->num_cores;
801 	}
802 
803 }
804 /*----------------------------------------------------------------------------*/
805 io_module_func dpdk_module_func = {
806 	.load_module_upper_half		   = dpdk_load_module_upper_half,
807 	.load_module_lower_half		   = dpdk_load_module_lower_half,
808 	.init_handle		   = dpdk_init_handle,
809 	.link_devices		   = NULL,
810 	.release_pkt		   = NULL,
811 	.send_pkts		   = dpdk_send_pkts,
812 	.get_wptr   		   = dpdk_get_wptr,
813 	.recv_pkts		   = dpdk_recv_pkts,
814 	.get_rptr	   	   = dpdk_get_rptr,
815 	.get_nif		   = dpdk_get_nif,
816 	.select			   = dpdk_select,
817 	.destroy_handle		   = dpdk_destroy_handle,
818 	.dev_ioctl		   = dpdk_dev_ioctl,
819 	.set_wptr		   = dpdk_set_wptr,
820 };
821 /*----------------------------------------------------------------------------*/
822 
823