1 /* for io_module_func def'ns */
2 #include "io_module.h"
3 /* for mtcp related def'ns */
4 #include "mtcp.h"
5 /* for errno */
6 #include <errno.h>
7 /* for close/optind */
8 #include <unistd.h>
9 /* for logging */
10 #include "debug.h"
11 /* for num_devices_* */
12 #include "config.h"
13 /* for rte_max_eth_ports */
14 #include <rte_common.h>
15 /* for rte_eth_rxconf */
16 #include <rte_ethdev.h>
17 /* for delay funcs */
18 #include <rte_cycles.h>
19 /* for ip pesudo-chksum */
20 #include <rte_ip.h>
21 #define ENABLE_STATS_IOCTL		1
22 #ifdef ENABLE_STATS_IOCTL
23 /* for open */
24 #include <fcntl.h>
25 /* for ioctl */
26 #include <sys/ioctl.h>
27 #endif /* !ENABLE_STATS_IOCTL */
28 /*----------------------------------------------------------------------------*/
29 /* Essential macros */
30 #define MAX_RX_QUEUE_PER_LCORE		MAX_CPUS
31 #define MAX_TX_QUEUE_PER_PORT		MAX_CPUS
32 
33 #define MBUF_SIZE 			(2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
34 #define NB_MBUF				8192
35 #define MEMPOOL_CACHE_SIZE		256
36 //#define RX_IDLE_ENABLE			1
37 #define RX_IDLE_TIMEOUT			1	/* in micro-seconds */
38 #define RX_IDLE_THRESH			64
39 
40 /*
41  * RX and TX Prefetch, Host, and Write-back threshold values should be
42  * carefully set for optimal performance. Consult the network
43  * controller's datasheet and supporting DPDK documentation for guidance
44  * on how these parameters should be set.
45  */
46 #define RX_PTHRESH 			8 /**< Default values of RX prefetch threshold reg. */
47 #define RX_HTHRESH 			8 /**< Default values of RX host threshold reg. */
48 #define RX_WTHRESH 			4 /**< Default values of RX write-back threshold reg. */
49 
50 /*
51  * These default values are optimized for use with the Intel(R) 82599 10 GbE
52  * Controller and the DPDK ixgbe PMD. Consider using other values for other
53  * network controllers and/or network drivers.
54  */
55 #define TX_PTHRESH 			36 /**< Default values of TX prefetch threshold reg. */
56 #define TX_HTHRESH			0  /**< Default values of TX host threshold reg. */
57 #define TX_WTHRESH			0  /**< Default values of TX write-back threshold reg. */
58 
59 #define MAX_PKT_BURST			/*32*/64/*128*//*32*/
60 
61 /*
62  * Configurable number of RX/TX ring descriptors
63  */
64 #define RTE_TEST_RX_DESC_DEFAULT	128
65 #define RTE_TEST_TX_DESC_DEFAULT	512
66 
67 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
68 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
69 /*----------------------------------------------------------------------------*/
70 /* packet memory pools for storing packet bufs */
71 static struct rte_mempool *pktmbuf_pool[MAX_CPUS] = {NULL};
72 static uint8_t cpu_qid_map[RTE_MAX_ETHPORTS][MAX_CPUS] = {{0}};
73 
74 //#define DEBUG				1
75 #ifdef DEBUG
76 /* ethernet addresses of ports */
77 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
78 #endif
79 
80 static struct rte_eth_dev_info dev_info[RTE_MAX_ETHPORTS];
81 
82 static struct rte_eth_conf port_conf = {
83 	.rxmode = {
84 		.mq_mode	= 	ETH_MQ_RX_RSS,
85 		.max_rx_pkt_len = 	ETHER_MAX_LEN,
86 		.split_hdr_size = 	0,
87 		.header_split   = 	0, /**< Header Split disabled */
88 		.hw_ip_checksum = 	1, /**< IP checksum offload enabled */
89 		.hw_vlan_filter = 	0, /**< VLAN filtering disabled */
90 		.jumbo_frame    = 	0, /**< Jumbo Frame Support disabled */
91 		.hw_strip_crc   = 	1, /**< CRC stripped by hardware */
92 	},
93 	.rx_adv_conf = {
94 		.rss_conf = {
95 			.rss_key = 	NULL,
96 			.rss_hf = 	ETH_RSS_TCP | ETH_RSS_UDP |
97 					ETH_RSS_IP | ETH_RSS_L2_PAYLOAD
98 		},
99 	},
100 	.txmode = {
101 		.mq_mode = 		ETH_MQ_TX_NONE,
102 	},
103 };
104 
105 static const struct rte_eth_rxconf rx_conf = {
106 	.rx_thresh = {
107 		.pthresh = 		RX_PTHRESH, /* RX prefetch threshold reg */
108 		.hthresh = 		RX_HTHRESH, /* RX host threshold reg */
109 		.wthresh = 		RX_WTHRESH, /* RX write-back threshold reg */
110 	},
111 	.rx_free_thresh = 		32,
112 };
113 
114 static const struct rte_eth_txconf tx_conf = {
115 	.tx_thresh = {
116 		.pthresh = 		TX_PTHRESH, /* TX prefetch threshold reg */
117 		.hthresh = 		TX_HTHRESH, /* TX host threshold reg */
118 		.wthresh = 		TX_WTHRESH, /* TX write-back threshold reg */
119 	},
120 	.tx_free_thresh = 		0, /* Use PMD default values */
121 	.tx_rs_thresh = 		0, /* Use PMD default values */
122 	/*
123 	 * As the example won't handle mult-segments and offload cases,
124 	 * set the flag by default.
125 	 */
126 	.txq_flags = 			0x0,
127 };
128 
129 struct mbuf_table {
130 	unsigned len; /* length of queued packets */
131 	struct rte_mbuf *m_table[MAX_PKT_BURST];
132 };
133 
134 struct dpdk_private_context {
135 	struct mbuf_table rmbufs[RTE_MAX_ETHPORTS];
136 	struct mbuf_table wmbufs[RTE_MAX_ETHPORTS];
137 	struct rte_mempool *pktmbuf_pool;
138 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
139 #ifdef RX_IDLE_ENABLE
140 	uint8_t rx_idle;
141 #endif
142 #ifdef ENABLE_STATS_IOCTL
143 	int fd;
144 #endif /* !ENABLE_STATS_IOCTL */
145 } __rte_cache_aligned;
146 
147 #ifdef ENABLE_STATS_IOCTL
148 /**
149  * stats struct passed on from user space to the driver
150  */
151 struct stats_struct {
152 	uint64_t tx_bytes;
153 	uint64_t tx_pkts;
154 	uint64_t rx_bytes;
155 	uint64_t rx_pkts;
156 	uint8_t qid;
157 	uint8_t dev;
158 };
159 #endif /* !ENABLE_STATS_IOCTL */
160 /*----------------------------------------------------------------------------*/
161 void
162 dpdk_init_handle(struct mtcp_thread_context *ctxt)
163 {
164 	struct dpdk_private_context *dpc;
165 	int i, j;
166 	char mempool_name[20];
167 
168 	/* create and initialize private I/O module context */
169 	ctxt->io_private_context = calloc(1, sizeof(struct dpdk_private_context));
170 	if (ctxt->io_private_context == NULL) {
171 		TRACE_ERROR("Failed to initialize ctxt->io_private_context: "
172 			    "Can't allocate memory\n");
173 		exit(EXIT_FAILURE);
174 	}
175 
176 	sprintf(mempool_name, "mbuf_pool-%d", ctxt->cpu);
177 	dpc = (struct dpdk_private_context *)ctxt->io_private_context;
178 	dpc->pktmbuf_pool = pktmbuf_pool[ctxt->cpu];
179 
180 	/* set wmbufs correctly */
181 	for (j = 0; j < g_config.mos->netdev_table->num; j++) {
182 		/* Allocate wmbufs for each registered port */
183 		for (i = 0; i < MAX_PKT_BURST; i++) {
184 			dpc->wmbufs[j].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
185 			if (dpc->wmbufs[j].m_table[i] == NULL) {
186 				TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
187 					    ctxt->cpu, i, j);
188 				exit(EXIT_FAILURE);
189 			}
190 		}
191 		/* set mbufs queue length to 0 to begin with */
192 		dpc->wmbufs[j].len = 0;
193 	}
194 
195 #ifdef ENABLE_STATS_IOCTL
196 	dpc->fd = open("/dev/dpdk-iface", O_RDWR);
197 	if (dpc->fd == -1) {
198 		TRACE_ERROR("Can't open /dev/dpdk-iface for context->cpu: %d! "
199 			    "Are you using mlx4/mlx5 driver?\n",
200 			    ctxt->cpu);
201 	}
202 #endif /* !ENABLE_STATS_IOCTL */
203 }
204 /*----------------------------------------------------------------------------*/
205 int
206 dpdk_send_pkts(struct mtcp_thread_context *ctxt, int nif)
207 {
208 	struct dpdk_private_context *dpc;
209 	mtcp_manager_t mtcp;
210 	int ret;
211 	int qid;
212 
213 	dpc = (struct dpdk_private_context *)ctxt->io_private_context;
214 	mtcp = ctxt->mtcp_manager;
215 	ret = 0;
216 	qid = cpu_qid_map[nif][ctxt->cpu];
217 
218 	/* if queue is unassigned, skip it.. */
219 	if (unlikely(qid == 0xFF))
220 		return 0;
221 
222 	/* if there are packets in the queue... flush them out to the wire */
223 	if (dpc->wmbufs[nif].len >/*= MAX_PKT_BURST*/ 0) {
224 		struct rte_mbuf **pkts;
225 #ifdef ENABLE_STATS_IOCTL
226 		struct stats_struct ss;
227 #endif /* !ENABLE_STATS_IOCTL */
228 		int cnt = dpc->wmbufs[nif].len;
229 		pkts = dpc->wmbufs[nif].m_table;
230 #ifdef NETSTAT
231 		mtcp->nstat.tx_packets[nif] += cnt;
232 #ifdef ENABLE_STATS_IOCTL
233 		if (likely(dpc->fd) >= 0) {
234 			ss.tx_pkts = mtcp->nstat.tx_packets[nif];
235 			ss.tx_bytes = mtcp->nstat.tx_bytes[nif];
236 			ss.rx_pkts = mtcp->nstat.rx_packets[nif];
237 			ss.rx_bytes = mtcp->nstat.rx_bytes[nif];
238 			ss.qid = ctxt->cpu;
239 			ss.dev = nif;
240 			ioctl(dpc->fd, 0, &ss);
241 		}
242 #endif /* !ENABLE_STATS_IOCTL */
243 #endif
244 		do {
245 			/* tx cnt # of packets */
246 			ret = rte_eth_tx_burst(nif, qid,
247 					       pkts, cnt);
248 			pkts += ret;
249 			cnt -= ret;
250 			/* if not all pkts were sent... then repeat the cycle */
251 		} while (cnt > 0);
252 
253 #ifndef SHARE_IO_BUFFER
254 		int i;
255 		/* time to allocate fresh mbufs for the queue */
256 		for (i = 0; i < dpc->wmbufs[nif].len; i++) {
257 			dpc->wmbufs[nif].m_table[i] = rte_pktmbuf_alloc(pktmbuf_pool[ctxt->cpu]);
258 			/* error checking */
259 			if (unlikely(dpc->wmbufs[nif].m_table[i] == NULL)) {
260 				TRACE_ERROR("Failed to allocate %d:wmbuf[%d] on device %d!\n",
261 					    ctxt->cpu, i, nif);
262 				exit(EXIT_FAILURE);
263 			}
264 		}
265 #endif
266 		/* reset the len of mbufs var after flushing of packets */
267 		dpc->wmbufs[nif].len = 0;
268 	}
269 
270 	return ret;
271 }
272 /*----------------------------------------------------------------------------*/
273 uint8_t *
274 dpdk_get_wptr(struct mtcp_thread_context *ctxt, int nif, uint16_t pktsize)
275 {
276 	struct dpdk_private_context *dpc;
277 	mtcp_manager_t mtcp;
278 	struct rte_mbuf *m;
279 	uint8_t *ptr;
280 	int len_of_mbuf;
281 
282 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
283 	mtcp = ctxt->mtcp_manager;
284 
285 	/* sanity check */
286 	if (unlikely(dpc->wmbufs[nif].len == MAX_PKT_BURST))
287 		return NULL;
288 
289 	len_of_mbuf = dpc->wmbufs[nif].len;
290 	m = dpc->wmbufs[nif].m_table[len_of_mbuf];
291 
292 	/* retrieve the right write offset */
293 	ptr = (void *)rte_pktmbuf_mtod(m, struct ether_hdr *);
294 	m->pkt_len = m->data_len = pktsize;
295 	m->nb_segs = 1;
296 	m->next = NULL;
297 
298 #ifdef NETSTAT
299 	mtcp->nstat.tx_bytes[nif] += pktsize + ETHER_OVR;
300 #endif
301 
302 	/* increment the len_of_mbuf var */
303 	dpc->wmbufs[nif].len = len_of_mbuf + 1;
304 
305 	return (uint8_t *)ptr;
306 }
307 /*----------------------------------------------------------------------------*/
308 void
309 dpdk_set_wptr(struct mtcp_thread_context *ctxt, int out_nif, int in_nif, int index)
310 {
311 	struct dpdk_private_context *dpc;
312 	mtcp_manager_t mtcp;
313 	int len_of_mbuf;
314 
315 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
316 	mtcp = ctxt->mtcp_manager;
317 
318 	/* sanity check */
319 	if (unlikely(dpc->wmbufs[out_nif].len == MAX_PKT_BURST))
320 		return;
321 
322 	len_of_mbuf = dpc->wmbufs[out_nif].len;
323 	dpc->wmbufs[out_nif].m_table[len_of_mbuf] =
324 		dpc->rmbufs[in_nif].m_table[index];
325 
326 	dpc->wmbufs[out_nif].m_table[len_of_mbuf]->udata64 = 0;
327 
328 #ifdef NETSTAT
329 	mtcp->nstat.tx_bytes[out_nif] += dpc->rmbufs[in_nif].m_table[index]->pkt_len + ETHER_OVR;
330 #endif
331 
332 	/* increment the len_of_mbuf var */
333 	dpc->wmbufs[out_nif].len = len_of_mbuf + 1;
334 
335 	return;
336 }
337 /*----------------------------------------------------------------------------*/
338 static inline void
339 free_pkts(struct rte_mbuf **mtable, unsigned len)
340 {
341 	int i;
342 
343 	/* free the freaking packets */
344 	for (i = 0; i < len; i++) {
345 		if (mtable[i]->udata64 == 1) {
346 			rte_pktmbuf_free_seg(mtable[i]);
347 			RTE_MBUF_PREFETCH_TO_FREE(mtable[i+1]);
348 		}
349 	}
350 }
351 /*----------------------------------------------------------------------------*/
352 int32_t
353 dpdk_recv_pkts(struct mtcp_thread_context *ctxt, int ifidx)
354 {
355 	struct dpdk_private_context *dpc;
356 	int ret;
357 	uint8_t qid;
358 
359 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
360 	qid = cpu_qid_map[ifidx][ctxt->cpu];
361 
362 	/* if queue is unassigned, skip it.. */
363 	if (qid == 0xFF)
364 		return 0;
365 
366 	if (dpc->rmbufs[ifidx].len != 0) {
367 		free_pkts(dpc->rmbufs[ifidx].m_table, dpc->rmbufs[ifidx].len);
368 		dpc->rmbufs[ifidx].len = 0;
369 	}
370 
371 	ret = rte_eth_rx_burst((uint8_t)ifidx, qid,
372 			       dpc->pkts_burst, MAX_PKT_BURST);
373 #ifdef RX_IDLE_ENABLE
374 	dpc->rx_idle = (likely(ret != 0)) ? 0 : dpc->rx_idle + 1;
375 #endif
376 	dpc->rmbufs[ifidx].len = ret;
377 
378 	return ret;
379 }
380 /*----------------------------------------------------------------------------*/
381 uint8_t *
382 dpdk_get_rptr(struct mtcp_thread_context *ctxt, int ifidx, int index, uint16_t *len)
383 {
384 	struct dpdk_private_context *dpc;
385 	struct rte_mbuf *m;
386 	uint8_t *pktbuf;
387 
388 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
389 
390 
391 	m = dpc->pkts_burst[index];
392 	/* tag to check if the packet is a local or a forwarded pkt */
393 	m->udata64 = 1;
394 	/* don't enable pre-fetching... performance goes down */
395 	//rte_prefetch0(rte_pktmbuf_mtod(m, void *));
396 	*len = m->pkt_len;
397 	pktbuf = rte_pktmbuf_mtod(m, uint8_t *);
398 
399 	/* enqueue the pkt ptr in mbuf */
400 	dpc->rmbufs[ifidx].m_table[index] = m;
401 
402 	return pktbuf;
403 }
404 /*----------------------------------------------------------------------------*/
405 int
406 dpdk_get_nif(struct ifreq *ifr)
407 {
408 	int i;
409 	static int num_dev = -1;
410 	static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
411 	/* get mac addr entries of 'detected' dpdk ports */
412 	if (num_dev < 0) {
413 		num_dev = rte_eth_dev_count();
414 		for (i = 0; i < num_dev; i++)
415 			rte_eth_macaddr_get(i, &ports_eth_addr[i]);
416 	}
417 
418 	for (i = 0; i < num_dev; i++)
419 		if (!memcmp(&ifr->ifr_addr.sa_data[0], &ports_eth_addr[i], ETH_ALEN))
420 			return i;
421 
422 	return -1;
423 }
424 /*----------------------------------------------------------------------------*/
425 int32_t
426 dpdk_select(struct mtcp_thread_context *ctxt)
427 {
428 #ifdef RX_IDLE_ENABLE
429 	struct dpdk_private_context *dpc;
430 
431 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
432 	if (dpc->rx_idle > RX_IDLE_THRESH) {
433 		dpc->rx_idle = 0;
434 		usleep(RX_IDLE_TIMEOUT);
435 	}
436 #endif
437 	return 0;
438 }
439 /*----------------------------------------------------------------------------*/
440 void
441 dpdk_destroy_handle(struct mtcp_thread_context *ctxt)
442 {
443 	struct dpdk_private_context *dpc;
444 	int i;
445 
446 	dpc = (struct dpdk_private_context *) ctxt->io_private_context;
447 
448 	/* free wmbufs */
449 	for (i = 0; i < g_config.mos->netdev_table->num; i++)
450 		free_pkts(dpc->wmbufs[i].m_table, MAX_PKT_BURST);
451 
452 #ifdef ENABLE_STATS_IOCTL
453 	/* free fd */
454 	if (dpc->fd >= 0)
455 		close(dpc->fd);
456 #endif /* !ENABLE_STATS_IOCTL */
457 
458 	/* free it all up */
459 	free(dpc);
460 }
461 /*----------------------------------------------------------------------------*/
462 static void
463 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
464 {
465 #define CHECK_INTERVAL 			100 /* 100ms */
466 #define MAX_CHECK_TIME 			90 /* 9s (90 * 100ms) in total */
467 
468 	uint8_t portid, count, all_ports_up, print_flag = 0;
469 	struct rte_eth_link link;
470 
471 	printf("\nChecking link status");
472 	fflush(stdout);
473 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
474 		all_ports_up = 1;
475 		for (portid = 0; portid < port_num; portid++) {
476 			if ((port_mask & (1 << portid)) == 0)
477 				continue;
478 			memset(&link, 0, sizeof(link));
479 			rte_eth_link_get_nowait(portid, &link);
480 			/* print link status if flag set */
481 			if (print_flag == 1) {
482 				if (link.link_status)
483 					printf("Port %d Link Up - speed %u "
484 						"Mbps - %s\n", (uint8_t)portid,
485 						(unsigned)link.link_speed,
486 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
487 					("full-duplex") : ("half-duplex\n"));
488 				else
489 					printf("Port %d Link Down\n",
490 						(uint8_t)portid);
491 				continue;
492 			}
493 			/* clear all_ports_up flag if any link down */
494 			if (link.link_status == 0) {
495 				all_ports_up = 0;
496 				break;
497 			}
498 		}
499 		/* after finally printing all link status, get out */
500 		if (print_flag == 1)
501 			break;
502 
503 		if (all_ports_up == 0) {
504 			printf(".");
505 			fflush(stdout);
506 			rte_delay_ms(CHECK_INTERVAL);
507 		}
508 
509 		/* set the print_flag if all ports up or timeout */
510 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
511 			print_flag = 1;
512 			printf("done\n");
513 		}
514 	}
515 }
516 /*----------------------------------------------------------------------------*/
517 int32_t
518 dpdk_dev_ioctl(struct mtcp_thread_context *ctx, int nif, int cmd, void *argp)
519 {
520 	struct dpdk_private_context *dpc;
521 	struct rte_mbuf *m;
522 	int len_of_mbuf;
523 	struct iphdr *iph;
524 	struct tcphdr *tcph;
525 	RssInfo *rss_i;
526 	void **argpptr = (void **)argp;
527 
528 	if (cmd == DRV_NAME) {
529 		*argpptr = (void *)dev_info->driver_name;
530 		return 0;
531 	}
532 
533 	iph = (struct iphdr *)argp;
534 	dpc = (struct dpdk_private_context *)ctx->io_private_context;
535 	len_of_mbuf = dpc->wmbufs[nif].len;
536 	rss_i = NULL;
537 
538 	switch (cmd) {
539 	case PKT_TX_IP_CSUM:
540 		m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
541 		m->ol_flags = PKT_TX_IP_CKSUM | PKT_TX_IPV4;
542 		m->l2_len = sizeof(struct ether_hdr);
543 		m->l3_len = (iph->ihl<<2);
544 		break;
545 	case PKT_TX_TCP_CSUM:
546 		m = dpc->wmbufs[nif].m_table[len_of_mbuf - 1];
547 		tcph = (struct tcphdr *)((unsigned char *)iph + (iph->ihl<<2));
548 		m->ol_flags |= PKT_TX_TCP_CKSUM;
549 		tcph->check = rte_ipv4_phdr_cksum((struct ipv4_hdr *)iph, m->ol_flags);
550 		break;
551 	case PKT_RX_RSS:
552 		rss_i = (RssInfo *)argp;
553 		m = dpc->pkts_burst[rss_i->pktidx];
554 		rss_i->hash_value = m->hash.rss;
555 		break;
556 	default:
557 		goto dev_ioctl_err;
558 	}
559 
560 	return 0;
561  dev_ioctl_err:
562 	return -1;
563 }
564 /*----------------------------------------------------------------------------*/
565 void
566 dpdk_load_module_upper_half(void)
567 {
568 	int cpu = g_config.mos->num_cores, ret;
569 	uint32_t cpumask = 0;
570 	char cpumaskbuf[10];
571 	char mem_channels[5];
572 
573 	/* set the log level */
574 #if 0
575 	rte_set_log_type(RTE_LOGTYPE_PMD, 0);
576 	rte_set_log_type(RTE_LOGTYPE_MALLOC, 0);
577 	rte_set_log_type(RTE_LOGTYPE_MEMPOOL, 0);
578 	rte_set_log_type(RTE_LOGTYPE_RING, 0);
579 	rte_set_log_level(RTE_LOG_WARNING);
580 #else
581 	rte_log_set_level(RTE_LOGTYPE_PMD, 0);
582 	rte_log_set_level(RTE_LOGTYPE_MALLOC, 0);
583 	rte_log_set_level(RTE_LOGTYPE_MEMPOOL, 0);
584 	rte_log_set_level(RTE_LOGTYPE_RING, 0);
585 	rte_log_set_level(RTE_LOG_WARNING, 0);
586 #endif
587 	/* get the cpu mask */
588 	for (ret = 0; ret < cpu; ret++)
589 		cpumask = (cpumask | (1 << ret));
590 	sprintf(cpumaskbuf, "%X", cpumask);
591 
592 	/* get the mem channels per socket */
593 	if (g_config.mos->nb_mem_channels == 0) {
594 		TRACE_ERROR("DPDK module requires # of memory channels "
595 				"per socket parameter!\n");
596 		exit(EXIT_FAILURE);
597 	}
598 	sprintf(mem_channels, "%d", g_config.mos->nb_mem_channels);
599 
600 	/* initialize the rte env first, what a waste of implementation effort!  */
601 	char *argv[] = {"",
602 			"-c",
603 			cpumaskbuf,
604 			"-n",
605 			mem_channels,
606 			"--proc-type=auto",
607 			""
608 	};
609 	const int argc = 6;
610 
611 	/*
612 	 * re-set getopt extern variable optind.
613 	 * this issue was a bitch to debug
614 	 * rte_eal_init() internally uses getopt() syscall
615 	 * mtcp applications that also use an `external' getopt
616 	 * will cause a violent crash if optind is not reset to zero
617 	 * prior to calling the func below...
618 	 * see man getopt(3) for more details
619 	 */
620 	optind = 0;
621 
622 	/* initialize the dpdk eal env */
623 	ret = rte_eal_init(argc, argv);
624 	if (ret < 0)
625 		rte_exit(EXIT_FAILURE, "Invalid EAL args!\n");
626 
627 }
628 /*----------------------------------------------------------------------------*/
629 void
630 dpdk_load_module_lower_half(void)
631 {
632 	int portid, rxlcore_id, ret;
633 	struct rte_eth_fc_conf fc_conf;	/* for Ethernet flow control settings */
634 	/* setting the rss key */
635 	static const uint8_t key[] = {
636 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
637 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
638 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
639 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
640 		0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
641 		0x05, 0x05
642 	};
643 
644 	port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key;
645 	port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key);
646 
647 	/* resetting cpu_qid mapping */
648 	memset(cpu_qid_map, 0xFF, sizeof(cpu_qid_map));
649 
650 	if (!g_config.mos->multiprocess
651 			|| (g_config.mos->multiprocess && g_config.mos->multiprocess_is_master)) {
652 		for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
653 			char name[20];
654 			sprintf(name, "mbuf_pool-%d", rxlcore_id);
655 			/* create the mbuf pools */
656 			pktmbuf_pool[rxlcore_id] =
657 				rte_mempool_create(name, NB_MBUF,
658 						   MBUF_SIZE, MEMPOOL_CACHE_SIZE,
659 						   sizeof(struct rte_pktmbuf_pool_private),
660 						   rte_pktmbuf_pool_init, NULL,
661 						   rte_pktmbuf_init, NULL,
662 						   rte_lcore_to_socket_id(rxlcore_id), 0);
663 			if (pktmbuf_pool[rxlcore_id] == NULL)
664 				rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
665 		}
666 
667 		/* Initialise each port */
668 		for (portid = 0; portid < g_config.mos->netdev_table->num; portid++) {
669 			int num_queue = 0, eth_idx, i, queue_id;
670 			for (eth_idx = 0; eth_idx < g_config.mos->netdev_table->num; eth_idx++)
671 				if (portid == g_config.mos->netdev_table->ent[eth_idx]->ifindex)
672 					break;
673 			if (eth_idx == g_config.mos->netdev_table->num)
674 				continue;
675 			for (i = 0; i < sizeof(uint64_t) * 8; i++)
676 				if (g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << i))
677 					num_queue++;
678 
679 			/* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
680 			num_queues = num_queue;
681 
682 			/* init port */
683 			printf("Initializing port %u... ", (unsigned) portid);
684 			fflush(stdout);
685 			ret = rte_eth_dev_configure(portid, num_queue, num_queue,
686 										&port_conf);
687 			if (ret < 0)
688 				rte_exit(EXIT_FAILURE, "Cannot configure device:"
689 									   "err=%d, port=%u\n",
690 									   ret, (unsigned) portid);
691 
692 			/* init one RX queue per CPU */
693 			fflush(stdout);
694 #ifdef DEBUG
695 			rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
696 #endif
697 			/* check port capabilities */
698 			rte_eth_dev_info_get(portid, &dev_info[portid]);
699 
700 			queue_id = 0;
701 			for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
702 				if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
703 					continue;
704 				ret = rte_eth_rx_queue_setup(portid, queue_id, nb_rxd,
705 						rte_eth_dev_socket_id(portid), &rx_conf,
706 						pktmbuf_pool[rxlcore_id]);
707 				if (ret < 0)
708 					rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:"
709 										   "err=%d, port=%u, queueid: %d\n",
710 										   ret, (unsigned) portid, rxlcore_id);
711 				cpu_qid_map[portid][rxlcore_id] = queue_id++;
712 			}
713 
714 			/* init one TX queue on each port per CPU (this is redundant for
715 			 * this app) */
716 			fflush(stdout);
717 			queue_id = 0;
718 			for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
719 				if (!(g_config.mos->netdev_table->ent[eth_idx]->cpu_mask & (1L << rxlcore_id)))
720 					continue;
721 				ret = rte_eth_tx_queue_setup(portid, queue_id++, nb_txd,
722 						rte_eth_dev_socket_id(portid), &tx_conf);
723 				if (ret < 0)
724 					rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:"
725 										   "err=%d, port=%u, queueid: %d\n",
726 										   ret, (unsigned) portid, rxlcore_id);
727 			}
728 
729 			/* Start device */
730 			ret = rte_eth_dev_start(portid);
731 			if (ret < 0)
732 				rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
733 									   ret, (unsigned) portid);
734 
735 			printf("done: \n");
736 			rte_eth_promiscuous_enable(portid);
737 
738 			/* retrieve current flow control settings per port */
739 			memset(&fc_conf, 0, sizeof(fc_conf));
740 			ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
741 			if (ret != 0) {
742 				rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
743 			}
744 
745 			/* and just disable the rx/tx flow control */
746 			fc_conf.mode = RTE_FC_NONE;
747 			ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf);
748 			if (ret != 0) {
749 				rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
750 					 ret);
751 			}
752 
753 #ifdef DEBUG
754 			printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
755 					(unsigned) portid,
756 					ports_eth_addr[portid].addr_bytes[0],
757 					ports_eth_addr[portid].addr_bytes[1],
758 					ports_eth_addr[portid].addr_bytes[2],
759 					ports_eth_addr[portid].addr_bytes[3],
760 					ports_eth_addr[portid].addr_bytes[4],
761 					ports_eth_addr[portid].addr_bytes[5]);
762 #endif
763 			/* only check for link status if the thread is master */
764 			check_all_ports_link_status(g_config.mos->netdev_table->num, 0xFFFFFFFF);
765 		}
766 	} else { /* g_config.mos->multiprocess && !g_config.mos->multiprocess_is_master */
767 		for (rxlcore_id = 0; rxlcore_id < g_config.mos->num_cores; rxlcore_id++) {
768 			char name[20];
769 			sprintf(name, "mbuf_pool-%d", rxlcore_id);
770 			/* initialize the mbuf pools */
771 			pktmbuf_pool[rxlcore_id] =
772 				rte_mempool_lookup(name);
773 			if (pktmbuf_pool[rxlcore_id] == NULL)
774 				rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
775 			for (portid = 0; portid < g_config.mos->netdev_table->num; portid++)
776 				cpu_qid_map[portid][rxlcore_id] = rxlcore_id;
777 		}
778 		/* set 'num_queues' (used for GetRSSCPUCore() in util.c) */
779 		num_queues = g_config.mos->num_cores;
780 	}
781 
782 }
783 /*----------------------------------------------------------------------------*/
784 io_module_func dpdk_module_func = {
785 	.load_module_upper_half		   = dpdk_load_module_upper_half,
786 	.load_module_lower_half		   = dpdk_load_module_lower_half,
787 	.init_handle		   = dpdk_init_handle,
788 	.link_devices		   = NULL,
789 	.release_pkt		   = NULL,
790 	.send_pkts		   = dpdk_send_pkts,
791 	.get_wptr   		   = dpdk_get_wptr,
792 	.recv_pkts		   = dpdk_recv_pkts,
793 	.get_rptr	   	   = dpdk_get_rptr,
794 	.get_nif		   = dpdk_get_nif,
795 	.select			   = dpdk_select,
796 	.destroy_handle		   = dpdk_destroy_handle,
797 	.dev_ioctl		   = dpdk_dev_ioctl,
798 	.set_wptr		   = dpdk_set_wptr,
799 };
800 /*----------------------------------------------------------------------------*/
801 
802