1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #define _GNU_SOURCE
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <stdint.h>
39 #include <inttypes.h>
40 #include <sys/types.h>
41 #include <string.h>
42 #include <sys/queue.h>
43 #include <stdarg.h>
44 #include <errno.h>
45 #include <getopt.h>
46 
47 #include <rte_common.h>
48 #include <rte_vect.h>
49 #include <rte_byteorder.h>
50 #include <rte_log.h>
51 #include <rte_memory.h>
52 #include <rte_memcpy.h>
53 #include <rte_eal.h>
54 #include <rte_launch.h>
55 #include <rte_atomic.h>
56 #include <rte_cycles.h>
57 #include <rte_prefetch.h>
58 #include <rte_lcore.h>
59 #include <rte_per_lcore.h>
60 #include <rte_branch_prediction.h>
61 #include <rte_interrupts.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_ring.h>
67 #include <rte_mempool.h>
68 #include <rte_mbuf.h>
69 #include <rte_ip.h>
70 #include <rte_tcp.h>
71 #include <rte_udp.h>
72 #include <rte_string_fns.h>
73 #include <rte_pause.h>
74 
75 #include <cmdline_parse.h>
76 #include <cmdline_parse_etheraddr.h>
77 
78 #include <lthread_api.h>
79 
80 #define APP_LOOKUP_EXACT_MATCH          0
81 #define APP_LOOKUP_LPM                  1
82 #define DO_RFC_1812_CHECKS
83 
84 /* Enable cpu-load stats 0-off, 1-on */
85 #define APP_CPU_LOAD                 1
86 
87 #ifndef APP_LOOKUP_METHOD
88 #define APP_LOOKUP_METHOD             APP_LOOKUP_LPM
89 #endif
90 
91 #ifndef __GLIBC__ /* sched_getcpu() is glibc specific */
92 #define sched_getcpu() rte_lcore_id()
93 #endif
94 
95 static int
96 check_ptype(int portid)
97 {
98 	int i, ret;
99 	int ipv4 = 0, ipv6 = 0;
100 
101 	ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL,
102 			0);
103 	if (ret <= 0)
104 		return 0;
105 
106 	uint32_t ptypes[ret];
107 
108 	ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK,
109 			ptypes, ret);
110 	for (i = 0; i < ret; ++i) {
111 		if (ptypes[i] & RTE_PTYPE_L3_IPV4)
112 			ipv4 = 1;
113 		if (ptypes[i] & RTE_PTYPE_L3_IPV6)
114 			ipv6 = 1;
115 	}
116 
117 	if (ipv4 && ipv6)
118 		return 1;
119 
120 	return 0;
121 }
122 
123 static inline void
124 parse_ptype(struct rte_mbuf *m)
125 {
126 	struct ether_hdr *eth_hdr;
127 	uint32_t packet_type = RTE_PTYPE_UNKNOWN;
128 	uint16_t ether_type;
129 
130 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
131 	ether_type = eth_hdr->ether_type;
132 	if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
133 		packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
134 	else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
135 		packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
136 
137 	m->packet_type = packet_type;
138 }
139 
140 static uint16_t
141 cb_parse_ptype(__rte_unused uint16_t port, __rte_unused uint16_t queue,
142 		struct rte_mbuf *pkts[], uint16_t nb_pkts,
143 		__rte_unused uint16_t max_pkts, __rte_unused void *user_param)
144 {
145 	unsigned int i;
146 
147 	for (i = 0; i < nb_pkts; i++)
148 		parse_ptype(pkts[i]);
149 
150 	return nb_pkts;
151 }
152 
153 /*
154  *  When set to zero, simple forwaring path is eanbled.
155  *  When set to one, optimized forwarding path is enabled.
156  *  Note that LPM optimisation path uses SSE4.1 instructions.
157  */
158 #define ENABLE_MULTI_BUFFER_OPTIMIZE	1
159 
160 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
161 #include <rte_hash.h>
162 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
163 #include <rte_lpm.h>
164 #include <rte_lpm6.h>
165 #else
166 #error "APP_LOOKUP_METHOD set to incorrect value"
167 #endif
168 
169 #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
170 
171 #define MAX_JUMBO_PKT_LEN  9600
172 
173 #define IPV6_ADDR_LEN 16
174 
175 #define MEMPOOL_CACHE_SIZE 256
176 
177 /*
178  * This expression is used to calculate the number of mbufs needed depending on
179  * user input, taking into account memory for rx and tx hardware rings, cache
180  * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
181  * NB_MBUF never goes below a minimum value of 8192
182  */
183 
184 #define NB_MBUF RTE_MAX(\
185 		(nb_ports*nb_rx_queue*nb_rxd +      \
186 		nb_ports*nb_lcores*MAX_PKT_BURST +  \
187 		nb_ports*n_tx_queue*nb_txd +        \
188 		nb_lcores*MEMPOOL_CACHE_SIZE),      \
189 		(unsigned)8192)
190 
191 #define MAX_PKT_BURST     32
192 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
193 
194 /*
195  * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
196  */
197 #define	MAX_TX_BURST  (MAX_PKT_BURST / 2)
198 #define BURST_SIZE    MAX_TX_BURST
199 
200 #define NB_SOCKETS 8
201 
202 /* Configure how many packets ahead to prefetch, when reading packets */
203 #define PREFETCH_OFFSET	3
204 
205 /* Used to mark destination port as 'invalid'. */
206 #define	BAD_PORT	((uint16_t)-1)
207 
208 #define FWDSTEP	4
209 
210 /*
211  * Configurable number of RX/TX ring descriptors
212  */
213 #define RTE_TEST_RX_DESC_DEFAULT 128
214 #define RTE_TEST_TX_DESC_DEFAULT 128
215 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
216 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
217 
218 /* ethernet addresses of ports */
219 static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
220 static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
221 
222 static xmm_t val_eth[RTE_MAX_ETHPORTS];
223 
224 /* replace first 12B of the ethernet header. */
225 #define	MASK_ETH 0x3f
226 
227 /* mask of enabled ports */
228 static uint32_t enabled_port_mask;
229 static int promiscuous_on; /**< Set in promiscuous mode off by default. */
230 static int numa_on = 1;    /**< NUMA is enabled by default. */
231 static int parse_ptype_on;
232 
233 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
234 static int ipv6;           /**< ipv6 is false by default. */
235 #endif
236 
237 #if (APP_CPU_LOAD == 1)
238 
239 #define MAX_CPU RTE_MAX_LCORE
240 #define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000)  /**< Timeout for collecting 5s */
241 
242 #define CPU_PROCESS     0
243 #define CPU_POLL        1
244 #define MAX_CPU_COUNTER 2
245 
246 struct cpu_load {
247 	uint16_t       n_cpu;
248 	uint64_t       counter;
249 	uint64_t       hits[MAX_CPU_COUNTER][MAX_CPU];
250 } __rte_cache_aligned;
251 
252 static struct cpu_load cpu_load;
253 static int cpu_load_lcore_id = -1;
254 
255 #define SET_CPU_BUSY(thread, counter) \
256 		thread->conf.busy[counter] = 1
257 
258 #define SET_CPU_IDLE(thread, counter) \
259 		thread->conf.busy[counter] = 0
260 
261 #define IS_CPU_BUSY(thread, counter) \
262 		(thread->conf.busy[counter] > 0)
263 
264 #else
265 
266 #define SET_CPU_BUSY(thread, counter)
267 #define SET_CPU_IDLE(thread, counter)
268 #define IS_CPU_BUSY(thread, counter) 0
269 
270 #endif
271 
272 struct mbuf_table {
273 	uint16_t len;
274 	struct rte_mbuf *m_table[MAX_PKT_BURST];
275 };
276 
277 struct lcore_rx_queue {
278 	uint16_t port_id;
279 	uint8_t queue_id;
280 } __rte_cache_aligned;
281 
282 #define MAX_RX_QUEUE_PER_LCORE 16
283 #define MAX_TX_QUEUE_PER_PORT  RTE_MAX_ETHPORTS
284 #define MAX_RX_QUEUE_PER_PORT  128
285 
286 #define MAX_LCORE_PARAMS       1024
287 struct rx_thread_params {
288 	uint16_t port_id;
289 	uint8_t queue_id;
290 	uint8_t lcore_id;
291 	uint8_t thread_id;
292 } __rte_cache_aligned;
293 
294 static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS];
295 static struct rx_thread_params rx_thread_params_array_default[] = {
296 	{0, 0, 2, 0},
297 	{0, 1, 2, 1},
298 	{0, 2, 2, 2},
299 	{1, 0, 2, 3},
300 	{1, 1, 2, 4},
301 	{1, 2, 2, 5},
302 	{2, 0, 2, 6},
303 	{3, 0, 3, 7},
304 	{3, 1, 3, 8},
305 };
306 
307 static struct rx_thread_params *rx_thread_params =
308 		rx_thread_params_array_default;
309 static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default);
310 
311 struct tx_thread_params {
312 	uint8_t lcore_id;
313 	uint8_t thread_id;
314 } __rte_cache_aligned;
315 
316 static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS];
317 static struct tx_thread_params tx_thread_params_array_default[] = {
318 	{4, 0},
319 	{5, 1},
320 	{6, 2},
321 	{7, 3},
322 	{8, 4},
323 	{9, 5},
324 	{10, 6},
325 	{11, 7},
326 	{12, 8},
327 };
328 
329 static struct tx_thread_params *tx_thread_params =
330 		tx_thread_params_array_default;
331 static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default);
332 
333 static struct rte_eth_conf port_conf = {
334 	.rxmode = {
335 		.mq_mode = ETH_MQ_RX_RSS,
336 		.max_rx_pkt_len = ETHER_MAX_LEN,
337 		.split_hdr_size = 0,
338 		.header_split   = 0, /**< Header Split disabled */
339 		.hw_ip_checksum = 1, /**< IP checksum offload enabled */
340 		.hw_vlan_filter = 0, /**< VLAN filtering disabled */
341 		.jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
342 		.hw_strip_crc   = 1, /**< CRC stripped by hardware */
343 	},
344 	.rx_adv_conf = {
345 		.rss_conf = {
346 			.rss_key = NULL,
347 			.rss_hf = ETH_RSS_TCP,
348 		},
349 	},
350 	.txmode = {
351 		.mq_mode = ETH_MQ_TX_NONE,
352 	},
353 };
354 
355 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
356 
357 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
358 
359 #include <rte_hash_crc.h>
360 #define DEFAULT_HASH_FUNC       rte_hash_crc
361 
362 struct ipv4_5tuple {
363 	uint32_t ip_dst;
364 	uint32_t ip_src;
365 	uint16_t port_dst;
366 	uint16_t port_src;
367 	uint8_t  proto;
368 } __attribute__((__packed__));
369 
370 union ipv4_5tuple_host {
371 	struct {
372 		uint8_t  pad0;
373 		uint8_t  proto;
374 		uint16_t pad1;
375 		uint32_t ip_src;
376 		uint32_t ip_dst;
377 		uint16_t port_src;
378 		uint16_t port_dst;
379 	};
380 	__m128i xmm;
381 };
382 
383 #define XMM_NUM_IN_IPV6_5TUPLE 3
384 
385 struct ipv6_5tuple {
386 	uint8_t  ip_dst[IPV6_ADDR_LEN];
387 	uint8_t  ip_src[IPV6_ADDR_LEN];
388 	uint16_t port_dst;
389 	uint16_t port_src;
390 	uint8_t  proto;
391 } __attribute__((__packed__));
392 
393 union ipv6_5tuple_host {
394 	struct {
395 		uint16_t pad0;
396 		uint8_t  proto;
397 		uint8_t  pad1;
398 		uint8_t  ip_src[IPV6_ADDR_LEN];
399 		uint8_t  ip_dst[IPV6_ADDR_LEN];
400 		uint16_t port_src;
401 		uint16_t port_dst;
402 		uint64_t reserve;
403 	};
404 	__m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
405 };
406 
407 struct ipv4_l3fwd_route {
408 	struct ipv4_5tuple key;
409 	uint8_t if_out;
410 };
411 
412 struct ipv6_l3fwd_route {
413 	struct ipv6_5tuple key;
414 	uint8_t if_out;
415 };
416 
417 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
418 	{{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1),  101, 11, IPPROTO_TCP}, 0},
419 	{{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1),  102, 12, IPPROTO_TCP}, 1},
420 	{{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1),  101, 11, IPPROTO_TCP}, 2},
421 	{{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1),  102, 12, IPPROTO_TCP}, 3},
422 };
423 
424 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
425 	{{
426 	{0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
427 	{0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
428 			0x05},
429 	101, 11, IPPROTO_TCP}, 0},
430 
431 	{{
432 	{0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
433 	{0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
434 			0x05},
435 	102, 12, IPPROTO_TCP}, 1},
436 
437 	{{
438 	{0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
439 	{0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
440 			0x05},
441 	101, 11, IPPROTO_TCP}, 2},
442 
443 	{{
444 	{0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
445 	{0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
446 			0x05},
447 	102, 12, IPPROTO_TCP}, 3},
448 };
449 
450 typedef struct rte_hash lookup_struct_t;
451 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
452 static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
453 
454 #ifdef RTE_ARCH_X86_64
455 /* default to 4 million hash entries (approx) */
456 #define L3FWD_HASH_ENTRIES (1024*1024*4)
457 #else
458 /* 32-bit has less address-space for hugepage memory, limit to 1M entries */
459 #define L3FWD_HASH_ENTRIES (1024*1024*1)
460 #endif
461 #define HASH_ENTRY_NUMBER_DEFAULT 4
462 
463 static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
464 
465 static inline uint32_t
466 ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
467 		uint32_t init_val)
468 {
469 	const union ipv4_5tuple_host *k;
470 	uint32_t t;
471 	const uint32_t *p;
472 
473 	k = data;
474 	t = k->proto;
475 	p = (const uint32_t *)&k->port_src;
476 
477 	init_val = rte_hash_crc_4byte(t, init_val);
478 	init_val = rte_hash_crc_4byte(k->ip_src, init_val);
479 	init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
480 	init_val = rte_hash_crc_4byte(*p, init_val);
481 	return init_val;
482 }
483 
484 static inline uint32_t
485 ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
486 		uint32_t init_val)
487 {
488 	const union ipv6_5tuple_host *k;
489 	uint32_t t;
490 	const uint32_t *p;
491 	const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
492 	const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
493 
494 	k = data;
495 	t = k->proto;
496 	p = (const uint32_t *)&k->port_src;
497 
498 	ip_src0 = (const uint32_t *) k->ip_src;
499 	ip_src1 = (const uint32_t *)(k->ip_src + 4);
500 	ip_src2 = (const uint32_t *)(k->ip_src + 8);
501 	ip_src3 = (const uint32_t *)(k->ip_src + 12);
502 	ip_dst0 = (const uint32_t *) k->ip_dst;
503 	ip_dst1 = (const uint32_t *)(k->ip_dst + 4);
504 	ip_dst2 = (const uint32_t *)(k->ip_dst + 8);
505 	ip_dst3 = (const uint32_t *)(k->ip_dst + 12);
506 	init_val = rte_hash_crc_4byte(t, init_val);
507 	init_val = rte_hash_crc_4byte(*ip_src0, init_val);
508 	init_val = rte_hash_crc_4byte(*ip_src1, init_val);
509 	init_val = rte_hash_crc_4byte(*ip_src2, init_val);
510 	init_val = rte_hash_crc_4byte(*ip_src3, init_val);
511 	init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
512 	init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
513 	init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
514 	init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
515 	init_val = rte_hash_crc_4byte(*p, init_val);
516 	return init_val;
517 }
518 
519 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
520 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
521 
522 static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
523 static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
524 
525 #endif
526 
527 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
528 struct ipv4_l3fwd_route {
529 	uint32_t ip;
530 	uint8_t  depth;
531 	uint8_t  if_out;
532 };
533 
534 struct ipv6_l3fwd_route {
535 	uint8_t ip[16];
536 	uint8_t depth;
537 	uint8_t if_out;
538 };
539 
540 static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
541 	{IPv4(1, 1, 1, 0), 24, 0},
542 	{IPv4(2, 1, 1, 0), 24, 1},
543 	{IPv4(3, 1, 1, 0), 24, 2},
544 	{IPv4(4, 1, 1, 0), 24, 3},
545 	{IPv4(5, 1, 1, 0), 24, 4},
546 	{IPv4(6, 1, 1, 0), 24, 5},
547 	{IPv4(7, 1, 1, 0), 24, 6},
548 	{IPv4(8, 1, 1, 0), 24, 7},
549 };
550 
551 static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
552 	{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
553 	{{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
554 	{{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
555 	{{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
556 	{{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
557 	{{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
558 	{{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
559 	{{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
560 };
561 
562 #define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
563 #define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
564 
565 #define IPV4_L3FWD_LPM_MAX_RULES         1024
566 #define IPV6_L3FWD_LPM_MAX_RULES         1024
567 #define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
568 
569 typedef struct rte_lpm lookup_struct_t;
570 typedef struct rte_lpm6 lookup6_struct_t;
571 static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
572 static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
573 #endif
574 
575 struct lcore_conf {
576 	lookup_struct_t *ipv4_lookup_struct;
577 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
578 	lookup6_struct_t *ipv6_lookup_struct;
579 #else
580 	lookup_struct_t *ipv6_lookup_struct;
581 #endif
582 	void *data;
583 } __rte_cache_aligned;
584 
585 static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
586 RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
587 
588 #define MAX_RX_QUEUE_PER_THREAD 16
589 #define MAX_TX_PORT_PER_THREAD  RTE_MAX_ETHPORTS
590 #define MAX_TX_QUEUE_PER_PORT   RTE_MAX_ETHPORTS
591 #define MAX_RX_QUEUE_PER_PORT   128
592 
593 #define MAX_RX_THREAD 1024
594 #define MAX_TX_THREAD 1024
595 #define MAX_THREAD    (MAX_RX_THREAD + MAX_TX_THREAD)
596 
597 /**
598  * Producers and consumers threads configuration
599  */
600 static int lthreads_on = 1; /**< Use lthreads for processing*/
601 
602 rte_atomic16_t rx_counter;  /**< Number of spawned rx threads */
603 rte_atomic16_t tx_counter;  /**< Number of spawned tx threads */
604 
605 struct thread_conf {
606 	uint16_t lcore_id;      /**< Initial lcore for rx thread */
607 	uint16_t cpu_id;        /**< Cpu id for cpu load stats counter */
608 	uint16_t thread_id;     /**< Thread ID */
609 
610 #if (APP_CPU_LOAD > 0)
611 	int busy[MAX_CPU_COUNTER];
612 #endif
613 };
614 
615 struct thread_rx_conf {
616 	struct thread_conf conf;
617 
618 	uint16_t n_rx_queue;
619 	struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
620 
621 	uint16_t n_ring;        /**< Number of output rings */
622 	struct rte_ring *ring[RTE_MAX_LCORE];
623 	struct lthread_cond *ready[RTE_MAX_LCORE];
624 
625 #if (APP_CPU_LOAD > 0)
626 	int busy[MAX_CPU_COUNTER];
627 #endif
628 } __rte_cache_aligned;
629 
630 uint16_t n_rx_thread;
631 struct thread_rx_conf rx_thread[MAX_RX_THREAD];
632 
633 struct thread_tx_conf {
634 	struct thread_conf conf;
635 
636 	uint16_t tx_queue_id[RTE_MAX_LCORE];
637 	struct mbuf_table tx_mbufs[RTE_MAX_LCORE];
638 
639 	struct rte_ring *ring;
640 	struct lthread_cond **ready;
641 
642 } __rte_cache_aligned;
643 
644 uint16_t n_tx_thread;
645 struct thread_tx_conf tx_thread[MAX_TX_THREAD];
646 
647 /* Send burst of packets on an output interface */
648 static inline int
649 send_burst(struct thread_tx_conf *qconf, uint16_t n, uint16_t port)
650 {
651 	struct rte_mbuf **m_table;
652 	int ret;
653 	uint16_t queueid;
654 
655 	queueid = qconf->tx_queue_id[port];
656 	m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
657 
658 	ret = rte_eth_tx_burst(port, queueid, m_table, n);
659 	if (unlikely(ret < n)) {
660 		do {
661 			rte_pktmbuf_free(m_table[ret]);
662 		} while (++ret < n);
663 	}
664 
665 	return 0;
666 }
667 
668 /* Enqueue a single packet, and send burst if queue is filled */
669 static inline int
670 send_single_packet(struct rte_mbuf *m, uint16_t port)
671 {
672 	uint16_t len;
673 	struct thread_tx_conf *qconf;
674 
675 	if (lthreads_on)
676 		qconf = (struct thread_tx_conf *)lthread_get_data();
677 	else
678 		qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
679 
680 	len = qconf->tx_mbufs[port].len;
681 	qconf->tx_mbufs[port].m_table[len] = m;
682 	len++;
683 
684 	/* enough pkts to be sent */
685 	if (unlikely(len == MAX_PKT_BURST)) {
686 		send_burst(qconf, MAX_PKT_BURST, port);
687 		len = 0;
688 	}
689 
690 	qconf->tx_mbufs[port].len = len;
691 	return 0;
692 }
693 
694 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
695 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
696 static __rte_always_inline void
697 send_packetsx4(uint16_t port,
698 	struct rte_mbuf *m[], uint32_t num)
699 {
700 	uint32_t len, j, n;
701 	struct thread_tx_conf *qconf;
702 
703 	if (lthreads_on)
704 		qconf = (struct thread_tx_conf *)lthread_get_data();
705 	else
706 		qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
707 
708 	len = qconf->tx_mbufs[port].len;
709 
710 	/*
711 	 * If TX buffer for that queue is empty, and we have enough packets,
712 	 * then send them straightway.
713 	 */
714 	if (num >= MAX_TX_BURST && len == 0) {
715 		n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
716 		if (unlikely(n < num)) {
717 			do {
718 				rte_pktmbuf_free(m[n]);
719 			} while (++n < num);
720 		}
721 		return;
722 	}
723 
724 	/*
725 	 * Put packets into TX buffer for that queue.
726 	 */
727 
728 	n = len + num;
729 	n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
730 
731 	j = 0;
732 	switch (n % FWDSTEP) {
733 	while (j < n) {
734 	case 0:
735 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
736 		j++;
737 		/* fall-through */
738 	case 3:
739 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
740 		j++;
741 		/* fall-through */
742 	case 2:
743 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
744 		j++;
745 		/* fall-through */
746 	case 1:
747 		qconf->tx_mbufs[port].m_table[len + j] = m[j];
748 		j++;
749 	}
750 	}
751 
752 	len += n;
753 
754 	/* enough pkts to be sent */
755 	if (unlikely(len == MAX_PKT_BURST)) {
756 
757 		send_burst(qconf, MAX_PKT_BURST, port);
758 
759 		/* copy rest of the packets into the TX buffer. */
760 		len = num - n;
761 		j = 0;
762 		switch (len % FWDSTEP) {
763 		while (j < len) {
764 		case 0:
765 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
766 			j++;
767 			/* fall-through */
768 		case 3:
769 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
770 			j++;
771 			/* fall-through */
772 		case 2:
773 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
774 			j++;
775 			/* fall-through */
776 		case 1:
777 			qconf->tx_mbufs[port].m_table[j] = m[n + j];
778 			j++;
779 		}
780 		}
781 	}
782 
783 	qconf->tx_mbufs[port].len = len;
784 }
785 #endif /* APP_LOOKUP_LPM */
786 
787 #ifdef DO_RFC_1812_CHECKS
788 static inline int
789 is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
790 {
791 	/* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
792 	/*
793 	 * 1. The packet length reported by the Link Layer must be large
794 	 * enough to hold the minimum length legal IP datagram (20 bytes).
795 	 */
796 	if (link_len < sizeof(struct ipv4_hdr))
797 		return -1;
798 
799 	/* 2. The IP checksum must be correct. */
800 	/* this is checked in H/W */
801 
802 	/*
803 	 * 3. The IP version number must be 4. If the version number is not 4
804 	 * then the packet may be another version of IP, such as IPng or
805 	 * ST-II.
806 	 */
807 	if (((pkt->version_ihl) >> 4) != 4)
808 		return -3;
809 	/*
810 	 * 4. The IP header length field must be large enough to hold the
811 	 * minimum length legal IP datagram (20 bytes = 5 words).
812 	 */
813 	if ((pkt->version_ihl & 0xf) < 5)
814 		return -4;
815 
816 	/*
817 	 * 5. The IP total length field must be large enough to hold the IP
818 	 * datagram header, whose length is specified in the IP header length
819 	 * field.
820 	 */
821 	if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
822 		return -5;
823 
824 	return 0;
825 }
826 #endif
827 
828 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
829 
830 static __m128i mask0;
831 static __m128i mask1;
832 static __m128i mask2;
833 static inline uint16_t
834 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
835 		lookup_struct_t *ipv4_l3fwd_lookup_struct)
836 {
837 	int ret = 0;
838 	union ipv4_5tuple_host key;
839 
840 	ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
841 	__m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr));
842 	/* Get 5 tuple: dst port, src port, dst IP address, src IP address and
843 	   protocol */
844 	key.xmm = _mm_and_si128(data, mask0);
845 	/* Find destination port */
846 	ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
847 	return ((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
848 }
849 
850 static inline uint16_t
851 get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid,
852 		lookup_struct_t *ipv6_l3fwd_lookup_struct)
853 {
854 	int ret = 0;
855 	union ipv6_5tuple_host key;
856 
857 	ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
858 	__m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr));
859 	__m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
860 			sizeof(__m128i)));
861 	__m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
862 			sizeof(__m128i) + sizeof(__m128i)));
863 	/* Get part of 5 tuple: src IP address lower 96 bits and protocol */
864 	key.xmm[0] = _mm_and_si128(data0, mask1);
865 	/* Get part of 5 tuple: dst IP address lower 96 bits and src IP address
866 	   higher 32 bits */
867 	key.xmm[1] = data1;
868 	/* Get part of 5 tuple: dst port and src port and dst IP address higher
869 	   32 bits */
870 	key.xmm[2] = _mm_and_si128(data2, mask2);
871 
872 	/* Find destination port */
873 	ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
874 	return ((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
875 }
876 #endif
877 
878 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
879 
880 static inline uint16_t
881 get_ipv4_dst_port(void *ipv4_hdr, uint16_t portid,
882 		lookup_struct_t *ipv4_l3fwd_lookup_struct)
883 {
884 	uint32_t next_hop;
885 
886 	return ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
887 		rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
888 		&next_hop) == 0) ? next_hop : portid);
889 }
890 
891 static inline uint16_t
892 get_ipv6_dst_port(void *ipv6_hdr,  uint16_t portid,
893 		lookup6_struct_t *ipv6_l3fwd_lookup_struct)
894 {
895 	uint32_t next_hop;
896 
897 	return ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
898 			((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ?
899 			next_hop : portid);
900 }
901 #endif
902 
903 static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
904 		__attribute__((unused));
905 
906 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
907 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
908 
909 #define MASK_ALL_PKTS   0xff
910 #define EXCLUDE_1ST_PKT 0xfe
911 #define EXCLUDE_2ND_PKT 0xfd
912 #define EXCLUDE_3RD_PKT 0xfb
913 #define EXCLUDE_4TH_PKT 0xf7
914 #define EXCLUDE_5TH_PKT 0xef
915 #define EXCLUDE_6TH_PKT 0xdf
916 #define EXCLUDE_7TH_PKT 0xbf
917 #define EXCLUDE_8TH_PKT 0x7f
918 
919 static inline void
920 simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
921 {
922 	struct ether_hdr *eth_hdr[8];
923 	struct ipv4_hdr *ipv4_hdr[8];
924 	uint16_t dst_port[8];
925 	int32_t ret[8];
926 	union ipv4_5tuple_host key[8];
927 	__m128i data[8];
928 
929 	eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
930 	eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
931 	eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
932 	eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
933 	eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
934 	eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
935 	eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
936 	eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
937 
938 	/* Handle IPv4 headers.*/
939 	ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *,
940 			sizeof(struct ether_hdr));
941 	ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *,
942 			sizeof(struct ether_hdr));
943 	ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *,
944 			sizeof(struct ether_hdr));
945 	ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
946 			sizeof(struct ether_hdr));
947 	ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
948 			sizeof(struct ether_hdr));
949 	ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
950 			sizeof(struct ether_hdr));
951 	ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
952 			sizeof(struct ether_hdr));
953 	ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
954 			sizeof(struct ether_hdr));
955 
956 #ifdef DO_RFC_1812_CHECKS
957 	/* Check to make sure the packet is valid (RFC1812) */
958 	uint8_t valid_mask = MASK_ALL_PKTS;
959 
960 	if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) {
961 		rte_pktmbuf_free(m[0]);
962 		valid_mask &= EXCLUDE_1ST_PKT;
963 	}
964 	if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) {
965 		rte_pktmbuf_free(m[1]);
966 		valid_mask &= EXCLUDE_2ND_PKT;
967 	}
968 	if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) {
969 		rte_pktmbuf_free(m[2]);
970 		valid_mask &= EXCLUDE_3RD_PKT;
971 	}
972 	if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) {
973 		rte_pktmbuf_free(m[3]);
974 		valid_mask &= EXCLUDE_4TH_PKT;
975 	}
976 	if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) {
977 		rte_pktmbuf_free(m[4]);
978 		valid_mask &= EXCLUDE_5TH_PKT;
979 	}
980 	if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) {
981 		rte_pktmbuf_free(m[5]);
982 		valid_mask &= EXCLUDE_6TH_PKT;
983 	}
984 	if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) {
985 		rte_pktmbuf_free(m[6]);
986 		valid_mask &= EXCLUDE_7TH_PKT;
987 	}
988 	if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) {
989 		rte_pktmbuf_free(m[7]);
990 		valid_mask &= EXCLUDE_8TH_PKT;
991 	}
992 	if (unlikely(valid_mask != MASK_ALL_PKTS)) {
993 		if (valid_mask == 0)
994 			return;
995 
996 		uint8_t i = 0;
997 
998 		for (i = 0; i < 8; i++)
999 			if ((0x1 << i) & valid_mask)
1000 				l3fwd_simple_forward(m[i], portid);
1001 	}
1002 #endif /* End of #ifdef DO_RFC_1812_CHECKS */
1003 
1004 	data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
1005 			sizeof(struct ether_hdr) +
1006 			offsetof(struct ipv4_hdr, time_to_live)));
1007 	data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
1008 			sizeof(struct ether_hdr) +
1009 			offsetof(struct ipv4_hdr, time_to_live)));
1010 	data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
1011 			sizeof(struct ether_hdr) +
1012 			offsetof(struct ipv4_hdr, time_to_live)));
1013 	data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
1014 			sizeof(struct ether_hdr) +
1015 			offsetof(struct ipv4_hdr, time_to_live)));
1016 	data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
1017 			sizeof(struct ether_hdr) +
1018 			offsetof(struct ipv4_hdr, time_to_live)));
1019 	data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
1020 			sizeof(struct ether_hdr) +
1021 			offsetof(struct ipv4_hdr, time_to_live)));
1022 	data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
1023 			sizeof(struct ether_hdr) +
1024 			offsetof(struct ipv4_hdr, time_to_live)));
1025 	data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
1026 			sizeof(struct ether_hdr) +
1027 			offsetof(struct ipv4_hdr, time_to_live)));
1028 
1029 	key[0].xmm = _mm_and_si128(data[0], mask0);
1030 	key[1].xmm = _mm_and_si128(data[1], mask0);
1031 	key[2].xmm = _mm_and_si128(data[2], mask0);
1032 	key[3].xmm = _mm_and_si128(data[3], mask0);
1033 	key[4].xmm = _mm_and_si128(data[4], mask0);
1034 	key[5].xmm = _mm_and_si128(data[5], mask0);
1035 	key[6].xmm = _mm_and_si128(data[6], mask0);
1036 	key[7].xmm = _mm_and_si128(data[7], mask0);
1037 
1038 	const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
1039 			&key[4], &key[5], &key[6], &key[7]};
1040 
1041 	rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
1042 			&key_array[0], 8, ret);
1043 	dst_port[0] = ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
1044 	dst_port[1] = ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
1045 	dst_port[2] = ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
1046 	dst_port[3] = ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
1047 	dst_port[4] = ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
1048 	dst_port[5] = ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
1049 	dst_port[6] = ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
1050 	dst_port[7] = ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
1051 
1052 	if (dst_port[0] >= RTE_MAX_ETHPORTS ||
1053 			(enabled_port_mask & 1 << dst_port[0]) == 0)
1054 		dst_port[0] = portid;
1055 	if (dst_port[1] >= RTE_MAX_ETHPORTS ||
1056 			(enabled_port_mask & 1 << dst_port[1]) == 0)
1057 		dst_port[1] = portid;
1058 	if (dst_port[2] >= RTE_MAX_ETHPORTS ||
1059 			(enabled_port_mask & 1 << dst_port[2]) == 0)
1060 		dst_port[2] = portid;
1061 	if (dst_port[3] >= RTE_MAX_ETHPORTS ||
1062 			(enabled_port_mask & 1 << dst_port[3]) == 0)
1063 		dst_port[3] = portid;
1064 	if (dst_port[4] >= RTE_MAX_ETHPORTS ||
1065 			(enabled_port_mask & 1 << dst_port[4]) == 0)
1066 		dst_port[4] = portid;
1067 	if (dst_port[5] >= RTE_MAX_ETHPORTS ||
1068 			(enabled_port_mask & 1 << dst_port[5]) == 0)
1069 		dst_port[5] = portid;
1070 	if (dst_port[6] >= RTE_MAX_ETHPORTS ||
1071 			(enabled_port_mask & 1 << dst_port[6]) == 0)
1072 		dst_port[6] = portid;
1073 	if (dst_port[7] >= RTE_MAX_ETHPORTS ||
1074 			(enabled_port_mask & 1 << dst_port[7]) == 0)
1075 		dst_port[7] = portid;
1076 
1077 #ifdef DO_RFC_1812_CHECKS
1078 	/* Update time to live and header checksum */
1079 	--(ipv4_hdr[0]->time_to_live);
1080 	--(ipv4_hdr[1]->time_to_live);
1081 	--(ipv4_hdr[2]->time_to_live);
1082 	--(ipv4_hdr[3]->time_to_live);
1083 	++(ipv4_hdr[0]->hdr_checksum);
1084 	++(ipv4_hdr[1]->hdr_checksum);
1085 	++(ipv4_hdr[2]->hdr_checksum);
1086 	++(ipv4_hdr[3]->hdr_checksum);
1087 	--(ipv4_hdr[4]->time_to_live);
1088 	--(ipv4_hdr[5]->time_to_live);
1089 	--(ipv4_hdr[6]->time_to_live);
1090 	--(ipv4_hdr[7]->time_to_live);
1091 	++(ipv4_hdr[4]->hdr_checksum);
1092 	++(ipv4_hdr[5]->hdr_checksum);
1093 	++(ipv4_hdr[6]->hdr_checksum);
1094 	++(ipv4_hdr[7]->hdr_checksum);
1095 #endif
1096 
1097 	/* dst addr */
1098 	*(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
1099 	*(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
1100 	*(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
1101 	*(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
1102 	*(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
1103 	*(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
1104 	*(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
1105 	*(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
1106 
1107 	/* src addr */
1108 	ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
1109 	ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
1110 	ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
1111 	ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
1112 	ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
1113 	ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
1114 	ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
1115 	ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
1116 
1117 	send_single_packet(m[0], (uint8_t)dst_port[0]);
1118 	send_single_packet(m[1], (uint8_t)dst_port[1]);
1119 	send_single_packet(m[2], (uint8_t)dst_port[2]);
1120 	send_single_packet(m[3], (uint8_t)dst_port[3]);
1121 	send_single_packet(m[4], (uint8_t)dst_port[4]);
1122 	send_single_packet(m[5], (uint8_t)dst_port[5]);
1123 	send_single_packet(m[6], (uint8_t)dst_port[6]);
1124 	send_single_packet(m[7], (uint8_t)dst_port[7]);
1125 
1126 }
1127 
1128 static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
1129 		__m128i mask1, union ipv6_5tuple_host *key)
1130 {
1131 	__m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1132 			__m128i *, sizeof(struct ether_hdr) +
1133 			offsetof(struct ipv6_hdr, payload_len)));
1134 	__m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1135 			__m128i *, sizeof(struct ether_hdr) +
1136 			offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i)));
1137 	__m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
1138 			__m128i *, sizeof(struct ether_hdr) +
1139 			offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) +
1140 			sizeof(__m128i)));
1141 	key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
1142 	key->xmm[1] = tmpdata1;
1143 	key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
1144 }
1145 
1146 static inline void
1147 simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint16_t portid)
1148 {
1149 	int32_t ret[8];
1150 	uint16_t dst_port[8];
1151 	struct ether_hdr *eth_hdr[8];
1152 	union ipv6_5tuple_host key[8];
1153 
1154 	__attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
1155 
1156 	eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
1157 	eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
1158 	eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
1159 	eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
1160 	eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
1161 	eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
1162 	eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
1163 	eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
1164 
1165 	/* Handle IPv6 headers.*/
1166 	ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *,
1167 			sizeof(struct ether_hdr));
1168 	ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *,
1169 			sizeof(struct ether_hdr));
1170 	ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *,
1171 			sizeof(struct ether_hdr));
1172 	ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
1173 			sizeof(struct ether_hdr));
1174 	ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
1175 			sizeof(struct ether_hdr));
1176 	ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
1177 			sizeof(struct ether_hdr));
1178 	ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
1179 			sizeof(struct ether_hdr));
1180 	ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
1181 			sizeof(struct ether_hdr));
1182 
1183 	get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
1184 	get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
1185 	get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
1186 	get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
1187 	get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
1188 	get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
1189 	get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
1190 	get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
1191 
1192 	const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
1193 			&key[4], &key[5], &key[6], &key[7]};
1194 
1195 	rte_hash_lookup_bulk(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
1196 			&key_array[0], 4, ret);
1197 	dst_port[0] = ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]);
1198 	dst_port[1] = ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]);
1199 	dst_port[2] = ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]);
1200 	dst_port[3] = ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]);
1201 	dst_port[4] = ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]);
1202 	dst_port[5] = ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]);
1203 	dst_port[6] = ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]);
1204 	dst_port[7] = ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]);
1205 
1206 	if (dst_port[0] >= RTE_MAX_ETHPORTS ||
1207 			(enabled_port_mask & 1 << dst_port[0]) == 0)
1208 		dst_port[0] = portid;
1209 	if (dst_port[1] >= RTE_MAX_ETHPORTS ||
1210 			(enabled_port_mask & 1 << dst_port[1]) == 0)
1211 		dst_port[1] = portid;
1212 	if (dst_port[2] >= RTE_MAX_ETHPORTS ||
1213 			(enabled_port_mask & 1 << dst_port[2]) == 0)
1214 		dst_port[2] = portid;
1215 	if (dst_port[3] >= RTE_MAX_ETHPORTS ||
1216 			(enabled_port_mask & 1 << dst_port[3]) == 0)
1217 		dst_port[3] = portid;
1218 	if (dst_port[4] >= RTE_MAX_ETHPORTS ||
1219 			(enabled_port_mask & 1 << dst_port[4]) == 0)
1220 		dst_port[4] = portid;
1221 	if (dst_port[5] >= RTE_MAX_ETHPORTS ||
1222 			(enabled_port_mask & 1 << dst_port[5]) == 0)
1223 		dst_port[5] = portid;
1224 	if (dst_port[6] >= RTE_MAX_ETHPORTS ||
1225 			(enabled_port_mask & 1 << dst_port[6]) == 0)
1226 		dst_port[6] = portid;
1227 	if (dst_port[7] >= RTE_MAX_ETHPORTS ||
1228 			(enabled_port_mask & 1 << dst_port[7]) == 0)
1229 		dst_port[7] = portid;
1230 
1231 	/* dst addr */
1232 	*(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
1233 	*(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
1234 	*(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
1235 	*(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
1236 	*(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
1237 	*(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
1238 	*(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
1239 	*(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
1240 
1241 	/* src addr */
1242 	ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
1243 	ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
1244 	ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
1245 	ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
1246 	ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
1247 	ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
1248 	ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
1249 	ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
1250 
1251 	send_single_packet(m[0], dst_port[0]);
1252 	send_single_packet(m[1], dst_port[1]);
1253 	send_single_packet(m[2], dst_port[2]);
1254 	send_single_packet(m[3], dst_port[3]);
1255 	send_single_packet(m[4], dst_port[4]);
1256 	send_single_packet(m[5], dst_port[5]);
1257 	send_single_packet(m[6], dst_port[6]);
1258 	send_single_packet(m[7], dst_port[7]);
1259 
1260 }
1261 #endif /* APP_LOOKUP_METHOD */
1262 
1263 static __rte_always_inline void
1264 l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid)
1265 {
1266 	struct ether_hdr *eth_hdr;
1267 	struct ipv4_hdr *ipv4_hdr;
1268 	uint16_t dst_port;
1269 
1270 	eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
1271 
1272 	if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
1273 		/* Handle IPv4 headers.*/
1274 		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
1275 				sizeof(struct ether_hdr));
1276 
1277 #ifdef DO_RFC_1812_CHECKS
1278 		/* Check to make sure the packet is valid (RFC1812) */
1279 		if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
1280 			rte_pktmbuf_free(m);
1281 			return;
1282 		}
1283 #endif
1284 
1285 		 dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
1286 			RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct);
1287 		if (dst_port >= RTE_MAX_ETHPORTS ||
1288 				(enabled_port_mask & 1 << dst_port) == 0)
1289 			dst_port = portid;
1290 
1291 #ifdef DO_RFC_1812_CHECKS
1292 		/* Update time to live and header checksum */
1293 		--(ipv4_hdr->time_to_live);
1294 		++(ipv4_hdr->hdr_checksum);
1295 #endif
1296 		/* dst addr */
1297 		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
1298 
1299 		/* src addr */
1300 		ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
1301 
1302 		send_single_packet(m, dst_port);
1303 	} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
1304 		/* Handle IPv6 headers.*/
1305 		struct ipv6_hdr *ipv6_hdr;
1306 
1307 		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
1308 				sizeof(struct ether_hdr));
1309 
1310 		dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
1311 				RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct);
1312 
1313 		if (dst_port >= RTE_MAX_ETHPORTS ||
1314 				(enabled_port_mask & 1 << dst_port) == 0)
1315 			dst_port = portid;
1316 
1317 		/* dst addr */
1318 		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
1319 
1320 		/* src addr */
1321 		ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
1322 
1323 		send_single_packet(m, dst_port);
1324 	} else
1325 		/* Free the mbuf that contains non-IPV4/IPV6 packet */
1326 		rte_pktmbuf_free(m);
1327 }
1328 
1329 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1330 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1331 #ifdef DO_RFC_1812_CHECKS
1332 
1333 #define	IPV4_MIN_VER_IHL	0x45
1334 #define	IPV4_MAX_VER_IHL	0x4f
1335 #define	IPV4_MAX_VER_IHL_DIFF	(IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
1336 
1337 /* Minimum value of IPV4 total length (20B) in network byte order. */
1338 #define	IPV4_MIN_LEN_BE	(sizeof(struct ipv4_hdr) << 8)
1339 
1340 /*
1341  * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
1342  * - The IP version number must be 4.
1343  * - The IP header length field must be large enough to hold the
1344  *    minimum length legal IP datagram (20 bytes = 5 words).
1345  * - The IP total length field must be large enough to hold the IP
1346  *   datagram header, whose length is specified in the IP header length
1347  *   field.
1348  * If we encounter invalid IPV4 packet, then set destination port for it
1349  * to BAD_PORT value.
1350  */
1351 static __rte_always_inline void
1352 rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
1353 {
1354 	uint8_t ihl;
1355 
1356 	if (RTE_ETH_IS_IPV4_HDR(ptype)) {
1357 		ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
1358 
1359 		ipv4_hdr->time_to_live--;
1360 		ipv4_hdr->hdr_checksum++;
1361 
1362 		if (ihl > IPV4_MAX_VER_IHL_DIFF ||
1363 				((uint8_t)ipv4_hdr->total_length == 0 &&
1364 				ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) {
1365 			dp[0] = BAD_PORT;
1366 		}
1367 	}
1368 }
1369 
1370 #else
1371 #define	rfc1812_process(mb, dp, ptype)	do { } while (0)
1372 #endif /* DO_RFC_1812_CHECKS */
1373 #endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */
1374 
1375 
1376 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1377 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1378 
1379 static __rte_always_inline uint16_t
1380 get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint16_t portid)
1381 {
1382 	uint32_t next_hop;
1383 	struct ipv6_hdr *ipv6_hdr;
1384 	struct ether_hdr *eth_hdr;
1385 
1386 	if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
1387 		return (uint16_t) ((rte_lpm_lookup(
1388 				RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dst_ipv4,
1389 				&next_hop) == 0) ? next_hop : portid);
1390 
1391 	} else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
1392 
1393 		eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
1394 		ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
1395 
1396 		return (uint16_t) ((rte_lpm6_lookup(
1397 				RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
1398 				ipv6_hdr->dst_addr, &next_hop) == 0) ?
1399 				next_hop : portid);
1400 
1401 	}
1402 
1403 	return portid;
1404 }
1405 
1406 static inline void
1407 process_packet(struct rte_mbuf *pkt, uint16_t *dst_port, uint16_t portid)
1408 {
1409 	struct ether_hdr *eth_hdr;
1410 	struct ipv4_hdr *ipv4_hdr;
1411 	uint32_t dst_ipv4;
1412 	uint16_t dp;
1413 	__m128i te, ve;
1414 
1415 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
1416 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1417 
1418 	dst_ipv4 = ipv4_hdr->dst_addr;
1419 	dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);
1420 	dp = get_dst_port(pkt, dst_ipv4, portid);
1421 
1422 	te = _mm_load_si128((__m128i *)eth_hdr);
1423 	ve = val_eth[dp];
1424 
1425 	dst_port[0] = dp;
1426 	rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type);
1427 
1428 	te =  _mm_blend_epi16(te, ve, MASK_ETH);
1429 	_mm_store_si128((__m128i *)eth_hdr, te);
1430 }
1431 
1432 /*
1433  * Read packet_type and destination IPV4 addresses from 4 mbufs.
1434  */
1435 static inline void
1436 processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
1437 		__m128i *dip,
1438 		uint32_t *ipv4_flag)
1439 {
1440 	struct ipv4_hdr *ipv4_hdr;
1441 	struct ether_hdr *eth_hdr;
1442 	uint32_t x0, x1, x2, x3;
1443 
1444 	eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
1445 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1446 	x0 = ipv4_hdr->dst_addr;
1447 	ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
1448 
1449 	eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
1450 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1451 	x1 = ipv4_hdr->dst_addr;
1452 	ipv4_flag[0] &= pkt[1]->packet_type;
1453 
1454 	eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
1455 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1456 	x2 = ipv4_hdr->dst_addr;
1457 	ipv4_flag[0] &= pkt[2]->packet_type;
1458 
1459 	eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
1460 	ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
1461 	x3 = ipv4_hdr->dst_addr;
1462 	ipv4_flag[0] &= pkt[3]->packet_type;
1463 
1464 	dip[0] = _mm_set_epi32(x3, x2, x1, x0);
1465 }
1466 
1467 /*
1468  * Lookup into LPM for destination port.
1469  * If lookup fails, use incoming port (portid) as destination port.
1470  */
1471 static inline void
1472 processx4_step2(__m128i dip,
1473 		uint32_t ipv4_flag,
1474 		uint16_t portid,
1475 		struct rte_mbuf *pkt[FWDSTEP],
1476 		uint16_t dprt[FWDSTEP])
1477 {
1478 	rte_xmm_t dst;
1479 	const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
1480 			4, 5, 6, 7, 0, 1, 2, 3);
1481 
1482 	/* Byte swap 4 IPV4 addresses. */
1483 	dip = _mm_shuffle_epi8(dip, bswap_mask);
1484 
1485 	/* if all 4 packets are IPV4. */
1486 	if (likely(ipv4_flag)) {
1487 		rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip,
1488 				dst.u32, portid);
1489 
1490 		/* get rid of unused upper 16 bit for each dport. */
1491 		dst.x = _mm_packs_epi32(dst.x, dst.x);
1492 		*(uint64_t *)dprt = dst.u64[0];
1493 	} else {
1494 		dst.x = dip;
1495 		dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
1496 		dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid);
1497 		dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid);
1498 		dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid);
1499 	}
1500 }
1501 
1502 /*
1503  * Update source and destination MAC addresses in the ethernet header.
1504  * Perform RFC1812 checks and updates for IPV4 packets.
1505  */
1506 static inline void
1507 processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
1508 {
1509 	__m128i te[FWDSTEP];
1510 	__m128i ve[FWDSTEP];
1511 	__m128i *p[FWDSTEP];
1512 
1513 	p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *);
1514 	p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *);
1515 	p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *);
1516 	p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *);
1517 
1518 	ve[0] = val_eth[dst_port[0]];
1519 	te[0] = _mm_load_si128(p[0]);
1520 
1521 	ve[1] = val_eth[dst_port[1]];
1522 	te[1] = _mm_load_si128(p[1]);
1523 
1524 	ve[2] = val_eth[dst_port[2]];
1525 	te[2] = _mm_load_si128(p[2]);
1526 
1527 	ve[3] = val_eth[dst_port[3]];
1528 	te[3] = _mm_load_si128(p[3]);
1529 
1530 	/* Update first 12 bytes, keep rest bytes intact. */
1531 	te[0] =  _mm_blend_epi16(te[0], ve[0], MASK_ETH);
1532 	te[1] =  _mm_blend_epi16(te[1], ve[1], MASK_ETH);
1533 	te[2] =  _mm_blend_epi16(te[2], ve[2], MASK_ETH);
1534 	te[3] =  _mm_blend_epi16(te[3], ve[3], MASK_ETH);
1535 
1536 	_mm_store_si128(p[0], te[0]);
1537 	_mm_store_si128(p[1], te[1]);
1538 	_mm_store_si128(p[2], te[2]);
1539 	_mm_store_si128(p[3], te[3]);
1540 
1541 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
1542 			&dst_port[0], pkt[0]->packet_type);
1543 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
1544 			&dst_port[1], pkt[1]->packet_type);
1545 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
1546 			&dst_port[2], pkt[2]->packet_type);
1547 	rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
1548 			&dst_port[3], pkt[3]->packet_type);
1549 }
1550 
1551 /*
1552  * We group consecutive packets with the same destionation port into one burst.
1553  * To avoid extra latency this is done together with some other packet
1554  * processing, but after we made a final decision about packet's destination.
1555  * To do this we maintain:
1556  * pnum - array of number of consecutive packets with the same dest port for
1557  * each packet in the input burst.
1558  * lp - pointer to the last updated element in the pnum.
1559  * dlp - dest port value lp corresponds to.
1560  */
1561 
1562 #define	GRPSZ	(1 << FWDSTEP)
1563 #define	GRPMSK	(GRPSZ - 1)
1564 
1565 #define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx)	do { \
1566 	if (likely((dlp) == (dcp)[(idx)])) {         \
1567 		(lp)[0]++;                           \
1568 	} else {                                     \
1569 		(dlp) = (dcp)[idx];                  \
1570 		(lp) = (pn) + (idx);                 \
1571 		(lp)[0] = 1;                         \
1572 	}                                            \
1573 } while (0)
1574 
1575 /*
1576  * Group consecutive packets with the same destination port in bursts of 4.
1577  * Suppose we have array of destionation ports:
1578  * dst_port[] = {a, b, c, d,, e, ... }
1579  * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
1580  * We doing 4 comparisons at once and the result is 4 bit mask.
1581  * This mask is used as an index into prebuild array of pnum values.
1582  */
1583 static inline uint16_t *
1584 port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
1585 {
1586 	static const struct {
1587 		uint64_t pnum; /* prebuild 4 values for pnum[]. */
1588 		int32_t  idx;  /* index for new last updated elemnet. */
1589 		uint16_t lpv;  /* add value to the last updated element. */
1590 	} gptbl[GRPSZ] = {
1591 	{
1592 		/* 0: a != b, b != c, c != d, d != e */
1593 		.pnum = UINT64_C(0x0001000100010001),
1594 		.idx = 4,
1595 		.lpv = 0,
1596 	},
1597 	{
1598 		/* 1: a == b, b != c, c != d, d != e */
1599 		.pnum = UINT64_C(0x0001000100010002),
1600 		.idx = 4,
1601 		.lpv = 1,
1602 	},
1603 	{
1604 		/* 2: a != b, b == c, c != d, d != e */
1605 		.pnum = UINT64_C(0x0001000100020001),
1606 		.idx = 4,
1607 		.lpv = 0,
1608 	},
1609 	{
1610 		/* 3: a == b, b == c, c != d, d != e */
1611 		.pnum = UINT64_C(0x0001000100020003),
1612 		.idx = 4,
1613 		.lpv = 2,
1614 	},
1615 	{
1616 		/* 4: a != b, b != c, c == d, d != e */
1617 		.pnum = UINT64_C(0x0001000200010001),
1618 		.idx = 4,
1619 		.lpv = 0,
1620 	},
1621 	{
1622 		/* 5: a == b, b != c, c == d, d != e */
1623 		.pnum = UINT64_C(0x0001000200010002),
1624 		.idx = 4,
1625 		.lpv = 1,
1626 	},
1627 	{
1628 		/* 6: a != b, b == c, c == d, d != e */
1629 		.pnum = UINT64_C(0x0001000200030001),
1630 		.idx = 4,
1631 		.lpv = 0,
1632 	},
1633 	{
1634 		/* 7: a == b, b == c, c == d, d != e */
1635 		.pnum = UINT64_C(0x0001000200030004),
1636 		.idx = 4,
1637 		.lpv = 3,
1638 	},
1639 	{
1640 		/* 8: a != b, b != c, c != d, d == e */
1641 		.pnum = UINT64_C(0x0002000100010001),
1642 		.idx = 3,
1643 		.lpv = 0,
1644 	},
1645 	{
1646 		/* 9: a == b, b != c, c != d, d == e */
1647 		.pnum = UINT64_C(0x0002000100010002),
1648 		.idx = 3,
1649 		.lpv = 1,
1650 	},
1651 	{
1652 		/* 0xa: a != b, b == c, c != d, d == e */
1653 		.pnum = UINT64_C(0x0002000100020001),
1654 		.idx = 3,
1655 		.lpv = 0,
1656 	},
1657 	{
1658 		/* 0xb: a == b, b == c, c != d, d == e */
1659 		.pnum = UINT64_C(0x0002000100020003),
1660 		.idx = 3,
1661 		.lpv = 2,
1662 	},
1663 	{
1664 		/* 0xc: a != b, b != c, c == d, d == e */
1665 		.pnum = UINT64_C(0x0002000300010001),
1666 		.idx = 2,
1667 		.lpv = 0,
1668 	},
1669 	{
1670 		/* 0xd: a == b, b != c, c == d, d == e */
1671 		.pnum = UINT64_C(0x0002000300010002),
1672 		.idx = 2,
1673 		.lpv = 1,
1674 	},
1675 	{
1676 		/* 0xe: a != b, b == c, c == d, d == e */
1677 		.pnum = UINT64_C(0x0002000300040001),
1678 		.idx = 1,
1679 		.lpv = 0,
1680 	},
1681 	{
1682 		/* 0xf: a == b, b == c, c == d, d == e */
1683 		.pnum = UINT64_C(0x0002000300040005),
1684 		.idx = 0,
1685 		.lpv = 4,
1686 	},
1687 	};
1688 
1689 	union {
1690 		uint16_t u16[FWDSTEP + 1];
1691 		uint64_t u64;
1692 	} *pnum = (void *)pn;
1693 
1694 	int32_t v;
1695 
1696 	dp1 = _mm_cmpeq_epi16(dp1, dp2);
1697 	dp1 = _mm_unpacklo_epi16(dp1, dp1);
1698 	v = _mm_movemask_ps((__m128)dp1);
1699 
1700 	/* update last port counter. */
1701 	lp[0] += gptbl[v].lpv;
1702 
1703 	/* if dest port value has changed. */
1704 	if (v != GRPMSK) {
1705 		pnum->u64 = gptbl[v].pnum;
1706 		pnum->u16[FWDSTEP] = 1;
1707 		lp = pnum->u16 + gptbl[v].idx;
1708 	}
1709 
1710 	return lp;
1711 }
1712 
1713 #endif /* APP_LOOKUP_METHOD */
1714 
1715 static void
1716 process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
1717 		uint16_t portid)
1718 {
1719 
1720 	int j;
1721 
1722 #if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
1723 	(ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
1724 	int32_t k;
1725 	uint16_t dlp;
1726 	uint16_t *lp;
1727 	uint16_t dst_port[MAX_PKT_BURST];
1728 	__m128i dip[MAX_PKT_BURST / FWDSTEP];
1729 	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
1730 	uint16_t pnum[MAX_PKT_BURST + 1];
1731 #endif
1732 
1733 
1734 #if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
1735 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
1736 	{
1737 		/*
1738 		 * Send nb_rx - nb_rx%8 packets
1739 		 * in groups of 8.
1740 		 */
1741 		int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
1742 
1743 		for (j = 0; j < n; j += 8) {
1744 			uint32_t pkt_type =
1745 				pkts_burst[j]->packet_type &
1746 				pkts_burst[j+1]->packet_type &
1747 				pkts_burst[j+2]->packet_type &
1748 				pkts_burst[j+3]->packet_type &
1749 				pkts_burst[j+4]->packet_type &
1750 				pkts_burst[j+5]->packet_type &
1751 				pkts_burst[j+6]->packet_type &
1752 				pkts_burst[j+7]->packet_type;
1753 			if (pkt_type & RTE_PTYPE_L3_IPV4) {
1754 				simple_ipv4_fwd_8pkts(&pkts_burst[j], portid);
1755 			} else if (pkt_type &
1756 				RTE_PTYPE_L3_IPV6) {
1757 				simple_ipv6_fwd_8pkts(&pkts_burst[j], portid);
1758 			} else {
1759 				l3fwd_simple_forward(pkts_burst[j], portid);
1760 				l3fwd_simple_forward(pkts_burst[j+1], portid);
1761 				l3fwd_simple_forward(pkts_burst[j+2], portid);
1762 				l3fwd_simple_forward(pkts_burst[j+3], portid);
1763 				l3fwd_simple_forward(pkts_burst[j+4], portid);
1764 				l3fwd_simple_forward(pkts_burst[j+5], portid);
1765 				l3fwd_simple_forward(pkts_burst[j+6], portid);
1766 				l3fwd_simple_forward(pkts_burst[j+7], portid);
1767 			}
1768 		}
1769 		for (; j < nb_rx ; j++)
1770 			l3fwd_simple_forward(pkts_burst[j], portid);
1771 	}
1772 #elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
1773 
1774 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1775 	for (j = 0; j != k; j += FWDSTEP)
1776 		processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
1777 				&ipv4_flag[j / FWDSTEP]);
1778 
1779 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1780 	for (j = 0; j != k; j += FWDSTEP)
1781 		processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP],
1782 				portid, &pkts_burst[j], &dst_port[j]);
1783 
1784 	/*
1785 	 * Finish packet processing and group consecutive
1786 	 * packets with the same destination port.
1787 	 */
1788 	k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
1789 	if (k != 0) {
1790 		__m128i dp1, dp2;
1791 
1792 		lp = pnum;
1793 		lp[0] = 1;
1794 
1795 		processx4_step3(pkts_burst, dst_port);
1796 
1797 		/* dp1: <d[0], d[1], d[2], d[3], ... > */
1798 		dp1 = _mm_loadu_si128((__m128i *)dst_port);
1799 
1800 		for (j = FWDSTEP; j != k; j += FWDSTEP) {
1801 			processx4_step3(&pkts_burst[j], &dst_port[j]);
1802 
1803 			/*
1804 			 * dp2:
1805 			 * <d[j-3], d[j-2], d[j-1], d[j], ... >
1806 			 */
1807 			dp2 = _mm_loadu_si128(
1808 					(__m128i *)&dst_port[j - FWDSTEP + 1]);
1809 			lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
1810 
1811 			/*
1812 			 * dp1:
1813 			 * <d[j], d[j+1], d[j+2], d[j+3], ... >
1814 			 */
1815 			dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) *
1816 					sizeof(dst_port[0]));
1817 		}
1818 
1819 		/*
1820 		 * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
1821 		 */
1822 		dp2 = _mm_shufflelo_epi16(dp1, 0xf9);
1823 		lp  = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
1824 
1825 		/*
1826 		 * remove values added by the last repeated
1827 		 * dst port.
1828 		 */
1829 		lp[0]--;
1830 		dlp = dst_port[j - 1];
1831 	} else {
1832 		/* set dlp and lp to the never used values. */
1833 		dlp = BAD_PORT - 1;
1834 		lp = pnum + MAX_PKT_BURST;
1835 	}
1836 
1837 	/* Process up to last 3 packets one by one. */
1838 	switch (nb_rx % FWDSTEP) {
1839 	case 3:
1840 		process_packet(pkts_burst[j], dst_port + j, portid);
1841 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1842 		j++;
1843 		/* fall-through */
1844 	case 2:
1845 		process_packet(pkts_burst[j], dst_port + j, portid);
1846 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1847 		j++;
1848 		/* fall-through */
1849 	case 1:
1850 		process_packet(pkts_burst[j], dst_port + j, portid);
1851 		GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
1852 		j++;
1853 	}
1854 
1855 	/*
1856 	 * Send packets out, through destination port.
1857 	 * Consecuteve pacekts with the same destination port
1858 	 * are already grouped together.
1859 	 * If destination port for the packet equals BAD_PORT,
1860 	 * then free the packet without sending it out.
1861 	 */
1862 	for (j = 0; j < nb_rx; j += k) {
1863 
1864 		int32_t m;
1865 		uint16_t pn;
1866 
1867 		pn = dst_port[j];
1868 		k = pnum[j];
1869 
1870 		if (likely(pn != BAD_PORT))
1871 			send_packetsx4(pn, pkts_burst + j, k);
1872 		else
1873 			for (m = j; m != j + k; m++)
1874 				rte_pktmbuf_free(pkts_burst[m]);
1875 
1876 	}
1877 
1878 #endif /* APP_LOOKUP_METHOD */
1879 #else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */
1880 
1881 	/* Prefetch first packets */
1882 	for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
1883 		rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
1884 
1885 	/* Prefetch and forward already prefetched packets */
1886 	for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1887 		rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1888 				j + PREFETCH_OFFSET], void *));
1889 		l3fwd_simple_forward(pkts_burst[j], portid);
1890 	}
1891 
1892 	/* Forward remaining prefetched packets */
1893 	for (; j < nb_rx; j++)
1894 		l3fwd_simple_forward(pkts_burst[j], portid);
1895 
1896 #endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
1897 
1898 }
1899 
1900 #if (APP_CPU_LOAD > 0)
1901 
1902 /*
1903  * CPU-load stats collector
1904  */
1905 static int
1906 cpu_load_collector(__rte_unused void *arg) {
1907 	unsigned i, j, k;
1908 	uint64_t hits;
1909 	uint64_t prev_tsc, diff_tsc, cur_tsc;
1910 	uint64_t total[MAX_CPU] = { 0 };
1911 	unsigned min_cpu = MAX_CPU;
1912 	unsigned max_cpu = 0;
1913 	unsigned cpu_id;
1914 	int busy_total = 0;
1915 	int busy_flag = 0;
1916 
1917 	unsigned int n_thread_per_cpu[MAX_CPU] = { 0 };
1918 	struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD];
1919 
1920 	struct thread_conf *thread_conf;
1921 
1922 	const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
1923 		US_PER_S * CPU_LOAD_TIMEOUT_US;
1924 
1925 	prev_tsc = 0;
1926 	/*
1927 	 * Wait for all threads
1928 	 */
1929 
1930 	printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
1931 			n_tx_thread);
1932 
1933 	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
1934 		rte_pause();
1935 
1936 	while (rte_atomic16_read(&tx_counter) < n_tx_thread)
1937 		rte_pause();
1938 
1939 	for (i = 0; i < n_rx_thread; i++) {
1940 
1941 		thread_conf = &rx_thread[i].conf;
1942 		cpu_id = thread_conf->cpu_id;
1943 		thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
1944 
1945 		if (cpu_id > max_cpu)
1946 			max_cpu = cpu_id;
1947 		if (cpu_id < min_cpu)
1948 			min_cpu = cpu_id;
1949 	}
1950 	for (i = 0; i < n_tx_thread; i++) {
1951 
1952 		thread_conf = &tx_thread[i].conf;
1953 		cpu_id = thread_conf->cpu_id;
1954 		thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
1955 
1956 		if (thread_conf->cpu_id > max_cpu)
1957 			max_cpu = thread_conf->cpu_id;
1958 		if (thread_conf->cpu_id < min_cpu)
1959 			min_cpu = thread_conf->cpu_id;
1960 	}
1961 
1962 	while (1) {
1963 
1964 		cpu_load.counter++;
1965 		for (i = min_cpu; i <= max_cpu; i++) {
1966 			for (j = 0; j < MAX_CPU_COUNTER; j++) {
1967 				for (k = 0; k < n_thread_per_cpu[i]; k++)
1968 					if (thread_per_cpu[i][k]->busy[j]) {
1969 						busy_flag = 1;
1970 						break;
1971 					}
1972 				if (busy_flag) {
1973 					cpu_load.hits[j][i]++;
1974 					busy_total = 1;
1975 					busy_flag = 0;
1976 				}
1977 			}
1978 
1979 			if (busy_total) {
1980 				total[i]++;
1981 				busy_total = 0;
1982 			}
1983 		}
1984 
1985 		cur_tsc = rte_rdtsc();
1986 
1987 		diff_tsc = cur_tsc - prev_tsc;
1988 		if (unlikely(diff_tsc > interval_tsc)) {
1989 
1990 			printf("\033c");
1991 
1992 			printf("Cpu usage for %d rx threads and %d tx threads:\n\n",
1993 					n_rx_thread, n_tx_thread);
1994 
1995 			printf("cpu#     proc%%  poll%%  overhead%%\n\n");
1996 
1997 			for (i = min_cpu; i <= max_cpu; i++) {
1998 				hits = 0;
1999 				printf("CPU %d:", i);
2000 				for (j = 0; j < MAX_CPU_COUNTER; j++) {
2001 					printf("%7" PRIu64 "",
2002 							cpu_load.hits[j][i] * 100 / cpu_load.counter);
2003 					hits += cpu_load.hits[j][i];
2004 					cpu_load.hits[j][i] = 0;
2005 				}
2006 				printf("%7" PRIu64 "\n",
2007 						100 - total[i] * 100 / cpu_load.counter);
2008 				total[i] = 0;
2009 			}
2010 			cpu_load.counter = 0;
2011 
2012 			prev_tsc = cur_tsc;
2013 		}
2014 
2015 	}
2016 }
2017 #endif /* APP_CPU_LOAD */
2018 
2019 /*
2020  * Null processing lthread loop
2021  *
2022  * This loop is used to start empty scheduler on lcore.
2023  */
2024 static void *
2025 lthread_null(__rte_unused void *args)
2026 {
2027 	int lcore_id = rte_lcore_id();
2028 
2029 	RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id);
2030 	lthread_exit(NULL);
2031 	return NULL;
2032 }
2033 
2034 /* main processing loop */
2035 static void *
2036 lthread_tx_per_ring(void *dummy)
2037 {
2038 	int nb_rx;
2039 	uint16_t portid;
2040 	struct rte_ring *ring;
2041 	struct thread_tx_conf *tx_conf;
2042 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2043 	struct lthread_cond *ready;
2044 
2045 	tx_conf = (struct thread_tx_conf *)dummy;
2046 	ring = tx_conf->ring;
2047 	ready = *tx_conf->ready;
2048 
2049 	lthread_set_data((void *)tx_conf);
2050 
2051 	/*
2052 	 * Move this lthread to lcore
2053 	 */
2054 	lthread_set_affinity(tx_conf->conf.lcore_id);
2055 
2056 	RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
2057 
2058 	nb_rx = 0;
2059 	rte_atomic16_inc(&tx_counter);
2060 	while (1) {
2061 
2062 		/*
2063 		 * Read packet from ring
2064 		 */
2065 		SET_CPU_BUSY(tx_conf, CPU_POLL);
2066 		nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst,
2067 				MAX_PKT_BURST, NULL);
2068 		SET_CPU_IDLE(tx_conf, CPU_POLL);
2069 
2070 		if (nb_rx > 0) {
2071 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2072 			portid = pkts_burst[0]->port;
2073 			process_burst(pkts_burst, nb_rx, portid);
2074 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2075 			lthread_yield();
2076 		} else
2077 			lthread_cond_wait(ready, 0);
2078 
2079 	}
2080 	return NULL;
2081 }
2082 
2083 /*
2084  * Main tx-lthreads spawner lthread.
2085  *
2086  * This lthread is used to spawn one new lthread per ring from producers.
2087  *
2088  */
2089 static void *
2090 lthread_tx(void *args)
2091 {
2092 	struct lthread *lt;
2093 
2094 	unsigned lcore_id;
2095 	uint16_t portid;
2096 	struct thread_tx_conf *tx_conf;
2097 
2098 	tx_conf = (struct thread_tx_conf *)args;
2099 	lthread_set_data((void *)tx_conf);
2100 
2101 	/*
2102 	 * Move this lthread to the selected lcore
2103 	 */
2104 	lthread_set_affinity(tx_conf->conf.lcore_id);
2105 
2106 	/*
2107 	 * Spawn tx readers (one per input ring)
2108 	 */
2109 	lthread_create(&lt, tx_conf->conf.lcore_id, lthread_tx_per_ring,
2110 			(void *)tx_conf);
2111 
2112 	lcore_id = rte_lcore_id();
2113 
2114 	RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id);
2115 
2116 	tx_conf->conf.cpu_id = sched_getcpu();
2117 	while (1) {
2118 
2119 		lthread_sleep(BURST_TX_DRAIN_US * 1000);
2120 
2121 		/*
2122 		 * TX burst queue drain
2123 		 */
2124 		for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
2125 			if (tx_conf->tx_mbufs[portid].len == 0)
2126 				continue;
2127 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2128 			send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
2129 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2130 			tx_conf->tx_mbufs[portid].len = 0;
2131 		}
2132 
2133 	}
2134 	return NULL;
2135 }
2136 
2137 static void *
2138 lthread_rx(void *dummy)
2139 {
2140 	int ret;
2141 	uint16_t nb_rx;
2142 	int i;
2143 	uint16_t portid;
2144 	uint8_t queueid;
2145 	int worker_id;
2146 	int len[RTE_MAX_LCORE] = { 0 };
2147 	int old_len, new_len;
2148 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2149 	struct thread_rx_conf *rx_conf;
2150 
2151 	rx_conf = (struct thread_rx_conf *)dummy;
2152 	lthread_set_data((void *)rx_conf);
2153 
2154 	/*
2155 	 * Move this lthread to lcore
2156 	 */
2157 	lthread_set_affinity(rx_conf->conf.lcore_id);
2158 
2159 	if (rx_conf->n_rx_queue == 0) {
2160 		RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id());
2161 		return NULL;
2162 	}
2163 
2164 	RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id());
2165 
2166 	for (i = 0; i < rx_conf->n_rx_queue; i++) {
2167 
2168 		portid = rx_conf->rx_queue_list[i].port_id;
2169 		queueid = rx_conf->rx_queue_list[i].queue_id;
2170 		RTE_LOG(INFO, L3FWD,
2171 			" -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
2172 				rte_lcore_id(), portid, queueid);
2173 	}
2174 
2175 	/*
2176 	 * Init all condition variables (one per rx thread)
2177 	 */
2178 	for (i = 0; i < rx_conf->n_rx_queue; i++)
2179 		lthread_cond_init(NULL, &rx_conf->ready[i], NULL);
2180 
2181 	worker_id = 0;
2182 
2183 	rx_conf->conf.cpu_id = sched_getcpu();
2184 	rte_atomic16_inc(&rx_counter);
2185 	while (1) {
2186 
2187 		/*
2188 		 * Read packet from RX queues
2189 		 */
2190 		for (i = 0; i < rx_conf->n_rx_queue; ++i) {
2191 			portid = rx_conf->rx_queue_list[i].port_id;
2192 			queueid = rx_conf->rx_queue_list[i].queue_id;
2193 
2194 			SET_CPU_BUSY(rx_conf, CPU_POLL);
2195 			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
2196 				MAX_PKT_BURST);
2197 			SET_CPU_IDLE(rx_conf, CPU_POLL);
2198 
2199 			if (nb_rx != 0) {
2200 				worker_id = (worker_id + 1) % rx_conf->n_ring;
2201 				old_len = len[worker_id];
2202 
2203 				SET_CPU_BUSY(rx_conf, CPU_PROCESS);
2204 				ret = rte_ring_sp_enqueue_burst(
2205 						rx_conf->ring[worker_id],
2206 						(void **) pkts_burst,
2207 						nb_rx, NULL);
2208 
2209 				new_len = old_len + ret;
2210 
2211 				if (new_len >= BURST_SIZE) {
2212 					lthread_cond_signal(rx_conf->ready[worker_id]);
2213 					new_len = 0;
2214 				}
2215 
2216 				len[worker_id] = new_len;
2217 
2218 				if (unlikely(ret < nb_rx)) {
2219 					uint32_t k;
2220 
2221 					for (k = ret; k < nb_rx; k++) {
2222 						struct rte_mbuf *m = pkts_burst[k];
2223 
2224 						rte_pktmbuf_free(m);
2225 					}
2226 				}
2227 				SET_CPU_IDLE(rx_conf, CPU_PROCESS);
2228 			}
2229 
2230 			lthread_yield();
2231 		}
2232 	}
2233 	return NULL;
2234 }
2235 
2236 /*
2237  * Start scheduler with initial lthread on lcore
2238  *
2239  * This lthread loop spawns all rx and tx lthreads on master lcore
2240  */
2241 
2242 static void *
2243 lthread_spawner(__rte_unused void *arg)
2244 {
2245 	struct lthread *lt[MAX_THREAD];
2246 	int i;
2247 	int n_thread = 0;
2248 
2249 	printf("Entering lthread_spawner\n");
2250 
2251 	/*
2252 	 * Create producers (rx threads) on default lcore
2253 	 */
2254 	for (i = 0; i < n_rx_thread; i++) {
2255 		rx_thread[i].conf.thread_id = i;
2256 		lthread_create(&lt[n_thread], -1, lthread_rx,
2257 				(void *)&rx_thread[i]);
2258 		n_thread++;
2259 	}
2260 
2261 	/*
2262 	 * Wait for all producers. Until some producers can be started on the same
2263 	 * scheduler as this lthread, yielding is required to let them to run and
2264 	 * prevent deadlock here.
2265 	 */
2266 	while (rte_atomic16_read(&rx_counter) < n_rx_thread)
2267 		lthread_sleep(100000);
2268 
2269 	/*
2270 	 * Create consumers (tx threads) on default lcore_id
2271 	 */
2272 	for (i = 0; i < n_tx_thread; i++) {
2273 		tx_thread[i].conf.thread_id = i;
2274 		lthread_create(&lt[n_thread], -1, lthread_tx,
2275 				(void *)&tx_thread[i]);
2276 		n_thread++;
2277 	}
2278 
2279 	/*
2280 	 * Wait for all threads finished
2281 	 */
2282 	for (i = 0; i < n_thread; i++)
2283 		lthread_join(lt[i], NULL);
2284 
2285 	return NULL;
2286 }
2287 
2288 /*
2289  * Start master scheduler with initial lthread spawning rx and tx lthreads
2290  * (main_lthread_master).
2291  */
2292 static int
2293 lthread_master_spawner(__rte_unused void *arg) {
2294 	struct lthread *lt;
2295 	int lcore_id = rte_lcore_id();
2296 
2297 	RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2298 	lthread_create(&lt, -1, lthread_spawner, NULL);
2299 	lthread_run();
2300 
2301 	return 0;
2302 }
2303 
2304 /*
2305  * Start scheduler on lcore.
2306  */
2307 static int
2308 sched_spawner(__rte_unused void *arg) {
2309 	struct lthread *lt;
2310 	int lcore_id = rte_lcore_id();
2311 
2312 #if (APP_CPU_LOAD)
2313 	if (lcore_id == cpu_load_lcore_id) {
2314 		cpu_load_collector(arg);
2315 		return 0;
2316 	}
2317 #endif /* APP_CPU_LOAD */
2318 
2319 	RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2320 	lthread_create(&lt, -1, lthread_null, NULL);
2321 	lthread_run();
2322 
2323 	return 0;
2324 }
2325 
2326 /* main processing loop */
2327 static int
2328 pthread_tx(void *dummy)
2329 {
2330 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2331 	uint64_t prev_tsc, diff_tsc, cur_tsc;
2332 	int nb_rx;
2333 	uint16_t portid;
2334 	struct thread_tx_conf *tx_conf;
2335 
2336 	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
2337 		US_PER_S * BURST_TX_DRAIN_US;
2338 
2339 	prev_tsc = 0;
2340 
2341 	tx_conf = (struct thread_tx_conf *)dummy;
2342 
2343 	RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
2344 
2345 	tx_conf->conf.cpu_id = sched_getcpu();
2346 	rte_atomic16_inc(&tx_counter);
2347 	while (1) {
2348 
2349 		cur_tsc = rte_rdtsc();
2350 
2351 		/*
2352 		 * TX burst queue drain
2353 		 */
2354 		diff_tsc = cur_tsc - prev_tsc;
2355 		if (unlikely(diff_tsc > drain_tsc)) {
2356 
2357 			/*
2358 			 * This could be optimized (use queueid instead of
2359 			 * portid), but it is not called so often
2360 			 */
2361 			SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2362 			for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
2363 				if (tx_conf->tx_mbufs[portid].len == 0)
2364 					continue;
2365 				send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
2366 				tx_conf->tx_mbufs[portid].len = 0;
2367 			}
2368 			SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2369 
2370 			prev_tsc = cur_tsc;
2371 		}
2372 
2373 		/*
2374 		 * Read packet from ring
2375 		 */
2376 		SET_CPU_BUSY(tx_conf, CPU_POLL);
2377 		nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring,
2378 				(void **)pkts_burst, MAX_PKT_BURST, NULL);
2379 		SET_CPU_IDLE(tx_conf, CPU_POLL);
2380 
2381 		if (unlikely(nb_rx == 0)) {
2382 			sched_yield();
2383 			continue;
2384 		}
2385 
2386 		SET_CPU_BUSY(tx_conf, CPU_PROCESS);
2387 		portid = pkts_burst[0]->port;
2388 		process_burst(pkts_burst, nb_rx, portid);
2389 		SET_CPU_IDLE(tx_conf, CPU_PROCESS);
2390 
2391 	}
2392 }
2393 
2394 static int
2395 pthread_rx(void *dummy)
2396 {
2397 	int i;
2398 	int worker_id;
2399 	uint32_t n;
2400 	uint32_t nb_rx;
2401 	unsigned lcore_id;
2402 	uint8_t queueid;
2403 	uint16_t portid;
2404 	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
2405 
2406 	struct thread_rx_conf *rx_conf;
2407 
2408 	lcore_id = rte_lcore_id();
2409 	rx_conf = (struct thread_rx_conf *)dummy;
2410 
2411 	if (rx_conf->n_rx_queue == 0) {
2412 		RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
2413 		return 0;
2414 	}
2415 
2416 	RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id);
2417 
2418 	for (i = 0; i < rx_conf->n_rx_queue; i++) {
2419 
2420 		portid = rx_conf->rx_queue_list[i].port_id;
2421 		queueid = rx_conf->rx_queue_list[i].queue_id;
2422 		RTE_LOG(INFO, L3FWD,
2423 			" -- lcoreid=%u portid=%u rxqueueid=%hhu\n",
2424 				lcore_id, portid, queueid);
2425 	}
2426 
2427 	worker_id = 0;
2428 	rx_conf->conf.cpu_id = sched_getcpu();
2429 	rte_atomic16_inc(&rx_counter);
2430 	while (1) {
2431 
2432 		/*
2433 		 * Read packet from RX queues
2434 		 */
2435 		for (i = 0; i < rx_conf->n_rx_queue; ++i) {
2436 			portid = rx_conf->rx_queue_list[i].port_id;
2437 			queueid = rx_conf->rx_queue_list[i].queue_id;
2438 
2439 			SET_CPU_BUSY(rx_conf, CPU_POLL);
2440 			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
2441 				MAX_PKT_BURST);
2442 			SET_CPU_IDLE(rx_conf, CPU_POLL);
2443 
2444 			if (nb_rx == 0) {
2445 				sched_yield();
2446 				continue;
2447 			}
2448 
2449 			SET_CPU_BUSY(rx_conf, CPU_PROCESS);
2450 			worker_id = (worker_id + 1) % rx_conf->n_ring;
2451 			n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id],
2452 					(void **)pkts_burst, nb_rx, NULL);
2453 
2454 			if (unlikely(n != nb_rx)) {
2455 				uint32_t k;
2456 
2457 				for (k = n; k < nb_rx; k++) {
2458 					struct rte_mbuf *m = pkts_burst[k];
2459 
2460 					rte_pktmbuf_free(m);
2461 				}
2462 			}
2463 
2464 			SET_CPU_IDLE(rx_conf, CPU_PROCESS);
2465 
2466 		}
2467 	}
2468 }
2469 
2470 /*
2471  * P-Thread spawner.
2472  */
2473 static int
2474 pthread_run(__rte_unused void *arg) {
2475 	int lcore_id = rte_lcore_id();
2476 	int i;
2477 
2478 	for (i = 0; i < n_rx_thread; i++)
2479 		if (rx_thread[i].conf.lcore_id == lcore_id) {
2480 			printf("Start rx thread on %d...\n", lcore_id);
2481 			RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2482 			RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i];
2483 			pthread_rx((void *)&rx_thread[i]);
2484 			return 0;
2485 		}
2486 
2487 	for (i = 0; i < n_tx_thread; i++)
2488 		if (tx_thread[i].conf.lcore_id == lcore_id) {
2489 			printf("Start tx thread on %d...\n", lcore_id);
2490 			RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
2491 			RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i];
2492 			pthread_tx((void *)&tx_thread[i]);
2493 			return 0;
2494 		}
2495 
2496 #if (APP_CPU_LOAD)
2497 	if (lcore_id == cpu_load_lcore_id)
2498 		cpu_load_collector(arg);
2499 #endif /* APP_CPU_LOAD */
2500 
2501 	return 0;
2502 }
2503 
2504 static int
2505 check_lcore_params(void)
2506 {
2507 	uint8_t queue, lcore;
2508 	uint16_t i;
2509 	int socketid;
2510 
2511 	for (i = 0; i < nb_rx_thread_params; ++i) {
2512 		queue = rx_thread_params[i].queue_id;
2513 		if (queue >= MAX_RX_QUEUE_PER_PORT) {
2514 			printf("invalid queue number: %hhu\n", queue);
2515 			return -1;
2516 		}
2517 		lcore = rx_thread_params[i].lcore_id;
2518 		if (!rte_lcore_is_enabled(lcore)) {
2519 			printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
2520 			return -1;
2521 		}
2522 		socketid = rte_lcore_to_socket_id(lcore);
2523 		if ((socketid != 0) && (numa_on == 0))
2524 			printf("warning: lcore %hhu is on socket %d with numa off\n",
2525 				lcore, socketid);
2526 	}
2527 	return 0;
2528 }
2529 
2530 static int
2531 check_port_config(const unsigned nb_ports)
2532 {
2533 	unsigned portid;
2534 	uint16_t i;
2535 
2536 	for (i = 0; i < nb_rx_thread_params; ++i) {
2537 		portid = rx_thread_params[i].port_id;
2538 		if ((enabled_port_mask & (1 << portid)) == 0) {
2539 			printf("port %u is not enabled in port mask\n", portid);
2540 			return -1;
2541 		}
2542 		if (portid >= nb_ports) {
2543 			printf("port %u is not present on the board\n", portid);
2544 			return -1;
2545 		}
2546 	}
2547 	return 0;
2548 }
2549 
2550 static uint8_t
2551 get_port_n_rx_queues(const uint16_t port)
2552 {
2553 	int queue = -1;
2554 	uint16_t i;
2555 
2556 	for (i = 0; i < nb_rx_thread_params; ++i)
2557 		if (rx_thread_params[i].port_id == port &&
2558 				rx_thread_params[i].queue_id > queue)
2559 			queue = rx_thread_params[i].queue_id;
2560 
2561 	return (uint8_t)(++queue);
2562 }
2563 
2564 static int
2565 init_rx_rings(void)
2566 {
2567 	unsigned socket_io;
2568 	struct thread_rx_conf *rx_conf;
2569 	struct thread_tx_conf *tx_conf;
2570 	unsigned rx_thread_id, tx_thread_id;
2571 	char name[256];
2572 	struct rte_ring *ring = NULL;
2573 
2574 	for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) {
2575 
2576 		tx_conf = &tx_thread[tx_thread_id];
2577 
2578 		printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id,
2579 				tx_conf->conf.thread_id);
2580 
2581 		rx_thread_id = tx_conf->conf.thread_id;
2582 		if (rx_thread_id > n_tx_thread) {
2583 			printf("connection from tx-thread %u to rx-thread %u fails "
2584 					"(rx-thread not defined)\n", tx_thread_id, rx_thread_id);
2585 			return -1;
2586 		}
2587 
2588 		rx_conf = &rx_thread[rx_thread_id];
2589 		socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id);
2590 
2591 		snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u",
2592 				socket_io, rx_thread_id, tx_thread_id);
2593 
2594 		ring = rte_ring_create(name, 1024 * 4, socket_io,
2595 				RING_F_SP_ENQ | RING_F_SC_DEQ);
2596 
2597 		if (ring == NULL) {
2598 			rte_panic("Cannot create ring to connect rx-thread %u "
2599 					"with tx-thread %u\n", rx_thread_id, tx_thread_id);
2600 		}
2601 
2602 		rx_conf->ring[rx_conf->n_ring] = ring;
2603 
2604 		tx_conf->ring = ring;
2605 		tx_conf->ready = &rx_conf->ready[rx_conf->n_ring];
2606 
2607 		rx_conf->n_ring++;
2608 	}
2609 	return 0;
2610 }
2611 
2612 static int
2613 init_rx_queues(void)
2614 {
2615 	uint16_t i, nb_rx_queue;
2616 	uint8_t thread;
2617 
2618 	n_rx_thread = 0;
2619 
2620 	for (i = 0; i < nb_rx_thread_params; ++i) {
2621 		thread = rx_thread_params[i].thread_id;
2622 		nb_rx_queue = rx_thread[thread].n_rx_queue;
2623 
2624 		if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
2625 			printf("error: too many queues (%u) for thread: %u\n",
2626 				(unsigned)nb_rx_queue + 1, (unsigned)thread);
2627 			return -1;
2628 		}
2629 
2630 		rx_thread[thread].conf.thread_id = thread;
2631 		rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id;
2632 		rx_thread[thread].rx_queue_list[nb_rx_queue].port_id =
2633 			rx_thread_params[i].port_id;
2634 		rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id =
2635 			rx_thread_params[i].queue_id;
2636 		rx_thread[thread].n_rx_queue++;
2637 
2638 		if (thread >= n_rx_thread)
2639 			n_rx_thread = thread + 1;
2640 
2641 	}
2642 	return 0;
2643 }
2644 
2645 static int
2646 init_tx_threads(void)
2647 {
2648 	int i;
2649 
2650 	n_tx_thread = 0;
2651 	for (i = 0; i < nb_tx_thread_params; ++i) {
2652 		tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id;
2653 		tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id;
2654 		n_tx_thread++;
2655 	}
2656 	return 0;
2657 }
2658 
2659 /* display usage */
2660 static void
2661 print_usage(const char *prgname)
2662 {
2663 	printf("%s [EAL options] -- -p PORTMASK -P"
2664 		"  [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]"
2665 		"  [--tx (lcore,thread)[,(lcore,thread]]"
2666 		"  [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
2667 		"  [--parse-ptype]\n\n"
2668 		"  -p PORTMASK: hexadecimal bitmask of ports to configure\n"
2669 		"  -P : enable promiscuous mode\n"
2670 		"  --rx (port,queue,lcore,thread): rx queues configuration\n"
2671 		"  --tx (lcore,thread): tx threads configuration\n"
2672 		"  --stat-lcore LCORE: use lcore for stat collector\n"
2673 		"  --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
2674 		"  --no-numa: optional, disable numa awareness\n"
2675 		"  --ipv6: optional, specify it if running ipv6 packets\n"
2676 		"  --enable-jumbo: enable jumbo frame"
2677 		" which max packet len is PKTLEN in decimal (64-9600)\n"
2678 		"  --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n"
2679 		"  --no-lthreads: turn off lthread model\n"
2680 		"  --parse-ptype: set to use software to analyze packet type\n\n",
2681 		prgname);
2682 }
2683 
2684 static int parse_max_pkt_len(const char *pktlen)
2685 {
2686 	char *end = NULL;
2687 	unsigned long len;
2688 
2689 	/* parse decimal string */
2690 	len = strtoul(pktlen, &end, 10);
2691 	if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
2692 		return -1;
2693 
2694 	if (len == 0)
2695 		return -1;
2696 
2697 	return len;
2698 }
2699 
2700 static int
2701 parse_portmask(const char *portmask)
2702 {
2703 	char *end = NULL;
2704 	unsigned long pm;
2705 
2706 	/* parse hexadecimal string */
2707 	pm = strtoul(portmask, &end, 16);
2708 	if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
2709 		return -1;
2710 
2711 	if (pm == 0)
2712 		return -1;
2713 
2714 	return pm;
2715 }
2716 
2717 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2718 static int
2719 parse_hash_entry_number(const char *hash_entry_num)
2720 {
2721 	char *end = NULL;
2722 	unsigned long hash_en;
2723 
2724 	/* parse hexadecimal string */
2725 	hash_en = strtoul(hash_entry_num, &end, 16);
2726 	if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
2727 		return -1;
2728 
2729 	if (hash_en == 0)
2730 		return -1;
2731 
2732 	return hash_en;
2733 }
2734 #endif
2735 
2736 static int
2737 parse_rx_config(const char *q_arg)
2738 {
2739 	char s[256];
2740 	const char *p, *p0 = q_arg;
2741 	char *end;
2742 	enum fieldnames {
2743 		FLD_PORT = 0,
2744 		FLD_QUEUE,
2745 		FLD_LCORE,
2746 		FLD_THREAD,
2747 		_NUM_FLD
2748 	};
2749 	unsigned long int_fld[_NUM_FLD];
2750 	char *str_fld[_NUM_FLD];
2751 	int i;
2752 	unsigned size;
2753 
2754 	nb_rx_thread_params = 0;
2755 
2756 	while ((p = strchr(p0, '(')) != NULL) {
2757 		++p;
2758 		p0 = strchr(p, ')');
2759 		if (p0 == NULL)
2760 			return -1;
2761 
2762 		size = p0 - p;
2763 		if (size >= sizeof(s))
2764 			return -1;
2765 
2766 		snprintf(s, sizeof(s), "%.*s", size, p);
2767 		if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
2768 			return -1;
2769 		for (i = 0; i < _NUM_FLD; i++) {
2770 			errno = 0;
2771 			int_fld[i] = strtoul(str_fld[i], &end, 0);
2772 			if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
2773 				return -1;
2774 		}
2775 		if (nb_rx_thread_params >= MAX_LCORE_PARAMS) {
2776 			printf("exceeded max number of rx params: %hu\n",
2777 					nb_rx_thread_params);
2778 			return -1;
2779 		}
2780 		rx_thread_params_array[nb_rx_thread_params].port_id =
2781 				int_fld[FLD_PORT];
2782 		rx_thread_params_array[nb_rx_thread_params].queue_id =
2783 				(uint8_t)int_fld[FLD_QUEUE];
2784 		rx_thread_params_array[nb_rx_thread_params].lcore_id =
2785 				(uint8_t)int_fld[FLD_LCORE];
2786 		rx_thread_params_array[nb_rx_thread_params].thread_id =
2787 				(uint8_t)int_fld[FLD_THREAD];
2788 		++nb_rx_thread_params;
2789 	}
2790 	rx_thread_params = rx_thread_params_array;
2791 	return 0;
2792 }
2793 
2794 static int
2795 parse_tx_config(const char *q_arg)
2796 {
2797 	char s[256];
2798 	const char *p, *p0 = q_arg;
2799 	char *end;
2800 	enum fieldnames {
2801 		FLD_LCORE = 0,
2802 		FLD_THREAD,
2803 		_NUM_FLD
2804 	};
2805 	unsigned long int_fld[_NUM_FLD];
2806 	char *str_fld[_NUM_FLD];
2807 	int i;
2808 	unsigned size;
2809 
2810 	nb_tx_thread_params = 0;
2811 
2812 	while ((p = strchr(p0, '(')) != NULL) {
2813 		++p;
2814 		p0 = strchr(p, ')');
2815 		if (p0 == NULL)
2816 			return -1;
2817 
2818 		size = p0 - p;
2819 		if (size >= sizeof(s))
2820 			return -1;
2821 
2822 		snprintf(s, sizeof(s), "%.*s", size, p);
2823 		if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
2824 			return -1;
2825 		for (i = 0; i < _NUM_FLD; i++) {
2826 			errno = 0;
2827 			int_fld[i] = strtoul(str_fld[i], &end, 0);
2828 			if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
2829 				return -1;
2830 		}
2831 		if (nb_tx_thread_params >= MAX_LCORE_PARAMS) {
2832 			printf("exceeded max number of tx params: %hu\n",
2833 				nb_tx_thread_params);
2834 			return -1;
2835 		}
2836 		tx_thread_params_array[nb_tx_thread_params].lcore_id =
2837 				(uint8_t)int_fld[FLD_LCORE];
2838 		tx_thread_params_array[nb_tx_thread_params].thread_id =
2839 				(uint8_t)int_fld[FLD_THREAD];
2840 		++nb_tx_thread_params;
2841 	}
2842 	tx_thread_params = tx_thread_params_array;
2843 
2844 	return 0;
2845 }
2846 
2847 #if (APP_CPU_LOAD > 0)
2848 static int
2849 parse_stat_lcore(const char *stat_lcore)
2850 {
2851 	char *end = NULL;
2852 	unsigned long lcore_id;
2853 
2854 	lcore_id = strtoul(stat_lcore, &end, 10);
2855 	if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0'))
2856 		return -1;
2857 
2858 	return lcore_id;
2859 }
2860 #endif
2861 
2862 static void
2863 parse_eth_dest(const char *optarg)
2864 {
2865 	uint16_t portid;
2866 	char *port_end;
2867 	uint8_t c, *dest, peer_addr[6];
2868 
2869 	errno = 0;
2870 	portid = strtoul(optarg, &port_end, 10);
2871 	if (errno != 0 || port_end == optarg || *port_end++ != ',')
2872 		rte_exit(EXIT_FAILURE,
2873 		"Invalid eth-dest: %s", optarg);
2874 	if (portid >= RTE_MAX_ETHPORTS)
2875 		rte_exit(EXIT_FAILURE,
2876 		"eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
2877 		portid, RTE_MAX_ETHPORTS);
2878 
2879 	if (cmdline_parse_etheraddr(NULL, port_end,
2880 		&peer_addr, sizeof(peer_addr)) < 0)
2881 		rte_exit(EXIT_FAILURE,
2882 		"Invalid ethernet address: %s\n",
2883 		port_end);
2884 	dest = (uint8_t *)&dest_eth_addr[portid];
2885 	for (c = 0; c < 6; c++)
2886 		dest[c] = peer_addr[c];
2887 	*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
2888 }
2889 
2890 #define CMD_LINE_OPT_RX_CONFIG "rx"
2891 #define CMD_LINE_OPT_TX_CONFIG "tx"
2892 #define CMD_LINE_OPT_STAT_LCORE "stat-lcore"
2893 #define CMD_LINE_OPT_ETH_DEST "eth-dest"
2894 #define CMD_LINE_OPT_NO_NUMA "no-numa"
2895 #define CMD_LINE_OPT_IPV6 "ipv6"
2896 #define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
2897 #define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
2898 #define CMD_LINE_OPT_NO_LTHREADS "no-lthreads"
2899 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
2900 
2901 /* Parse the argument given in the command line of the application */
2902 static int
2903 parse_args(int argc, char **argv)
2904 {
2905 	int opt, ret;
2906 	char **argvopt;
2907 	int option_index;
2908 	char *prgname = argv[0];
2909 	static struct option lgopts[] = {
2910 		{CMD_LINE_OPT_RX_CONFIG, 1, 0, 0},
2911 		{CMD_LINE_OPT_TX_CONFIG, 1, 0, 0},
2912 		{CMD_LINE_OPT_STAT_LCORE, 1, 0, 0},
2913 		{CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
2914 		{CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
2915 		{CMD_LINE_OPT_IPV6, 0, 0, 0},
2916 		{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
2917 		{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
2918 		{CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0},
2919 		{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
2920 		{NULL, 0, 0, 0}
2921 	};
2922 
2923 	argvopt = argv;
2924 
2925 	while ((opt = getopt_long(argc, argvopt, "p:P",
2926 				lgopts, &option_index)) != EOF) {
2927 
2928 		switch (opt) {
2929 		/* portmask */
2930 		case 'p':
2931 			enabled_port_mask = parse_portmask(optarg);
2932 			if (enabled_port_mask == 0) {
2933 				printf("invalid portmask\n");
2934 				print_usage(prgname);
2935 				return -1;
2936 			}
2937 			break;
2938 		case 'P':
2939 			printf("Promiscuous mode selected\n");
2940 			promiscuous_on = 1;
2941 			break;
2942 
2943 		/* long options */
2944 		case 0:
2945 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG,
2946 				sizeof(CMD_LINE_OPT_RX_CONFIG))) {
2947 				ret = parse_rx_config(optarg);
2948 				if (ret) {
2949 					printf("invalid rx-config\n");
2950 					print_usage(prgname);
2951 					return -1;
2952 				}
2953 			}
2954 
2955 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG,
2956 				sizeof(CMD_LINE_OPT_TX_CONFIG))) {
2957 				ret = parse_tx_config(optarg);
2958 				if (ret) {
2959 					printf("invalid tx-config\n");
2960 					print_usage(prgname);
2961 					return -1;
2962 				}
2963 			}
2964 
2965 #if (APP_CPU_LOAD > 0)
2966 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE,
2967 					sizeof(CMD_LINE_OPT_STAT_LCORE))) {
2968 				cpu_load_lcore_id = parse_stat_lcore(optarg);
2969 			}
2970 #endif
2971 
2972 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST,
2973 				sizeof(CMD_LINE_OPT_ETH_DEST)))
2974 					parse_eth_dest(optarg);
2975 
2976 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
2977 				sizeof(CMD_LINE_OPT_NO_NUMA))) {
2978 				printf("numa is disabled\n");
2979 				numa_on = 0;
2980 			}
2981 
2982 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
2983 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
2984 				sizeof(CMD_LINE_OPT_IPV6))) {
2985 				printf("ipv6 is specified\n");
2986 				ipv6 = 1;
2987 			}
2988 #endif
2989 
2990 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS,
2991 					sizeof(CMD_LINE_OPT_NO_LTHREADS))) {
2992 				printf("l-threads model is disabled\n");
2993 				lthreads_on = 0;
2994 			}
2995 
2996 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_PARSE_PTYPE,
2997 					sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
2998 				printf("software packet type parsing enabled\n");
2999 				parse_ptype_on = 1;
3000 			}
3001 
3002 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
3003 				sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
3004 				struct option lenopts = {"max-pkt-len", required_argument, 0,
3005 						0};
3006 
3007 				printf("jumbo frame is enabled - disabling simple TX path\n");
3008 				port_conf.rxmode.jumbo_frame = 1;
3009 
3010 				/* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
3011 				if (0 == getopt_long(argc, argvopt, "", &lenopts,
3012 						&option_index)) {
3013 
3014 					ret = parse_max_pkt_len(optarg);
3015 					if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) {
3016 						printf("invalid packet length\n");
3017 						print_usage(prgname);
3018 						return -1;
3019 					}
3020 					port_conf.rxmode.max_rx_pkt_len = ret;
3021 				}
3022 				printf("set jumbo frame max packet length to %u\n",
3023 						(unsigned int)port_conf.rxmode.max_rx_pkt_len);
3024 			}
3025 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
3026 			if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
3027 				sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
3028 				ret = parse_hash_entry_number(optarg);
3029 				if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
3030 					hash_entry_number = ret;
3031 				} else {
3032 					printf("invalid hash entry number\n");
3033 					print_usage(prgname);
3034 					return -1;
3035 				}
3036 			}
3037 #endif
3038 			break;
3039 
3040 		default:
3041 			print_usage(prgname);
3042 			return -1;
3043 		}
3044 	}
3045 
3046 	if (optind >= 0)
3047 		argv[optind-1] = prgname;
3048 
3049 	ret = optind-1;
3050 	optind = 1; /* reset getopt lib */
3051 	return ret;
3052 }
3053 
3054 static void
3055 print_ethaddr(const char *name, const struct ether_addr *eth_addr)
3056 {
3057 	char buf[ETHER_ADDR_FMT_SIZE];
3058 
3059 	ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
3060 	printf("%s%s", name, buf);
3061 }
3062 
3063 #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
3064 
3065 static void convert_ipv4_5tuple(struct ipv4_5tuple *key1,
3066 		union ipv4_5tuple_host *key2)
3067 {
3068 	key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
3069 	key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
3070 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
3071 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
3072 	key2->proto = key1->proto;
3073 	key2->pad0 = 0;
3074 	key2->pad1 = 0;
3075 }
3076 
3077 static void convert_ipv6_5tuple(struct ipv6_5tuple *key1,
3078 		union ipv6_5tuple_host *key2)
3079 {
3080 	uint32_t i;
3081 
3082 	for (i = 0; i < 16; i++) {
3083 		key2->ip_dst[i] = key1->ip_dst[i];
3084 		key2->ip_src[i] = key1->ip_src[i];
3085 	}
3086 	key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
3087 	key2->port_src = rte_cpu_to_be_16(key1->port_src);
3088 	key2->proto = key1->proto;
3089 	key2->pad0 = 0;
3090 	key2->pad1 = 0;
3091 	key2->reserve = 0;
3092 }
3093 
3094 #define BYTE_VALUE_MAX 256
3095 #define ALL_32_BITS 0xffffffff
3096 #define BIT_8_TO_15 0x0000ff00
3097 static inline void
3098 populate_ipv4_few_flow_into_table(const struct rte_hash *h)
3099 {
3100 	uint32_t i;
3101 	int32_t ret;
3102 	uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array);
3103 
3104 	mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
3105 	for (i = 0; i < array_len; i++) {
3106 		struct ipv4_l3fwd_route  entry;
3107 		union ipv4_5tuple_host newkey;
3108 
3109 		entry = ipv4_l3fwd_route_array[i];
3110 		convert_ipv4_5tuple(&entry.key, &newkey);
3111 		ret = rte_hash_add_key(h, (void *)&newkey);
3112 		if (ret < 0) {
3113 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
3114 				" to the l3fwd hash.\n", i);
3115 		}
3116 		ipv4_l3fwd_out_if[ret] = entry.if_out;
3117 	}
3118 	printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len);
3119 }
3120 
3121 #define BIT_16_TO_23 0x00ff0000
3122 static inline void
3123 populate_ipv6_few_flow_into_table(const struct rte_hash *h)
3124 {
3125 	uint32_t i;
3126 	int32_t ret;
3127 	uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array);
3128 
3129 	mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
3130 	mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
3131 	for (i = 0; i < array_len; i++) {
3132 		struct ipv6_l3fwd_route entry;
3133 		union ipv6_5tuple_host newkey;
3134 
3135 		entry = ipv6_l3fwd_route_array[i];
3136 		convert_ipv6_5tuple(&entry.key, &newkey);
3137 		ret = rte_hash_add_key(h, (void *)&newkey);
3138 		if (ret < 0) {
3139 			rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
3140 				" to the l3fwd hash.\n", i);
3141 		}
3142 		ipv6_l3fwd_out_if[ret] = entry.if_out;
3143 	}
3144 	printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len);
3145 }
3146 
3147 #define NUMBER_PORT_USED 4
3148 static inline void
3149 populate_ipv4_many_flow_into_table(const struct rte_hash *h,
3150 		unsigned int nr_flow)
3151 {
3152 	unsigned i;
3153 
3154 	mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
3155 
3156 	for (i = 0; i < nr_flow; i++) {
3157 		struct ipv4_l3fwd_route entry;
3158 		union ipv4_5tuple_host newkey;
3159 		uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
3160 		uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
3161 				BYTE_VALUE_MAX);
3162 		uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
3163 				BYTE_VALUE_MAX));
3164 		/* Create the ipv4 exact match flow */
3165 		memset(&entry, 0, sizeof(entry));
3166 		switch (i & (NUMBER_PORT_USED - 1)) {
3167 		case 0:
3168 			entry = ipv4_l3fwd_route_array[0];
3169 			entry.key.ip_dst = IPv4(101, c, b, a);
3170 			break;
3171 		case 1:
3172 			entry = ipv4_l3fwd_route_array[1];
3173 			entry.key.ip_dst = IPv4(201, c, b, a);
3174 			break;
3175 		case 2:
3176 			entry = ipv4_l3fwd_route_array[2];
3177 			entry.key.ip_dst = IPv4(111, c, b, a);
3178 			break;
3179 		case 3:
3180 			entry = ipv4_l3fwd_route_array[3];
3181 			entry.key.ip_dst = IPv4(211, c, b, a);
3182 			break;
3183 		};
3184 		convert_ipv4_5tuple(&entry.key, &newkey);
3185 		int32_t ret = rte_hash_add_key(h, (void *)&newkey);
3186 
3187 		if (ret < 0)
3188 			rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
3189 
3190 		ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out;
3191 
3192 	}
3193 	printf("Hash: Adding 0x%x keys\n", nr_flow);
3194 }
3195 
3196 static inline void
3197 populate_ipv6_many_flow_into_table(const struct rte_hash *h,
3198 		unsigned int nr_flow)
3199 {
3200 	unsigned i;
3201 
3202 	mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
3203 	mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
3204 	for (i = 0; i < nr_flow; i++) {
3205 		struct ipv6_l3fwd_route entry;
3206 		union ipv6_5tuple_host newkey;
3207 
3208 		uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
3209 		uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
3210 				BYTE_VALUE_MAX);
3211 		uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
3212 				BYTE_VALUE_MAX));
3213 
3214 		/* Create the ipv6 exact match flow */
3215 		memset(&entry, 0, sizeof(entry));
3216 		switch (i & (NUMBER_PORT_USED - 1)) {
3217 		case 0:
3218 			entry = ipv6_l3fwd_route_array[0];
3219 			break;
3220 		case 1:
3221 			entry = ipv6_l3fwd_route_array[1];
3222 			break;
3223 		case 2:
3224 			entry = ipv6_l3fwd_route_array[2];
3225 			break;
3226 		case 3:
3227 			entry = ipv6_l3fwd_route_array[3];
3228 			break;
3229 		};
3230 		entry.key.ip_dst[13] = c;
3231 		entry.key.ip_dst[14] = b;
3232 		entry.key.ip_dst[15] = a;
3233 		convert_ipv6_5tuple(&entry.key, &newkey);
3234 		int32_t ret = rte_hash_add_key(h, (void *)&newkey);
3235 
3236 		if (ret < 0)
3237 			rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
3238 
3239 		ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
3240 
3241 	}
3242 	printf("Hash: Adding 0x%x keys\n", nr_flow);
3243 }
3244 
3245 static void
3246 setup_hash(int socketid)
3247 {
3248 	struct rte_hash_parameters ipv4_l3fwd_hash_params = {
3249 		.name = NULL,
3250 		.entries = L3FWD_HASH_ENTRIES,
3251 		.key_len = sizeof(union ipv4_5tuple_host),
3252 		.hash_func = ipv4_hash_crc,
3253 		.hash_func_init_val = 0,
3254 	};
3255 
3256 	struct rte_hash_parameters ipv6_l3fwd_hash_params = {
3257 		.name = NULL,
3258 		.entries = L3FWD_HASH_ENTRIES,
3259 		.key_len = sizeof(union ipv6_5tuple_host),
3260 		.hash_func = ipv6_hash_crc,
3261 		.hash_func_init_val = 0,
3262 	};
3263 
3264 	char s[64];
3265 
3266 	/* create ipv4 hash */
3267 	snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
3268 	ipv4_l3fwd_hash_params.name = s;
3269 	ipv4_l3fwd_hash_params.socket_id = socketid;
3270 	ipv4_l3fwd_lookup_struct[socketid] =
3271 			rte_hash_create(&ipv4_l3fwd_hash_params);
3272 	if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
3273 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
3274 				"socket %d\n", socketid);
3275 
3276 	/* create ipv6 hash */
3277 	snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
3278 	ipv6_l3fwd_hash_params.name = s;
3279 	ipv6_l3fwd_hash_params.socket_id = socketid;
3280 	ipv6_l3fwd_lookup_struct[socketid] =
3281 			rte_hash_create(&ipv6_l3fwd_hash_params);
3282 	if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
3283 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
3284 				"socket %d\n", socketid);
3285 
3286 	if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) {
3287 		/* For testing hash matching with a large number of flows we
3288 		 * generate millions of IP 5-tuples with an incremented dst
3289 		 * address to initialize the hash table. */
3290 		if (ipv6 == 0) {
3291 			/* populate the ipv4 hash */
3292 			populate_ipv4_many_flow_into_table(
3293 				ipv4_l3fwd_lookup_struct[socketid], hash_entry_number);
3294 		} else {
3295 			/* populate the ipv6 hash */
3296 			populate_ipv6_many_flow_into_table(
3297 				ipv6_l3fwd_lookup_struct[socketid], hash_entry_number);
3298 		}
3299 	} else {
3300 		/* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize
3301 		 * the hash table */
3302 		if (ipv6 == 0) {
3303 			/* populate the ipv4 hash */
3304 			populate_ipv4_few_flow_into_table(
3305 					ipv4_l3fwd_lookup_struct[socketid]);
3306 		} else {
3307 			/* populate the ipv6 hash */
3308 			populate_ipv6_few_flow_into_table(
3309 					ipv6_l3fwd_lookup_struct[socketid]);
3310 		}
3311 	}
3312 }
3313 #endif
3314 
3315 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
3316 static void
3317 setup_lpm(int socketid)
3318 {
3319 	struct rte_lpm6_config config;
3320 	struct rte_lpm_config lpm_ipv4_config;
3321 	unsigned i;
3322 	int ret;
3323 	char s[64];
3324 
3325 	/* create the LPM table */
3326 	snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
3327 	lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
3328 	lpm_ipv4_config.number_tbl8s = 256;
3329 	lpm_ipv4_config.flags = 0;
3330 	ipv4_l3fwd_lookup_struct[socketid] =
3331 			rte_lpm_create(s, socketid, &lpm_ipv4_config);
3332 	if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
3333 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
3334 				" on socket %d\n", socketid);
3335 
3336 	/* populate the LPM table */
3337 	for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
3338 
3339 		/* skip unused ports */
3340 		if ((1 << ipv4_l3fwd_route_array[i].if_out &
3341 				enabled_port_mask) == 0)
3342 			continue;
3343 
3344 		ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
3345 			ipv4_l3fwd_route_array[i].ip,
3346 			ipv4_l3fwd_route_array[i].depth,
3347 			ipv4_l3fwd_route_array[i].if_out);
3348 
3349 		if (ret < 0) {
3350 			rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
3351 				"l3fwd LPM table on socket %d\n",
3352 				i, socketid);
3353 		}
3354 
3355 		printf("LPM: Adding route 0x%08x / %d (%d)\n",
3356 			(unsigned)ipv4_l3fwd_route_array[i].ip,
3357 			ipv4_l3fwd_route_array[i].depth,
3358 			ipv4_l3fwd_route_array[i].if_out);
3359 	}
3360 
3361 	/* create the LPM6 table */
3362 	snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
3363 
3364 	config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
3365 	config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
3366 	config.flags = 0;
3367 	ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
3368 				&config);
3369 	if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
3370 		rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
3371 				" on socket %d\n", socketid);
3372 
3373 	/* populate the LPM table */
3374 	for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
3375 
3376 		/* skip unused ports */
3377 		if ((1 << ipv6_l3fwd_route_array[i].if_out &
3378 				enabled_port_mask) == 0)
3379 			continue;
3380 
3381 		ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
3382 			ipv6_l3fwd_route_array[i].ip,
3383 			ipv6_l3fwd_route_array[i].depth,
3384 			ipv6_l3fwd_route_array[i].if_out);
3385 
3386 		if (ret < 0) {
3387 			rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
3388 				"l3fwd LPM table on socket %d\n",
3389 				i, socketid);
3390 		}
3391 
3392 		printf("LPM: Adding route %s / %d (%d)\n",
3393 			"IPV6",
3394 			ipv6_l3fwd_route_array[i].depth,
3395 			ipv6_l3fwd_route_array[i].if_out);
3396 	}
3397 }
3398 #endif
3399 
3400 static int
3401 init_mem(unsigned nb_mbuf)
3402 {
3403 	struct lcore_conf *qconf;
3404 	int socketid;
3405 	unsigned lcore_id;
3406 	char s[64];
3407 
3408 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
3409 		if (rte_lcore_is_enabled(lcore_id) == 0)
3410 			continue;
3411 
3412 		if (numa_on)
3413 			socketid = rte_lcore_to_socket_id(lcore_id);
3414 		else
3415 			socketid = 0;
3416 
3417 		if (socketid >= NB_SOCKETS) {
3418 			rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
3419 				socketid, lcore_id, NB_SOCKETS);
3420 		}
3421 		if (pktmbuf_pool[socketid] == NULL) {
3422 			snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
3423 			pktmbuf_pool[socketid] =
3424 				rte_pktmbuf_pool_create(s, nb_mbuf,
3425 					MEMPOOL_CACHE_SIZE, 0,
3426 					RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
3427 			if (pktmbuf_pool[socketid] == NULL)
3428 				rte_exit(EXIT_FAILURE,
3429 						"Cannot init mbuf pool on socket %d\n", socketid);
3430 			else
3431 				printf("Allocated mbuf pool on socket %d\n", socketid);
3432 
3433 #if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
3434 			setup_lpm(socketid);
3435 #else
3436 			setup_hash(socketid);
3437 #endif
3438 		}
3439 		qconf = &lcore_conf[lcore_id];
3440 		qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
3441 		qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
3442 	}
3443 	return 0;
3444 }
3445 
3446 /* Check the link status of all ports in up to 9s, and print them finally */
3447 static void
3448 check_all_ports_link_status(uint16_t port_num, uint32_t port_mask)
3449 {
3450 #define CHECK_INTERVAL 100 /* 100ms */
3451 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
3452 	uint16_t portid;
3453 	uint8_t count, all_ports_up, print_flag = 0;
3454 	struct rte_eth_link link;
3455 
3456 	printf("\nChecking link status");
3457 	fflush(stdout);
3458 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
3459 		all_ports_up = 1;
3460 		for (portid = 0; portid < port_num; portid++) {
3461 			if ((port_mask & (1 << portid)) == 0)
3462 				continue;
3463 			memset(&link, 0, sizeof(link));
3464 			rte_eth_link_get_nowait(portid, &link);
3465 			/* print link status if flag set */
3466 			if (print_flag == 1) {
3467 				if (link.link_status)
3468 					printf(
3469 					"Port%d Link Up. Speed %u Mbps - %s\n",
3470 						portid, link.link_speed,
3471 				(link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
3472 					("full-duplex") : ("half-duplex\n"));
3473 				else
3474 					printf("Port %d Link Down\n", portid);
3475 				continue;
3476 			}
3477 			/* clear all_ports_up flag if any link down */
3478 			if (link.link_status == ETH_LINK_DOWN) {
3479 				all_ports_up = 0;
3480 				break;
3481 			}
3482 		}
3483 		/* after finally printing all link status, get out */
3484 		if (print_flag == 1)
3485 			break;
3486 
3487 		if (all_ports_up == 0) {
3488 			printf(".");
3489 			fflush(stdout);
3490 			rte_delay_ms(CHECK_INTERVAL);
3491 		}
3492 
3493 		/* set the print_flag if all ports up or timeout */
3494 		if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
3495 			print_flag = 1;
3496 			printf("done\n");
3497 		}
3498 	}
3499 }
3500 
3501 int
3502 main(int argc, char **argv)
3503 {
3504 	struct rte_eth_dev_info dev_info;
3505 	struct rte_eth_txconf *txconf;
3506 	int ret;
3507 	int i;
3508 	unsigned nb_ports;
3509 	uint16_t queueid, portid;
3510 	unsigned lcore_id;
3511 	uint32_t n_tx_queue, nb_lcores;
3512 	uint8_t nb_rx_queue, queue, socketid;
3513 
3514 	/* init EAL */
3515 	ret = rte_eal_init(argc, argv);
3516 	if (ret < 0)
3517 		rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
3518 	argc -= ret;
3519 	argv += ret;
3520 
3521 	/* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
3522 	for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
3523 		dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR +
3524 				((uint64_t)portid << 40);
3525 		*(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
3526 	}
3527 
3528 	/* parse application arguments (after the EAL ones) */
3529 	ret = parse_args(argc, argv);
3530 	if (ret < 0)
3531 		rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
3532 
3533 	if (check_lcore_params() < 0)
3534 		rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
3535 
3536 	printf("Initializing rx-queues...\n");
3537 	ret = init_rx_queues();
3538 	if (ret < 0)
3539 		rte_exit(EXIT_FAILURE, "init_rx_queues failed\n");
3540 
3541 	printf("Initializing tx-threads...\n");
3542 	ret = init_tx_threads();
3543 	if (ret < 0)
3544 		rte_exit(EXIT_FAILURE, "init_tx_threads failed\n");
3545 
3546 	printf("Initializing rings...\n");
3547 	ret = init_rx_rings();
3548 	if (ret < 0)
3549 		rte_exit(EXIT_FAILURE, "init_rx_rings failed\n");
3550 
3551 	nb_ports = rte_eth_dev_count();
3552 
3553 	if (check_port_config(nb_ports) < 0)
3554 		rte_exit(EXIT_FAILURE, "check_port_config failed\n");
3555 
3556 	nb_lcores = rte_lcore_count();
3557 
3558 	/* initialize all ports */
3559 	for (portid = 0; portid < nb_ports; portid++) {
3560 		/* skip ports that are not enabled */
3561 		if ((enabled_port_mask & (1 << portid)) == 0) {
3562 			printf("\nSkipping disabled port %d\n", portid);
3563 			continue;
3564 		}
3565 
3566 		/* init port */
3567 		printf("Initializing port %d ... ", portid);
3568 		fflush(stdout);
3569 
3570 		nb_rx_queue = get_port_n_rx_queues(portid);
3571 		n_tx_queue = nb_lcores;
3572 		if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
3573 			n_tx_queue = MAX_TX_QUEUE_PER_PORT;
3574 		printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
3575 			nb_rx_queue, (unsigned)n_tx_queue);
3576 		ret = rte_eth_dev_configure(portid, nb_rx_queue,
3577 					(uint16_t)n_tx_queue, &port_conf);
3578 		if (ret < 0)
3579 			rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
3580 				ret, portid);
3581 
3582 		ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
3583 						       &nb_txd);
3584 		if (ret < 0)
3585 			rte_exit(EXIT_FAILURE,
3586 				 "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n",
3587 				 ret, portid);
3588 
3589 		rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
3590 		print_ethaddr(" Address:", &ports_eth_addr[portid]);
3591 		printf(", ");
3592 		print_ethaddr("Destination:",
3593 			(const struct ether_addr *)&dest_eth_addr[portid]);
3594 		printf(", ");
3595 
3596 		/*
3597 		 * prepare src MACs for each port.
3598 		 */
3599 		ether_addr_copy(&ports_eth_addr[portid],
3600 			(struct ether_addr *)(val_eth + portid) + 1);
3601 
3602 		/* init memory */
3603 		ret = init_mem(NB_MBUF);
3604 		if (ret < 0)
3605 			rte_exit(EXIT_FAILURE, "init_mem failed\n");
3606 
3607 		/* init one TX queue per couple (lcore,port) */
3608 		queueid = 0;
3609 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
3610 			if (rte_lcore_is_enabled(lcore_id) == 0)
3611 				continue;
3612 
3613 			if (numa_on)
3614 				socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
3615 			else
3616 				socketid = 0;
3617 
3618 			printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
3619 			fflush(stdout);
3620 
3621 			rte_eth_dev_info_get(portid, &dev_info);
3622 			txconf = &dev_info.default_txconf;
3623 			if (port_conf.rxmode.jumbo_frame)
3624 				txconf->txq_flags = 0;
3625 			ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
3626 						     socketid, txconf);
3627 			if (ret < 0)
3628 				rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
3629 					"port=%d\n", ret, portid);
3630 
3631 			tx_thread[lcore_id].tx_queue_id[portid] = queueid;
3632 			queueid++;
3633 		}
3634 		printf("\n");
3635 	}
3636 
3637 	for (i = 0; i < n_rx_thread; i++) {
3638 		lcore_id = rx_thread[i].conf.lcore_id;
3639 
3640 		if (rte_lcore_is_enabled(lcore_id) == 0) {
3641 			rte_exit(EXIT_FAILURE,
3642 					"Cannot start Rx thread on lcore %u: lcore disabled\n",
3643 					lcore_id
3644 				);
3645 		}
3646 
3647 		printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ",
3648 				i, lcore_id);
3649 		fflush(stdout);
3650 
3651 		/* init RX queues */
3652 		for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
3653 			portid = rx_thread[i].rx_queue_list[queue].port_id;
3654 			queueid = rx_thread[i].rx_queue_list[queue].queue_id;
3655 
3656 			if (numa_on)
3657 				socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
3658 			else
3659 				socketid = 0;
3660 
3661 			printf("rxq=%d,%d,%d ", portid, queueid, socketid);
3662 			fflush(stdout);
3663 
3664 			ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
3665 					socketid,
3666 					NULL,
3667 					pktmbuf_pool[socketid]);
3668 			if (ret < 0)
3669 				rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, "
3670 						"port=%d\n", ret, portid);
3671 		}
3672 	}
3673 
3674 	printf("\n");
3675 
3676 	/* start ports */
3677 	for (portid = 0; portid < nb_ports; portid++) {
3678 		if ((enabled_port_mask & (1 << portid)) == 0)
3679 			continue;
3680 
3681 		/* Start device */
3682 		ret = rte_eth_dev_start(portid);
3683 		if (ret < 0)
3684 			rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
3685 				ret, portid);
3686 
3687 		/*
3688 		 * If enabled, put device in promiscuous mode.
3689 		 * This allows IO forwarding mode to forward packets
3690 		 * to itself through 2 cross-connected  ports of the
3691 		 * target machine.
3692 		 */
3693 		if (promiscuous_on)
3694 			rte_eth_promiscuous_enable(portid);
3695 	}
3696 
3697 	for (i = 0; i < n_rx_thread; i++) {
3698 		lcore_id = rx_thread[i].conf.lcore_id;
3699 		if (rte_lcore_is_enabled(lcore_id) == 0)
3700 			continue;
3701 
3702 		/* check if hw packet type is supported */
3703 		for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
3704 			portid = rx_thread[i].rx_queue_list[queue].port_id;
3705 			queueid = rx_thread[i].rx_queue_list[queue].queue_id;
3706 
3707 			if (parse_ptype_on) {
3708 				if (!rte_eth_add_rx_callback(portid, queueid,
3709 						cb_parse_ptype, NULL))
3710 					rte_exit(EXIT_FAILURE,
3711 						"Failed to add rx callback: "
3712 						"port=%d\n", portid);
3713 			} else if (!check_ptype(portid))
3714 				rte_exit(EXIT_FAILURE,
3715 					"Port %d cannot parse packet type.\n\n"
3716 					"Please add --parse-ptype to use sw "
3717 					"packet type analyzer.\n\n",
3718 					portid);
3719 		}
3720 	}
3721 
3722 	check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
3723 
3724 	if (lthreads_on) {
3725 		printf("Starting L-Threading Model\n");
3726 
3727 #if (APP_CPU_LOAD > 0)
3728 		if (cpu_load_lcore_id > 0)
3729 			/* Use one lcore for cpu load collector */
3730 			nb_lcores--;
3731 #endif
3732 
3733 		lthread_num_schedulers_set(nb_lcores);
3734 		rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MASTER);
3735 		lthread_master_spawner(NULL);
3736 
3737 	} else {
3738 		printf("Starting P-Threading Model\n");
3739 		/* launch per-lcore init on every lcore */
3740 		rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MASTER);
3741 		RTE_LCORE_FOREACH_SLAVE(lcore_id) {
3742 			if (rte_eal_wait_lcore(lcore_id) < 0)
3743 				return -1;
3744 		}
3745 	}
3746 
3747 	return 0;
3748 }
3749