xref: /f-stack/lib/ff_dpdk_if.c (revision 7abd0fb2)
1 /*
2  * Copyright (C) 2017 THL A29 Limited, a Tencent company.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice, this
9  *   list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  *   this list of conditions and the following disclaimer in the documentation
12  *   and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
18  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  */
26 
27 #include <rte_common.h>
28 #include <rte_byteorder.h>
29 #include <rte_log.h>
30 #include <rte_memory.h>
31 #include <rte_memcpy.h>
32 #include <rte_memzone.h>
33 #include <rte_config.h>
34 #include <rte_eal.h>
35 #include <rte_pci.h>
36 #include <rte_mbuf.h>
37 #include <rte_memory.h>
38 #include <rte_lcore.h>
39 #include <rte_launch.h>
40 #include <rte_ethdev.h>
41 #include <rte_debug.h>
42 #include <rte_common.h>
43 #include <rte_ether.h>
44 #include <rte_malloc.h>
45 #include <rte_cycles.h>
46 #include <rte_timer.h>
47 #include <rte_thash.h>
48 #include <rte_ip.h>
49 #include <rte_tcp.h>
50 #include <rte_udp.h>
51 
52 #include "ff_dpdk_if.h"
53 #include "ff_dpdk_pcap.h"
54 #include "ff_dpdk_kni.h"
55 #include "ff_config.h"
56 #include "ff_veth.h"
57 #include "ff_host_interface.h"
58 #include "ff_msg.h"
59 #include "ff_api.h"
60 
61 #define MEMPOOL_CACHE_SIZE 256
62 
63 #define ARP_RING_SIZE 2048
64 
65 #define MSG_RING_SIZE 32
66 
67 /*
68  * Configurable number of RX/TX ring descriptors
69  */
70 #define RX_QUEUE_SIZE 512
71 #define TX_QUEUE_SIZE 256
72 
73 #define MAX_PKT_BURST 32
74 #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
75 
76 /*
77  * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
78  */
79 #define MAX_TX_BURST    (MAX_PKT_BURST / 2)
80 
81 #define NB_SOCKETS 8
82 
83 /* Configure how many packets ahead to prefetch, when reading packets */
84 #define PREFETCH_OFFSET    3
85 
86 #define MAX_RX_QUEUE_PER_LCORE 16
87 #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
88 #define MAX_RX_QUEUE_PER_PORT 128
89 
90 #define BITS_PER_HEX 4
91 
92 static int enable_kni;
93 static int kni_accept;
94 
95 static struct rte_timer freebsd_clock;
96 
97 // Mellanox Linux's driver key
98 static uint8_t default_rsskey_40bytes[40] = {
99     0xd1, 0x81, 0xc6, 0x2c, 0xf7, 0xf4, 0xdb, 0x5b,
100     0x19, 0x83, 0xa2, 0xfc, 0x94, 0x3e, 0x1a, 0xdb,
101     0xd9, 0x38, 0x9e, 0x6b, 0xd1, 0x03, 0x9c, 0x2c,
102     0xa7, 0x44, 0x99, 0xad, 0x59, 0x3d, 0x56, 0xd9,
103     0xf3, 0x25, 0x3c, 0x06, 0x2a, 0xdc, 0x1f, 0xfc
104 };
105 
106 static struct rte_eth_conf default_port_conf = {
107     .rxmode = {
108         .mq_mode = ETH_MQ_RX_RSS,
109         .max_rx_pkt_len = ETHER_MAX_LEN,
110         .split_hdr_size = 0, /**< hdr buf size */
111         .header_split   = 0, /**< Header Split disabled */
112         .hw_ip_checksum = 0, /**< IP checksum offload disabled */
113         .hw_vlan_filter = 0, /**< VLAN filtering disabled */
114         .hw_vlan_strip  = 0, /**< VLAN strip disabled. */
115         .hw_vlan_extend = 0, /**< Extended VLAN disabled. */
116         .jumbo_frame    = 0, /**< Jumbo Frame Support disabled */
117         .hw_strip_crc   = 0, /**< CRC stripped by hardware */
118         .enable_lro     = 0, /**< LRO disabled */
119     },
120     .rx_adv_conf = {
121         .rss_conf = {
122             .rss_key = default_rsskey_40bytes,
123             .rss_key_len = 40,
124             .rss_hf = ETH_RSS_PROTO_MASK,
125         },
126     },
127     .txmode = {
128         .mq_mode = ETH_MQ_TX_NONE,
129     },
130 };
131 
132 struct mbuf_table {
133     uint16_t len;
134     struct rte_mbuf *m_table[MAX_PKT_BURST];
135 };
136 
137 struct lcore_rx_queue {
138     uint8_t port_id;
139     uint8_t queue_id;
140 } __rte_cache_aligned;
141 
142 struct lcore_conf {
143     uint16_t proc_id;
144     uint16_t nb_procs;
145     uint16_t socket_id;
146     uint16_t nb_rx_queue;
147     uint16_t *lcore_proc;
148     struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
149     uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
150     struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
151     char *pcap[RTE_MAX_ETHPORTS];
152 } __rte_cache_aligned;
153 
154 static struct lcore_conf lcore_conf;
155 
156 static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
157 
158 static struct rte_ring **arp_ring[RTE_MAX_LCORE];
159 
160 struct ff_msg_ring {
161     char ring_name[2][RTE_RING_NAMESIZE];
162     /* ring[0] for lcore recv msg, other send */
163     /* ring[1] for lcore send msg, other read */
164     struct rte_ring *ring[2];
165 } __rte_cache_aligned;
166 
167 static struct ff_msg_ring msg_ring[RTE_MAX_LCORE];
168 static struct rte_mempool *message_pool;
169 
170 struct ff_dpdk_if_context {
171     void *sc;
172     void *ifp;
173     uint16_t port_id;
174     struct ff_hw_features hw_features;
175 } __rte_cache_aligned;
176 
177 static struct ff_dpdk_if_context *veth_ctx[RTE_MAX_ETHPORTS];
178 
179 extern void ff_hardclock(void);
180 
181 static void
182 freebsd_hardclock_job(__rte_unused struct rte_timer *timer,
183     __rte_unused void *arg) {
184     ff_hardclock();
185 }
186 
187 struct ff_dpdk_if_context *
188 ff_dpdk_register_if(void *sc, void *ifp, struct ff_port_cfg *cfg)
189 {
190     struct ff_dpdk_if_context *ctx;
191 
192     ctx = calloc(1, sizeof(struct ff_dpdk_if_context));
193     if (ctx == NULL)
194         return NULL;
195 
196     ctx->sc = sc;
197     ctx->ifp = ifp;
198     ctx->port_id = cfg->port_id;
199     ctx->hw_features = cfg->hw_features;
200 
201     return ctx;
202 }
203 
204 void
205 ff_dpdk_deregister_if(struct ff_dpdk_if_context *ctx)
206 {
207     free(ctx);
208 }
209 
210 static void
211 check_all_ports_link_status(void)
212 {
213     #define CHECK_INTERVAL 100 /* 100ms */
214     #define MAX_CHECK_TIME 90  /* 9s (90 * 100ms) in total */
215 
216     uint8_t portid, count, all_ports_up, print_flag = 0;
217     struct rte_eth_link link;
218 
219     printf("\nChecking link status");
220     fflush(stdout);
221 
222     int i, nb_ports;
223     nb_ports = ff_global_cfg.dpdk.nb_ports;
224     for (count = 0; count <= MAX_CHECK_TIME; count++) {
225         all_ports_up = 1;
226         for (i = 0; i < nb_ports; i++) {
227             uint8_t portid = ff_global_cfg.dpdk.port_cfgs[i].port_id;
228             memset(&link, 0, sizeof(link));
229             rte_eth_link_get_nowait(portid, &link);
230 
231             /* print link status if flag set */
232             if (print_flag == 1) {
233                 if (link.link_status) {
234                     printf("Port %d Link Up - speed %u "
235                         "Mbps - %s\n", (int)portid,
236                         (unsigned)link.link_speed,
237                         (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
238                         ("full-duplex") : ("half-duplex\n"));
239                 } else {
240                     printf("Port %d Link Down\n", (int)portid);
241                 }
242                 continue;
243             }
244             /* clear all_ports_up flag if any link down */
245             if (link.link_status == 0) {
246                 all_ports_up = 0;
247                 break;
248             }
249         }
250 
251         /* after finally printing all link status, get out */
252         if (print_flag == 1)
253             break;
254 
255         if (all_ports_up == 0) {
256             printf(".");
257             fflush(stdout);
258             rte_delay_ms(CHECK_INTERVAL);
259         }
260 
261         /* set the print_flag if all ports up or timeout */
262         if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
263             print_flag = 1;
264             printf("done\n");
265         }
266     }
267 }
268 
269 static int
270 xdigit2val(unsigned char c)
271 {
272     int val;
273 
274     if (isdigit(c))
275         val = c - '0';
276     else if (isupper(c))
277         val = c - 'A' + 10;
278     else
279         val = c - 'a' + 10;
280     return val;
281 }
282 
283 static int
284 parse_lcore_mask(const char *coremask, uint16_t *lcore_proc,
285     uint16_t nb_procs)
286 {
287     int i, j, idx = 0;
288     unsigned count = 0;
289     char c;
290     int val;
291 
292     if (coremask == NULL)
293         return -1;
294 
295     /* Remove all blank characters ahead and after.
296      * Remove 0x/0X if exists.
297      */
298     while (isblank(*coremask))
299         coremask++;
300     if (coremask[0] == '0' && ((coremask[1] == 'x')
301         || (coremask[1] == 'X')))
302         coremask += 2;
303 
304     i = strlen(coremask);
305     while ((i > 0) && isblank(coremask[i - 1]))
306         i--;
307 
308     if (i == 0)
309         return -1;
310 
311     for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE && count < nb_procs; i--) {
312         c = coremask[i];
313         if (isxdigit(c) == 0) {
314             return -1;
315         }
316         val = xdigit2val(c);
317         for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE && count < nb_procs;
318             j++, idx++) {
319             if ((1 << j) & val) {
320                 if (!lcore_config[idx].detected) {
321                     RTE_LOG(ERR, EAL, "lcore %u unavailable\n", idx);
322                     return -1;
323                 }
324                 lcore_proc[count] = idx;
325                 count++;
326             }
327         }
328     }
329 
330     for (; i >= 0; i--)
331         if (coremask[i] != '0')
332             return -1;
333 
334     if (count < nb_procs)
335         return -1;
336 
337     return 0;
338 }
339 
340 static int
341 init_lcore_conf(void)
342 {
343     uint8_t nb_ports = rte_eth_dev_count();
344     if (nb_ports == 0) {
345         rte_exit(EXIT_FAILURE, "No probed ethernet devices\n");
346     }
347 
348     lcore_conf.proc_id = ff_global_cfg.dpdk.proc_id;
349     lcore_conf.nb_procs = ff_global_cfg.dpdk.nb_procs;
350     lcore_conf.lcore_proc = rte_zmalloc(NULL,
351         sizeof(uint16_t)*lcore_conf.nb_procs, 0);
352     if (lcore_conf.lcore_proc == NULL) {
353         rte_exit(EXIT_FAILURE, "rte_zmalloc lcore_proc failed\n");
354     }
355 
356     int ret = parse_lcore_mask(ff_global_cfg.dpdk.lcore_mask,
357         lcore_conf.lcore_proc, lcore_conf.nb_procs);
358     if (ret < 0) {
359         rte_exit(EXIT_FAILURE, "parse_lcore_mask failed:%s\n",
360             ff_global_cfg.dpdk.lcore_mask);
361     }
362 
363     uint16_t socket_id = 0;
364     if (ff_global_cfg.dpdk.numa_on) {
365         socket_id = rte_lcore_to_socket_id(rte_lcore_id());
366     }
367 
368     lcore_conf.socket_id = socket_id;
369 
370     /* Currently, proc id 1:1 map to rx/tx queue id per port. */
371     uint8_t port_id, enabled_ports = 0;
372     for (port_id = 0; port_id < nb_ports; port_id++) {
373         if (ff_global_cfg.dpdk.port_mask &&
374             (ff_global_cfg.dpdk.port_mask & (1 << port_id)) == 0) {
375             printf("\nSkipping disabled port %d\n", port_id);
376             continue;
377         }
378 
379         if (port_id >= ff_global_cfg.dpdk.nb_ports) {
380             printf("\nSkipping non-configured port %d\n", port_id);
381             break;
382         }
383 
384         uint16_t nb_rx_queue = lcore_conf.nb_rx_queue;
385         lcore_conf.rx_queue_list[nb_rx_queue].port_id = port_id;
386         lcore_conf.rx_queue_list[nb_rx_queue].queue_id = lcore_conf.proc_id;
387         lcore_conf.nb_rx_queue++;
388 
389         lcore_conf.tx_queue_id[port_id] = lcore_conf.proc_id;
390         lcore_conf.pcap[port_id] = ff_global_cfg.dpdk.port_cfgs[enabled_ports].pcap;
391 
392         ff_global_cfg.dpdk.port_cfgs[enabled_ports].port_id = port_id;
393 
394         enabled_ports++;
395     }
396 
397     ff_global_cfg.dpdk.nb_ports = enabled_ports;
398 
399     return 0;
400 }
401 
402 static int
403 init_mem_pool(void)
404 {
405     uint8_t nb_ports = ff_global_cfg.dpdk.nb_ports;
406     uint32_t nb_lcores = ff_global_cfg.dpdk.nb_procs;
407     uint32_t nb_tx_queue = nb_lcores;
408     uint32_t nb_rx_queue = lcore_conf.nb_rx_queue * nb_lcores;
409 
410     unsigned nb_mbuf = RTE_MAX (
411         (nb_rx_queue*RX_QUEUE_SIZE          +
412         nb_ports*nb_lcores*MAX_PKT_BURST    +
413         nb_ports*nb_tx_queue*TX_QUEUE_SIZE  +
414         nb_lcores*MEMPOOL_CACHE_SIZE),
415         (unsigned)8192);
416 
417     unsigned socketid = 0;
418     uint16_t i, lcore_id;
419     char s[64];
420     int numa_on = ff_global_cfg.dpdk.numa_on;
421 
422     for (i = 0; i < lcore_conf.nb_procs; i++) {
423         lcore_id = lcore_conf.lcore_proc[i];
424         if (numa_on) {
425             socketid = rte_lcore_to_socket_id(lcore_id);
426         }
427 
428         if (socketid >= NB_SOCKETS) {
429             rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
430                 socketid, i, NB_SOCKETS);
431         }
432 
433         if (pktmbuf_pool[socketid] != NULL) {
434             continue;
435         }
436 
437         if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
438             snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
439             pktmbuf_pool[socketid] =
440                 rte_pktmbuf_pool_create(s, nb_mbuf,
441                     MEMPOOL_CACHE_SIZE, 0,
442                     RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
443         } else {
444             snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
445             pktmbuf_pool[socketid] = rte_mempool_lookup(s);
446         }
447 
448         if (pktmbuf_pool[socketid] == NULL) {
449             rte_exit(EXIT_FAILURE, "Cannot create mbuf pool on socket %d\n", socketid);
450         } else {
451             printf("create mbuf pool on socket %d\n", socketid);
452         }
453     }
454 
455     return 0;
456 }
457 
458 static struct rte_ring *
459 create_ring(const char *name, unsigned count, int socket_id, unsigned flags)
460 {
461     struct rte_ring *ring;
462 
463     if (name == NULL)
464         return NULL;
465 
466     /* If already create, just attached it */
467     if (likely((ring = rte_ring_lookup(name)) != NULL))
468         return ring;
469 
470     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
471         return rte_ring_create(name, count, socket_id, flags);
472     } else {
473         return rte_ring_lookup(name);
474     }
475 }
476 
477 static int
478 init_arp_ring(void)
479 {
480     int i, j, ret;
481     char name_buf[RTE_RING_NAMESIZE];
482     int nb_procs = ff_global_cfg.dpdk.nb_procs;
483     int proc_id = ff_global_cfg.dpdk.proc_id;
484 
485     /* Allocate arp ring ptr according to eth dev count. */
486     int nb_ports = rte_eth_dev_count();
487     for(i = 0; i < nb_procs; ++i) {
488         snprintf(name_buf, RTE_RING_NAMESIZE, "ring_ptr_%d_%d",
489             proc_id, i);
490 
491         arp_ring[i] = rte_zmalloc(name_buf,
492             sizeof(struct rte_ring *) * nb_ports,
493              RTE_CACHE_LINE_SIZE);
494         if (arp_ring[i] == NULL) {
495             rte_exit(EXIT_FAILURE, "rte_zmalloc(%s (struct rte_ring*)) "
496                 "failed\n", name_buf);
497         }
498     }
499 
500     unsigned socketid = lcore_conf.socket_id;
501 
502     /* Create ring according to ports actually being used. */
503     nb_ports = ff_global_cfg.dpdk.nb_ports;
504     for (j = 0; j < nb_ports; j++) {
505         uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[j].port_id;
506 
507         for(i = 0; i < nb_procs; ++i) {
508             snprintf(name_buf, RTE_RING_NAMESIZE, "arp_ring_%d_%d", i, port_id);
509             arp_ring[i][port_id] = create_ring(name_buf, ARP_RING_SIZE,
510                 socketid, RING_F_SC_DEQ);
511 
512             if (arp_ring[i][port_id] == NULL)
513                 rte_panic("create ring:%s failed!\n", name_buf);
514 
515             printf("create ring:%s success, %u ring entries are now free!\n",
516                 name_buf, rte_ring_free_count(arp_ring[i][port_id]));
517         }
518     }
519 
520     return 0;
521 }
522 
523 static void
524 ff_msg_init(struct rte_mempool *mp,
525     __attribute__((unused)) void *opaque_arg,
526     void *obj, __attribute__((unused)) unsigned i)
527 {
528     struct ff_msg *msg = (struct ff_msg *)obj;
529     msg->buf_addr = (char *)msg + sizeof(struct ff_msg);
530     msg->buf_len = mp->elt_size - sizeof(struct ff_msg);
531 }
532 
533 static int
534 init_msg_ring(void)
535 {
536     uint16_t i;
537     uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs;
538     unsigned socketid = lcore_conf.socket_id;
539 
540     /* Create message buffer pool */
541     if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
542         message_pool = rte_mempool_create(FF_MSG_POOL,
543            MSG_RING_SIZE * 2 * nb_procs,
544            MAX_MSG_BUF_SIZE, MSG_RING_SIZE / 2, 0,
545            NULL, NULL, ff_msg_init, NULL,
546            socketid, 0);
547     } else {
548         message_pool = rte_mempool_lookup(FF_MSG_POOL);
549     }
550 
551     if (message_pool == NULL) {
552         rte_panic("Create msg mempool failed\n");
553     }
554 
555     for(i = 0; i < nb_procs; ++i) {
556         snprintf(msg_ring[i].ring_name[0], RTE_RING_NAMESIZE,
557             "%s%u", FF_MSG_RING_IN, i);
558         snprintf(msg_ring[i].ring_name[1], RTE_RING_NAMESIZE,
559             "%s%u", FF_MSG_RING_OUT, i);
560 
561         msg_ring[i].ring[0] = create_ring(msg_ring[i].ring_name[0],
562             MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
563         if (msg_ring[i].ring[0] == NULL)
564             rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]);
565 
566         msg_ring[i].ring[1] = create_ring(msg_ring[i].ring_name[1],
567             MSG_RING_SIZE, socketid, RING_F_SP_ENQ | RING_F_SC_DEQ);
568         if (msg_ring[i].ring[1] == NULL)
569             rte_panic("create ring::%s failed!\n", msg_ring[i].ring_name[0]);
570     }
571 
572     return 0;
573 }
574 
575 static int
576 init_kni(void)
577 {
578     int nb_ports = rte_eth_dev_count();
579     kni_accept = 0;
580     if(strcasecmp(ff_global_cfg.kni.method, "accept") == 0)
581         kni_accept = 1;
582 
583     ff_kni_init(nb_ports, ff_global_cfg.kni.tcp_port,
584         ff_global_cfg.kni.udp_port);
585 
586     unsigned socket_id = lcore_conf.socket_id;
587     struct rte_mempool *mbuf_pool = pktmbuf_pool[socket_id];
588 
589     nb_ports = ff_global_cfg.dpdk.nb_ports;
590     int i, ret;
591     for (i = 0; i < nb_ports; i++) {
592         uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id;
593         ff_kni_alloc(port_id, socket_id, mbuf_pool);
594     }
595 
596     return 0;
597 }
598 
599 static int
600 init_port_start(void)
601 {
602     int nb_ports = ff_global_cfg.dpdk.nb_ports;
603     uint16_t nb_procs = ff_global_cfg.dpdk.nb_procs;
604     unsigned socketid = rte_lcore_to_socket_id(rte_lcore_id());
605     struct rte_mempool *mbuf_pool = pktmbuf_pool[socketid];
606     uint16_t i;
607 
608     for (i = 0; i < nb_ports; i++) {
609         uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id;
610 
611         struct rte_eth_dev_info dev_info;
612         rte_eth_dev_info_get(port_id, &dev_info);
613 
614         if (nb_procs > dev_info.max_rx_queues) {
615             rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_rx_queues[%d]\n",
616                 nb_procs,
617                 dev_info.max_rx_queues);
618         }
619 
620         if (nb_procs > dev_info.max_tx_queues) {
621             rte_exit(EXIT_FAILURE, "num_procs[%d] bigger than max_tx_queues[%d]\n",
622                 nb_procs,
623                 dev_info.max_tx_queues);
624         }
625 
626         struct ether_addr addr;
627         rte_eth_macaddr_get(port_id, &addr);
628         printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
629                    " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
630                 (unsigned)port_id,
631                 addr.addr_bytes[0], addr.addr_bytes[1],
632                 addr.addr_bytes[2], addr.addr_bytes[3],
633                 addr.addr_bytes[4], addr.addr_bytes[5]);
634 
635         rte_memcpy(ff_global_cfg.dpdk.port_cfgs[i].mac,
636             addr.addr_bytes, ETHER_ADDR_LEN);
637 
638         /* Clear txq_flags - we do not need multi-mempool and refcnt */
639         dev_info.default_txconf.txq_flags = ETH_TXQ_FLAGS_NOMULTMEMP |
640             ETH_TXQ_FLAGS_NOREFCOUNT;
641 
642         /* Disable features that are not supported by port's HW */
643         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
644             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
645         }
646 
647         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) {
648             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
649         }
650 
651         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)) {
652             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
653         }
654 
655         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)) {
656             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
657         }
658 
659         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT)) {
660             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOVLANOFFL;
661         }
662 
663         if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) &&
664             !(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_TSO)) {
665             dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
666         }
667 
668         struct rte_eth_conf port_conf = {0};
669 
670         /* Set RSS mode */
671         port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
672         port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_PROTO_MASK;
673         port_conf.rx_adv_conf.rss_conf.rss_key = default_rsskey_40bytes;
674         port_conf.rx_adv_conf.rss_conf.rss_key_len = 40;
675 
676         /* Set Rx VLAN stripping */
677         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_VLAN_STRIP) {
678             port_conf.rxmode.hw_vlan_strip = 1;
679         }
680 
681         /* Enable HW CRC stripping */
682         port_conf.rxmode.hw_strip_crc = 1;
683 
684         /* FIXME: Enable TCP LRO ?*/
685         #if 0
686         if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) {
687             printf("LRO is supported\n");
688             port_conf.rxmode.enable_lro = 1;
689             ff_global_cfg.dpdk.port_cfgs[i].hw_features.rx_lro = 1;
690         }
691         #endif
692 
693         /* Set Rx checksum checking */
694         if ((dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM) &&
695             (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM) &&
696             (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)) {
697             printf("RX checksum offload supported\n");
698             port_conf.rxmode.hw_ip_checksum = 1;
699             ff_global_cfg.dpdk.port_cfgs[i].hw_features.rx_csum = 1;
700         }
701 
702         if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM)) {
703             printf("TX ip checksum offload supported\n");
704             ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_csum_ip = 1;
705         }
706 
707         if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) &&
708             (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) {
709             printf("TX TCP&UDP checksum offload supported\n");
710             ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_csum_l4 = 1;
711         }
712 
713         if (ff_global_cfg.dpdk.tso) {
714             if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) {
715                 printf("TSO is supported\n");
716                 ff_global_cfg.dpdk.port_cfgs[i].hw_features.tx_tso = 1;
717             }
718         } else {
719             printf("TSO is disabled\n");
720         }
721 
722         if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
723             return 0;
724         }
725 
726         /* Currently, proc id 1:1 map to queue id per port. */
727         int ret = rte_eth_dev_configure(port_id, nb_procs, nb_procs, &port_conf);
728         if (ret != 0) {
729             return ret;
730         }
731 
732         uint16_t q;
733         for (q = 0; q < nb_procs; q++) {
734             ret = rte_eth_tx_queue_setup(port_id, q, TX_QUEUE_SIZE,
735                 socketid, &dev_info.default_txconf);
736             if (ret < 0) {
737                 return ret;
738             }
739 
740             ret = rte_eth_rx_queue_setup(port_id, q, RX_QUEUE_SIZE,
741                 socketid, &dev_info.default_rxconf, mbuf_pool);
742             if (ret < 0) {
743                 return ret;
744             }
745         }
746 
747         ret = rte_eth_dev_start(port_id);
748         if (ret < 0) {
749             return ret;
750         }
751 
752         /* Enable RX in promiscuous mode for the Ethernet device. */
753         if (ff_global_cfg.dpdk.promiscuous) {
754             rte_eth_promiscuous_enable(port_id);
755             ret = rte_eth_promiscuous_get(port_id);
756             if (ret == 1) {
757                 printf("set port %u to promiscuous mode ok\n", port_id);
758             } else {
759                 printf("set port %u to promiscuous mode error\n", port_id);
760             }
761         }
762 
763         /* Enable pcap dump */
764         if (ff_global_cfg.dpdk.port_cfgs[i].pcap) {
765             ff_enable_pcap(ff_global_cfg.dpdk.port_cfgs[i].pcap);
766         }
767     }
768 
769     return 0;
770 }
771 
772 static int
773 init_freebsd_clock(void)
774 {
775     rte_timer_subsystem_init();
776     uint64_t hz = rte_get_timer_hz();
777     uint64_t intrs = MS_PER_S/ff_global_cfg.freebsd.hz;
778     uint64_t tsc = (hz + MS_PER_S - 1) / MS_PER_S*intrs;
779 
780     rte_timer_init(&freebsd_clock);
781     rte_timer_reset(&freebsd_clock, tsc, PERIODICAL,
782         rte_lcore_id(), &freebsd_hardclock_job, NULL);
783 
784     return 0;
785 }
786 
787 int
788 ff_dpdk_init(int argc, char **argv)
789 {
790     if (ff_global_cfg.dpdk.nb_procs < 1 ||
791         ff_global_cfg.dpdk.nb_procs > RTE_MAX_LCORE ||
792         ff_global_cfg.dpdk.proc_id >= ff_global_cfg.dpdk.nb_procs ||
793         ff_global_cfg.dpdk.proc_id < 0) {
794         printf("param num_procs[%d] or proc_id[%d] error!\n",
795             ff_global_cfg.dpdk.nb_procs,
796             ff_global_cfg.dpdk.proc_id);
797         exit(1);
798     }
799 
800     int ret = rte_eal_init(argc, argv);
801     if (ret < 0) {
802         rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
803     }
804 
805     init_lcore_conf();
806 
807     init_mem_pool();
808 
809     init_arp_ring();
810 
811     init_msg_ring();
812 
813     enable_kni = ff_global_cfg.kni.enable;
814     if (enable_kni) {
815         init_kni();
816     }
817 
818     ret = init_port_start();
819     if (ret < 0) {
820         rte_exit(EXIT_FAILURE, "init_port_start failed\n");
821     }
822 
823     check_all_ports_link_status();
824 
825     init_freebsd_clock();
826 
827     return 0;
828 }
829 
830 static void
831 ff_veth_input(const struct ff_dpdk_if_context *ctx, struct rte_mbuf *pkt)
832 {
833     uint8_t rx_csum = ctx->hw_features.rx_csum;
834     if (rx_csum) {
835         if (pkt->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD)) {
836             return;
837         }
838     }
839 
840     /*
841      * FIXME: should we save pkt->vlan_tci
842      * if (pkt->ol_flags & PKT_RX_VLAN_PKT)
843      */
844 
845     void *data = rte_pktmbuf_mtod(pkt, void*);
846     uint16_t len = rte_pktmbuf_data_len(pkt);
847 
848     void *hdr = ff_mbuf_gethdr(pkt, pkt->pkt_len, data, len, rx_csum);
849     if (hdr == NULL) {
850         rte_pktmbuf_free(pkt);
851         return;
852     }
853 
854     pkt = pkt->next;
855     void *prev = hdr;
856     while(pkt != NULL) {
857         data = rte_pktmbuf_mtod(pkt, void*);
858         len = rte_pktmbuf_data_len(pkt);
859 
860         void *mb = ff_mbuf_get(prev, data, len);
861         if (mb == NULL) {
862             ff_mbuf_free(hdr);
863             return;
864         }
865         pkt = pkt->next;
866         prev = mb;
867     }
868 
869     ff_veth_process_packet(ctx->ifp, hdr);
870 }
871 
872 static enum FilterReturn
873 protocol_filter(const void *data, uint16_t len)
874 {
875     if(len < sizeof(struct ether_hdr))
876         return FILTER_UNKNOWN;
877 
878     const struct ether_hdr *hdr;
879     hdr = (const struct ether_hdr *)data;
880 
881     if(ntohs(hdr->ether_type) == ETHER_TYPE_ARP)
882         return FILTER_ARP;
883 
884     if (!enable_kni) {
885         return FILTER_UNKNOWN;
886     }
887 
888     if(ntohs(hdr->ether_type) != ETHER_TYPE_IPv4)
889         return FILTER_UNKNOWN;
890 
891     return ff_kni_proto_filter(data + sizeof(struct ether_hdr),
892         len - sizeof(struct ether_hdr));
893 }
894 
895 static inline void
896 process_packets(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **bufs,
897     uint16_t count, const struct ff_dpdk_if_context *ctx, int pkts_from_ring)
898 {
899     struct lcore_conf *qconf = &lcore_conf;
900 
901     uint16_t i;
902     for (i = 0; i < count; i++) {
903         struct rte_mbuf *rtem = bufs[i];
904 
905         if (unlikely(qconf->pcap[port_id] != NULL)) {
906             ff_dump_packets(qconf->pcap[port_id], rtem);
907         }
908 
909         void *data = rte_pktmbuf_mtod(rtem, void*);
910         uint16_t len = rte_pktmbuf_data_len(rtem);
911 
912         enum FilterReturn filter = protocol_filter(data, len);
913         if (filter == FILTER_ARP) {
914             struct rte_mempool *mbuf_pool;
915             struct rte_mbuf *mbuf_clone;
916             if (pkts_from_ring == 0) {
917                 uint16_t i;
918                 for(i = 0; i < qconf->nb_procs; ++i) {
919                     if(i == queue_id)
920                         continue;
921 
922                     mbuf_pool = pktmbuf_pool[rte_lcore_to_socket_id(qconf->lcore_proc[i])];
923                     mbuf_clone = rte_pktmbuf_clone(rtem, mbuf_pool);
924                     if(mbuf_clone) {
925                         int ret = rte_ring_enqueue(arp_ring[i][port_id], mbuf_clone);
926                         if (ret < 0)
927                             rte_pktmbuf_free(mbuf_clone);
928                     }
929                 }
930             }
931 
932             if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
933                 mbuf_pool = pktmbuf_pool[qconf->socket_id];
934                 mbuf_clone = rte_pktmbuf_clone(rtem, mbuf_pool);
935                 if(mbuf_clone) {
936                     ff_kni_enqueue(port_id, rtem);
937                 }
938             }
939 
940             ff_veth_input(ctx, rtem);
941         } else if (enable_kni && ((filter == FILTER_KNI && kni_accept) ||
942             (filter == FILTER_UNKNOWN && !kni_accept)) ) {
943             ff_kni_enqueue(port_id, rtem);
944         } else {
945             ff_veth_input(ctx, rtem);
946         }
947     }
948 }
949 
950 static inline int
951 process_arp_ring(uint8_t port_id, uint16_t queue_id,
952     struct rte_mbuf **pkts_burst, const struct ff_dpdk_if_context *ctx)
953 {
954     /* read packet from ring buf and to process */
955     uint16_t nb_rb;
956     nb_rb = rte_ring_dequeue_burst(arp_ring[queue_id][port_id],
957         (void **)pkts_burst, MAX_PKT_BURST);
958 
959     if(nb_rb > 0) {
960         process_packets(port_id, queue_id, pkts_burst, nb_rb, ctx, 1);
961     }
962 
963     return 0;
964 }
965 
966 static inline void
967 handle_sysctl_msg(struct ff_msg *msg, uint16_t proc_id)
968 {
969     int ret = ff_sysctl(msg->sysctl.name, msg->sysctl.namelen,
970         msg->sysctl.old, msg->sysctl.oldlenp, msg->sysctl.new,
971         msg->sysctl.newlen);
972 
973     if (ret < 0) {
974         msg->result = errno;
975     } else {
976         msg->result = 0;
977     }
978 
979     rte_ring_enqueue(msg_ring[proc_id].ring[1], msg);
980 }
981 
982 static inline void
983 handle_default_msg(struct ff_msg *msg, uint16_t proc_id)
984 {
985     msg->result = EINVAL;
986     rte_ring_enqueue(msg_ring[proc_id].ring[1], msg);
987 }
988 
989 static inline void
990 handle_msg(struct ff_msg *msg, uint16_t proc_id)
991 {
992     switch (msg->msg_type) {
993         case FF_SYSCTL:
994             handle_sysctl_msg(msg, proc_id);
995             break;
996         default:
997             handle_default_msg(msg, proc_id);
998             break;
999     }
1000 }
1001 
1002 static inline int
1003 process_msg_ring(uint16_t proc_id)
1004 {
1005     void *msg;
1006     int ret = rte_ring_dequeue(msg_ring[proc_id].ring[0], &msg);
1007 
1008     if (unlikely(ret == 0)) {
1009         handle_msg((struct ff_msg *)msg, proc_id);
1010     }
1011 
1012     return 0;
1013 }
1014 
1015 /* Send burst of packets on an output interface */
1016 static inline int
1017 send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
1018 {
1019     struct rte_mbuf **m_table;
1020     int ret;
1021     uint16_t queueid;
1022 
1023     queueid = qconf->tx_queue_id[port];
1024     m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
1025 
1026     if (unlikely(qconf->pcap[port] != NULL)) {
1027         uint16_t i;
1028         for (i = 0; i < n; i++) {
1029             ff_dump_packets(qconf->pcap[port], m_table[i]);
1030         }
1031     }
1032 
1033     ret = rte_eth_tx_burst(port, queueid, m_table, n);
1034     if (unlikely(ret < n)) {
1035         do {
1036             rte_pktmbuf_free(m_table[ret]);
1037         } while (++ret < n);
1038     }
1039 
1040     return 0;
1041 }
1042 
1043 /* Enqueue a single packet, and send burst if queue is filled */
1044 static inline int
1045 send_single_packet(struct rte_mbuf *m, uint8_t port)
1046 {
1047     uint16_t len;
1048     struct lcore_conf *qconf;
1049 
1050     qconf = &lcore_conf;
1051     len = qconf->tx_mbufs[port].len;
1052     qconf->tx_mbufs[port].m_table[len] = m;
1053     len++;
1054 
1055     /* enough pkts to be sent */
1056     if (unlikely(len == MAX_PKT_BURST)) {
1057         send_burst(qconf, MAX_PKT_BURST, port);
1058         len = 0;
1059     }
1060 
1061     qconf->tx_mbufs[port].len = len;
1062     return 0;
1063 }
1064 
1065 int
1066 ff_dpdk_if_send(struct ff_dpdk_if_context *ctx, void *m,
1067     int total)
1068 {
1069     struct rte_mempool *mbuf_pool = pktmbuf_pool[lcore_conf.socket_id];
1070     struct rte_mbuf *head = rte_pktmbuf_alloc(mbuf_pool);
1071     if (head == NULL) {
1072         ff_mbuf_free(m);
1073         return -1;
1074     }
1075 
1076     head->pkt_len = total;
1077     head->nb_segs = 0;
1078 
1079     int off = 0;
1080     struct rte_mbuf *cur = head, *prev = NULL;
1081     while(total > 0) {
1082         if (cur == NULL) {
1083             cur = rte_pktmbuf_alloc(mbuf_pool);
1084             if (cur == NULL) {
1085                 rte_pktmbuf_free(head);
1086                 ff_mbuf_free(m);
1087                 return -1;
1088             }
1089         }
1090 
1091         void *data = rte_pktmbuf_mtod(cur, void*);
1092         int len = total > RTE_MBUF_DEFAULT_DATAROOM ? RTE_MBUF_DEFAULT_DATAROOM : total;
1093         int ret = ff_mbuf_copydata(m, data, off, len);
1094         if (ret < 0) {
1095             rte_pktmbuf_free(head);
1096             ff_mbuf_free(m);
1097             return -1;
1098         }
1099 
1100         if (prev != NULL) {
1101             prev->next = cur;
1102         }
1103         prev = cur;
1104 
1105         cur->data_len = len;
1106         off += len;
1107         total -= len;
1108         head->nb_segs++;
1109         cur = NULL;
1110     }
1111 
1112     struct ff_tx_offload offload = {0};
1113     ff_mbuf_tx_offload(m, &offload);
1114 
1115     if (offload.ip_csum) {
1116         head->ol_flags |= PKT_TX_IP_CKSUM;
1117         head->l2_len = sizeof(struct ether_hdr);
1118         head->l3_len = sizeof(struct ipv4_hdr);
1119     }
1120 
1121     if (ctx->hw_features.tx_csum_l4) {
1122         if (offload.tcp_csum) {
1123             head->ol_flags |= PKT_TX_TCP_CKSUM;
1124             head->l2_len = sizeof(struct ether_hdr);
1125             head->l3_len = sizeof(struct ipv4_hdr);
1126         }
1127 
1128         if (offload.tso_seg_size) {
1129             head->ol_flags |= PKT_TX_TCP_SEG;
1130             head->l4_len = sizeof(struct tcp_hdr);
1131             head->tso_segsz = offload.tso_seg_size;
1132         }
1133 
1134         if (offload.udp_csum) {
1135             head->ol_flags |= PKT_TX_UDP_CKSUM;
1136             head->l2_len = sizeof(struct ether_hdr);
1137             head->l3_len = sizeof(struct ipv4_hdr);
1138         }
1139     }
1140 
1141     ff_mbuf_free(m);
1142 
1143     return send_single_packet(head, ctx->port_id);
1144 }
1145 
1146 static int
1147 main_loop(void *arg)
1148 {
1149     struct loop_routine *lr = (struct loop_routine *)arg;
1150 
1151     struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
1152     unsigned lcore_id;
1153     uint64_t prev_tsc, diff_tsc, cur_tsc;
1154     int i, j, nb_rx;
1155     uint8_t port_id, queue_id;
1156     struct lcore_conf *qconf;
1157     const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
1158         US_PER_S * BURST_TX_DRAIN_US;
1159     struct ff_dpdk_if_context *ctx;
1160 
1161     prev_tsc = 0;
1162 
1163     lcore_id = rte_lcore_id();
1164     qconf = &lcore_conf;
1165 
1166     if (qconf->nb_rx_queue == 0) {
1167         printf("lcore %u has nothing to do\n", lcore_id);
1168         return 0;
1169     }
1170 
1171     while (1) {
1172         cur_tsc = rte_rdtsc();
1173         if (unlikely(freebsd_clock.expire < cur_tsc)) {
1174             rte_timer_manage();
1175         }
1176 
1177         /*
1178          * TX burst queue drain
1179          */
1180         diff_tsc = cur_tsc - prev_tsc;
1181         if (unlikely(diff_tsc > drain_tsc)) {
1182             /*
1183              * This could be optimized (use queueid instead of
1184              * portid), but it is not called so often
1185              */
1186             for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
1187                 if (qconf->tx_mbufs[port_id].len == 0)
1188                     continue;
1189                 send_burst(qconf,
1190                     qconf->tx_mbufs[port_id].len,
1191                     port_id);
1192                 qconf->tx_mbufs[port_id].len = 0;
1193             }
1194 
1195             prev_tsc = cur_tsc;
1196         }
1197 
1198         /*
1199          * Read packet from RX queues
1200          */
1201         for (i = 0; i < qconf->nb_rx_queue; ++i) {
1202             port_id = qconf->rx_queue_list[i].port_id;
1203             queue_id = qconf->rx_queue_list[i].queue_id;
1204             ctx = veth_ctx[port_id];
1205 
1206             if (enable_kni && rte_eal_process_type() == RTE_PROC_PRIMARY) {
1207                 ff_kni_process(port_id, queue_id, pkts_burst, MAX_PKT_BURST);
1208             }
1209 
1210             process_arp_ring(port_id, queue_id, pkts_burst, ctx);
1211 
1212             nb_rx = rte_eth_rx_burst(port_id, queue_id, pkts_burst,
1213                 MAX_PKT_BURST);
1214             if (nb_rx == 0)
1215                 continue;
1216 
1217             /* Prefetch first packets */
1218             for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
1219                 rte_prefetch0(rte_pktmbuf_mtod(
1220                         pkts_burst[j], void *));
1221             }
1222 
1223             /* Prefetch and handle already prefetched packets */
1224             for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
1225                 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
1226                         j + PREFETCH_OFFSET], void *));
1227                 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
1228             }
1229 
1230             /* Handle remaining prefetched packets */
1231             for (; j < nb_rx; j++) {
1232                 process_packets(port_id, queue_id, &pkts_burst[j], 1, ctx, 0);
1233             }
1234         }
1235 
1236         process_msg_ring(qconf->proc_id);
1237 
1238         if (likely(lr->loop != NULL)) {
1239             lr->loop(lr->arg);
1240         }
1241     }
1242 }
1243 
1244 int
1245 ff_dpdk_if_up(void) {
1246     int nb_ports = ff_global_cfg.dpdk.nb_ports;
1247     int i;
1248     for (i = 0; i < nb_ports; i++) {
1249         uint8_t port_id = ff_global_cfg.dpdk.port_cfgs[i].port_id;
1250         veth_ctx[port_id] = ff_veth_attach(ff_global_cfg.dpdk.port_cfgs + i);
1251         if (veth_ctx[port_id] == NULL) {
1252             rte_exit(EXIT_FAILURE, "ff_veth_attach failed");
1253         }
1254     }
1255 
1256     return 0;
1257 }
1258 
1259 void
1260 ff_dpdk_run(loop_func_t loop, void *arg) {
1261     struct loop_routine *lr = malloc(sizeof(struct loop_routine));
1262     lr->loop = loop;
1263     lr->arg = arg;
1264     rte_eal_mp_remote_launch(main_loop, lr, CALL_MASTER);
1265     rte_eal_mp_wait_lcore();
1266     free(lr);
1267 }
1268 
1269 void
1270 ff_dpdk_pktmbuf_free(void *m)
1271 {
1272     rte_pktmbuf_free((struct rte_mbuf *)m);
1273 }
1274 
1275 static uint32_t
1276 toeplitz_hash(unsigned keylen, const uint8_t *key,
1277     unsigned datalen, const uint8_t *data)
1278 {
1279     uint32_t hash = 0, v;
1280     u_int i, b;
1281 
1282     /* XXXRW: Perhaps an assertion about key length vs. data length? */
1283 
1284     v = (key[0]<<24) + (key[1]<<16) + (key[2] <<8) + key[3];
1285     for (i = 0; i < datalen; i++) {
1286         for (b = 0; b < 8; b++) {
1287             if (data[i] & (1<<(7-b)))
1288                 hash ^= v;
1289             v <<= 1;
1290             if ((i + 4) < keylen &&
1291                 (key[i+4] & (1<<(7-b))))
1292                 v |= 1;
1293         }
1294     }
1295     return (hash);
1296 }
1297 
1298 int
1299 ff_rss_check(uint32_t saddr, uint32_t daddr, uint16_t sport, uint16_t dport)
1300 {
1301     struct lcore_conf *qconf = &lcore_conf;
1302 
1303     if (qconf->nb_procs == 1) {
1304         return 1;
1305     }
1306 
1307     uint8_t data[sizeof(saddr) + sizeof(daddr) + sizeof(sport) +
1308         sizeof(dport)];
1309 
1310     unsigned datalen = 0;
1311 
1312     bcopy(&saddr, &data[datalen], sizeof(saddr));
1313     datalen += sizeof(saddr);
1314 
1315     bcopy(&daddr, &data[datalen], sizeof(daddr));
1316     datalen += sizeof(daddr);
1317 
1318     bcopy(&sport, &data[datalen], sizeof(sport));
1319     datalen += sizeof(sport);
1320 
1321     bcopy(&dport, &data[datalen], sizeof(dport));
1322     datalen += sizeof(dport);
1323 
1324     uint32_t hash = toeplitz_hash(sizeof(default_rsskey_40bytes), default_rsskey_40bytes, datalen, data);
1325 
1326     return (hash % qconf->nb_procs) == qconf->proc_id;
1327 }
1328 
1329 
1330