1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <inttypes.h>
7 #include <getopt.h>
8 #include <rte_eal.h>
9 #include <rte_ethdev.h>
10 #include <rte_cycles.h>
11 #include <rte_lcore.h>
12 #include <rte_mbuf.h>
13 #include <rte_mbuf_dyn.h>
14
15 #define RX_RING_SIZE 1024
16 #define TX_RING_SIZE 1024
17
18 #define NUM_MBUFS 8191
19 #define MBUF_CACHE_SIZE 250
20 #define BURST_SIZE 32
21
22 static int hwts_dynfield_offset = -1;
23
24 static inline rte_mbuf_timestamp_t *
hwts_field(struct rte_mbuf * mbuf)25 hwts_field(struct rte_mbuf *mbuf)
26 {
27 return RTE_MBUF_DYNFIELD(mbuf,
28 hwts_dynfield_offset, rte_mbuf_timestamp_t *);
29 }
30
31 typedef uint64_t tsc_t;
32 static int tsc_dynfield_offset = -1;
33
34 static inline tsc_t *
tsc_field(struct rte_mbuf * mbuf)35 tsc_field(struct rte_mbuf *mbuf)
36 {
37 return RTE_MBUF_DYNFIELD(mbuf, tsc_dynfield_offset, tsc_t *);
38 }
39
40 static const char usage[] =
41 "%s EAL_ARGS -- [-t]\n";
42
43 static const struct rte_eth_conf port_conf_default = {
44 .rxmode = {
45 .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
46 },
47 };
48
49 static struct {
50 uint64_t total_cycles;
51 uint64_t total_queue_cycles;
52 uint64_t total_pkts;
53 } latency_numbers;
54
55 int hw_timestamping;
56
57 #define TICKS_PER_CYCLE_SHIFT 16
58 static uint64_t ticks_per_cycle_mult;
59
60 static uint16_t
add_timestamps(uint16_t port __rte_unused,uint16_t qidx __rte_unused,struct rte_mbuf ** pkts,uint16_t nb_pkts,uint16_t max_pkts __rte_unused,void * _ __rte_unused)61 add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
62 struct rte_mbuf **pkts, uint16_t nb_pkts,
63 uint16_t max_pkts __rte_unused, void *_ __rte_unused)
64 {
65 unsigned i;
66 uint64_t now = rte_rdtsc();
67
68 for (i = 0; i < nb_pkts; i++)
69 *tsc_field(pkts[i]) = now;
70 return nb_pkts;
71 }
72
73 static uint16_t
calc_latency(uint16_t port,uint16_t qidx __rte_unused,struct rte_mbuf ** pkts,uint16_t nb_pkts,void * _ __rte_unused)74 calc_latency(uint16_t port, uint16_t qidx __rte_unused,
75 struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
76 {
77 uint64_t cycles = 0;
78 uint64_t queue_ticks = 0;
79 uint64_t now = rte_rdtsc();
80 uint64_t ticks;
81 unsigned i;
82
83 if (hw_timestamping)
84 rte_eth_read_clock(port, &ticks);
85
86 for (i = 0; i < nb_pkts; i++) {
87 cycles += now - *tsc_field(pkts[i]);
88 if (hw_timestamping)
89 queue_ticks += ticks - *hwts_field(pkts[i]);
90 }
91
92 latency_numbers.total_cycles += cycles;
93 if (hw_timestamping)
94 latency_numbers.total_queue_cycles += (queue_ticks
95 * ticks_per_cycle_mult) >> TICKS_PER_CYCLE_SHIFT;
96
97 latency_numbers.total_pkts += nb_pkts;
98
99 if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
100 printf("Latency = %"PRIu64" cycles\n",
101 latency_numbers.total_cycles / latency_numbers.total_pkts);
102 if (hw_timestamping) {
103 printf("Latency from HW = %"PRIu64" cycles\n",
104 latency_numbers.total_queue_cycles
105 / latency_numbers.total_pkts);
106 }
107 latency_numbers.total_cycles = 0;
108 latency_numbers.total_queue_cycles = 0;
109 latency_numbers.total_pkts = 0;
110 }
111 return nb_pkts;
112 }
113
114 /*
115 * Initialises a given port using global settings and with the rx buffers
116 * coming from the mbuf_pool passed as parameter
117 */
118 static inline int
port_init(uint16_t port,struct rte_mempool * mbuf_pool)119 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
120 {
121 struct rte_eth_conf port_conf = port_conf_default;
122 const uint16_t rx_rings = 1, tx_rings = 1;
123 uint16_t nb_rxd = RX_RING_SIZE;
124 uint16_t nb_txd = TX_RING_SIZE;
125 int retval;
126 uint16_t q;
127 struct rte_eth_dev_info dev_info;
128 struct rte_eth_rxconf rxconf;
129 struct rte_eth_txconf txconf;
130
131 if (!rte_eth_dev_is_valid_port(port))
132 return -1;
133
134 retval = rte_eth_dev_info_get(port, &dev_info);
135 if (retval != 0) {
136 printf("Error during getting device (port %u) info: %s\n",
137 port, strerror(-retval));
138
139 return retval;
140 }
141
142 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
143 port_conf.txmode.offloads |=
144 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
145
146 if (hw_timestamping) {
147 if (!(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)) {
148 printf("\nERROR: Port %u does not support hardware timestamping\n"
149 , port);
150 return -1;
151 }
152 port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TIMESTAMP;
153 rte_mbuf_dyn_rx_timestamp_register(&hwts_dynfield_offset, NULL);
154 if (hwts_dynfield_offset < 0) {
155 printf("ERROR: Failed to register timestamp field\n");
156 return -rte_errno;
157 }
158 }
159
160 retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
161 if (retval != 0)
162 return retval;
163
164 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
165 if (retval != 0)
166 return retval;
167
168 rxconf = dev_info.default_rxconf;
169
170 for (q = 0; q < rx_rings; q++) {
171 retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
172 rte_eth_dev_socket_id(port), &rxconf, mbuf_pool);
173 if (retval < 0)
174 return retval;
175 }
176
177 txconf = dev_info.default_txconf;
178 txconf.offloads = port_conf.txmode.offloads;
179 for (q = 0; q < tx_rings; q++) {
180 retval = rte_eth_tx_queue_setup(port, q, nb_txd,
181 rte_eth_dev_socket_id(port), &txconf);
182 if (retval < 0)
183 return retval;
184 }
185
186 retval = rte_eth_dev_start(port);
187 if (retval < 0)
188 return retval;
189
190 if (hw_timestamping && ticks_per_cycle_mult == 0) {
191 uint64_t cycles_base = rte_rdtsc();
192 uint64_t ticks_base;
193 retval = rte_eth_read_clock(port, &ticks_base);
194 if (retval != 0)
195 return retval;
196 rte_delay_ms(100);
197 uint64_t cycles = rte_rdtsc();
198 uint64_t ticks;
199 rte_eth_read_clock(port, &ticks);
200 uint64_t c_freq = cycles - cycles_base;
201 uint64_t t_freq = ticks - ticks_base;
202 double freq_mult = (double)c_freq / t_freq;
203 printf("TSC Freq ~= %" PRIu64
204 "\nHW Freq ~= %" PRIu64
205 "\nRatio : %f\n",
206 c_freq * 10, t_freq * 10, freq_mult);
207 /* TSC will be faster than internal ticks so freq_mult is > 0
208 * We convert the multiplication to an integer shift & mult
209 */
210 ticks_per_cycle_mult = (1 << TICKS_PER_CYCLE_SHIFT) / freq_mult;
211 }
212
213 struct rte_ether_addr addr;
214
215 retval = rte_eth_macaddr_get(port, &addr);
216 if (retval < 0) {
217 printf("Failed to get MAC address on port %u: %s\n",
218 port, rte_strerror(-retval));
219 return retval;
220 }
221 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
222 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
223 (unsigned)port,
224 addr.addr_bytes[0], addr.addr_bytes[1],
225 addr.addr_bytes[2], addr.addr_bytes[3],
226 addr.addr_bytes[4], addr.addr_bytes[5]);
227
228 retval = rte_eth_promiscuous_enable(port);
229 if (retval != 0)
230 return retval;
231
232 rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
233 rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
234
235 return 0;
236 }
237
238 /*
239 * Main thread that does the work, reading from INPUT_PORT
240 * and writing to OUTPUT_PORT
241 */
242 static __rte_noreturn void
lcore_main(void)243 lcore_main(void)
244 {
245 uint16_t port;
246
247 RTE_ETH_FOREACH_DEV(port)
248 if (rte_eth_dev_socket_id(port) > 0 &&
249 rte_eth_dev_socket_id(port) !=
250 (int)rte_socket_id())
251 printf("WARNING, port %u is on remote NUMA node to "
252 "polling thread.\n\tPerformance will "
253 "not be optimal.\n", port);
254
255 printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
256 rte_lcore_id());
257 for (;;) {
258 RTE_ETH_FOREACH_DEV(port) {
259 struct rte_mbuf *bufs[BURST_SIZE];
260 const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
261 bufs, BURST_SIZE);
262 if (unlikely(nb_rx == 0))
263 continue;
264 const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
265 bufs, nb_rx);
266 if (unlikely(nb_tx < nb_rx)) {
267 uint16_t buf;
268
269 for (buf = nb_tx; buf < nb_rx; buf++)
270 rte_pktmbuf_free(bufs[buf]);
271 }
272 }
273 }
274 }
275
276 /* Main function, does initialisation and calls the per-lcore functions */
277 int
main(int argc,char * argv[])278 main(int argc, char *argv[])
279 {
280 struct rte_mempool *mbuf_pool;
281 uint16_t nb_ports;
282 uint16_t portid;
283 struct option lgopts[] = {
284 { NULL, 0, 0, 0 }
285 };
286 int opt, option_index;
287
288 static const struct rte_mbuf_dynfield tsc_dynfield_desc = {
289 .name = "example_bbdev_dynfield_tsc",
290 .size = sizeof(tsc_t),
291 .align = __alignof__(tsc_t),
292 };
293
294 /* init EAL */
295 int ret = rte_eal_init(argc, argv);
296
297 if (ret < 0)
298 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
299 argc -= ret;
300 argv += ret;
301
302 while ((opt = getopt_long(argc, argv, "t", lgopts, &option_index))
303 != EOF)
304 switch (opt) {
305 case 't':
306 hw_timestamping = 1;
307 break;
308 default:
309 printf(usage, argv[0]);
310 return -1;
311 }
312 optind = 1; /* reset getopt lib */
313
314 nb_ports = rte_eth_dev_count_avail();
315 if (nb_ports < 2 || (nb_ports & 1))
316 rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
317
318 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
319 NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
320 RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
321 if (mbuf_pool == NULL)
322 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
323
324 tsc_dynfield_offset =
325 rte_mbuf_dynfield_register(&tsc_dynfield_desc);
326 if (tsc_dynfield_offset < 0)
327 rte_exit(EXIT_FAILURE, "Cannot register mbuf field\n");
328
329 /* initialize all ports */
330 RTE_ETH_FOREACH_DEV(portid)
331 if (port_init(portid, mbuf_pool) != 0)
332 rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
333 portid);
334
335 if (rte_lcore_count() > 1)
336 printf("\nWARNING: Too much enabled lcores - "
337 "App uses only 1 lcore\n");
338
339 /* call lcore_main on main core only */
340 lcore_main();
341 return 0;
342 }
343