1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2013 6WIND S.A.
3 */
4
5 #include <stdarg.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <errno.h>
9 #include <stdint.h>
10 #include <unistd.h>
11 #include <inttypes.h>
12
13 #include <sys/queue.h>
14 #include <sys/stat.h>
15
16 #include <rte_common.h>
17 #include <rte_byteorder.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_cycles.h>
21 #include <rte_per_lcore.h>
22 #include <rte_lcore.h>
23 #include <rte_branch_prediction.h>
24 #include <rte_memory.h>
25 #include <rte_mempool.h>
26 #include <rte_mbuf.h>
27 #include <rte_ether.h>
28 #include <rte_ethdev.h>
29 #include <rte_arp.h>
30 #include <rte_ip.h>
31 #include <rte_icmp.h>
32 #include <rte_string_fns.h>
33 #include <rte_flow.h>
34
35 #include "testpmd.h"
36
37 static const char *
arp_op_name(uint16_t arp_op)38 arp_op_name(uint16_t arp_op)
39 {
40 switch (arp_op) {
41 case RTE_ARP_OP_REQUEST:
42 return "ARP Request";
43 case RTE_ARP_OP_REPLY:
44 return "ARP Reply";
45 case RTE_ARP_OP_REVREQUEST:
46 return "Reverse ARP Request";
47 case RTE_ARP_OP_REVREPLY:
48 return "Reverse ARP Reply";
49 case RTE_ARP_OP_INVREQUEST:
50 return "Peer Identify Request";
51 case RTE_ARP_OP_INVREPLY:
52 return "Peer Identify Reply";
53 default:
54 break;
55 }
56 return "Unknown ARP op";
57 }
58
59 static const char *
ip_proto_name(uint16_t ip_proto)60 ip_proto_name(uint16_t ip_proto)
61 {
62 static const char * ip_proto_names[] = {
63 "IP6HOPOPTS", /**< IP6 hop-by-hop options */
64 "ICMP", /**< control message protocol */
65 "IGMP", /**< group mgmt protocol */
66 "GGP", /**< gateway^2 (deprecated) */
67 "IPv4", /**< IPv4 encapsulation */
68
69 "UNASSIGNED",
70 "TCP", /**< transport control protocol */
71 "ST", /**< Stream protocol II */
72 "EGP", /**< exterior gateway protocol */
73 "PIGP", /**< private interior gateway */
74
75 "RCC_MON", /**< BBN RCC Monitoring */
76 "NVPII", /**< network voice protocol*/
77 "PUP", /**< pup */
78 "ARGUS", /**< Argus */
79 "EMCON", /**< EMCON */
80
81 "XNET", /**< Cross Net Debugger */
82 "CHAOS", /**< Chaos*/
83 "UDP", /**< user datagram protocol */
84 "MUX", /**< Multiplexing */
85 "DCN_MEAS", /**< DCN Measurement Subsystems */
86
87 "HMP", /**< Host Monitoring */
88 "PRM", /**< Packet Radio Measurement */
89 "XNS_IDP", /**< xns idp */
90 "TRUNK1", /**< Trunk-1 */
91 "TRUNK2", /**< Trunk-2 */
92
93 "LEAF1", /**< Leaf-1 */
94 "LEAF2", /**< Leaf-2 */
95 "RDP", /**< Reliable Data */
96 "IRTP", /**< Reliable Transaction */
97 "TP4", /**< tp-4 w/ class negotiation */
98
99 "BLT", /**< Bulk Data Transfer */
100 "NSP", /**< Network Services */
101 "INP", /**< Merit Internodal */
102 "SEP", /**< Sequential Exchange */
103 "3PC", /**< Third Party Connect */
104
105 "IDPR", /**< InterDomain Policy Routing */
106 "XTP", /**< XTP */
107 "DDP", /**< Datagram Delivery */
108 "CMTP", /**< Control Message Transport */
109 "TPXX", /**< TP++ Transport */
110
111 "ILTP", /**< IL transport protocol */
112 "IPv6_HDR", /**< IP6 header */
113 "SDRP", /**< Source Demand Routing */
114 "IPv6_RTG", /**< IP6 routing header */
115 "IPv6_FRAG", /**< IP6 fragmentation header */
116
117 "IDRP", /**< InterDomain Routing*/
118 "RSVP", /**< resource reservation */
119 "GRE", /**< General Routing Encap. */
120 "MHRP", /**< Mobile Host Routing */
121 "BHA", /**< BHA */
122
123 "ESP", /**< IP6 Encap Sec. Payload */
124 "AH", /**< IP6 Auth Header */
125 "INLSP", /**< Integ. Net Layer Security */
126 "SWIPE", /**< IP with encryption */
127 "NHRP", /**< Next Hop Resolution */
128
129 "UNASSIGNED",
130 "UNASSIGNED",
131 "UNASSIGNED",
132 "ICMPv6", /**< ICMP6 */
133 "IPv6NONEXT", /**< IP6 no next header */
134
135 "Ipv6DSTOPTS",/**< IP6 destination option */
136 "AHIP", /**< any host internal protocol */
137 "CFTP", /**< CFTP */
138 "HELLO", /**< "hello" routing protocol */
139 "SATEXPAK", /**< SATNET/Backroom EXPAK */
140
141 "KRYPTOLAN", /**< Kryptolan */
142 "RVD", /**< Remote Virtual Disk */
143 "IPPC", /**< Pluribus Packet Core */
144 "ADFS", /**< Any distributed FS */
145 "SATMON", /**< Satnet Monitoring */
146
147 "VISA", /**< VISA Protocol */
148 "IPCV", /**< Packet Core Utility */
149 "CPNX", /**< Comp. Prot. Net. Executive */
150 "CPHB", /**< Comp. Prot. HeartBeat */
151 "WSN", /**< Wang Span Network */
152
153 "PVP", /**< Packet Video Protocol */
154 "BRSATMON", /**< BackRoom SATNET Monitoring */
155 "ND", /**< Sun net disk proto (temp.) */
156 "WBMON", /**< WIDEBAND Monitoring */
157 "WBEXPAK", /**< WIDEBAND EXPAK */
158
159 "EON", /**< ISO cnlp */
160 "VMTP", /**< VMTP */
161 "SVMTP", /**< Secure VMTP */
162 "VINES", /**< Banyon VINES */
163 "TTP", /**< TTP */
164
165 "IGP", /**< NSFNET-IGP */
166 "DGP", /**< dissimilar gateway prot. */
167 "TCF", /**< TCF */
168 "IGRP", /**< Cisco/GXS IGRP */
169 "OSPFIGP", /**< OSPFIGP */
170
171 "SRPC", /**< Strite RPC protocol */
172 "LARP", /**< Locus Address Resolution */
173 "MTP", /**< Multicast Transport */
174 "AX25", /**< AX.25 Frames */
175 "4IN4", /**< IP encapsulated in IP */
176
177 "MICP", /**< Mobile Int.ing control */
178 "SCCSP", /**< Semaphore Comm. security */
179 "ETHERIP", /**< Ethernet IP encapsulation */
180 "ENCAP", /**< encapsulation header */
181 "AES", /**< any private encr. scheme */
182
183 "GMTP", /**< GMTP */
184 "IPCOMP", /**< payload compression (IPComp) */
185 "UNASSIGNED",
186 "UNASSIGNED",
187 "PIM", /**< Protocol Independent Mcast */
188 };
189
190 if (ip_proto < RTE_DIM(ip_proto_names))
191 return ip_proto_names[ip_proto];
192 switch (ip_proto) {
193 #ifdef IPPROTO_PGM
194 case IPPROTO_PGM: /**< PGM */
195 return "PGM";
196 #endif
197 case IPPROTO_SCTP: /**< Stream Control Transport Protocol */
198 return "SCTP";
199 #ifdef IPPROTO_DIVERT
200 case IPPROTO_DIVERT: /**< divert pseudo-protocol */
201 return "DIVERT";
202 #endif
203 case IPPROTO_RAW: /**< raw IP packet */
204 return "RAW";
205 default:
206 break;
207 }
208 return "UNASSIGNED";
209 }
210
211 static void
ipv4_addr_to_dot(uint32_t be_ipv4_addr,char * buf)212 ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf)
213 {
214 uint32_t ipv4_addr;
215
216 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
217 sprintf(buf, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
218 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
219 ipv4_addr & 0xFF);
220 }
221
222 static void
ether_addr_dump(const char * what,const struct rte_ether_addr * ea)223 ether_addr_dump(const char *what, const struct rte_ether_addr *ea)
224 {
225 char buf[RTE_ETHER_ADDR_FMT_SIZE];
226
227 rte_ether_format_addr(buf, RTE_ETHER_ADDR_FMT_SIZE, ea);
228 if (what)
229 printf("%s", what);
230 printf("%s", buf);
231 }
232
233 static void
ipv4_addr_dump(const char * what,uint32_t be_ipv4_addr)234 ipv4_addr_dump(const char *what, uint32_t be_ipv4_addr)
235 {
236 char buf[16];
237
238 ipv4_addr_to_dot(be_ipv4_addr, buf);
239 if (what)
240 printf("%s", what);
241 printf("%s", buf);
242 }
243
244 static uint16_t
ipv4_hdr_cksum(struct rte_ipv4_hdr * ip_h)245 ipv4_hdr_cksum(struct rte_ipv4_hdr *ip_h)
246 {
247 uint16_t *v16_h;
248 uint32_t ip_cksum;
249
250 /*
251 * Compute the sum of successive 16-bit words of the IPv4 header,
252 * skipping the checksum field of the header.
253 */
254 v16_h = (unaligned_uint16_t *) ip_h;
255 ip_cksum = v16_h[0] + v16_h[1] + v16_h[2] + v16_h[3] +
256 v16_h[4] + v16_h[6] + v16_h[7] + v16_h[8] + v16_h[9];
257
258 /* reduce 32 bit checksum to 16 bits and complement it */
259 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
260 ip_cksum = (ip_cksum & 0xffff) + (ip_cksum >> 16);
261 ip_cksum = (~ip_cksum) & 0x0000FFFF;
262 return (ip_cksum == 0) ? 0xFFFF : (uint16_t) ip_cksum;
263 }
264
265 #define is_multicast_ipv4_addr(ipv4_addr) \
266 (((rte_be_to_cpu_32((ipv4_addr)) >> 24) & 0x000000FF) == 0xE0)
267
268 /*
269 * Receive a burst of packets, lookup for ICMP echo requests, and, if any,
270 * send back ICMP echo replies.
271 */
272 static void
reply_to_icmp_echo_rqsts(struct fwd_stream * fs)273 reply_to_icmp_echo_rqsts(struct fwd_stream *fs)
274 {
275 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
276 struct rte_mbuf *pkt;
277 struct rte_ether_hdr *eth_h;
278 struct rte_vlan_hdr *vlan_h;
279 struct rte_arp_hdr *arp_h;
280 struct rte_ipv4_hdr *ip_h;
281 struct rte_icmp_hdr *icmp_h;
282 struct rte_ether_addr eth_addr;
283 uint32_t retry;
284 uint32_t ip_addr;
285 uint16_t nb_rx;
286 uint16_t nb_tx;
287 uint16_t nb_replies;
288 uint16_t eth_type;
289 uint16_t vlan_id;
290 uint16_t arp_op;
291 uint16_t arp_pro;
292 uint32_t cksum;
293 uint8_t i;
294 int l2_len;
295 uint64_t start_tsc = 0;
296
297 get_start_cycles(&start_tsc);
298
299 /*
300 * First, receive a burst of packets.
301 */
302 nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, pkts_burst,
303 nb_pkt_per_burst);
304 inc_rx_burst_stats(fs, nb_rx);
305 if (unlikely(nb_rx == 0))
306 return;
307
308 fs->rx_packets += nb_rx;
309 nb_replies = 0;
310 for (i = 0; i < nb_rx; i++) {
311 if (likely(i < nb_rx - 1))
312 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i + 1],
313 void *));
314 pkt = pkts_burst[i];
315 eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
316 eth_type = RTE_BE_TO_CPU_16(eth_h->ether_type);
317 l2_len = sizeof(struct rte_ether_hdr);
318 if (verbose_level > 0) {
319 printf("\nPort %d pkt-len=%u nb-segs=%u\n",
320 fs->rx_port, pkt->pkt_len, pkt->nb_segs);
321 ether_addr_dump(" ETH: src=", ð_h->src_addr);
322 ether_addr_dump(" dst=", ð_h->dst_addr);
323 }
324 if (eth_type == RTE_ETHER_TYPE_VLAN) {
325 vlan_h = (struct rte_vlan_hdr *)
326 ((char *)eth_h + sizeof(struct rte_ether_hdr));
327 l2_len += sizeof(struct rte_vlan_hdr);
328 eth_type = rte_be_to_cpu_16(vlan_h->eth_proto);
329 if (verbose_level > 0) {
330 vlan_id = rte_be_to_cpu_16(vlan_h->vlan_tci)
331 & 0xFFF;
332 printf(" [vlan id=%u]", vlan_id);
333 }
334 }
335 if (verbose_level > 0) {
336 printf(" type=0x%04x\n", eth_type);
337 }
338
339 /* Reply to ARP requests */
340 if (eth_type == RTE_ETHER_TYPE_ARP) {
341 arp_h = (struct rte_arp_hdr *) ((char *)eth_h + l2_len);
342 arp_op = RTE_BE_TO_CPU_16(arp_h->arp_opcode);
343 arp_pro = RTE_BE_TO_CPU_16(arp_h->arp_protocol);
344 if (verbose_level > 0) {
345 printf(" ARP: hrd=%d proto=0x%04x hln=%d "
346 "pln=%d op=%u (%s)\n",
347 RTE_BE_TO_CPU_16(arp_h->arp_hardware),
348 arp_pro, arp_h->arp_hlen,
349 arp_h->arp_plen, arp_op,
350 arp_op_name(arp_op));
351 }
352 if ((RTE_BE_TO_CPU_16(arp_h->arp_hardware) !=
353 RTE_ARP_HRD_ETHER) ||
354 (arp_pro != RTE_ETHER_TYPE_IPV4) ||
355 (arp_h->arp_hlen != 6) ||
356 (arp_h->arp_plen != 4)
357 ) {
358 rte_pktmbuf_free(pkt);
359 if (verbose_level > 0)
360 printf("\n");
361 continue;
362 }
363 if (verbose_level > 0) {
364 rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
365 ð_addr);
366 ether_addr_dump(" sha=", ð_addr);
367 ip_addr = arp_h->arp_data.arp_sip;
368 ipv4_addr_dump(" sip=", ip_addr);
369 printf("\n");
370 rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
371 ð_addr);
372 ether_addr_dump(" tha=", ð_addr);
373 ip_addr = arp_h->arp_data.arp_tip;
374 ipv4_addr_dump(" tip=", ip_addr);
375 printf("\n");
376 }
377 if (arp_op != RTE_ARP_OP_REQUEST) {
378 rte_pktmbuf_free(pkt);
379 continue;
380 }
381
382 /*
383 * Build ARP reply.
384 */
385
386 /* Use source MAC address as destination MAC address. */
387 rte_ether_addr_copy(ð_h->src_addr, ð_h->dst_addr);
388 /* Set source MAC address with MAC address of TX port */
389 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr,
390 ð_h->src_addr);
391
392 arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY);
393 rte_ether_addr_copy(&arp_h->arp_data.arp_tha,
394 ð_addr);
395 rte_ether_addr_copy(&arp_h->arp_data.arp_sha,
396 &arp_h->arp_data.arp_tha);
397 rte_ether_addr_copy(ð_h->src_addr,
398 &arp_h->arp_data.arp_sha);
399
400 /* Swap IP addresses in ARP payload */
401 ip_addr = arp_h->arp_data.arp_sip;
402 arp_h->arp_data.arp_sip = arp_h->arp_data.arp_tip;
403 arp_h->arp_data.arp_tip = ip_addr;
404 pkts_burst[nb_replies++] = pkt;
405 continue;
406 }
407
408 if (eth_type != RTE_ETHER_TYPE_IPV4) {
409 rte_pktmbuf_free(pkt);
410 continue;
411 }
412 ip_h = (struct rte_ipv4_hdr *) ((char *)eth_h + l2_len);
413 if (verbose_level > 0) {
414 ipv4_addr_dump(" IPV4: src=", ip_h->src_addr);
415 ipv4_addr_dump(" dst=", ip_h->dst_addr);
416 printf(" proto=%d (%s)\n",
417 ip_h->next_proto_id,
418 ip_proto_name(ip_h->next_proto_id));
419 }
420
421 /*
422 * Check if packet is a ICMP echo request.
423 */
424 icmp_h = (struct rte_icmp_hdr *) ((char *)ip_h +
425 sizeof(struct rte_ipv4_hdr));
426 if (! ((ip_h->next_proto_id == IPPROTO_ICMP) &&
427 (icmp_h->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) &&
428 (icmp_h->icmp_code == 0))) {
429 rte_pktmbuf_free(pkt);
430 continue;
431 }
432
433 if (verbose_level > 0)
434 printf(" ICMP: echo request seq id=%d\n",
435 rte_be_to_cpu_16(icmp_h->icmp_seq_nb));
436
437 /*
438 * Prepare ICMP echo reply to be sent back.
439 * - switch ethernet source and destinations addresses,
440 * - use the request IP source address as the reply IP
441 * destination address,
442 * - if the request IP destination address is a multicast
443 * address:
444 * - choose a reply IP source address different from the
445 * request IP source address,
446 * - re-compute the IP header checksum.
447 * Otherwise:
448 * - switch the request IP source and destination
449 * addresses in the reply IP header,
450 * - keep the IP header checksum unchanged.
451 * - set RTE_IP_ICMP_ECHO_REPLY in ICMP header.
452 * ICMP checksum is computed by assuming it is valid in the
453 * echo request and not verified.
454 */
455 rte_ether_addr_copy(ð_h->src_addr, ð_addr);
456 rte_ether_addr_copy(ð_h->dst_addr, ð_h->src_addr);
457 rte_ether_addr_copy(ð_addr, ð_h->dst_addr);
458 ip_addr = ip_h->src_addr;
459 if (is_multicast_ipv4_addr(ip_h->dst_addr)) {
460 uint32_t ip_src;
461
462 ip_src = rte_be_to_cpu_32(ip_addr);
463 if ((ip_src & 0x00000003) == 1)
464 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000002;
465 else
466 ip_src = (ip_src & 0xFFFFFFFC) | 0x00000001;
467 ip_h->src_addr = rte_cpu_to_be_32(ip_src);
468 ip_h->dst_addr = ip_addr;
469 ip_h->hdr_checksum = ipv4_hdr_cksum(ip_h);
470 } else {
471 ip_h->src_addr = ip_h->dst_addr;
472 ip_h->dst_addr = ip_addr;
473 }
474 icmp_h->icmp_type = RTE_IP_ICMP_ECHO_REPLY;
475 cksum = ~icmp_h->icmp_cksum & 0xffff;
476 cksum += ~RTE_BE16(RTE_IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
477 cksum += RTE_BE16(RTE_IP_ICMP_ECHO_REPLY << 8);
478 cksum = (cksum & 0xffff) + (cksum >> 16);
479 cksum = (cksum & 0xffff) + (cksum >> 16);
480 icmp_h->icmp_cksum = ~cksum;
481 pkts_burst[nb_replies++] = pkt;
482 }
483
484 /* Send back ICMP echo replies, if any. */
485 if (nb_replies > 0) {
486 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
487 nb_replies);
488 /*
489 * Retry if necessary
490 */
491 if (unlikely(nb_tx < nb_replies) && fs->retry_enabled) {
492 retry = 0;
493 while (nb_tx < nb_replies &&
494 retry++ < burst_tx_retry_num) {
495 rte_delay_us(burst_tx_delay_time);
496 nb_tx += rte_eth_tx_burst(fs->tx_port,
497 fs->tx_queue,
498 &pkts_burst[nb_tx],
499 nb_replies - nb_tx);
500 }
501 }
502 fs->tx_packets += nb_tx;
503 inc_tx_burst_stats(fs, nb_tx);
504 if (unlikely(nb_tx < nb_replies)) {
505 fs->fwd_dropped += (nb_replies - nb_tx);
506 do {
507 rte_pktmbuf_free(pkts_burst[nb_tx]);
508 } while (++nb_tx < nb_replies);
509 }
510 }
511
512 get_end_cycles(fs, start_tsc);
513 }
514
515 struct fwd_engine icmp_echo_engine = {
516 .fwd_mode_name = "icmpecho",
517 .port_fwd_begin = NULL,
518 .port_fwd_end = NULL,
519 .packet_fwd = reply_to_icmp_echo_rqsts,
520 };
521