1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdarg.h> 6 #include <string.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <sys/queue.h> 14 #include <sys/stat.h> 15 16 #include <rte_common.h> 17 #include <rte_byteorder.h> 18 #include <rte_log.h> 19 #include <rte_debug.h> 20 #include <rte_cycles.h> 21 #include <rte_memory.h> 22 #include <rte_memcpy.h> 23 #include <rte_launch.h> 24 #include <rte_eal.h> 25 #include <rte_per_lcore.h> 26 #include <rte_lcore.h> 27 #include <rte_branch_prediction.h> 28 #include <rte_mempool.h> 29 #include <rte_mbuf.h> 30 #include <rte_interrupts.h> 31 #include <rte_pci.h> 32 #include <rte_ether.h> 33 #include <rte_ethdev.h> 34 #include <rte_ip.h> 35 #include <rte_tcp.h> 36 #include <rte_udp.h> 37 #include <rte_string_fns.h> 38 #include <rte_flow.h> 39 40 #include "testpmd.h" 41 42 struct tx_timestamp { 43 rte_be32_t signature; 44 rte_be16_t pkt_idx; 45 rte_be16_t queue_idx; 46 rte_be64_t ts; 47 }; 48 49 /* use RFC863 Discard Protocol */ 50 uint16_t tx_udp_src_port = 9; 51 uint16_t tx_udp_dst_port = 9; 52 53 /* use RFC5735 / RFC2544 reserved network test addresses */ 54 uint32_t tx_ip_src_addr = (198U << 24) | (18 << 16) | (0 << 8) | 1; 55 uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2; 56 57 #define IP_DEFTTL 64 /* from RFC 1340. */ 58 59 static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */ 60 RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */ 61 static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */ 62 63 static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */ 64 static int32_t timestamp_off; /**< Timestamp dynamic field offset */ 65 static bool timestamp_enable; /**< Timestamp enable */ 66 static uint64_t timestamp_initial[RTE_MAX_ETHPORTS]; 67 68 static void 69 copy_buf_to_pkt_segs(void* buf, unsigned len, struct rte_mbuf *pkt, 70 unsigned offset) 71 { 72 struct rte_mbuf *seg; 73 void *seg_buf; 74 unsigned copy_len; 75 76 seg = pkt; 77 while (offset >= seg->data_len) { 78 offset -= seg->data_len; 79 seg = seg->next; 80 } 81 copy_len = seg->data_len - offset; 82 seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset); 83 while (len > copy_len) { 84 rte_memcpy(seg_buf, buf, (size_t) copy_len); 85 len -= copy_len; 86 buf = ((char*) buf + copy_len); 87 seg = seg->next; 88 seg_buf = rte_pktmbuf_mtod(seg, char *); 89 copy_len = seg->data_len; 90 } 91 rte_memcpy(seg_buf, buf, (size_t) len); 92 } 93 94 static inline void 95 copy_buf_to_pkt(void* buf, unsigned len, struct rte_mbuf *pkt, unsigned offset) 96 { 97 if (offset + len <= pkt->data_len) { 98 rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset), 99 buf, (size_t) len); 100 return; 101 } 102 copy_buf_to_pkt_segs(buf, len, pkt, offset); 103 } 104 105 static void 106 setup_pkt_udp_ip_headers(struct rte_ipv4_hdr *ip_hdr, 107 struct rte_udp_hdr *udp_hdr, 108 uint16_t pkt_data_len) 109 { 110 uint16_t *ptr16; 111 uint32_t ip_cksum; 112 uint16_t pkt_len; 113 114 /* 115 * Initialize UDP header. 116 */ 117 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr)); 118 udp_hdr->src_port = rte_cpu_to_be_16(tx_udp_src_port); 119 udp_hdr->dst_port = rte_cpu_to_be_16(tx_udp_dst_port); 120 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len); 121 udp_hdr->dgram_cksum = 0; /* No UDP checksum. */ 122 123 /* 124 * Initialize IP header. 125 */ 126 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr)); 127 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 128 ip_hdr->type_of_service = 0; 129 ip_hdr->fragment_offset = 0; 130 ip_hdr->time_to_live = IP_DEFTTL; 131 ip_hdr->next_proto_id = IPPROTO_UDP; 132 ip_hdr->packet_id = 0; 133 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len); 134 ip_hdr->src_addr = rte_cpu_to_be_32(tx_ip_src_addr); 135 ip_hdr->dst_addr = rte_cpu_to_be_32(tx_ip_dst_addr); 136 137 /* 138 * Compute IP header checksum. 139 */ 140 ptr16 = (unaligned_uint16_t*) ip_hdr; 141 ip_cksum = 0; 142 ip_cksum += ptr16[0]; ip_cksum += ptr16[1]; 143 ip_cksum += ptr16[2]; ip_cksum += ptr16[3]; 144 ip_cksum += ptr16[4]; 145 ip_cksum += ptr16[6]; ip_cksum += ptr16[7]; 146 ip_cksum += ptr16[8]; ip_cksum += ptr16[9]; 147 148 /* 149 * Reduce 32 bit checksum to 16 bits and complement it. 150 */ 151 ip_cksum = ((ip_cksum & 0xFFFF0000) >> 16) + 152 (ip_cksum & 0x0000FFFF); 153 if (ip_cksum > 65535) 154 ip_cksum -= 65535; 155 ip_cksum = (~ip_cksum) & 0x0000FFFF; 156 if (ip_cksum == 0) 157 ip_cksum = 0xFFFF; 158 ip_hdr->hdr_checksum = (uint16_t) ip_cksum; 159 } 160 161 static inline void 162 update_pkt_header(struct rte_mbuf *pkt, uint32_t total_pkt_len) 163 { 164 struct rte_ipv4_hdr *ip_hdr; 165 struct rte_udp_hdr *udp_hdr; 166 uint16_t pkt_data_len; 167 uint16_t pkt_len; 168 169 pkt_data_len = (uint16_t) (total_pkt_len - ( 170 sizeof(struct rte_ether_hdr) + 171 sizeof(struct rte_ipv4_hdr) + 172 sizeof(struct rte_udp_hdr))); 173 /* update UDP packet length */ 174 udp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_udp_hdr *, 175 sizeof(struct rte_ether_hdr) + 176 sizeof(struct rte_ipv4_hdr)); 177 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr)); 178 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len); 179 180 /* update IP packet length and checksum */ 181 ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *, 182 sizeof(struct rte_ether_hdr)); 183 ip_hdr->hdr_checksum = 0; 184 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr)); 185 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len); 186 ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr); 187 } 188 189 static inline bool 190 pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, 191 struct rte_ether_hdr *eth_hdr, const uint16_t vlan_tci, 192 const uint16_t vlan_tci_outer, const uint64_t ol_flags, 193 const uint16_t idx, struct fwd_stream *fs) 194 { 195 struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; 196 struct rte_mbuf *pkt_seg; 197 uint32_t nb_segs, pkt_len; 198 uint8_t i; 199 200 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND)) 201 nb_segs = rte_rand() % tx_pkt_nb_segs + 1; 202 else 203 nb_segs = tx_pkt_nb_segs; 204 205 if (nb_segs > 1) { 206 if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs - 1)) 207 return false; 208 } 209 210 rte_pktmbuf_reset_headroom(pkt); 211 pkt->data_len = tx_pkt_seg_lengths[0]; 212 pkt->ol_flags &= RTE_MBUF_F_EXTERNAL; 213 pkt->ol_flags |= ol_flags; 214 pkt->vlan_tci = vlan_tci; 215 pkt->vlan_tci_outer = vlan_tci_outer; 216 pkt->l2_len = sizeof(struct rte_ether_hdr); 217 pkt->l3_len = sizeof(struct rte_ipv4_hdr); 218 219 pkt_len = pkt->data_len; 220 pkt_seg = pkt; 221 for (i = 1; i < nb_segs; i++) { 222 pkt_seg->next = pkt_segs[i - 1]; 223 pkt_seg = pkt_seg->next; 224 pkt_seg->data_len = tx_pkt_seg_lengths[i]; 225 pkt_len += pkt_seg->data_len; 226 } 227 pkt_seg->next = NULL; /* Last segment of packet. */ 228 /* 229 * Copy headers in first packet segment(s). 230 */ 231 copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0); 232 copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, 233 sizeof(struct rte_ether_hdr)); 234 if (txonly_multi_flow) { 235 uint8_t ip_var = RTE_PER_LCORE(_ip_var); 236 struct rte_ipv4_hdr *ip_hdr; 237 uint32_t addr; 238 239 ip_hdr = rte_pktmbuf_mtod_offset(pkt, 240 struct rte_ipv4_hdr *, 241 sizeof(struct rte_ether_hdr)); 242 /* 243 * Generate multiple flows by varying IP src addr. This 244 * enables packets are well distributed by RSS in 245 * receiver side if any and txonly mode can be a decent 246 * packet generator for developer's quick performance 247 * regression test. 248 */ 249 addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id(); 250 ip_hdr->src_addr = rte_cpu_to_be_32(addr); 251 RTE_PER_LCORE(_ip_var) = ip_var; 252 } 253 copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, 254 sizeof(struct rte_ether_hdr) + 255 sizeof(struct rte_ipv4_hdr)); 256 257 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) || txonly_multi_flow) 258 update_pkt_header(pkt, pkt_len); 259 260 if (unlikely(timestamp_enable)) { 261 uint64_t skew = fs->ts_skew; 262 struct tx_timestamp timestamp_mark; 263 264 if (unlikely(!skew)) { 265 struct rte_eth_dev_info dev_info; 266 unsigned int txqs_n; 267 uint64_t phase; 268 int ret; 269 270 ret = eth_dev_info_get_print_err(fs->tx_port, &dev_info); 271 if (ret != 0) { 272 TESTPMD_LOG(ERR, 273 "Failed to get device info for port %d," 274 "could not finish timestamp init", 275 fs->tx_port); 276 return false; 277 } 278 txqs_n = dev_info.nb_tx_queues; 279 phase = tx_pkt_times_inter * fs->tx_queue / 280 (txqs_n ? txqs_n : 1); 281 /* 282 * Initialize the scheduling time phase shift 283 * depending on queue index. 284 */ 285 skew = timestamp_initial[fs->tx_port] + 286 tx_pkt_times_inter + phase; 287 fs->ts_skew = skew; 288 } 289 timestamp_mark.pkt_idx = rte_cpu_to_be_16(idx); 290 timestamp_mark.queue_idx = rte_cpu_to_be_16(fs->tx_queue); 291 timestamp_mark.signature = rte_cpu_to_be_32(0xBEEFC0DE); 292 if (unlikely(!idx)) { 293 skew += tx_pkt_times_inter; 294 pkt->ol_flags |= timestamp_mask; 295 *RTE_MBUF_DYNFIELD 296 (pkt, timestamp_off, uint64_t *) = skew; 297 fs->ts_skew = skew; 298 timestamp_mark.ts = rte_cpu_to_be_64(skew); 299 } else if (tx_pkt_times_intra) { 300 skew += tx_pkt_times_intra; 301 pkt->ol_flags |= timestamp_mask; 302 *RTE_MBUF_DYNFIELD 303 (pkt, timestamp_off, uint64_t *) = skew; 304 fs->ts_skew = skew; 305 timestamp_mark.ts = rte_cpu_to_be_64(skew); 306 } else { 307 timestamp_mark.ts = RTE_BE64(0); 308 } 309 copy_buf_to_pkt(×tamp_mark, sizeof(timestamp_mark), pkt, 310 sizeof(struct rte_ether_hdr) + 311 sizeof(struct rte_ipv4_hdr) + 312 sizeof(pkt_udp_hdr)); 313 } 314 /* 315 * Complete first mbuf of packet and append it to the 316 * burst of packets to be transmitted. 317 */ 318 pkt->nb_segs = nb_segs; 319 pkt->pkt_len = pkt_len; 320 321 return true; 322 } 323 324 /* 325 * Transmit a burst of multi-segments packets. 326 */ 327 static void 328 pkt_burst_transmit(struct fwd_stream *fs) 329 { 330 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 331 struct rte_port *txp; 332 struct rte_mbuf *pkt; 333 struct rte_mempool *mbp; 334 struct rte_ether_hdr eth_hdr; 335 uint16_t nb_tx; 336 uint16_t nb_pkt; 337 uint16_t vlan_tci, vlan_tci_outer; 338 uint32_t retry; 339 uint64_t ol_flags = 0; 340 uint64_t tx_offloads; 341 uint64_t start_tsc = 0; 342 343 get_start_cycles(&start_tsc); 344 345 mbp = current_fwd_lcore()->mbp; 346 txp = &ports[fs->tx_port]; 347 tx_offloads = txp->dev_conf.txmode.offloads; 348 vlan_tci = txp->tx_vlan_id; 349 vlan_tci_outer = txp->tx_vlan_id_outer; 350 if (tx_offloads & RTE_ETH_TX_OFFLOAD_VLAN_INSERT) 351 ol_flags = RTE_MBUF_F_TX_VLAN; 352 if (tx_offloads & RTE_ETH_TX_OFFLOAD_QINQ_INSERT) 353 ol_flags |= RTE_MBUF_F_TX_QINQ; 354 if (tx_offloads & RTE_ETH_TX_OFFLOAD_MACSEC_INSERT) 355 ol_flags |= RTE_MBUF_F_TX_MACSEC; 356 357 /* 358 * Initialize Ethernet header. 359 */ 360 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.dst_addr); 361 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.src_addr); 362 eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 363 364 if (rte_mempool_get_bulk(mbp, (void **)pkts_burst, 365 nb_pkt_per_burst) == 0) { 366 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { 367 if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp, 368 ð_hdr, vlan_tci, 369 vlan_tci_outer, 370 ol_flags, 371 nb_pkt, fs))) { 372 rte_mempool_put_bulk(mbp, 373 (void **)&pkts_burst[nb_pkt], 374 nb_pkt_per_burst - nb_pkt); 375 break; 376 } 377 } 378 } else { 379 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { 380 pkt = rte_mbuf_raw_alloc(mbp); 381 if (pkt == NULL) 382 break; 383 if (unlikely(!pkt_burst_prepare(pkt, mbp, ð_hdr, 384 vlan_tci, 385 vlan_tci_outer, 386 ol_flags, 387 nb_pkt, fs))) { 388 rte_pktmbuf_free(pkt); 389 break; 390 } 391 pkts_burst[nb_pkt] = pkt; 392 } 393 } 394 395 if (nb_pkt == 0) 396 return; 397 398 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt); 399 400 /* 401 * Retry if necessary 402 */ 403 if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) { 404 retry = 0; 405 while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) { 406 rte_delay_us(burst_tx_delay_time); 407 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, 408 &pkts_burst[nb_tx], nb_pkt - nb_tx); 409 } 410 } 411 fs->tx_packets += nb_tx; 412 413 if (txonly_multi_flow) 414 RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx; 415 416 inc_tx_burst_stats(fs, nb_tx); 417 if (unlikely(nb_tx < nb_pkt)) { 418 if (verbose_level > 0 && fs->fwd_dropped == 0) 419 printf("port %d tx_queue %d - drop " 420 "(nb_pkt:%u - nb_tx:%u)=%u packets\n", 421 fs->tx_port, fs->tx_queue, 422 (unsigned) nb_pkt, (unsigned) nb_tx, 423 (unsigned) (nb_pkt - nb_tx)); 424 fs->fwd_dropped += (nb_pkt - nb_tx); 425 do { 426 rte_pktmbuf_free(pkts_burst[nb_tx]); 427 } while (++nb_tx < nb_pkt); 428 } 429 430 get_end_cycles(fs, start_tsc); 431 } 432 433 static int 434 tx_only_begin(portid_t pi) 435 { 436 uint16_t pkt_hdr_len, pkt_data_len; 437 int dynf; 438 439 pkt_hdr_len = (uint16_t)(sizeof(struct rte_ether_hdr) + 440 sizeof(struct rte_ipv4_hdr) + 441 sizeof(struct rte_udp_hdr)); 442 pkt_data_len = tx_pkt_length - pkt_hdr_len; 443 444 if ((tx_pkt_split == TX_PKT_SPLIT_RND || txonly_multi_flow) && 445 tx_pkt_seg_lengths[0] < pkt_hdr_len) { 446 TESTPMD_LOG(ERR, 447 "Random segment number or multiple flow is enabled, " 448 "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", 449 tx_pkt_seg_lengths[0], pkt_hdr_len); 450 return -EINVAL; 451 } 452 453 setup_pkt_udp_ip_headers(&pkt_ip_hdr, &pkt_udp_hdr, pkt_data_len); 454 455 timestamp_enable = false; 456 timestamp_mask = 0; 457 timestamp_off = -1; 458 dynf = rte_mbuf_dynflag_lookup 459 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 460 if (dynf >= 0) 461 timestamp_mask = 1ULL << dynf; 462 dynf = rte_mbuf_dynfield_lookup 463 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 464 if (dynf >= 0) 465 timestamp_off = dynf; 466 timestamp_enable = tx_pkt_times_inter && 467 timestamp_mask && 468 timestamp_off >= 0 && 469 !rte_eth_read_clock(pi, ×tamp_initial[pi]); 470 471 if (timestamp_enable) { 472 pkt_hdr_len += sizeof(struct tx_timestamp); 473 474 if (tx_pkt_split == TX_PKT_SPLIT_RND) { 475 if (tx_pkt_seg_lengths[0] < pkt_hdr_len) { 476 TESTPMD_LOG(ERR, 477 "Time stamp and random segment number are enabled, " 478 "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", 479 tx_pkt_seg_lengths[0], pkt_hdr_len); 480 return -EINVAL; 481 } 482 } else { 483 uint16_t total = 0; 484 uint8_t i; 485 486 for (i = 0; i < tx_pkt_nb_segs; i++) { 487 total += tx_pkt_seg_lengths[i]; 488 if (total >= pkt_hdr_len) 489 break; 490 } 491 492 if (total < pkt_hdr_len) { 493 TESTPMD_LOG(ERR, 494 "Not enough Tx segment space for time stamp info, " 495 "total %u < %u (needed)\n", 496 total, pkt_hdr_len); 497 return -EINVAL; 498 } 499 } 500 } 501 502 /* Make sure all settings are visible on forwarding cores.*/ 503 rte_wmb(); 504 return 0; 505 } 506 507 struct fwd_engine tx_only_engine = { 508 .fwd_mode_name = "txonly", 509 .port_fwd_begin = tx_only_begin, 510 .port_fwd_end = NULL, 511 .packet_fwd = pkt_burst_transmit, 512 }; 513