1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdarg.h> 6 #include <string.h> 7 #include <stdio.h> 8 #include <errno.h> 9 #include <stdint.h> 10 #include <unistd.h> 11 #include <inttypes.h> 12 13 #include <sys/queue.h> 14 #include <sys/stat.h> 15 16 #include <rte_common.h> 17 #include <rte_byteorder.h> 18 #include <rte_log.h> 19 #include <rte_debug.h> 20 #include <rte_cycles.h> 21 #include <rte_memory.h> 22 #include <rte_memcpy.h> 23 #include <rte_launch.h> 24 #include <rte_eal.h> 25 #include <rte_per_lcore.h> 26 #include <rte_lcore.h> 27 #include <rte_atomic.h> 28 #include <rte_branch_prediction.h> 29 #include <rte_mempool.h> 30 #include <rte_mbuf.h> 31 #include <rte_interrupts.h> 32 #include <rte_pci.h> 33 #include <rte_ether.h> 34 #include <rte_ethdev.h> 35 #include <rte_ip.h> 36 #include <rte_tcp.h> 37 #include <rte_udp.h> 38 #include <rte_string_fns.h> 39 #include <rte_flow.h> 40 41 #include "testpmd.h" 42 43 struct tx_timestamp { 44 rte_be32_t signature; 45 rte_be16_t pkt_idx; 46 rte_be16_t queue_idx; 47 rte_be64_t ts; 48 }; 49 50 /* use RFC863 Discard Protocol */ 51 uint16_t tx_udp_src_port = 9; 52 uint16_t tx_udp_dst_port = 9; 53 54 /* use RFC5735 / RFC2544 reserved network test addresses */ 55 uint32_t tx_ip_src_addr = (198U << 24) | (18 << 16) | (0 << 8) | 1; 56 uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2; 57 58 #define IP_DEFTTL 64 /* from RFC 1340. */ 59 60 static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */ 61 RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */ 62 static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */ 63 RTE_DEFINE_PER_LCORE(uint64_t, timestamp_qskew); 64 /**< Timestamp offset per queue */ 65 RTE_DEFINE_PER_LCORE(uint32_t, timestamp_idone); /**< Timestamp init done. */ 66 67 static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */ 68 static int32_t timestamp_off; /**< Timestamp dynamic field offset */ 69 static bool timestamp_enable; /**< Timestamp enable */ 70 static uint32_t timestamp_init_req; /**< Timestamp initialization request. */ 71 static uint64_t timestamp_initial[RTE_MAX_ETHPORTS]; 72 73 static void 74 copy_buf_to_pkt_segs(void* buf, unsigned len, struct rte_mbuf *pkt, 75 unsigned offset) 76 { 77 struct rte_mbuf *seg; 78 void *seg_buf; 79 unsigned copy_len; 80 81 seg = pkt; 82 while (offset >= seg->data_len) { 83 offset -= seg->data_len; 84 seg = seg->next; 85 } 86 copy_len = seg->data_len - offset; 87 seg_buf = rte_pktmbuf_mtod_offset(seg, char *, offset); 88 while (len > copy_len) { 89 rte_memcpy(seg_buf, buf, (size_t) copy_len); 90 len -= copy_len; 91 buf = ((char*) buf + copy_len); 92 seg = seg->next; 93 seg_buf = rte_pktmbuf_mtod(seg, char *); 94 copy_len = seg->data_len; 95 } 96 rte_memcpy(seg_buf, buf, (size_t) len); 97 } 98 99 static inline void 100 copy_buf_to_pkt(void* buf, unsigned len, struct rte_mbuf *pkt, unsigned offset) 101 { 102 if (offset + len <= pkt->data_len) { 103 rte_memcpy(rte_pktmbuf_mtod_offset(pkt, char *, offset), 104 buf, (size_t) len); 105 return; 106 } 107 copy_buf_to_pkt_segs(buf, len, pkt, offset); 108 } 109 110 static void 111 setup_pkt_udp_ip_headers(struct rte_ipv4_hdr *ip_hdr, 112 struct rte_udp_hdr *udp_hdr, 113 uint16_t pkt_data_len) 114 { 115 uint16_t *ptr16; 116 uint32_t ip_cksum; 117 uint16_t pkt_len; 118 119 /* 120 * Initialize UDP header. 121 */ 122 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr)); 123 udp_hdr->src_port = rte_cpu_to_be_16(tx_udp_src_port); 124 udp_hdr->dst_port = rte_cpu_to_be_16(tx_udp_dst_port); 125 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len); 126 udp_hdr->dgram_cksum = 0; /* No UDP checksum. */ 127 128 /* 129 * Initialize IP header. 130 */ 131 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr)); 132 ip_hdr->version_ihl = RTE_IPV4_VHL_DEF; 133 ip_hdr->type_of_service = 0; 134 ip_hdr->fragment_offset = 0; 135 ip_hdr->time_to_live = IP_DEFTTL; 136 ip_hdr->next_proto_id = IPPROTO_UDP; 137 ip_hdr->packet_id = 0; 138 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len); 139 ip_hdr->src_addr = rte_cpu_to_be_32(tx_ip_src_addr); 140 ip_hdr->dst_addr = rte_cpu_to_be_32(tx_ip_dst_addr); 141 142 /* 143 * Compute IP header checksum. 144 */ 145 ptr16 = (unaligned_uint16_t*) ip_hdr; 146 ip_cksum = 0; 147 ip_cksum += ptr16[0]; ip_cksum += ptr16[1]; 148 ip_cksum += ptr16[2]; ip_cksum += ptr16[3]; 149 ip_cksum += ptr16[4]; 150 ip_cksum += ptr16[6]; ip_cksum += ptr16[7]; 151 ip_cksum += ptr16[8]; ip_cksum += ptr16[9]; 152 153 /* 154 * Reduce 32 bit checksum to 16 bits and complement it. 155 */ 156 ip_cksum = ((ip_cksum & 0xFFFF0000) >> 16) + 157 (ip_cksum & 0x0000FFFF); 158 if (ip_cksum > 65535) 159 ip_cksum -= 65535; 160 ip_cksum = (~ip_cksum) & 0x0000FFFF; 161 if (ip_cksum == 0) 162 ip_cksum = 0xFFFF; 163 ip_hdr->hdr_checksum = (uint16_t) ip_cksum; 164 } 165 166 static inline void 167 update_pkt_header(struct rte_mbuf *pkt, uint32_t total_pkt_len) 168 { 169 struct rte_ipv4_hdr *ip_hdr; 170 struct rte_udp_hdr *udp_hdr; 171 uint16_t pkt_data_len; 172 uint16_t pkt_len; 173 174 pkt_data_len = (uint16_t) (total_pkt_len - ( 175 sizeof(struct rte_ether_hdr) + 176 sizeof(struct rte_ipv4_hdr) + 177 sizeof(struct rte_udp_hdr))); 178 /* updata udp pkt length */ 179 udp_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_udp_hdr *, 180 sizeof(struct rte_ether_hdr) + 181 sizeof(struct rte_ipv4_hdr)); 182 pkt_len = (uint16_t) (pkt_data_len + sizeof(struct rte_udp_hdr)); 183 udp_hdr->dgram_len = RTE_CPU_TO_BE_16(pkt_len); 184 185 /* updata ip pkt length and csum */ 186 ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *, 187 sizeof(struct rte_ether_hdr)); 188 ip_hdr->hdr_checksum = 0; 189 pkt_len = (uint16_t) (pkt_len + sizeof(struct rte_ipv4_hdr)); 190 ip_hdr->total_length = RTE_CPU_TO_BE_16(pkt_len); 191 ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr); 192 } 193 194 static inline bool 195 pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp, 196 struct rte_ether_hdr *eth_hdr, const uint16_t vlan_tci, 197 const uint16_t vlan_tci_outer, const uint64_t ol_flags, 198 const uint16_t idx, const struct fwd_stream *fs) 199 { 200 struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT]; 201 struct rte_mbuf *pkt_seg; 202 uint32_t nb_segs, pkt_len; 203 uint8_t i; 204 205 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND)) 206 nb_segs = rte_rand() % tx_pkt_nb_segs + 1; 207 else 208 nb_segs = tx_pkt_nb_segs; 209 210 if (nb_segs > 1) { 211 if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs - 1)) 212 return false; 213 } 214 215 rte_pktmbuf_reset_headroom(pkt); 216 pkt->data_len = tx_pkt_seg_lengths[0]; 217 pkt->ol_flags &= EXT_ATTACHED_MBUF; 218 pkt->ol_flags |= ol_flags; 219 pkt->vlan_tci = vlan_tci; 220 pkt->vlan_tci_outer = vlan_tci_outer; 221 pkt->l2_len = sizeof(struct rte_ether_hdr); 222 pkt->l3_len = sizeof(struct rte_ipv4_hdr); 223 224 pkt_len = pkt->data_len; 225 pkt_seg = pkt; 226 for (i = 1; i < nb_segs; i++) { 227 pkt_seg->next = pkt_segs[i - 1]; 228 pkt_seg = pkt_seg->next; 229 pkt_seg->data_len = tx_pkt_seg_lengths[i]; 230 pkt_len += pkt_seg->data_len; 231 } 232 pkt_seg->next = NULL; /* Last segment of packet. */ 233 /* 234 * Copy headers in first packet segment(s). 235 */ 236 copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0); 237 copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt, 238 sizeof(struct rte_ether_hdr)); 239 if (txonly_multi_flow) { 240 uint8_t ip_var = RTE_PER_LCORE(_ip_var); 241 struct rte_ipv4_hdr *ip_hdr; 242 uint32_t addr; 243 244 ip_hdr = rte_pktmbuf_mtod_offset(pkt, 245 struct rte_ipv4_hdr *, 246 sizeof(struct rte_ether_hdr)); 247 /* 248 * Generate multiple flows by varying IP src addr. This 249 * enables packets are well distributed by RSS in 250 * receiver side if any and txonly mode can be a decent 251 * packet generator for developer's quick performance 252 * regression test. 253 */ 254 addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id(); 255 ip_hdr->src_addr = rte_cpu_to_be_32(addr); 256 RTE_PER_LCORE(_ip_var) = ip_var; 257 } 258 copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt, 259 sizeof(struct rte_ether_hdr) + 260 sizeof(struct rte_ipv4_hdr)); 261 262 if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) || txonly_multi_flow) 263 update_pkt_header(pkt, pkt_len); 264 265 if (unlikely(timestamp_enable)) { 266 uint64_t skew = RTE_PER_LCORE(timestamp_qskew); 267 struct tx_timestamp timestamp_mark; 268 269 if (unlikely(timestamp_init_req != 270 RTE_PER_LCORE(timestamp_idone))) { 271 struct rte_eth_dev_info dev_info; 272 unsigned int txqs_n; 273 uint64_t phase; 274 int ret; 275 276 ret = eth_dev_info_get_print_err(fs->tx_port, &dev_info); 277 if (ret != 0) { 278 TESTPMD_LOG(ERR, 279 "Failed to get device info for port %d," 280 "could not finish timestamp init", 281 fs->tx_port); 282 return false; 283 } 284 txqs_n = dev_info.nb_tx_queues; 285 phase = tx_pkt_times_inter * fs->tx_queue / 286 (txqs_n ? txqs_n : 1); 287 /* 288 * Initialize the scheduling time phase shift 289 * depending on queue index. 290 */ 291 skew = timestamp_initial[fs->tx_port] + 292 tx_pkt_times_inter + phase; 293 RTE_PER_LCORE(timestamp_qskew) = skew; 294 RTE_PER_LCORE(timestamp_idone) = timestamp_init_req; 295 } 296 timestamp_mark.pkt_idx = rte_cpu_to_be_16(idx); 297 timestamp_mark.queue_idx = rte_cpu_to_be_16(fs->tx_queue); 298 timestamp_mark.signature = rte_cpu_to_be_32(0xBEEFC0DE); 299 if (unlikely(!idx)) { 300 skew += tx_pkt_times_inter; 301 pkt->ol_flags |= timestamp_mask; 302 *RTE_MBUF_DYNFIELD 303 (pkt, timestamp_off, uint64_t *) = skew; 304 RTE_PER_LCORE(timestamp_qskew) = skew; 305 timestamp_mark.ts = rte_cpu_to_be_64(skew); 306 } else if (tx_pkt_times_intra) { 307 skew += tx_pkt_times_intra; 308 pkt->ol_flags |= timestamp_mask; 309 *RTE_MBUF_DYNFIELD 310 (pkt, timestamp_off, uint64_t *) = skew; 311 RTE_PER_LCORE(timestamp_qskew) = skew; 312 timestamp_mark.ts = rte_cpu_to_be_64(skew); 313 } else { 314 timestamp_mark.ts = RTE_BE64(0); 315 } 316 copy_buf_to_pkt(×tamp_mark, sizeof(timestamp_mark), pkt, 317 sizeof(struct rte_ether_hdr) + 318 sizeof(struct rte_ipv4_hdr) + 319 sizeof(pkt_udp_hdr)); 320 } 321 /* 322 * Complete first mbuf of packet and append it to the 323 * burst of packets to be transmitted. 324 */ 325 pkt->nb_segs = nb_segs; 326 pkt->pkt_len = pkt_len; 327 328 return true; 329 } 330 331 /* 332 * Transmit a burst of multi-segments packets. 333 */ 334 static void 335 pkt_burst_transmit(struct fwd_stream *fs) 336 { 337 struct rte_mbuf *pkts_burst[MAX_PKT_BURST]; 338 struct rte_port *txp; 339 struct rte_mbuf *pkt; 340 struct rte_mempool *mbp; 341 struct rte_ether_hdr eth_hdr; 342 uint16_t nb_tx; 343 uint16_t nb_pkt; 344 uint16_t vlan_tci, vlan_tci_outer; 345 uint32_t retry; 346 uint64_t ol_flags = 0; 347 uint64_t tx_offloads; 348 uint64_t start_tsc = 0; 349 350 get_start_cycles(&start_tsc); 351 352 mbp = current_fwd_lcore()->mbp; 353 txp = &ports[fs->tx_port]; 354 tx_offloads = txp->dev_conf.txmode.offloads; 355 vlan_tci = txp->tx_vlan_id; 356 vlan_tci_outer = txp->tx_vlan_id_outer; 357 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) 358 ol_flags = PKT_TX_VLAN_PKT; 359 if (tx_offloads & DEV_TX_OFFLOAD_QINQ_INSERT) 360 ol_flags |= PKT_TX_QINQ_PKT; 361 if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT) 362 ol_flags |= PKT_TX_MACSEC; 363 364 /* 365 * Initialize Ethernet header. 366 */ 367 rte_ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.dst_addr); 368 rte_ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.src_addr); 369 eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); 370 371 if (rte_mempool_get_bulk(mbp, (void **)pkts_burst, 372 nb_pkt_per_burst) == 0) { 373 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { 374 if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp, 375 ð_hdr, vlan_tci, 376 vlan_tci_outer, 377 ol_flags, 378 nb_pkt, fs))) { 379 rte_mempool_put_bulk(mbp, 380 (void **)&pkts_burst[nb_pkt], 381 nb_pkt_per_burst - nb_pkt); 382 break; 383 } 384 } 385 } else { 386 for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) { 387 pkt = rte_mbuf_raw_alloc(mbp); 388 if (pkt == NULL) 389 break; 390 if (unlikely(!pkt_burst_prepare(pkt, mbp, ð_hdr, 391 vlan_tci, 392 vlan_tci_outer, 393 ol_flags, 394 nb_pkt, fs))) { 395 rte_pktmbuf_free(pkt); 396 break; 397 } 398 pkts_burst[nb_pkt] = pkt; 399 } 400 } 401 402 if (nb_pkt == 0) 403 return; 404 405 nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt); 406 407 /* 408 * Retry if necessary 409 */ 410 if (unlikely(nb_tx < nb_pkt) && fs->retry_enabled) { 411 retry = 0; 412 while (nb_tx < nb_pkt && retry++ < burst_tx_retry_num) { 413 rte_delay_us(burst_tx_delay_time); 414 nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, 415 &pkts_burst[nb_tx], nb_pkt - nb_tx); 416 } 417 } 418 fs->tx_packets += nb_tx; 419 420 if (txonly_multi_flow) 421 RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx; 422 423 inc_tx_burst_stats(fs, nb_tx); 424 if (unlikely(nb_tx < nb_pkt)) { 425 if (verbose_level > 0 && fs->fwd_dropped == 0) 426 printf("port %d tx_queue %d - drop " 427 "(nb_pkt:%u - nb_tx:%u)=%u packets\n", 428 fs->tx_port, fs->tx_queue, 429 (unsigned) nb_pkt, (unsigned) nb_tx, 430 (unsigned) (nb_pkt - nb_tx)); 431 fs->fwd_dropped += (nb_pkt - nb_tx); 432 do { 433 rte_pktmbuf_free(pkts_burst[nb_tx]); 434 } while (++nb_tx < nb_pkt); 435 } 436 437 get_end_cycles(fs, start_tsc); 438 } 439 440 static int 441 tx_only_begin(portid_t pi) 442 { 443 uint16_t pkt_hdr_len, pkt_data_len; 444 int dynf; 445 446 pkt_hdr_len = (uint16_t)(sizeof(struct rte_ether_hdr) + 447 sizeof(struct rte_ipv4_hdr) + 448 sizeof(struct rte_udp_hdr)); 449 pkt_data_len = tx_pkt_length - pkt_hdr_len; 450 451 if ((tx_pkt_split == TX_PKT_SPLIT_RND || txonly_multi_flow) && 452 tx_pkt_seg_lengths[0] < pkt_hdr_len) { 453 TESTPMD_LOG(ERR, 454 "Random segment number or multiple flow is enabled, " 455 "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", 456 tx_pkt_seg_lengths[0], pkt_hdr_len); 457 return -EINVAL; 458 } 459 460 setup_pkt_udp_ip_headers(&pkt_ip_hdr, &pkt_udp_hdr, pkt_data_len); 461 462 timestamp_enable = false; 463 timestamp_mask = 0; 464 timestamp_off = -1; 465 RTE_PER_LCORE(timestamp_qskew) = 0; 466 dynf = rte_mbuf_dynflag_lookup 467 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL); 468 if (dynf >= 0) 469 timestamp_mask = 1ULL << dynf; 470 dynf = rte_mbuf_dynfield_lookup 471 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL); 472 if (dynf >= 0) 473 timestamp_off = dynf; 474 timestamp_enable = tx_pkt_times_inter && 475 timestamp_mask && 476 timestamp_off >= 0 && 477 !rte_eth_read_clock(pi, ×tamp_initial[pi]); 478 479 if (timestamp_enable) { 480 pkt_hdr_len += sizeof(struct tx_timestamp); 481 482 if (tx_pkt_split == TX_PKT_SPLIT_RND) { 483 if (tx_pkt_seg_lengths[0] < pkt_hdr_len) { 484 TESTPMD_LOG(ERR, 485 "Time stamp and random segment number are enabled, " 486 "but tx_pkt_seg_lengths[0] %u < %u (needed)\n", 487 tx_pkt_seg_lengths[0], pkt_hdr_len); 488 return -EINVAL; 489 } 490 } else { 491 uint16_t total = 0; 492 uint8_t i; 493 494 for (i = 0; i < tx_pkt_nb_segs; i++) { 495 total += tx_pkt_seg_lengths[i]; 496 if (total >= pkt_hdr_len) 497 break; 498 } 499 500 if (total < pkt_hdr_len) { 501 TESTPMD_LOG(ERR, 502 "Not enough Tx segment space for time stamp info, " 503 "total %u < %u (needed)\n", 504 total, pkt_hdr_len); 505 return -EINVAL; 506 } 507 } 508 timestamp_init_req++; 509 } 510 511 /* Make sure all settings are visible on forwarding cores.*/ 512 rte_wmb(); 513 return 0; 514 } 515 516 struct fwd_engine tx_only_engine = { 517 .fwd_mode_name = "txonly", 518 .port_fwd_begin = tx_only_begin, 519 .port_fwd_end = NULL, 520 .packet_fwd = pkt_burst_transmit, 521 }; 522