1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_uar_data uar_data; 168 struct rte_mbuf *elts[0]; 169 /* Storage for queued packets, must be the last field. */ 170 } __rte_cache_aligned; 171 172 /* TX queue control descriptor. */ 173 struct mlx5_txq_ctrl { 174 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 175 uint32_t refcnt; /* Reference counter. */ 176 unsigned int socket; /* CPU socket ID for allocations. */ 177 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 178 unsigned int max_inline_data; /* Max inline data. */ 179 unsigned int max_tso_header; /* Max TSO header size. */ 180 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 181 struct mlx5_priv *priv; /* Back pointer to private data. */ 182 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 183 uint16_t dump_file_n; /* Number of dump files. */ 184 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 185 uint32_t hairpin_status; /* Hairpin binding status. */ 186 struct mlx5_txq_data txq; /* Data path structure. */ 187 /* Must be the last field in the structure, contains elts[]. */ 188 }; 189 190 /* mlx5_txq.c */ 191 192 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 193 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 194 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 197 unsigned int socket, const struct rte_eth_txconf *conf); 198 int mlx5_tx_hairpin_queue_setup 199 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 200 const struct rte_eth_hairpin_conf *hairpin_conf); 201 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 202 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 203 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 204 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 205 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 206 uint16_t desc, unsigned int socket, 207 const struct rte_eth_txconf *conf); 208 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 209 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 210 const struct rte_eth_hairpin_conf *hairpin_conf); 211 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 212 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 213 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_verify(struct rte_eth_dev *dev); 215 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 216 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 217 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 218 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 219 220 /* mlx5_tx.c */ 221 222 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 223 unsigned int olx __rte_unused); 224 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 225 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 226 struct rte_eth_txq_info *qinfo); 227 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 228 struct rte_eth_burst_mode *mode); 229 230 /* mlx5_tx_empw.c */ 231 232 MLX5_TXOFF_PRE_DECL(full_empw); 233 MLX5_TXOFF_PRE_DECL(none_empw); 234 MLX5_TXOFF_PRE_DECL(md_empw); 235 MLX5_TXOFF_PRE_DECL(mt_empw); 236 MLX5_TXOFF_PRE_DECL(mtsc_empw); 237 MLX5_TXOFF_PRE_DECL(mti_empw); 238 MLX5_TXOFF_PRE_DECL(mtv_empw); 239 MLX5_TXOFF_PRE_DECL(mtiv_empw); 240 MLX5_TXOFF_PRE_DECL(sc_empw); 241 MLX5_TXOFF_PRE_DECL(sci_empw); 242 MLX5_TXOFF_PRE_DECL(scv_empw); 243 MLX5_TXOFF_PRE_DECL(sciv_empw); 244 MLX5_TXOFF_PRE_DECL(i_empw); 245 MLX5_TXOFF_PRE_DECL(v_empw); 246 MLX5_TXOFF_PRE_DECL(iv_empw); 247 248 /* mlx5_tx_nompw.c */ 249 250 MLX5_TXOFF_PRE_DECL(full); 251 MLX5_TXOFF_PRE_DECL(none); 252 MLX5_TXOFF_PRE_DECL(md); 253 MLX5_TXOFF_PRE_DECL(mt); 254 MLX5_TXOFF_PRE_DECL(mtsc); 255 MLX5_TXOFF_PRE_DECL(mti); 256 MLX5_TXOFF_PRE_DECL(mtv); 257 MLX5_TXOFF_PRE_DECL(mtiv); 258 MLX5_TXOFF_PRE_DECL(sc); 259 MLX5_TXOFF_PRE_DECL(sci); 260 MLX5_TXOFF_PRE_DECL(scv); 261 MLX5_TXOFF_PRE_DECL(sciv); 262 MLX5_TXOFF_PRE_DECL(i); 263 MLX5_TXOFF_PRE_DECL(v); 264 MLX5_TXOFF_PRE_DECL(iv); 265 266 /* mlx5_tx_txpp.c */ 267 268 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 269 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 270 MLX5_TXOFF_PRE_DECL(full_ts); 271 MLX5_TXOFF_PRE_DECL(full_ts_noi); 272 MLX5_TXOFF_PRE_DECL(none_ts); 273 MLX5_TXOFF_PRE_DECL(mdi_ts); 274 MLX5_TXOFF_PRE_DECL(mti_ts); 275 MLX5_TXOFF_PRE_DECL(mtiv_ts); 276 277 /* mlx5_tx_mpw.c */ 278 279 MLX5_TXOFF_PRE_DECL(none_mpw); 280 MLX5_TXOFF_PRE_DECL(mci_mpw); 281 MLX5_TXOFF_PRE_DECL(mc_mpw); 282 MLX5_TXOFF_PRE_DECL(i_mpw); 283 284 static __rte_always_inline struct mlx5_uar_data * 285 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 286 { 287 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 288 } 289 290 /** 291 * Ring TX queue doorbell and flush the update by write memory barrier. 292 * 293 * @param txq 294 * Pointer to TX queue structure. 295 * @param wqe 296 * Pointer to the last WQE posted in the NIC. 297 */ 298 static __rte_always_inline void 299 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 300 { 301 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 302 txq->wqe_ci, txq->qp_db, 1); 303 } 304 305 /** 306 * Convert timestamp from mbuf format to linear counter 307 * of Clock Queue completions (24 bits). 308 * 309 * @param sh 310 * Pointer to the device shared context to fetch Tx 311 * packet pacing timestamp and parameters. 312 * @param ts 313 * Timestamp from mbuf to convert. 314 * @return 315 * positive or zero value - completion ID to wait. 316 * negative value - conversion error. 317 */ 318 static __rte_always_inline int32_t 319 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 320 { 321 uint64_t ts, ci; 322 uint32_t tick; 323 324 do { 325 /* 326 * Read atomically two uint64_t fields and compare lsb bits. 327 * It there is no match - the timestamp was updated in 328 * the service thread, data should be re-read. 329 */ 330 rte_compiler_barrier(); 331 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 332 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 333 rte_compiler_barrier(); 334 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 335 break; 336 } while (true); 337 /* Perform the skew correction, positive value to send earlier. */ 338 mts -= sh->txpp.skew; 339 mts -= ts; 340 if (unlikely(mts >= UINT64_MAX / 2)) { 341 /* We have negative integer, mts is in the past. */ 342 __atomic_fetch_add(&sh->txpp.err_ts_past, 343 1, __ATOMIC_RELAXED); 344 return -1; 345 } 346 tick = sh->txpp.tick; 347 MLX5_ASSERT(tick); 348 /* Convert delta to completions, round up. */ 349 mts = (mts + tick - 1) / tick; 350 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 351 /* We have mts is too distant future. */ 352 __atomic_fetch_add(&sh->txpp.err_ts_future, 353 1, __ATOMIC_RELAXED); 354 return -1; 355 } 356 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 357 ci += mts; 358 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 359 return ci; 360 } 361 362 /** 363 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 364 * Flags must be preliminary initialized to zero. 365 * 366 * @param loc 367 * Pointer to burst routine local context. 368 * @param swp_flags 369 * Pointer to store Software Parser flags. 370 * @param olx 371 * Configured Tx offloads mask. It is fully defined at 372 * compile time and may be used for optimization. 373 * 374 * @return 375 * Software Parser offsets packed in dword. 376 * Software Parser flags are set by pointer. 377 */ 378 static __rte_always_inline uint32_t 379 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 380 uint8_t *swp_flags, 381 unsigned int olx) 382 { 383 uint64_t ol, tunnel; 384 unsigned int idx, off; 385 uint32_t set; 386 387 if (!MLX5_TXOFF_CONFIG(SWP)) 388 return 0; 389 ol = loc->mbuf->ol_flags; 390 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 391 /* 392 * Check whether Software Parser is required. 393 * Only customized tunnels may ask for. 394 */ 395 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 396 return 0; 397 /* 398 * The index should have: 399 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 400 * bit[4] = RTE_MBUF_F_TX_IPV6 401 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 402 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 403 */ 404 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 405 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 406 *swp_flags = mlx5_swp_types_table[idx]; 407 /* 408 * Set offsets for SW parser. Since ConnectX-5, SW parser just 409 * complements HW parser. SW parser starts to engage only if HW parser 410 * can't reach a header. For the older devices, HW parser will not kick 411 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 412 * should be set regardless of HW offload. 413 */ 414 off = loc->mbuf->outer_l2_len; 415 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 416 off += sizeof(struct rte_vlan_hdr); 417 set = (off >> 1) << 8; /* Outer L3 offset. */ 418 off += loc->mbuf->outer_l3_len; 419 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 420 set |= off >> 1; /* Outer L4 offset. */ 421 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 422 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 423 off += loc->mbuf->l2_len; 424 set |= (off >> 1) << 24; /* Inner L3 offset. */ 425 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 426 csum == RTE_MBUF_F_TX_UDP_CKSUM || 427 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 428 off += loc->mbuf->l3_len; 429 set |= (off >> 1) << 16; /* Inner L4 offset. */ 430 } 431 } 432 set = rte_cpu_to_le_32(set); 433 return set; 434 } 435 436 /** 437 * Convert the Checksum offloads to Verbs. 438 * 439 * @param buf 440 * Pointer to the mbuf. 441 * 442 * @return 443 * Converted checksum flags. 444 */ 445 static __rte_always_inline uint8_t 446 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 447 { 448 uint32_t idx; 449 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 450 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 451 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 452 453 /* 454 * The index should have: 455 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 456 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 457 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 458 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 459 * bit[9] = tunnel 460 */ 461 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 462 return mlx5_cksum_table[idx]; 463 } 464 465 /** 466 * Free the mbufs from the linear array of pointers. 467 * 468 * @param txq 469 * Pointer to Tx queue structure. 470 * @param pkts 471 * Pointer to array of packets to be free. 472 * @param pkts_n 473 * Number of packets to be freed. 474 * @param olx 475 * Configured Tx offloads mask. It is fully defined at 476 * compile time and may be used for optimization. 477 */ 478 static __rte_always_inline void 479 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 480 struct rte_mbuf **__rte_restrict pkts, 481 unsigned int pkts_n, 482 unsigned int olx __rte_unused) 483 { 484 struct rte_mempool *pool = NULL; 485 struct rte_mbuf **p_free = NULL; 486 struct rte_mbuf *mbuf; 487 unsigned int n_free = 0; 488 489 /* 490 * The implemented algorithm eliminates 491 * copying pointers to temporary array 492 * for rte_mempool_put_bulk() calls. 493 */ 494 MLX5_ASSERT(pkts); 495 MLX5_ASSERT(pkts_n); 496 /* 497 * Free mbufs directly to the pool in bulk 498 * if fast free offload is engaged 499 */ 500 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 501 mbuf = *pkts; 502 pool = mbuf->pool; 503 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 504 return; 505 } 506 for (;;) { 507 for (;;) { 508 /* 509 * Decrement mbuf reference counter, detach 510 * indirect and external buffers if needed. 511 */ 512 mbuf = rte_pktmbuf_prefree_seg(*pkts); 513 if (likely(mbuf != NULL)) { 514 MLX5_ASSERT(mbuf == *pkts); 515 if (likely(n_free != 0)) { 516 if (unlikely(pool != mbuf->pool)) 517 /* From different pool. */ 518 break; 519 } else { 520 /* Start new scan array. */ 521 pool = mbuf->pool; 522 p_free = pkts; 523 } 524 ++n_free; 525 ++pkts; 526 --pkts_n; 527 if (unlikely(pkts_n == 0)) { 528 mbuf = NULL; 529 break; 530 } 531 } else { 532 /* 533 * This happens if mbuf is still referenced. 534 * We can't put it back to the pool, skip. 535 */ 536 ++pkts; 537 --pkts_n; 538 if (unlikely(n_free != 0)) 539 /* There is some array to free.*/ 540 break; 541 if (unlikely(pkts_n == 0)) 542 /* Last mbuf, nothing to free. */ 543 return; 544 } 545 } 546 for (;;) { 547 /* 548 * This loop is implemented to avoid multiple 549 * inlining of rte_mempool_put_bulk(). 550 */ 551 MLX5_ASSERT(pool); 552 MLX5_ASSERT(p_free); 553 MLX5_ASSERT(n_free); 554 /* 555 * Free the array of pre-freed mbufs 556 * belonging to the same memory pool. 557 */ 558 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 559 if (unlikely(mbuf != NULL)) { 560 /* There is the request to start new scan. */ 561 pool = mbuf->pool; 562 p_free = pkts++; 563 n_free = 1; 564 --pkts_n; 565 if (likely(pkts_n != 0)) 566 break; 567 /* 568 * This is the last mbuf to be freed. 569 * Do one more loop iteration to complete. 570 * This is rare case of the last unique mbuf. 571 */ 572 mbuf = NULL; 573 continue; 574 } 575 if (likely(pkts_n == 0)) 576 return; 577 n_free = 0; 578 break; 579 } 580 } 581 } 582 583 /** 584 * No inline version to free buffers for optimal call 585 * on the tx_burst completion. 586 */ 587 static __rte_noinline void 588 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 589 struct rte_mbuf **__rte_restrict pkts, 590 unsigned int pkts_n, 591 unsigned int olx __rte_unused) 592 { 593 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 594 } 595 596 /** 597 * Free the mbuf from the elts ring buffer till new tail. 598 * 599 * @param txq 600 * Pointer to Tx queue structure. 601 * @param tail 602 * Index in elts to free up to, becomes new elts tail. 603 * @param olx 604 * Configured Tx offloads mask. It is fully defined at 605 * compile time and may be used for optimization. 606 */ 607 static __rte_always_inline void 608 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 609 uint16_t tail, 610 unsigned int olx __rte_unused) 611 { 612 uint16_t n_elts = tail - txq->elts_tail; 613 614 MLX5_ASSERT(n_elts); 615 MLX5_ASSERT(n_elts <= txq->elts_s); 616 /* 617 * Implement a loop to support ring buffer wraparound 618 * with single inlining of mlx5_tx_free_mbuf(). 619 */ 620 do { 621 unsigned int part; 622 623 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 624 part = RTE_MIN(part, n_elts); 625 MLX5_ASSERT(part); 626 MLX5_ASSERT(part <= txq->elts_s); 627 mlx5_tx_free_mbuf(txq, 628 &txq->elts[txq->elts_tail & txq->elts_m], 629 part, olx); 630 txq->elts_tail += part; 631 n_elts -= part; 632 } while (n_elts); 633 } 634 635 /** 636 * Store the mbuf being sent into elts ring buffer. 637 * On Tx completion these mbufs will be freed. 638 * 639 * @param txq 640 * Pointer to Tx queue structure. 641 * @param pkts 642 * Pointer to array of packets to be stored. 643 * @param pkts_n 644 * Number of packets to be stored. 645 * @param olx 646 * Configured Tx offloads mask. It is fully defined at 647 * compile time and may be used for optimization. 648 */ 649 static __rte_always_inline void 650 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 651 struct rte_mbuf **__rte_restrict pkts, 652 unsigned int pkts_n, 653 unsigned int olx __rte_unused) 654 { 655 unsigned int part; 656 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 657 658 MLX5_ASSERT(pkts); 659 MLX5_ASSERT(pkts_n); 660 part = txq->elts_s - (txq->elts_head & txq->elts_m); 661 MLX5_ASSERT(part); 662 MLX5_ASSERT(part <= txq->elts_s); 663 /* This code is a good candidate for vectorizing with SIMD. */ 664 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 665 (void *)pkts, 666 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 667 txq->elts_head += pkts_n; 668 if (unlikely(part < pkts_n)) 669 /* The copy is wrapping around the elts array. */ 670 rte_memcpy((void *)elts, (void *)(pkts + part), 671 (pkts_n - part) * sizeof(struct rte_mbuf *)); 672 } 673 674 /** 675 * Check if the completion request flag should be set in the last WQE. 676 * Both pushed mbufs and WQEs are monitored and the completion request 677 * flag is set if any of thresholds is reached. 678 * 679 * @param txq 680 * Pointer to TX queue structure. 681 * @param loc 682 * Pointer to burst routine local context. 683 * @param olx 684 * Configured Tx offloads mask. It is fully defined at 685 * compile time and may be used for optimization. 686 */ 687 static __rte_always_inline void 688 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 689 struct mlx5_txq_local *__rte_restrict loc, 690 unsigned int olx) 691 { 692 uint16_t head = txq->elts_head; 693 unsigned int part; 694 695 part = MLX5_TXOFF_CONFIG(INLINE) ? 696 0 : loc->pkts_sent - loc->pkts_copy; 697 head += part; 698 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 699 (MLX5_TXOFF_CONFIG(INLINE) && 700 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 701 volatile struct mlx5_wqe *last = loc->wqe_last; 702 703 MLX5_ASSERT(last); 704 txq->elts_comp = head; 705 if (MLX5_TXOFF_CONFIG(INLINE)) 706 txq->wqe_comp = txq->wqe_ci; 707 /* Request unconditional completion on last WQE. */ 708 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 709 MLX5_COMP_MODE_OFFSET); 710 /* Save elts_head in dedicated free on completion queue. */ 711 #ifdef RTE_LIBRTE_MLX5_DEBUG 712 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 713 (last->cseg.opcode >> 8) << 16; 714 #else 715 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 716 #endif 717 /* A CQE slot must always be available. */ 718 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 719 } 720 } 721 722 /** 723 * Build the Control Segment with specified opcode: 724 * - MLX5_OPCODE_SEND 725 * - MLX5_OPCODE_ENHANCED_MPSW 726 * - MLX5_OPCODE_TSO 727 * 728 * @param txq 729 * Pointer to TX queue structure. 730 * @param loc 731 * Pointer to burst routine local context. 732 * @param wqe 733 * Pointer to WQE to fill with built Control Segment. 734 * @param ds 735 * Supposed length of WQE in segments. 736 * @param opcode 737 * SQ WQE opcode to put into Control Segment. 738 * @param olx 739 * Configured Tx offloads mask. It is fully defined at 740 * compile time and may be used for optimization. 741 */ 742 static __rte_always_inline void 743 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 744 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 745 struct mlx5_wqe *__rte_restrict wqe, 746 unsigned int ds, 747 unsigned int opcode, 748 unsigned int olx __rte_unused) 749 { 750 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 751 752 /* For legacy MPW replace the EMPW by TSO with modifier. */ 753 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 754 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 755 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 756 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 757 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 758 MLX5_COMP_MODE_OFFSET); 759 cs->misc = RTE_BE32(0); 760 } 761 762 /** 763 * Build the Synchronize Queue Segment with specified completion index. 764 * 765 * @param txq 766 * Pointer to TX queue structure. 767 * @param loc 768 * Pointer to burst routine local context. 769 * @param wqe 770 * Pointer to WQE to fill with built Control Segment. 771 * @param wci 772 * Completion index in Clock Queue to wait. 773 * @param olx 774 * Configured Tx offloads mask. It is fully defined at 775 * compile time and may be used for optimization. 776 */ 777 static __rte_always_inline void 778 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 779 struct mlx5_txq_local *restrict loc __rte_unused, 780 struct mlx5_wqe *restrict wqe, 781 unsigned int wci, 782 unsigned int olx __rte_unused) 783 { 784 struct mlx5_wqe_qseg *qs; 785 786 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 787 qs->max_index = rte_cpu_to_be_32(wci); 788 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 789 qs->reserved0 = RTE_BE32(0); 790 qs->reserved1 = RTE_BE32(0); 791 } 792 793 /** 794 * Build the Wait on Time Segment with specified timestamp value. 795 * 796 * @param txq 797 * Pointer to TX queue structure. 798 * @param loc 799 * Pointer to burst routine local context. 800 * @param wqe 801 * Pointer to WQE to fill with built Control Segment. 802 * @param ts 803 * Timesatmp value to wait. 804 * @param olx 805 * Configured Tx offloads mask. It is fully defined at 806 * compile time and may be used for optimization. 807 */ 808 static __rte_always_inline void 809 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 810 struct mlx5_txq_local *restrict loc __rte_unused, 811 struct mlx5_wqe *restrict wqe, 812 uint64_t ts, 813 unsigned int olx __rte_unused) 814 { 815 struct mlx5_wqe_wseg *ws; 816 817 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 818 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER); 819 ws->lkey = RTE_BE32(0); 820 ws->va_high = RTE_BE32(0); 821 ws->va_low = RTE_BE32(0); 822 if (txq->rt_timestamp) { 823 ts = ts % (uint64_t)NS_PER_S 824 | (ts / (uint64_t)NS_PER_S) << 32; 825 } 826 ws->value = rte_cpu_to_be_64(ts); 827 ws->mask = txq->rt_timemask; 828 } 829 830 /** 831 * Build the Ethernet Segment without inlined data. 832 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 833 * 834 * @param txq 835 * Pointer to TX queue structure. 836 * @param loc 837 * Pointer to burst routine local context. 838 * @param wqe 839 * Pointer to WQE to fill with built Ethernet Segment. 840 * @param olx 841 * Configured Tx offloads mask. It is fully defined at 842 * compile time and may be used for optimization. 843 */ 844 static __rte_always_inline void 845 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 846 struct mlx5_txq_local *__rte_restrict loc, 847 struct mlx5_wqe *__rte_restrict wqe, 848 unsigned int olx) 849 { 850 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 851 uint32_t csum; 852 853 /* 854 * Calculate and set check sum flags first, dword field 855 * in segment may be shared with Software Parser flags. 856 */ 857 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 858 es->flags = rte_cpu_to_le_32(csum); 859 /* 860 * Calculate and set Software Parser offsets and flags. 861 * These flags a set for custom UDP and IP tunnel packets. 862 */ 863 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 864 /* Fill metadata field if needed. */ 865 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 866 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 867 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 868 0 : 0; 869 /* Engage VLAN tag insertion feature if requested. */ 870 if (MLX5_TXOFF_CONFIG(VLAN) && 871 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 872 /* 873 * We should get here only if device support 874 * this feature correctly. 875 */ 876 MLX5_ASSERT(txq->vlan_en); 877 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 878 loc->mbuf->vlan_tci); 879 } else { 880 es->inline_hdr = RTE_BE32(0); 881 } 882 } 883 884 /** 885 * Build the Ethernet Segment with minimal inlined data 886 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 887 * used to fill the gap in single WQEBB WQEs. 888 * Supports Software Parser, Checksums and VLAN 889 * insertion Tx offload features. 890 * 891 * @param txq 892 * Pointer to TX queue structure. 893 * @param loc 894 * Pointer to burst routine local context. 895 * @param wqe 896 * Pointer to WQE to fill with built Ethernet Segment. 897 * @param vlan 898 * Length of VLAN tag insertion if any. 899 * @param olx 900 * Configured Tx offloads mask. It is fully defined at 901 * compile time and may be used for optimization. 902 */ 903 static __rte_always_inline void 904 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 905 struct mlx5_txq_local *__rte_restrict loc, 906 struct mlx5_wqe *__rte_restrict wqe, 907 unsigned int vlan, 908 unsigned int olx) 909 { 910 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 911 uint32_t csum; 912 uint8_t *psrc, *pdst; 913 914 /* 915 * Calculate and set check sum flags first, dword field 916 * in segment may be shared with Software Parser flags. 917 */ 918 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 919 es->flags = rte_cpu_to_le_32(csum); 920 /* 921 * Calculate and set Software Parser offsets and flags. 922 * These flags a set for custom UDP and IP tunnel packets. 923 */ 924 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 925 /* Fill metadata field if needed. */ 926 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 927 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 928 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 929 0 : 0; 930 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 931 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 932 es->inline_data = *(unaligned_uint16_t *)psrc; 933 psrc += sizeof(uint16_t); 934 pdst = (uint8_t *)(es + 1); 935 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 936 /* Implement VLAN tag insertion as part inline data. */ 937 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 938 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 939 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 940 /* Insert VLAN ethertype + VLAN tag. */ 941 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 942 ((RTE_ETHER_TYPE_VLAN << 16) | 943 loc->mbuf->vlan_tci); 944 pdst += sizeof(struct rte_vlan_hdr); 945 /* Copy the rest two bytes from packet data. */ 946 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 947 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 948 } else { 949 /* Fill the gap in the title WQEBB with inline data. */ 950 rte_mov16(pdst, psrc); 951 } 952 } 953 954 /** 955 * Build the Ethernet Segment with entire packet data inlining. Checks the 956 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 957 * Checksums and VLAN insertion Tx offload features. 958 * 959 * @param txq 960 * Pointer to TX queue structure. 961 * @param loc 962 * Pointer to burst routine local context. 963 * @param wqe 964 * Pointer to WQE to fill with built Ethernet Segment. 965 * @param vlan 966 * Length of VLAN tag insertion if any. 967 * @param inlen 968 * Length of data to inline (VLAN included, if any). 969 * @param tso 970 * TSO flag, set mss field from the packet. 971 * @param olx 972 * Configured Tx offloads mask. It is fully defined at 973 * compile time and may be used for optimization. 974 * 975 * @return 976 * Pointer to the next Data Segment (aligned and wrapped around). 977 */ 978 static __rte_always_inline struct mlx5_wqe_dseg * 979 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 980 struct mlx5_txq_local *__rte_restrict loc, 981 struct mlx5_wqe *__rte_restrict wqe, 982 unsigned int vlan, 983 unsigned int inlen, 984 unsigned int tso, 985 unsigned int olx) 986 { 987 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 988 uint32_t csum; 989 uint8_t *psrc, *pdst; 990 unsigned int part; 991 992 /* 993 * Calculate and set check sum flags first, dword field 994 * in segment may be shared with Software Parser flags. 995 */ 996 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 997 if (tso) { 998 csum <<= 24; 999 csum |= loc->mbuf->tso_segsz; 1000 es->flags = rte_cpu_to_be_32(csum); 1001 } else { 1002 es->flags = rte_cpu_to_le_32(csum); 1003 } 1004 /* 1005 * Calculate and set Software Parser offsets and flags. 1006 * These flags a set for custom UDP and IP tunnel packets. 1007 */ 1008 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1009 /* Fill metadata field if needed. */ 1010 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1011 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1012 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1013 0 : 0; 1014 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1015 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1016 es->inline_data = *(unaligned_uint16_t *)psrc; 1017 psrc += sizeof(uint16_t); 1018 pdst = (uint8_t *)(es + 1); 1019 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1020 /* Implement VLAN tag insertion as part inline data. */ 1021 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1022 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1023 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1024 /* Insert VLAN ethertype + VLAN tag. */ 1025 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1026 ((RTE_ETHER_TYPE_VLAN << 16) | 1027 loc->mbuf->vlan_tci); 1028 pdst += sizeof(struct rte_vlan_hdr); 1029 /* Copy the rest two bytes from packet data. */ 1030 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1031 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1032 psrc += sizeof(uint16_t); 1033 } else { 1034 /* Fill the gap in the title WQEBB with inline data. */ 1035 rte_mov16(pdst, psrc); 1036 psrc += sizeof(rte_v128u32_t); 1037 } 1038 pdst = (uint8_t *)(es + 2); 1039 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1040 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1041 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1042 if (!inlen) { 1043 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1044 return (struct mlx5_wqe_dseg *)pdst; 1045 } 1046 /* 1047 * The WQEBB space availability is checked by caller. 1048 * Here we should be aware of WQE ring buffer wraparound only. 1049 */ 1050 part = (uint8_t *)txq->wqes_end - pdst; 1051 part = RTE_MIN(part, inlen); 1052 do { 1053 rte_memcpy(pdst, psrc, part); 1054 inlen -= part; 1055 if (likely(!inlen)) { 1056 /* 1057 * If return value is not used by the caller 1058 * the code below will be optimized out. 1059 */ 1060 pdst += part; 1061 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1062 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1063 pdst = (uint8_t *)txq->wqes; 1064 return (struct mlx5_wqe_dseg *)pdst; 1065 } 1066 pdst = (uint8_t *)txq->wqes; 1067 psrc += part; 1068 part = inlen; 1069 } while (true); 1070 } 1071 1072 /** 1073 * Copy data from chain of mbuf to the specified linear buffer. 1074 * Checksums and VLAN insertion Tx offload features. If data 1075 * from some mbuf copied completely this mbuf is freed. Local 1076 * structure is used to keep the byte stream state. 1077 * 1078 * @param pdst 1079 * Pointer to the destination linear buffer. 1080 * @param loc 1081 * Pointer to burst routine local context. 1082 * @param len 1083 * Length of data to be copied. 1084 * @param must 1085 * Length of data to be copied ignoring no inline hint. 1086 * @param olx 1087 * Configured Tx offloads mask. It is fully defined at 1088 * compile time and may be used for optimization. 1089 * 1090 * @return 1091 * Number of actual copied data bytes. This is always greater than or 1092 * equal to must parameter and might be lesser than len in no inline 1093 * hint flag is encountered. 1094 */ 1095 static __rte_always_inline unsigned int 1096 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1097 struct mlx5_txq_local *__rte_restrict loc, 1098 unsigned int len, 1099 unsigned int must, 1100 unsigned int olx __rte_unused) 1101 { 1102 struct rte_mbuf *mbuf; 1103 unsigned int part, dlen, copy = 0; 1104 uint8_t *psrc; 1105 1106 MLX5_ASSERT(len); 1107 do { 1108 /* Allow zero length packets, must check first. */ 1109 dlen = rte_pktmbuf_data_len(loc->mbuf); 1110 if (dlen <= loc->mbuf_off) { 1111 /* Exhausted packet, just free. */ 1112 mbuf = loc->mbuf; 1113 loc->mbuf = mbuf->next; 1114 rte_pktmbuf_free_seg(mbuf); 1115 loc->mbuf_off = 0; 1116 MLX5_ASSERT(loc->mbuf_nseg > 1); 1117 MLX5_ASSERT(loc->mbuf); 1118 --loc->mbuf_nseg; 1119 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1120 unsigned int diff; 1121 1122 if (copy >= must) { 1123 /* 1124 * We already copied the minimal 1125 * requested amount of data. 1126 */ 1127 return copy; 1128 } 1129 diff = must - copy; 1130 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1131 /* 1132 * Copy only the minimal required 1133 * part of the data buffer. Limit amount 1134 * of data to be copied to the length of 1135 * available space. 1136 */ 1137 len = RTE_MIN(len, diff); 1138 } 1139 } 1140 continue; 1141 } 1142 dlen -= loc->mbuf_off; 1143 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1144 loc->mbuf_off); 1145 part = RTE_MIN(len, dlen); 1146 rte_memcpy(pdst, psrc, part); 1147 copy += part; 1148 loc->mbuf_off += part; 1149 len -= part; 1150 if (!len) { 1151 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1152 loc->mbuf_off = 0; 1153 /* Exhausted packet, just free. */ 1154 mbuf = loc->mbuf; 1155 loc->mbuf = mbuf->next; 1156 rte_pktmbuf_free_seg(mbuf); 1157 loc->mbuf_off = 0; 1158 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1159 --loc->mbuf_nseg; 1160 } 1161 return copy; 1162 } 1163 pdst += part; 1164 } while (true); 1165 } 1166 1167 /** 1168 * Build the Ethernet Segment with inlined data from multi-segment packet. 1169 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1170 * Parser, Checksums and VLAN insertion Tx offload features. 1171 * 1172 * @param txq 1173 * Pointer to TX queue structure. 1174 * @param loc 1175 * Pointer to burst routine local context. 1176 * @param wqe 1177 * Pointer to WQE to fill with built Ethernet Segment. 1178 * @param vlan 1179 * Length of VLAN tag insertion if any. 1180 * @param inlen 1181 * Length of data to inline (VLAN included, if any). 1182 * @param tso 1183 * TSO flag, set mss field from the packet. 1184 * @param olx 1185 * Configured Tx offloads mask. It is fully defined at 1186 * compile time and may be used for optimization. 1187 * 1188 * @return 1189 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1190 * around - caller should do wrapping check on its own). 1191 */ 1192 static __rte_always_inline struct mlx5_wqe_dseg * 1193 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1194 struct mlx5_txq_local *__rte_restrict loc, 1195 struct mlx5_wqe *__rte_restrict wqe, 1196 unsigned int vlan, 1197 unsigned int inlen, 1198 unsigned int tso, 1199 unsigned int olx) 1200 { 1201 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1202 uint32_t csum; 1203 uint8_t *pdst; 1204 unsigned int part, tlen = 0; 1205 1206 /* 1207 * Calculate and set check sum flags first, uint32_t field 1208 * in segment may be shared with Software Parser flags. 1209 */ 1210 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1211 if (tso) { 1212 csum <<= 24; 1213 csum |= loc->mbuf->tso_segsz; 1214 es->flags = rte_cpu_to_be_32(csum); 1215 } else { 1216 es->flags = rte_cpu_to_le_32(csum); 1217 } 1218 /* 1219 * Calculate and set Software Parser offsets and flags. 1220 * These flags a set for custom UDP and IP tunnel packets. 1221 */ 1222 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1223 /* Fill metadata field if needed. */ 1224 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1225 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1226 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1227 0 : 0; 1228 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1229 pdst = (uint8_t *)&es->inline_data; 1230 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1231 /* Implement VLAN tag insertion as part inline data. */ 1232 mlx5_tx_mseg_memcpy(pdst, loc, 1233 2 * RTE_ETHER_ADDR_LEN, 1234 2 * RTE_ETHER_ADDR_LEN, olx); 1235 pdst += 2 * RTE_ETHER_ADDR_LEN; 1236 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1237 ((RTE_ETHER_TYPE_VLAN << 16) | 1238 loc->mbuf->vlan_tci); 1239 pdst += sizeof(struct rte_vlan_hdr); 1240 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1241 } 1242 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1243 /* 1244 * The WQEBB space availability is checked by caller. 1245 * Here we should be aware of WQE ring buffer wraparound only. 1246 */ 1247 part = (uint8_t *)txq->wqes_end - pdst; 1248 part = RTE_MIN(part, inlen - tlen); 1249 MLX5_ASSERT(part); 1250 do { 1251 unsigned int copy; 1252 1253 /* 1254 * Copying may be interrupted inside the routine 1255 * if run into no inline hint flag. 1256 */ 1257 copy = tso ? inlen : txq->inlen_mode; 1258 copy = tlen >= copy ? 0 : (copy - tlen); 1259 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1260 tlen += copy; 1261 if (likely(inlen <= tlen) || copy < part) { 1262 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1263 pdst += copy; 1264 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1265 return (struct mlx5_wqe_dseg *)pdst; 1266 } 1267 pdst = (uint8_t *)txq->wqes; 1268 part = inlen - tlen; 1269 } while (true); 1270 } 1271 1272 /** 1273 * Build the Data Segment of pointer type. 1274 * 1275 * @param txq 1276 * Pointer to TX queue structure. 1277 * @param loc 1278 * Pointer to burst routine local context. 1279 * @param dseg 1280 * Pointer to WQE to fill with built Data Segment. 1281 * @param buf 1282 * Data buffer to point. 1283 * @param len 1284 * Data buffer length. 1285 * @param olx 1286 * Configured Tx offloads mask. It is fully defined at 1287 * compile time and may be used for optimization. 1288 */ 1289 static __rte_always_inline void 1290 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1291 struct mlx5_txq_local *__rte_restrict loc, 1292 struct mlx5_wqe_dseg *__rte_restrict dseg, 1293 uint8_t *buf, 1294 unsigned int len, 1295 unsigned int olx __rte_unused) 1296 1297 { 1298 MLX5_ASSERT(len); 1299 dseg->bcount = rte_cpu_to_be_32(len); 1300 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1301 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1302 } 1303 1304 /** 1305 * Build the Data Segment of pointer type or inline if data length is less than 1306 * buffer in minimal Data Segment size. 1307 * 1308 * @param txq 1309 * Pointer to TX queue structure. 1310 * @param loc 1311 * Pointer to burst routine local context. 1312 * @param dseg 1313 * Pointer to WQE to fill with built Data Segment. 1314 * @param buf 1315 * Data buffer to point. 1316 * @param len 1317 * Data buffer length. 1318 * @param olx 1319 * Configured Tx offloads mask. It is fully defined at 1320 * compile time and may be used for optimization. 1321 */ 1322 static __rte_always_inline void 1323 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1324 struct mlx5_txq_local *__rte_restrict loc, 1325 struct mlx5_wqe_dseg *__rte_restrict dseg, 1326 uint8_t *buf, 1327 unsigned int len, 1328 unsigned int olx __rte_unused) 1329 1330 { 1331 uintptr_t dst, src; 1332 1333 MLX5_ASSERT(len); 1334 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1335 dseg->bcount = rte_cpu_to_be_32(len); 1336 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1337 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1338 1339 return; 1340 } 1341 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1342 /* Unrolled implementation of generic rte_memcpy. */ 1343 dst = (uintptr_t)&dseg->inline_data[0]; 1344 src = (uintptr_t)buf; 1345 if (len & 0x08) { 1346 #ifdef RTE_ARCH_STRICT_ALIGN 1347 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1348 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1349 dst += sizeof(uint32_t); 1350 src += sizeof(uint32_t); 1351 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1352 dst += sizeof(uint32_t); 1353 src += sizeof(uint32_t); 1354 #else 1355 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1356 dst += sizeof(uint64_t); 1357 src += sizeof(uint64_t); 1358 #endif 1359 } 1360 if (len & 0x04) { 1361 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1362 dst += sizeof(uint32_t); 1363 src += sizeof(uint32_t); 1364 } 1365 if (len & 0x02) { 1366 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1367 dst += sizeof(uint16_t); 1368 src += sizeof(uint16_t); 1369 } 1370 if (len & 0x01) 1371 *(uint8_t *)dst = *(uint8_t *)src; 1372 } 1373 1374 /** 1375 * Build the Data Segment of inlined data from single 1376 * segment packet, no VLAN insertion. 1377 * 1378 * @param txq 1379 * Pointer to TX queue structure. 1380 * @param loc 1381 * Pointer to burst routine local context. 1382 * @param dseg 1383 * Pointer to WQE to fill with built Data Segment. 1384 * @param buf 1385 * Data buffer to point. 1386 * @param len 1387 * Data buffer length. 1388 * @param olx 1389 * Configured Tx offloads mask. It is fully defined at 1390 * compile time and may be used for optimization. 1391 * 1392 * @return 1393 * Pointer to the next Data Segment after inlined data. 1394 * Ring buffer wraparound check is needed. We do not do it here because it 1395 * may not be needed for the last packet in the eMPW session. 1396 */ 1397 static __rte_always_inline struct mlx5_wqe_dseg * 1398 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1399 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1400 struct mlx5_wqe_dseg *__rte_restrict dseg, 1401 uint8_t *buf, 1402 unsigned int len, 1403 unsigned int olx __rte_unused) 1404 { 1405 unsigned int part; 1406 uint8_t *pdst; 1407 1408 if (!MLX5_TXOFF_CONFIG(MPW)) { 1409 /* Store the descriptor byte counter for eMPW sessions. */ 1410 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1411 pdst = &dseg->inline_data[0]; 1412 } else { 1413 /* The entire legacy MPW session counter is stored on close. */ 1414 pdst = (uint8_t *)dseg; 1415 } 1416 /* 1417 * The WQEBB space availability is checked by caller. 1418 * Here we should be aware of WQE ring buffer wraparound only. 1419 */ 1420 part = (uint8_t *)txq->wqes_end - pdst; 1421 part = RTE_MIN(part, len); 1422 do { 1423 rte_memcpy(pdst, buf, part); 1424 len -= part; 1425 if (likely(!len)) { 1426 pdst += part; 1427 if (!MLX5_TXOFF_CONFIG(MPW)) 1428 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1429 /* Note: no final wraparound check here. */ 1430 return (struct mlx5_wqe_dseg *)pdst; 1431 } 1432 pdst = (uint8_t *)txq->wqes; 1433 buf += part; 1434 part = len; 1435 } while (true); 1436 } 1437 1438 /** 1439 * Build the Data Segment of inlined data from single 1440 * segment packet with VLAN insertion. 1441 * 1442 * @param txq 1443 * Pointer to TX queue structure. 1444 * @param loc 1445 * Pointer to burst routine local context. 1446 * @param dseg 1447 * Pointer to the dseg fill with built Data Segment. 1448 * @param buf 1449 * Data buffer to point. 1450 * @param len 1451 * Data buffer length. 1452 * @param olx 1453 * Configured Tx offloads mask. It is fully defined at 1454 * compile time and may be used for optimization. 1455 * 1456 * @return 1457 * Pointer to the next Data Segment after inlined data. 1458 * Ring buffer wraparound check is needed. 1459 */ 1460 static __rte_always_inline struct mlx5_wqe_dseg * 1461 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1462 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1463 struct mlx5_wqe_dseg *__rte_restrict dseg, 1464 uint8_t *buf, 1465 unsigned int len, 1466 unsigned int olx __rte_unused) 1467 1468 { 1469 unsigned int part; 1470 uint8_t *pdst; 1471 1472 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1473 if (!MLX5_TXOFF_CONFIG(MPW)) { 1474 /* Store the descriptor byte counter for eMPW sessions. */ 1475 dseg->bcount = rte_cpu_to_be_32 1476 ((len + sizeof(struct rte_vlan_hdr)) | 1477 MLX5_ETH_WQE_DATA_INLINE); 1478 pdst = &dseg->inline_data[0]; 1479 } else { 1480 /* The entire legacy MPW session counter is stored on close. */ 1481 pdst = (uint8_t *)dseg; 1482 } 1483 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1484 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1485 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1486 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1487 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1488 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1489 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1490 pdst = (uint8_t *)txq->wqes; 1491 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1492 loc->mbuf->vlan_tci); 1493 pdst += sizeof(struct rte_vlan_hdr); 1494 /* 1495 * The WQEBB space availability is checked by caller. 1496 * Here we should be aware of WQE ring buffer wraparound only. 1497 */ 1498 part = (uint8_t *)txq->wqes_end - pdst; 1499 part = RTE_MIN(part, len); 1500 do { 1501 rte_memcpy(pdst, buf, part); 1502 len -= part; 1503 if (likely(!len)) { 1504 pdst += part; 1505 if (!MLX5_TXOFF_CONFIG(MPW)) 1506 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1507 /* Note: no final wraparound check here. */ 1508 return (struct mlx5_wqe_dseg *)pdst; 1509 } 1510 pdst = (uint8_t *)txq->wqes; 1511 buf += part; 1512 part = len; 1513 } while (true); 1514 } 1515 1516 /** 1517 * Build the Ethernet Segment with optionally inlined data with 1518 * VLAN insertion and following Data Segments (if any) from 1519 * multi-segment packet. Used by ordinary send and TSO. 1520 * 1521 * @param txq 1522 * Pointer to TX queue structure. 1523 * @param loc 1524 * Pointer to burst routine local context. 1525 * @param wqe 1526 * Pointer to WQE to fill with built Ethernet/Data Segments. 1527 * @param vlan 1528 * Length of VLAN header to insert, 0 means no VLAN insertion. 1529 * @param inlen 1530 * Data length to inline. For TSO this parameter specifies exact value, 1531 * for ordinary send routine can be aligned by caller to provide better WQE 1532 * space saving and data buffer start address alignment. 1533 * This length includes VLAN header being inserted. 1534 * @param tso 1535 * Zero means ordinary send, inlined data can be extended, 1536 * otherwise this is TSO, inlined data length is fixed. 1537 * @param olx 1538 * Configured Tx offloads mask. It is fully defined at 1539 * compile time and may be used for optimization. 1540 * 1541 * @return 1542 * Actual size of built WQE in segments. 1543 */ 1544 static __rte_always_inline unsigned int 1545 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1546 struct mlx5_txq_local *__rte_restrict loc, 1547 struct mlx5_wqe *__rte_restrict wqe, 1548 unsigned int vlan, 1549 unsigned int inlen, 1550 unsigned int tso, 1551 unsigned int olx __rte_unused) 1552 { 1553 struct mlx5_wqe_dseg *__rte_restrict dseg; 1554 unsigned int ds; 1555 1556 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1557 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1558 loc->mbuf_off = 0; 1559 1560 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1561 if (!loc->mbuf_nseg) 1562 goto dseg_done; 1563 /* 1564 * There are still some mbuf remaining, not inlined. 1565 * The first mbuf may be partially inlined and we 1566 * must process the possible non-zero data offset. 1567 */ 1568 if (loc->mbuf_off) { 1569 unsigned int dlen; 1570 uint8_t *dptr; 1571 1572 /* 1573 * Exhausted packets must be dropped before. 1574 * Non-zero offset means there are some data 1575 * remained in the packet. 1576 */ 1577 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1578 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1579 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1580 loc->mbuf_off); 1581 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1582 /* 1583 * Build the pointer/minimal Data Segment. 1584 * Do ring buffer wrapping check in advance. 1585 */ 1586 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1587 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1588 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1589 /* Store the mbuf to be freed on completion. */ 1590 MLX5_ASSERT(loc->elts_free); 1591 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1592 --loc->elts_free; 1593 ++dseg; 1594 if (--loc->mbuf_nseg == 0) 1595 goto dseg_done; 1596 loc->mbuf = loc->mbuf->next; 1597 loc->mbuf_off = 0; 1598 } 1599 do { 1600 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1601 struct rte_mbuf *mbuf; 1602 1603 /* Zero length segment found, just skip. */ 1604 mbuf = loc->mbuf; 1605 loc->mbuf = loc->mbuf->next; 1606 rte_pktmbuf_free_seg(mbuf); 1607 if (--loc->mbuf_nseg == 0) 1608 break; 1609 } else { 1610 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1611 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1612 mlx5_tx_dseg_iptr 1613 (txq, loc, dseg, 1614 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1615 rte_pktmbuf_data_len(loc->mbuf), olx); 1616 MLX5_ASSERT(loc->elts_free); 1617 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1618 --loc->elts_free; 1619 ++dseg; 1620 if (--loc->mbuf_nseg == 0) 1621 break; 1622 loc->mbuf = loc->mbuf->next; 1623 } 1624 } while (true); 1625 1626 dseg_done: 1627 /* Calculate actual segments used from the dseg pointer. */ 1628 if ((uintptr_t)wqe < (uintptr_t)dseg) 1629 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1630 else 1631 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1632 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1633 return ds; 1634 } 1635 1636 /** 1637 * The routine checks timestamp flag in the current packet, 1638 * and push WAIT WQE into the queue if scheduling is required. 1639 * 1640 * @param txq 1641 * Pointer to TX queue structure. 1642 * @param loc 1643 * Pointer to burst routine local context. 1644 * @param olx 1645 * Configured Tx offloads mask. It is fully defined at 1646 * compile time and may be used for optimization. 1647 * 1648 * @return 1649 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1650 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1651 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1652 * Local context variables partially updated. 1653 */ 1654 static __rte_always_inline enum mlx5_txcmp_code 1655 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1656 struct mlx5_txq_local *restrict loc, 1657 unsigned int olx) 1658 { 1659 if (MLX5_TXOFF_CONFIG(TXPP) && 1660 loc->mbuf->ol_flags & txq->ts_mask) { 1661 struct mlx5_dev_ctx_shared *sh; 1662 struct mlx5_wqe *wqe; 1663 uint64_t ts; 1664 1665 /* 1666 * Estimate the required space quickly and roughly. 1667 * We would like to ensure the packet can be pushed 1668 * to the queue and we won't get the orphan WAIT WQE. 1669 */ 1670 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1671 loc->elts_free < NB_SEGS(loc->mbuf)) 1672 return MLX5_TXCMP_CODE_EXIT; 1673 /* Convert the timestamp into completion to wait. */ 1674 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1675 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1676 sh = txq->sh; 1677 if (txq->wait_on_time) { 1678 /* The wait on time capability should be used. */ 1679 ts -= sh->txpp.skew; 1680 mlx5_tx_cseg_init(txq, loc, wqe, 1681 1 + sizeof(struct mlx5_wqe_wseg) / 1682 MLX5_WSEG_SIZE, 1683 MLX5_OPCODE_WAIT | 1684 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1685 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1686 } else { 1687 /* Legacy cross-channel operation should be used. */ 1688 int32_t wci; 1689 1690 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1691 if (unlikely(wci < 0)) 1692 return MLX5_TXCMP_CODE_SINGLE; 1693 /* Build the WAIT WQE with specified completion. */ 1694 mlx5_tx_cseg_init(txq, loc, wqe, 1695 1 + sizeof(struct mlx5_wqe_qseg) / 1696 MLX5_WSEG_SIZE, 1697 MLX5_OPCODE_WAIT | 1698 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1699 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1700 } 1701 ++txq->wqe_ci; 1702 --loc->wqe_free; 1703 return MLX5_TXCMP_CODE_MULTI; 1704 } 1705 return MLX5_TXCMP_CODE_SINGLE; 1706 } 1707 1708 /** 1709 * Tx one packet function for multi-segment TSO. Supports all 1710 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1711 * sends one packet per WQE. 1712 * 1713 * This routine is responsible for storing processed mbuf 1714 * into elts ring buffer and update elts_head. 1715 * 1716 * @param txq 1717 * Pointer to TX queue structure. 1718 * @param loc 1719 * Pointer to burst routine local context. 1720 * @param olx 1721 * Configured Tx offloads mask. It is fully defined at 1722 * compile time and may be used for optimization. 1723 * 1724 * @return 1725 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1726 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1727 * Local context variables partially updated. 1728 */ 1729 static __rte_always_inline enum mlx5_txcmp_code 1730 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1731 struct mlx5_txq_local *__rte_restrict loc, 1732 unsigned int olx) 1733 { 1734 struct mlx5_wqe *__rte_restrict wqe; 1735 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1736 1737 if (MLX5_TXOFF_CONFIG(TXPP)) { 1738 enum mlx5_txcmp_code wret; 1739 1740 /* Generate WAIT for scheduling if requested. */ 1741 wret = mlx5_tx_schedule_send(txq, loc, olx); 1742 if (wret == MLX5_TXCMP_CODE_EXIT) 1743 return MLX5_TXCMP_CODE_EXIT; 1744 if (wret == MLX5_TXCMP_CODE_ERROR) 1745 return MLX5_TXCMP_CODE_ERROR; 1746 } 1747 /* 1748 * Calculate data length to be inlined to estimate 1749 * the required space in WQE ring buffer. 1750 */ 1751 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1752 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1753 vlan = sizeof(struct rte_vlan_hdr); 1754 inlen = loc->mbuf->l2_len + vlan + 1755 loc->mbuf->l3_len + loc->mbuf->l4_len; 1756 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1757 return MLX5_TXCMP_CODE_ERROR; 1758 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1759 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1760 /* Packet must contain all TSO headers. */ 1761 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1762 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1763 inlen > (dlen + vlan))) 1764 return MLX5_TXCMP_CODE_ERROR; 1765 /* 1766 * Check whether there are enough free WQEBBs: 1767 * - Control Segment 1768 * - Ethernet Segment 1769 * - First Segment of inlined Ethernet data 1770 * - ... data continued ... 1771 * - Data Segments of pointer/min inline type 1772 */ 1773 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1774 MLX5_ESEG_MIN_INLINE_SIZE + 1775 MLX5_WSEG_SIZE + 1776 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1777 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1778 return MLX5_TXCMP_CODE_EXIT; 1779 /* Check for maximal WQE size. */ 1780 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1781 return MLX5_TXCMP_CODE_ERROR; 1782 #ifdef MLX5_PMD_SOFT_COUNTERS 1783 /* Update sent data bytes/packets counters. */ 1784 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1785 loc->mbuf->tso_segsz; 1786 /* 1787 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1788 * from loc->pkts_sent field. 1789 */ 1790 --ntcp; 1791 txq->stats.opackets += ntcp; 1792 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1793 #endif 1794 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1795 loc->wqe_last = wqe; 1796 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1797 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1798 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1799 txq->wqe_ci += (ds + 3) / 4; 1800 loc->wqe_free -= (ds + 3) / 4; 1801 return MLX5_TXCMP_CODE_MULTI; 1802 } 1803 1804 /** 1805 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1806 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1807 * without any data inlining in Ethernet Segment. 1808 * 1809 * This routine is responsible for storing processed mbuf 1810 * into elts ring buffer and update elts_head. 1811 * 1812 * @param txq 1813 * Pointer to TX queue structure. 1814 * @param loc 1815 * Pointer to burst routine local context. 1816 * @param olx 1817 * Configured Tx offloads mask. It is fully defined at 1818 * compile time and may be used for optimization. 1819 * 1820 * @return 1821 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1822 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1823 * Local context variables partially updated. 1824 */ 1825 static __rte_always_inline enum mlx5_txcmp_code 1826 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1827 struct mlx5_txq_local *__rte_restrict loc, 1828 unsigned int olx) 1829 { 1830 struct mlx5_wqe_dseg *__rte_restrict dseg; 1831 struct mlx5_wqe *__rte_restrict wqe; 1832 unsigned int ds, nseg; 1833 1834 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1835 if (MLX5_TXOFF_CONFIG(TXPP)) { 1836 enum mlx5_txcmp_code wret; 1837 1838 /* Generate WAIT for scheduling if requested. */ 1839 wret = mlx5_tx_schedule_send(txq, loc, olx); 1840 if (wret == MLX5_TXCMP_CODE_EXIT) 1841 return MLX5_TXCMP_CODE_EXIT; 1842 if (wret == MLX5_TXCMP_CODE_ERROR) 1843 return MLX5_TXCMP_CODE_ERROR; 1844 } 1845 /* 1846 * No inline at all, it means the CPU cycles saving is prioritized at 1847 * configuration, we should not copy any packet data to WQE. 1848 */ 1849 nseg = NB_SEGS(loc->mbuf); 1850 ds = 2 + nseg; 1851 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1852 return MLX5_TXCMP_CODE_EXIT; 1853 /* Check for maximal WQE size. */ 1854 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1855 return MLX5_TXCMP_CODE_ERROR; 1856 /* 1857 * Some Tx offloads may cause an error if packet is not long enough, 1858 * check against assumed minimal length. 1859 */ 1860 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1861 return MLX5_TXCMP_CODE_ERROR; 1862 #ifdef MLX5_PMD_SOFT_COUNTERS 1863 /* Update sent data bytes counter. */ 1864 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1865 if (MLX5_TXOFF_CONFIG(VLAN) && 1866 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1867 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1868 #endif 1869 /* 1870 * SEND WQE, one WQEBB: 1871 * - Control Segment, SEND opcode 1872 * - Ethernet Segment, optional VLAN, no inline 1873 * - Data Segments, pointer only type 1874 */ 1875 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1876 loc->wqe_last = wqe; 1877 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1878 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1879 dseg = &wqe->dseg[0]; 1880 do { 1881 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1882 struct rte_mbuf *mbuf; 1883 1884 /* 1885 * Zero length segment found, have to correct total 1886 * size of WQE in segments. 1887 * It is supposed to be rare occasion, so in normal 1888 * case (no zero length segments) we avoid extra 1889 * writing to the Control Segment. 1890 */ 1891 --ds; 1892 wqe->cseg.sq_ds -= RTE_BE32(1); 1893 mbuf = loc->mbuf; 1894 loc->mbuf = mbuf->next; 1895 rte_pktmbuf_free_seg(mbuf); 1896 if (--nseg == 0) 1897 break; 1898 } else { 1899 mlx5_tx_dseg_ptr 1900 (txq, loc, dseg, 1901 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1902 rte_pktmbuf_data_len(loc->mbuf), olx); 1903 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1904 --loc->elts_free; 1905 if (--nseg == 0) 1906 break; 1907 ++dseg; 1908 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1909 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1910 loc->mbuf = loc->mbuf->next; 1911 } 1912 } while (true); 1913 txq->wqe_ci += (ds + 3) / 4; 1914 loc->wqe_free -= (ds + 3) / 4; 1915 return MLX5_TXCMP_CODE_MULTI; 1916 } 1917 1918 /** 1919 * Tx one packet function for multi-segment SEND. Supports all 1920 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1921 * sends one packet per WQE, with data inlining in 1922 * Ethernet Segment and minimal Data Segments. 1923 * 1924 * This routine is responsible for storing processed mbuf 1925 * into elts ring buffer and update elts_head. 1926 * 1927 * @param txq 1928 * Pointer to TX queue structure. 1929 * @param loc 1930 * Pointer to burst routine local context. 1931 * @param olx 1932 * Configured Tx offloads mask. It is fully defined at 1933 * compile time and may be used for optimization. 1934 * 1935 * @return 1936 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1937 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1938 * Local context variables partially updated. 1939 */ 1940 static __rte_always_inline enum mlx5_txcmp_code 1941 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1942 struct mlx5_txq_local *__rte_restrict loc, 1943 unsigned int olx) 1944 { 1945 struct mlx5_wqe *__rte_restrict wqe; 1946 unsigned int ds, inlen, dlen, vlan = 0; 1947 1948 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1949 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1950 if (MLX5_TXOFF_CONFIG(TXPP)) { 1951 enum mlx5_txcmp_code wret; 1952 1953 /* Generate WAIT for scheduling if requested. */ 1954 wret = mlx5_tx_schedule_send(txq, loc, olx); 1955 if (wret == MLX5_TXCMP_CODE_EXIT) 1956 return MLX5_TXCMP_CODE_EXIT; 1957 if (wret == MLX5_TXCMP_CODE_ERROR) 1958 return MLX5_TXCMP_CODE_ERROR; 1959 } 1960 /* 1961 * First calculate data length to be inlined 1962 * to estimate the required space for WQE. 1963 */ 1964 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1965 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1966 vlan = sizeof(struct rte_vlan_hdr); 1967 inlen = dlen + vlan; 1968 /* Check against minimal length. */ 1969 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1970 return MLX5_TXCMP_CODE_ERROR; 1971 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1972 if (inlen > txq->inlen_send || 1973 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1974 struct rte_mbuf *mbuf; 1975 unsigned int nxlen; 1976 uintptr_t start; 1977 1978 mbuf = loc->mbuf; 1979 nxlen = rte_pktmbuf_data_len(mbuf); 1980 /* 1981 * Packet length exceeds the allowed inline data length, 1982 * check whether the minimal inlining is required. 1983 */ 1984 if (txq->inlen_mode) { 1985 MLX5_ASSERT(txq->inlen_mode >= 1986 MLX5_ESEG_MIN_INLINE_SIZE); 1987 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1988 inlen = RTE_MIN(txq->inlen_mode, inlen); 1989 } else if (vlan && !txq->vlan_en) { 1990 /* 1991 * VLAN insertion is requested and hardware does not 1992 * support the offload, will do with software inline. 1993 */ 1994 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1995 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1996 nxlen > txq->inlen_send) { 1997 return mlx5_tx_packet_multi_send(txq, loc, olx); 1998 } else { 1999 goto do_first; 2000 } 2001 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2002 goto do_build; 2003 /* 2004 * Now we know the minimal amount of data is requested 2005 * to inline. Check whether we should inline the buffers 2006 * from the chain beginning to eliminate some mbufs. 2007 */ 2008 if (unlikely(nxlen <= txq->inlen_send)) { 2009 /* We can inline first mbuf at least. */ 2010 if (nxlen < inlen) { 2011 unsigned int smlen; 2012 2013 /* Scan mbufs till inlen filled. */ 2014 do { 2015 smlen = nxlen; 2016 mbuf = NEXT(mbuf); 2017 MLX5_ASSERT(mbuf); 2018 nxlen = rte_pktmbuf_data_len(mbuf); 2019 nxlen += smlen; 2020 } while (unlikely(nxlen < inlen)); 2021 if (unlikely(nxlen > txq->inlen_send)) { 2022 /* We cannot inline entire mbuf. */ 2023 smlen = inlen - smlen; 2024 start = rte_pktmbuf_mtod_offset 2025 (mbuf, uintptr_t, smlen); 2026 goto do_align; 2027 } 2028 } 2029 do_first: 2030 do { 2031 inlen = nxlen; 2032 mbuf = NEXT(mbuf); 2033 /* There should be not end of packet. */ 2034 MLX5_ASSERT(mbuf); 2035 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2036 break; 2037 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2038 } while (unlikely(nxlen < txq->inlen_send)); 2039 } 2040 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2041 /* 2042 * Check whether we can do inline to align start 2043 * address of data buffer to cacheline. 2044 */ 2045 do_align: 2046 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2047 if (unlikely(start)) { 2048 start += inlen; 2049 if (start <= txq->inlen_send) 2050 inlen = start; 2051 } 2052 } 2053 /* 2054 * Check whether there are enough free WQEBBs: 2055 * - Control Segment 2056 * - Ethernet Segment 2057 * - First Segment of inlined Ethernet data 2058 * - ... data continued ... 2059 * - Data Segments of pointer/min inline type 2060 * 2061 * Estimate the number of Data Segments conservatively, 2062 * supposing no any mbufs is being freed during inlining. 2063 */ 2064 do_build: 2065 MLX5_ASSERT(inlen <= txq->inlen_send); 2066 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2067 MLX5_ESEG_MIN_INLINE_SIZE + 2068 MLX5_WSEG_SIZE + 2069 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2070 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2071 return MLX5_TXCMP_CODE_EXIT; 2072 /* Check for maximal WQE size. */ 2073 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2074 return MLX5_TXCMP_CODE_ERROR; 2075 #ifdef MLX5_PMD_SOFT_COUNTERS 2076 /* Update sent data bytes/packets counters. */ 2077 txq->stats.obytes += dlen + vlan; 2078 #endif 2079 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2080 loc->wqe_last = wqe; 2081 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2082 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2083 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2084 txq->wqe_ci += (ds + 3) / 4; 2085 loc->wqe_free -= (ds + 3) / 4; 2086 return MLX5_TXCMP_CODE_MULTI; 2087 } 2088 2089 /** 2090 * Tx burst function for multi-segment packets. Supports all 2091 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2092 * sends one packet per WQE. Function stops sending if it 2093 * encounters the single-segment packet. 2094 * 2095 * This routine is responsible for storing processed mbuf 2096 * into elts ring buffer and update elts_head. 2097 * 2098 * @param txq 2099 * Pointer to TX queue structure. 2100 * @param[in] pkts 2101 * Packets to transmit. 2102 * @param pkts_n 2103 * Number of packets in array. 2104 * @param loc 2105 * Pointer to burst routine local context. 2106 * @param olx 2107 * Configured Tx offloads mask. It is fully defined at 2108 * compile time and may be used for optimization. 2109 * 2110 * @return 2111 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2112 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2113 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2114 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2115 * Local context variables updated. 2116 */ 2117 static __rte_always_inline enum mlx5_txcmp_code 2118 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2119 struct rte_mbuf **__rte_restrict pkts, 2120 unsigned int pkts_n, 2121 struct mlx5_txq_local *__rte_restrict loc, 2122 unsigned int olx) 2123 { 2124 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2125 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2126 pkts += loc->pkts_sent + 1; 2127 pkts_n -= loc->pkts_sent; 2128 for (;;) { 2129 enum mlx5_txcmp_code ret; 2130 2131 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2132 /* 2133 * Estimate the number of free elts quickly but conservatively. 2134 * Some segment may be fully inlined and freed, 2135 * ignore this here - precise estimation is costly. 2136 */ 2137 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2138 return MLX5_TXCMP_CODE_EXIT; 2139 if (MLX5_TXOFF_CONFIG(TSO) && 2140 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2141 /* Proceed with multi-segment TSO. */ 2142 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2143 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2144 /* Proceed with multi-segment SEND with inlining. */ 2145 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2146 } else { 2147 /* Proceed with multi-segment SEND w/o inlining. */ 2148 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2149 } 2150 if (ret == MLX5_TXCMP_CODE_EXIT) 2151 return MLX5_TXCMP_CODE_EXIT; 2152 if (ret == MLX5_TXCMP_CODE_ERROR) 2153 return MLX5_TXCMP_CODE_ERROR; 2154 /* WQE is built, go to the next packet. */ 2155 ++loc->pkts_sent; 2156 --pkts_n; 2157 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2158 return MLX5_TXCMP_CODE_EXIT; 2159 loc->mbuf = *pkts++; 2160 if (pkts_n > 1) 2161 rte_prefetch0(*pkts); 2162 if (likely(NB_SEGS(loc->mbuf) > 1)) 2163 continue; 2164 /* Here ends the series of multi-segment packets. */ 2165 if (MLX5_TXOFF_CONFIG(TSO) && 2166 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2167 return MLX5_TXCMP_CODE_TSO; 2168 return MLX5_TXCMP_CODE_SINGLE; 2169 } 2170 MLX5_ASSERT(false); 2171 } 2172 2173 /** 2174 * Tx burst function for single-segment packets with TSO. 2175 * Supports all types of Tx offloads, except multi-packets. 2176 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2177 * Function stops sending if it encounters the multi-segment 2178 * packet or packet without TSO requested. 2179 * 2180 * The routine is responsible for storing processed mbuf into elts ring buffer 2181 * and update elts_head if inline offloads is requested due to possible early 2182 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2183 * 2184 * @param txq 2185 * Pointer to TX queue structure. 2186 * @param[in] pkts 2187 * Packets to transmit. 2188 * @param pkts_n 2189 * Number of packets in array. 2190 * @param loc 2191 * Pointer to burst routine local context. 2192 * @param olx 2193 * Configured Tx offloads mask. It is fully defined at 2194 * compile time and may be used for optimization. 2195 * 2196 * @return 2197 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2198 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2199 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2200 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2201 * Local context variables updated. 2202 */ 2203 static __rte_always_inline enum mlx5_txcmp_code 2204 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2205 struct rte_mbuf **__rte_restrict pkts, 2206 unsigned int pkts_n, 2207 struct mlx5_txq_local *__rte_restrict loc, 2208 unsigned int olx) 2209 { 2210 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2211 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2212 pkts += loc->pkts_sent + 1; 2213 pkts_n -= loc->pkts_sent; 2214 for (;;) { 2215 struct mlx5_wqe_dseg *__rte_restrict dseg; 2216 struct mlx5_wqe *__rte_restrict wqe; 2217 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2218 uint8_t *dptr; 2219 2220 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2221 if (MLX5_TXOFF_CONFIG(TXPP)) { 2222 enum mlx5_txcmp_code wret; 2223 2224 /* Generate WAIT for scheduling if requested. */ 2225 wret = mlx5_tx_schedule_send(txq, loc, olx); 2226 if (wret == MLX5_TXCMP_CODE_EXIT) 2227 return MLX5_TXCMP_CODE_EXIT; 2228 if (wret == MLX5_TXCMP_CODE_ERROR) 2229 return MLX5_TXCMP_CODE_ERROR; 2230 } 2231 dlen = rte_pktmbuf_data_len(loc->mbuf); 2232 if (MLX5_TXOFF_CONFIG(VLAN) && 2233 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2234 vlan = sizeof(struct rte_vlan_hdr); 2235 } 2236 /* 2237 * First calculate the WQE size to check 2238 * whether we have enough space in ring buffer. 2239 */ 2240 hlen = loc->mbuf->l2_len + vlan + 2241 loc->mbuf->l3_len + loc->mbuf->l4_len; 2242 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2243 return MLX5_TXCMP_CODE_ERROR; 2244 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2245 hlen += loc->mbuf->outer_l2_len + 2246 loc->mbuf->outer_l3_len; 2247 /* Segment must contain all TSO headers. */ 2248 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2249 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2250 hlen > (dlen + vlan))) 2251 return MLX5_TXCMP_CODE_ERROR; 2252 /* 2253 * Check whether there are enough free WQEBBs: 2254 * - Control Segment 2255 * - Ethernet Segment 2256 * - First Segment of inlined Ethernet data 2257 * - ... data continued ... 2258 * - Finishing Data Segment of pointer type 2259 */ 2260 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2261 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2262 if (loc->wqe_free < ((ds + 3) / 4)) 2263 return MLX5_TXCMP_CODE_EXIT; 2264 #ifdef MLX5_PMD_SOFT_COUNTERS 2265 /* Update sent data bytes/packets counters. */ 2266 ntcp = (dlen + vlan - hlen + 2267 loc->mbuf->tso_segsz - 1) / 2268 loc->mbuf->tso_segsz; 2269 /* 2270 * One will be added for mbuf itself at the end 2271 * of the mlx5_tx_burst from loc->pkts_sent field. 2272 */ 2273 --ntcp; 2274 txq->stats.opackets += ntcp; 2275 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2276 #endif 2277 /* 2278 * Build the TSO WQE: 2279 * - Control Segment 2280 * - Ethernet Segment with hlen bytes inlined 2281 * - Data Segment of pointer type 2282 */ 2283 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2284 loc->wqe_last = wqe; 2285 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2286 MLX5_OPCODE_TSO, olx); 2287 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2288 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2289 dlen -= hlen - vlan; 2290 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2291 /* 2292 * WQE is built, update the loop parameters 2293 * and go to the next packet. 2294 */ 2295 txq->wqe_ci += (ds + 3) / 4; 2296 loc->wqe_free -= (ds + 3) / 4; 2297 if (MLX5_TXOFF_CONFIG(INLINE)) 2298 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2299 --loc->elts_free; 2300 ++loc->pkts_sent; 2301 --pkts_n; 2302 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2303 return MLX5_TXCMP_CODE_EXIT; 2304 loc->mbuf = *pkts++; 2305 if (pkts_n > 1) 2306 rte_prefetch0(*pkts); 2307 if (MLX5_TXOFF_CONFIG(MULTI) && 2308 unlikely(NB_SEGS(loc->mbuf) > 1)) 2309 return MLX5_TXCMP_CODE_MULTI; 2310 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2311 return MLX5_TXCMP_CODE_SINGLE; 2312 /* Continue with the next TSO packet. */ 2313 } 2314 MLX5_ASSERT(false); 2315 } 2316 2317 /** 2318 * Analyze the packet and select the best method to send. 2319 * 2320 * @param txq 2321 * Pointer to TX queue structure. 2322 * @param loc 2323 * Pointer to burst routine local context. 2324 * @param olx 2325 * Configured Tx offloads mask. It is fully defined at 2326 * compile time and may be used for optimization. 2327 * @param newp 2328 * The predefined flag whether do complete check for 2329 * multi-segment packets and TSO. 2330 * 2331 * @return 2332 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2333 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2334 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2335 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2336 */ 2337 static __rte_always_inline enum mlx5_txcmp_code 2338 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2339 struct mlx5_txq_local *__rte_restrict loc, 2340 unsigned int olx, 2341 bool newp) 2342 { 2343 /* Check for multi-segment packet. */ 2344 if (newp && 2345 MLX5_TXOFF_CONFIG(MULTI) && 2346 unlikely(NB_SEGS(loc->mbuf) > 1)) 2347 return MLX5_TXCMP_CODE_MULTI; 2348 /* Check for TSO packet. */ 2349 if (newp && 2350 MLX5_TXOFF_CONFIG(TSO) && 2351 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2352 return MLX5_TXCMP_CODE_TSO; 2353 /* Check if eMPW is enabled at all. */ 2354 if (!MLX5_TXOFF_CONFIG(EMPW)) 2355 return MLX5_TXCMP_CODE_SINGLE; 2356 /* Check if eMPW can be engaged. */ 2357 if (MLX5_TXOFF_CONFIG(VLAN) && 2358 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2359 (!MLX5_TXOFF_CONFIG(INLINE) || 2360 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2361 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2362 /* 2363 * eMPW does not support VLAN insertion offload, we have to 2364 * inline the entire packet but packet is too long for inlining. 2365 */ 2366 return MLX5_TXCMP_CODE_SINGLE; 2367 } 2368 return MLX5_TXCMP_CODE_EMPW; 2369 } 2370 2371 /** 2372 * Check the next packet attributes to match with the eMPW batch ones. 2373 * In addition, for legacy MPW the packet length is checked either. 2374 * 2375 * @param txq 2376 * Pointer to TX queue structure. 2377 * @param es 2378 * Pointer to Ethernet Segment of eMPW batch. 2379 * @param loc 2380 * Pointer to burst routine local context. 2381 * @param dlen 2382 * Length of previous packet in MPW descriptor. 2383 * @param olx 2384 * Configured Tx offloads mask. It is fully defined at 2385 * compile time and may be used for optimization. 2386 * 2387 * @return 2388 * true - packet match with eMPW batch attributes. 2389 * false - no match, eMPW should be restarted. 2390 */ 2391 static __rte_always_inline bool 2392 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2393 struct mlx5_wqe_eseg *__rte_restrict es, 2394 struct mlx5_txq_local *__rte_restrict loc, 2395 uint32_t dlen, 2396 unsigned int olx) 2397 { 2398 uint8_t swp_flags = 0; 2399 2400 /* Compare the checksum flags, if any. */ 2401 if (MLX5_TXOFF_CONFIG(CSUM) && 2402 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2403 return false; 2404 /* Compare the Software Parser offsets and flags. */ 2405 if (MLX5_TXOFF_CONFIG(SWP) && 2406 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2407 es->swp_flags != swp_flags)) 2408 return false; 2409 /* Fill metadata field if needed. */ 2410 if (MLX5_TXOFF_CONFIG(METADATA) && 2411 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2412 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2413 return false; 2414 /* Legacy MPW can send packets with the same length only. */ 2415 if (MLX5_TXOFF_CONFIG(MPW) && 2416 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2417 return false; 2418 /* There must be no VLAN packets in eMPW loop. */ 2419 if (MLX5_TXOFF_CONFIG(VLAN)) 2420 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2421 /* Check if the scheduling is requested. */ 2422 if (MLX5_TXOFF_CONFIG(TXPP) && 2423 loc->mbuf->ol_flags & txq->ts_mask) 2424 return false; 2425 return true; 2426 } 2427 2428 /** 2429 * Update send loop variables and WQE for eMPW loop without data inlining. 2430 * Number of Data Segments is equal to the number of sent packets. 2431 * 2432 * @param txq 2433 * Pointer to TX queue structure. 2434 * @param loc 2435 * Pointer to burst routine local context. 2436 * @param ds 2437 * Number of packets/Data Segments/Packets. 2438 * @param slen 2439 * Accumulated statistics, bytes sent. 2440 * @param olx 2441 * Configured Tx offloads mask. It is fully defined at 2442 * compile time and may be used for optimization. 2443 * 2444 * @return 2445 * true - packet match with eMPW batch attributes. 2446 * false - no match, eMPW should be restarted. 2447 */ 2448 static __rte_always_inline void 2449 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2450 struct mlx5_txq_local *__rte_restrict loc, 2451 unsigned int ds, 2452 unsigned int slen, 2453 unsigned int olx __rte_unused) 2454 { 2455 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2456 #ifdef MLX5_PMD_SOFT_COUNTERS 2457 /* Update sent data bytes counter. */ 2458 txq->stats.obytes += slen; 2459 #else 2460 (void)slen; 2461 #endif 2462 loc->elts_free -= ds; 2463 loc->pkts_sent += ds; 2464 ds += 2; 2465 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2466 txq->wqe_ci += (ds + 3) / 4; 2467 loc->wqe_free -= (ds + 3) / 4; 2468 } 2469 2470 /** 2471 * Update send loop variables and WQE for eMPW loop with data inlining. 2472 * Gets the size of pushed descriptors and data to the WQE. 2473 * 2474 * @param txq 2475 * Pointer to TX queue structure. 2476 * @param loc 2477 * Pointer to burst routine local context. 2478 * @param len 2479 * Total size of descriptor/data in bytes. 2480 * @param slen 2481 * Accumulated statistics, data bytes sent. 2482 * @param wqem 2483 * The base WQE for the eMPW/MPW descriptor. 2484 * @param olx 2485 * Configured Tx offloads mask. It is fully defined at 2486 * compile time and may be used for optimization. 2487 * 2488 * @return 2489 * true - packet match with eMPW batch attributes. 2490 * false - no match, eMPW should be restarted. 2491 */ 2492 static __rte_always_inline void 2493 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2494 struct mlx5_txq_local *__rte_restrict loc, 2495 unsigned int len, 2496 unsigned int slen, 2497 struct mlx5_wqe *__rte_restrict wqem, 2498 unsigned int olx __rte_unused) 2499 { 2500 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2501 2502 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2503 #ifdef MLX5_PMD_SOFT_COUNTERS 2504 /* Update sent data bytes counter. */ 2505 txq->stats.obytes += slen; 2506 #else 2507 (void)slen; 2508 #endif 2509 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2510 /* 2511 * If the legacy MPW session contains the inline packets 2512 * we should set the only inline data segment length 2513 * and align the total length to the segment size. 2514 */ 2515 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2516 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2517 MLX5_ETH_WQE_DATA_INLINE); 2518 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2519 } else { 2520 /* 2521 * The session is not legacy MPW or contains the 2522 * data buffer pointer segments. 2523 */ 2524 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2525 len = len / MLX5_WSEG_SIZE + 2; 2526 } 2527 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2528 txq->wqe_ci += (len + 3) / 4; 2529 loc->wqe_free -= (len + 3) / 4; 2530 loc->wqe_last = wqem; 2531 } 2532 2533 /** 2534 * The set of Tx burst functions for single-segment packets without TSO 2535 * and with Multi-Packet Writing feature support. 2536 * Supports all types of Tx offloads, except multi-packets and TSO. 2537 * 2538 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2539 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2540 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2541 * placed in WQE. 2542 * 2543 * Functions stop sending if it encounters the multi-segment packet or packet 2544 * with TSO requested. 2545 * 2546 * The routines are responsible for storing processed mbuf into elts ring buffer 2547 * and update elts_head if inlining offload is requested. Otherwise the copying 2548 * mbufs to elts can be postponed and completed at the end of burst routine. 2549 * 2550 * @param txq 2551 * Pointer to TX queue structure. 2552 * @param[in] pkts 2553 * Packets to transmit. 2554 * @param pkts_n 2555 * Number of packets in array. 2556 * @param loc 2557 * Pointer to burst routine local context. 2558 * @param olx 2559 * Configured Tx offloads mask. It is fully defined at 2560 * compile time and may be used for optimization. 2561 * 2562 * @return 2563 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2564 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2565 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2566 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2567 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2568 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2569 * 2570 * Local context variables updated. 2571 * 2572 * 2573 * The routine sends packets with MLX5_OPCODE_EMPW 2574 * without inlining, this is dedicated optimized branch. 2575 * No VLAN insertion is supported. 2576 */ 2577 static __rte_always_inline enum mlx5_txcmp_code 2578 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2579 struct rte_mbuf **__rte_restrict pkts, 2580 unsigned int pkts_n, 2581 struct mlx5_txq_local *__rte_restrict loc, 2582 unsigned int olx) 2583 { 2584 /* 2585 * Subroutine is the part of mlx5_tx_burst_single() and sends 2586 * single-segment packet with eMPW opcode without data inlining. 2587 */ 2588 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2589 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2590 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2591 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2592 pkts += loc->pkts_sent + 1; 2593 pkts_n -= loc->pkts_sent; 2594 for (;;) { 2595 struct mlx5_wqe_dseg *__rte_restrict dseg; 2596 struct mlx5_wqe_eseg *__rte_restrict eseg; 2597 enum mlx5_txcmp_code ret; 2598 unsigned int part, loop; 2599 unsigned int slen = 0; 2600 2601 next_empw: 2602 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2603 if (MLX5_TXOFF_CONFIG(TXPP)) { 2604 enum mlx5_txcmp_code wret; 2605 2606 /* Generate WAIT for scheduling if requested. */ 2607 wret = mlx5_tx_schedule_send(txq, loc, olx); 2608 if (wret == MLX5_TXCMP_CODE_EXIT) 2609 return MLX5_TXCMP_CODE_EXIT; 2610 if (wret == MLX5_TXCMP_CODE_ERROR) 2611 return MLX5_TXCMP_CODE_ERROR; 2612 } 2613 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2614 MLX5_MPW_MAX_PACKETS : 2615 MLX5_EMPW_MAX_PACKETS); 2616 if (unlikely(loc->elts_free < part)) { 2617 /* We have no enough elts to save all mbufs. */ 2618 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2619 return MLX5_TXCMP_CODE_EXIT; 2620 /* But we still able to send at least minimal eMPW. */ 2621 part = loc->elts_free; 2622 } 2623 /* Check whether we have enough WQEs */ 2624 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2625 if (unlikely(loc->wqe_free < 2626 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2627 return MLX5_TXCMP_CODE_EXIT; 2628 part = (loc->wqe_free * 4) - 2; 2629 } 2630 if (likely(part > 1)) 2631 rte_prefetch0(*pkts); 2632 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2633 /* 2634 * Build eMPW title WQEBB: 2635 * - Control Segment, eMPW opcode 2636 * - Ethernet Segment, no inline 2637 */ 2638 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2639 MLX5_OPCODE_ENHANCED_MPSW, olx); 2640 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2641 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2642 eseg = &loc->wqe_last->eseg; 2643 dseg = &loc->wqe_last->dseg[0]; 2644 loop = part; 2645 /* Store the packet length for legacy MPW. */ 2646 if (MLX5_TXOFF_CONFIG(MPW)) 2647 eseg->mss = rte_cpu_to_be_16 2648 (rte_pktmbuf_data_len(loc->mbuf)); 2649 for (;;) { 2650 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2651 #ifdef MLX5_PMD_SOFT_COUNTERS 2652 /* Update sent data bytes counter. */ 2653 slen += dlen; 2654 #endif 2655 mlx5_tx_dseg_ptr 2656 (txq, loc, dseg, 2657 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2658 dlen, olx); 2659 if (unlikely(--loop == 0)) 2660 break; 2661 loc->mbuf = *pkts++; 2662 if (likely(loop > 1)) 2663 rte_prefetch0(*pkts); 2664 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2665 /* 2666 * Unroll the completion code to avoid 2667 * returning variable value - it results in 2668 * unoptimized sequent checking in caller. 2669 */ 2670 if (ret == MLX5_TXCMP_CODE_MULTI) { 2671 part -= loop; 2672 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2673 if (unlikely(!loc->elts_free || 2674 !loc->wqe_free)) 2675 return MLX5_TXCMP_CODE_EXIT; 2676 return MLX5_TXCMP_CODE_MULTI; 2677 } 2678 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2679 if (ret == MLX5_TXCMP_CODE_TSO) { 2680 part -= loop; 2681 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2682 if (unlikely(!loc->elts_free || 2683 !loc->wqe_free)) 2684 return MLX5_TXCMP_CODE_EXIT; 2685 return MLX5_TXCMP_CODE_TSO; 2686 } 2687 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2688 part -= loop; 2689 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2690 if (unlikely(!loc->elts_free || 2691 !loc->wqe_free)) 2692 return MLX5_TXCMP_CODE_EXIT; 2693 return MLX5_TXCMP_CODE_SINGLE; 2694 } 2695 if (ret != MLX5_TXCMP_CODE_EMPW) { 2696 MLX5_ASSERT(false); 2697 part -= loop; 2698 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2699 return MLX5_TXCMP_CODE_ERROR; 2700 } 2701 /* 2702 * Check whether packet parameters coincide 2703 * within assumed eMPW batch: 2704 * - check sum settings 2705 * - metadata value 2706 * - software parser settings 2707 * - packets length (legacy MPW only) 2708 * - scheduling is not required 2709 */ 2710 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2711 MLX5_ASSERT(loop); 2712 part -= loop; 2713 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2714 if (unlikely(!loc->elts_free || 2715 !loc->wqe_free)) 2716 return MLX5_TXCMP_CODE_EXIT; 2717 pkts_n -= part; 2718 goto next_empw; 2719 } 2720 /* Packet attributes match, continue the same eMPW. */ 2721 ++dseg; 2722 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2723 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2724 } 2725 /* eMPW is built successfully, update loop parameters. */ 2726 MLX5_ASSERT(!loop); 2727 MLX5_ASSERT(pkts_n >= part); 2728 #ifdef MLX5_PMD_SOFT_COUNTERS 2729 /* Update sent data bytes counter. */ 2730 txq->stats.obytes += slen; 2731 #endif 2732 loc->elts_free -= part; 2733 loc->pkts_sent += part; 2734 txq->wqe_ci += (2 + part + 3) / 4; 2735 loc->wqe_free -= (2 + part + 3) / 4; 2736 pkts_n -= part; 2737 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2738 return MLX5_TXCMP_CODE_EXIT; 2739 loc->mbuf = *pkts++; 2740 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2741 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2742 return ret; 2743 /* Continue sending eMPW batches. */ 2744 } 2745 MLX5_ASSERT(false); 2746 } 2747 2748 /** 2749 * The routine sends packets with MLX5_OPCODE_EMPW 2750 * with inlining, optionally supports VLAN insertion. 2751 */ 2752 static __rte_always_inline enum mlx5_txcmp_code 2753 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2754 struct rte_mbuf **__rte_restrict pkts, 2755 unsigned int pkts_n, 2756 struct mlx5_txq_local *__rte_restrict loc, 2757 unsigned int olx) 2758 { 2759 /* 2760 * Subroutine is the part of mlx5_tx_burst_single() and sends 2761 * single-segment packet with eMPW opcode with data inlining. 2762 */ 2763 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2764 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2765 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2766 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2767 pkts += loc->pkts_sent + 1; 2768 pkts_n -= loc->pkts_sent; 2769 for (;;) { 2770 struct mlx5_wqe_dseg *__rte_restrict dseg; 2771 struct mlx5_wqe *__rte_restrict wqem; 2772 enum mlx5_txcmp_code ret; 2773 unsigned int room, part, nlim; 2774 unsigned int slen = 0; 2775 2776 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2777 if (MLX5_TXOFF_CONFIG(TXPP)) { 2778 enum mlx5_txcmp_code wret; 2779 2780 /* Generate WAIT for scheduling if requested. */ 2781 wret = mlx5_tx_schedule_send(txq, loc, olx); 2782 if (wret == MLX5_TXCMP_CODE_EXIT) 2783 return MLX5_TXCMP_CODE_EXIT; 2784 if (wret == MLX5_TXCMP_CODE_ERROR) 2785 return MLX5_TXCMP_CODE_ERROR; 2786 } 2787 /* 2788 * Limits the amount of packets in one WQE 2789 * to improve CQE latency generation. 2790 */ 2791 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2792 MLX5_MPW_INLINE_MAX_PACKETS : 2793 MLX5_EMPW_MAX_PACKETS); 2794 /* Check whether we have minimal amount WQEs */ 2795 if (unlikely(loc->wqe_free < 2796 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2797 return MLX5_TXCMP_CODE_EXIT; 2798 if (likely(pkts_n > 1)) 2799 rte_prefetch0(*pkts); 2800 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2801 /* 2802 * Build eMPW title WQEBB: 2803 * - Control Segment, eMPW opcode, zero DS 2804 * - Ethernet Segment, no inline 2805 */ 2806 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2807 MLX5_OPCODE_ENHANCED_MPSW, olx); 2808 mlx5_tx_eseg_none(txq, loc, wqem, 2809 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2810 dseg = &wqem->dseg[0]; 2811 /* Store the packet length for legacy MPW. */ 2812 if (MLX5_TXOFF_CONFIG(MPW)) 2813 wqem->eseg.mss = rte_cpu_to_be_16 2814 (rte_pktmbuf_data_len(loc->mbuf)); 2815 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2816 loc->wqe_free) * MLX5_WQE_SIZE - 2817 MLX5_WQE_CSEG_SIZE - 2818 MLX5_WQE_ESEG_SIZE; 2819 /* Limit the room for legacy MPW sessions for performance. */ 2820 if (MLX5_TXOFF_CONFIG(MPW)) 2821 room = RTE_MIN(room, 2822 RTE_MAX(txq->inlen_empw + 2823 sizeof(dseg->bcount) + 2824 (MLX5_TXOFF_CONFIG(VLAN) ? 2825 sizeof(struct rte_vlan_hdr) : 0), 2826 MLX5_MPW_INLINE_MAX_PACKETS * 2827 MLX5_WQE_DSEG_SIZE)); 2828 /* Build WQE till we have space, packets and resources. */ 2829 part = room; 2830 for (;;) { 2831 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2832 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2833 unsigned int tlen; 2834 2835 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2836 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2837 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2838 /* 2839 * Some Tx offloads may cause an error if packet is not 2840 * long enough, check against assumed minimal length. 2841 */ 2842 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2843 part -= room; 2844 if (unlikely(!part)) 2845 return MLX5_TXCMP_CODE_ERROR; 2846 /* 2847 * We have some successfully built 2848 * packet Data Segments to send. 2849 */ 2850 mlx5_tx_idone_empw(txq, loc, part, 2851 slen, wqem, olx); 2852 return MLX5_TXCMP_CODE_ERROR; 2853 } 2854 /* Inline or not inline - that's the Question. */ 2855 if (dlen > txq->inlen_empw || 2856 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2857 goto pointer_empw; 2858 if (MLX5_TXOFF_CONFIG(MPW)) { 2859 if (dlen > txq->inlen_send) 2860 goto pointer_empw; 2861 tlen = dlen; 2862 if (part == room) { 2863 /* Open new inline MPW session. */ 2864 tlen += sizeof(dseg->bcount); 2865 dseg->bcount = RTE_BE32(0); 2866 dseg = RTE_PTR_ADD 2867 (dseg, sizeof(dseg->bcount)); 2868 } else { 2869 /* 2870 * No pointer and inline descriptor 2871 * intermix for legacy MPW sessions. 2872 */ 2873 if (wqem->dseg[0].bcount) 2874 break; 2875 } 2876 } else { 2877 tlen = sizeof(dseg->bcount) + dlen; 2878 } 2879 /* Inline entire packet, optional VLAN insertion. */ 2880 if (MLX5_TXOFF_CONFIG(VLAN) && 2881 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2882 /* 2883 * The packet length must be checked in 2884 * mlx5_tx_able_to_empw() and packet 2885 * fits into inline length guaranteed. 2886 */ 2887 MLX5_ASSERT((dlen + 2888 sizeof(struct rte_vlan_hdr)) <= 2889 txq->inlen_empw); 2890 tlen += sizeof(struct rte_vlan_hdr); 2891 if (room < tlen) 2892 break; 2893 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2894 dptr, dlen, olx); 2895 #ifdef MLX5_PMD_SOFT_COUNTERS 2896 /* Update sent data bytes counter. */ 2897 slen += sizeof(struct rte_vlan_hdr); 2898 #endif 2899 } else { 2900 if (room < tlen) 2901 break; 2902 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2903 dptr, dlen, olx); 2904 } 2905 if (!MLX5_TXOFF_CONFIG(MPW)) 2906 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2907 MLX5_ASSERT(room >= tlen); 2908 room -= tlen; 2909 /* 2910 * Packet data are completely inline, 2911 * we can try to free the packet. 2912 */ 2913 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2914 /* 2915 * All the packets from the burst beginning 2916 * are inline, we can free mbufs directly 2917 * from the origin array on tx_burst exit(). 2918 */ 2919 loc->mbuf_free++; 2920 goto next_mbuf; 2921 } 2922 /* 2923 * In order no to call rte_pktmbuf_free_seg() here, 2924 * in the most inner loop (that might be very 2925 * expensive) we just save the mbuf in elts. 2926 */ 2927 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2928 loc->elts_free--; 2929 goto next_mbuf; 2930 pointer_empw: 2931 /* 2932 * No pointer and inline descriptor 2933 * intermix for legacy MPW sessions. 2934 */ 2935 if (MLX5_TXOFF_CONFIG(MPW) && 2936 part != room && 2937 wqem->dseg[0].bcount == RTE_BE32(0)) 2938 break; 2939 /* 2940 * Not inlinable VLAN packets are 2941 * proceeded outside of this routine. 2942 */ 2943 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2944 if (MLX5_TXOFF_CONFIG(VLAN)) 2945 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2946 RTE_MBUF_F_TX_VLAN)); 2947 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2948 /* We have to store mbuf in elts.*/ 2949 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2950 loc->elts_free--; 2951 room -= MLX5_WQE_DSEG_SIZE; 2952 /* Ring buffer wraparound is checked at the loop end.*/ 2953 ++dseg; 2954 next_mbuf: 2955 #ifdef MLX5_PMD_SOFT_COUNTERS 2956 /* Update sent data bytes counter. */ 2957 slen += dlen; 2958 #endif 2959 loc->pkts_sent++; 2960 pkts_n--; 2961 if (unlikely(!pkts_n || !loc->elts_free)) { 2962 /* 2963 * We have no resources/packets to 2964 * continue build descriptors. 2965 */ 2966 part -= room; 2967 mlx5_tx_idone_empw(txq, loc, part, 2968 slen, wqem, olx); 2969 return MLX5_TXCMP_CODE_EXIT; 2970 } 2971 loc->mbuf = *pkts++; 2972 if (likely(pkts_n > 1)) 2973 rte_prefetch0(*pkts); 2974 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2975 /* 2976 * Unroll the completion code to avoid 2977 * returning variable value - it results in 2978 * unoptimized sequent checking in caller. 2979 */ 2980 if (ret == MLX5_TXCMP_CODE_MULTI) { 2981 part -= room; 2982 mlx5_tx_idone_empw(txq, loc, part, 2983 slen, wqem, olx); 2984 if (unlikely(!loc->elts_free || 2985 !loc->wqe_free)) 2986 return MLX5_TXCMP_CODE_EXIT; 2987 return MLX5_TXCMP_CODE_MULTI; 2988 } 2989 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2990 if (ret == MLX5_TXCMP_CODE_TSO) { 2991 part -= room; 2992 mlx5_tx_idone_empw(txq, loc, part, 2993 slen, wqem, olx); 2994 if (unlikely(!loc->elts_free || 2995 !loc->wqe_free)) 2996 return MLX5_TXCMP_CODE_EXIT; 2997 return MLX5_TXCMP_CODE_TSO; 2998 } 2999 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3000 part -= room; 3001 mlx5_tx_idone_empw(txq, loc, part, 3002 slen, wqem, olx); 3003 if (unlikely(!loc->elts_free || 3004 !loc->wqe_free)) 3005 return MLX5_TXCMP_CODE_EXIT; 3006 return MLX5_TXCMP_CODE_SINGLE; 3007 } 3008 if (ret != MLX5_TXCMP_CODE_EMPW) { 3009 MLX5_ASSERT(false); 3010 part -= room; 3011 mlx5_tx_idone_empw(txq, loc, part, 3012 slen, wqem, olx); 3013 return MLX5_TXCMP_CODE_ERROR; 3014 } 3015 /* Check if we have minimal room left. */ 3016 nlim--; 3017 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3018 break; 3019 /* 3020 * Check whether packet parameters coincide 3021 * within assumed eMPW batch: 3022 * - check sum settings 3023 * - metadata value 3024 * - software parser settings 3025 * - packets length (legacy MPW only) 3026 * - scheduling is not required 3027 */ 3028 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3029 loc, dlen, olx)) 3030 break; 3031 /* Packet attributes match, continue the same eMPW. */ 3032 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3033 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3034 } 3035 /* 3036 * We get here to close an existing eMPW 3037 * session and start the new one. 3038 */ 3039 MLX5_ASSERT(pkts_n); 3040 part -= room; 3041 if (unlikely(!part)) 3042 return MLX5_TXCMP_CODE_EXIT; 3043 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3044 if (unlikely(!loc->elts_free || 3045 !loc->wqe_free)) 3046 return MLX5_TXCMP_CODE_EXIT; 3047 /* Continue the loop with new eMPW session. */ 3048 } 3049 MLX5_ASSERT(false); 3050 } 3051 3052 /** 3053 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3054 * Data inlining and VLAN insertion are supported. 3055 */ 3056 static __rte_always_inline enum mlx5_txcmp_code 3057 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3058 struct rte_mbuf **__rte_restrict pkts, 3059 unsigned int pkts_n, 3060 struct mlx5_txq_local *__rte_restrict loc, 3061 unsigned int olx) 3062 { 3063 /* 3064 * Subroutine is the part of mlx5_tx_burst_single() 3065 * and sends single-segment packet with SEND opcode. 3066 */ 3067 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3068 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3069 pkts += loc->pkts_sent + 1; 3070 pkts_n -= loc->pkts_sent; 3071 for (;;) { 3072 struct mlx5_wqe *__rte_restrict wqe; 3073 enum mlx5_txcmp_code ret; 3074 3075 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3076 if (MLX5_TXOFF_CONFIG(TXPP)) { 3077 enum mlx5_txcmp_code wret; 3078 3079 /* Generate WAIT for scheduling if requested. */ 3080 wret = mlx5_tx_schedule_send(txq, loc, olx); 3081 if (wret == MLX5_TXCMP_CODE_EXIT) 3082 return MLX5_TXCMP_CODE_EXIT; 3083 if (wret == MLX5_TXCMP_CODE_ERROR) 3084 return MLX5_TXCMP_CODE_ERROR; 3085 } 3086 if (MLX5_TXOFF_CONFIG(INLINE)) { 3087 unsigned int inlen, vlan = 0; 3088 3089 inlen = rte_pktmbuf_data_len(loc->mbuf); 3090 if (MLX5_TXOFF_CONFIG(VLAN) && 3091 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3092 vlan = sizeof(struct rte_vlan_hdr); 3093 inlen += vlan; 3094 } 3095 /* 3096 * If inlining is enabled at configuration time 3097 * the limit must be not less than minimal size. 3098 * Otherwise we would do extra check for data 3099 * size to avoid crashes due to length overflow. 3100 */ 3101 MLX5_ASSERT(txq->inlen_send >= 3102 MLX5_ESEG_MIN_INLINE_SIZE); 3103 if (inlen <= txq->inlen_send) { 3104 unsigned int seg_n, wqe_n; 3105 3106 rte_prefetch0(rte_pktmbuf_mtod 3107 (loc->mbuf, uint8_t *)); 3108 /* Check against minimal length. */ 3109 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3110 return MLX5_TXCMP_CODE_ERROR; 3111 if (loc->mbuf->ol_flags & 3112 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3113 /* 3114 * The hint flag not to inline packet 3115 * data is set. Check whether we can 3116 * follow the hint. 3117 */ 3118 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3119 txq->inlen_mode) || 3120 (MLX5_TXOFF_CONFIG(MPW) && 3121 txq->inlen_mode)) { 3122 if (inlen <= txq->inlen_send) 3123 goto single_inline; 3124 /* 3125 * The hardware requires the 3126 * minimal inline data header. 3127 */ 3128 goto single_min_inline; 3129 } 3130 if (MLX5_TXOFF_CONFIG(VLAN) && 3131 vlan && !txq->vlan_en) { 3132 /* 3133 * We must insert VLAN tag 3134 * by software means. 3135 */ 3136 goto single_part_inline; 3137 } 3138 goto single_no_inline; 3139 } 3140 single_inline: 3141 /* 3142 * Completely inlined packet data WQE: 3143 * - Control Segment, SEND opcode 3144 * - Ethernet Segment, no VLAN insertion 3145 * - Data inlined, VLAN optionally inserted 3146 * - Alignment to MLX5_WSEG_SIZE 3147 * Have to estimate amount of WQEBBs 3148 */ 3149 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3150 MLX5_ESEG_MIN_INLINE_SIZE + 3151 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3152 /* Check if there are enough WQEBBs. */ 3153 wqe_n = (seg_n + 3) / 4; 3154 if (wqe_n > loc->wqe_free) 3155 return MLX5_TXCMP_CODE_EXIT; 3156 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3157 loc->wqe_last = wqe; 3158 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3159 MLX5_OPCODE_SEND, olx); 3160 mlx5_tx_eseg_data(txq, loc, wqe, 3161 vlan, inlen, 0, olx); 3162 txq->wqe_ci += wqe_n; 3163 loc->wqe_free -= wqe_n; 3164 /* 3165 * Packet data are completely inlined, 3166 * free the packet immediately. 3167 */ 3168 rte_pktmbuf_free_seg(loc->mbuf); 3169 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3170 MLX5_TXOFF_CONFIG(MPW)) && 3171 txq->inlen_mode) { 3172 /* 3173 * If minimal inlining is requested the eMPW 3174 * feature should be disabled due to data is 3175 * inlined into Ethernet Segment, which can 3176 * not contain inlined data for eMPW due to 3177 * segment shared for all packets. 3178 */ 3179 struct mlx5_wqe_dseg *__rte_restrict dseg; 3180 unsigned int ds; 3181 uint8_t *dptr; 3182 3183 /* 3184 * The inline-mode settings require 3185 * to inline the specified amount of 3186 * data bytes to the Ethernet Segment. 3187 * We should check the free space in 3188 * WQE ring buffer to inline partially. 3189 */ 3190 single_min_inline: 3191 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3192 MLX5_ASSERT(inlen > txq->inlen_mode); 3193 MLX5_ASSERT(txq->inlen_mode >= 3194 MLX5_ESEG_MIN_INLINE_SIZE); 3195 /* 3196 * Check whether there are enough free WQEBBs: 3197 * - Control Segment 3198 * - Ethernet Segment 3199 * - First Segment of inlined Ethernet data 3200 * - ... data continued ... 3201 * - Finishing Data Segment of pointer type 3202 */ 3203 ds = (MLX5_WQE_CSEG_SIZE + 3204 MLX5_WQE_ESEG_SIZE + 3205 MLX5_WQE_DSEG_SIZE + 3206 txq->inlen_mode - 3207 MLX5_ESEG_MIN_INLINE_SIZE + 3208 MLX5_WQE_DSEG_SIZE + 3209 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3210 if (loc->wqe_free < ((ds + 3) / 4)) 3211 return MLX5_TXCMP_CODE_EXIT; 3212 /* 3213 * Build the ordinary SEND WQE: 3214 * - Control Segment 3215 * - Ethernet Segment, inline inlen_mode bytes 3216 * - Data Segment of pointer type 3217 */ 3218 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3219 loc->wqe_last = wqe; 3220 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3221 MLX5_OPCODE_SEND, olx); 3222 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3223 txq->inlen_mode, 3224 0, olx); 3225 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3226 txq->inlen_mode - vlan; 3227 inlen -= txq->inlen_mode; 3228 mlx5_tx_dseg_ptr(txq, loc, dseg, 3229 dptr, inlen, olx); 3230 /* 3231 * WQE is built, update the loop parameters 3232 * and got to the next packet. 3233 */ 3234 txq->wqe_ci += (ds + 3) / 4; 3235 loc->wqe_free -= (ds + 3) / 4; 3236 /* We have to store mbuf in elts.*/ 3237 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3238 txq->elts[txq->elts_head++ & txq->elts_m] = 3239 loc->mbuf; 3240 --loc->elts_free; 3241 } else { 3242 uint8_t *dptr; 3243 unsigned int dlen; 3244 3245 /* 3246 * Partially inlined packet data WQE, we have 3247 * some space in title WQEBB, we can fill it 3248 * with some packet data. It takes one WQEBB, 3249 * it is available, no extra space check: 3250 * - Control Segment, SEND opcode 3251 * - Ethernet Segment, no VLAN insertion 3252 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3253 * - Data Segment, pointer type 3254 * 3255 * We also get here if VLAN insertion is not 3256 * supported by HW, the inline is enabled. 3257 */ 3258 single_part_inline: 3259 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3260 loc->wqe_last = wqe; 3261 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3262 MLX5_OPCODE_SEND, olx); 3263 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3264 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3265 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3266 /* 3267 * The length check is performed above, by 3268 * comparing with txq->inlen_send. We should 3269 * not get overflow here. 3270 */ 3271 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3272 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3273 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3274 dptr, dlen, olx); 3275 ++txq->wqe_ci; 3276 --loc->wqe_free; 3277 /* We have to store mbuf in elts.*/ 3278 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3279 txq->elts[txq->elts_head++ & txq->elts_m] = 3280 loc->mbuf; 3281 --loc->elts_free; 3282 } 3283 #ifdef MLX5_PMD_SOFT_COUNTERS 3284 /* Update sent data bytes counter. */ 3285 txq->stats.obytes += vlan + 3286 rte_pktmbuf_data_len(loc->mbuf); 3287 #endif 3288 } else { 3289 /* 3290 * No inline at all, it means the CPU cycles saving 3291 * is prioritized at configuration, we should not 3292 * copy any packet data to WQE. 3293 * 3294 * SEND WQE, one WQEBB: 3295 * - Control Segment, SEND opcode 3296 * - Ethernet Segment, optional VLAN, no inline 3297 * - Data Segment, pointer type 3298 */ 3299 single_no_inline: 3300 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3301 loc->wqe_last = wqe; 3302 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3303 MLX5_OPCODE_SEND, olx); 3304 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3305 mlx5_tx_dseg_ptr 3306 (txq, loc, &wqe->dseg[0], 3307 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3308 rte_pktmbuf_data_len(loc->mbuf), olx); 3309 ++txq->wqe_ci; 3310 --loc->wqe_free; 3311 /* 3312 * We should not store mbuf pointer in elts 3313 * if no inlining is configured, this is done 3314 * by calling routine in a batch copy. 3315 */ 3316 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3317 --loc->elts_free; 3318 #ifdef MLX5_PMD_SOFT_COUNTERS 3319 /* Update sent data bytes counter. */ 3320 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3321 if (MLX5_TXOFF_CONFIG(VLAN) && 3322 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3323 txq->stats.obytes += 3324 sizeof(struct rte_vlan_hdr); 3325 #endif 3326 } 3327 ++loc->pkts_sent; 3328 --pkts_n; 3329 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3330 return MLX5_TXCMP_CODE_EXIT; 3331 loc->mbuf = *pkts++; 3332 if (pkts_n > 1) 3333 rte_prefetch0(*pkts); 3334 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3335 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3336 return ret; 3337 } 3338 MLX5_ASSERT(false); 3339 } 3340 3341 static __rte_always_inline enum mlx5_txcmp_code 3342 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3343 struct rte_mbuf **__rte_restrict pkts, 3344 unsigned int pkts_n, 3345 struct mlx5_txq_local *__rte_restrict loc, 3346 unsigned int olx) 3347 { 3348 enum mlx5_txcmp_code ret; 3349 3350 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3351 if (ret == MLX5_TXCMP_CODE_SINGLE) 3352 goto ordinary_send; 3353 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3354 for (;;) { 3355 /* Optimize for inline/no inline eMPW send. */ 3356 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3357 mlx5_tx_burst_empw_inline 3358 (txq, pkts, pkts_n, loc, olx) : 3359 mlx5_tx_burst_empw_simple 3360 (txq, pkts, pkts_n, loc, olx); 3361 if (ret != MLX5_TXCMP_CODE_SINGLE) 3362 return ret; 3363 /* The resources to send one packet should remain. */ 3364 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3365 ordinary_send: 3366 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3367 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3368 if (ret != MLX5_TXCMP_CODE_EMPW) 3369 return ret; 3370 /* The resources to send one packet should remain. */ 3371 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3372 } 3373 } 3374 3375 /** 3376 * DPDK Tx callback template. This is configured template used to generate 3377 * routines optimized for specified offload setup. 3378 * One of this generated functions is chosen at SQ configuration time. 3379 * 3380 * @param txq 3381 * Generic pointer to TX queue structure. 3382 * @param[in] pkts 3383 * Packets to transmit. 3384 * @param pkts_n 3385 * Number of packets in array. 3386 * @param olx 3387 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3388 * values. Should be static to take compile time static configuration 3389 * advantages. 3390 * 3391 * @return 3392 * Number of packets successfully transmitted (<= pkts_n). 3393 */ 3394 static __rte_always_inline uint16_t 3395 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3396 struct rte_mbuf **__rte_restrict pkts, 3397 uint16_t pkts_n, 3398 unsigned int olx) 3399 { 3400 struct mlx5_txq_local loc; 3401 enum mlx5_txcmp_code ret; 3402 unsigned int part; 3403 3404 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3405 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3406 if (unlikely(!pkts_n)) 3407 return 0; 3408 if (MLX5_TXOFF_CONFIG(INLINE)) 3409 loc.mbuf_free = 0; 3410 loc.pkts_sent = 0; 3411 loc.pkts_copy = 0; 3412 loc.wqe_last = NULL; 3413 3414 send_loop: 3415 loc.pkts_loop = loc.pkts_sent; 3416 /* 3417 * Check if there are some CQEs, if any: 3418 * - process an encountered errors 3419 * - process the completed WQEs 3420 * - free related mbufs 3421 * - doorbell the NIC about processed CQEs 3422 */ 3423 rte_prefetch0(*(pkts + loc.pkts_sent)); 3424 mlx5_tx_handle_completion(txq, olx); 3425 /* 3426 * Calculate the number of available resources - elts and WQEs. 3427 * There are two possible different scenarios: 3428 * - no data inlining into WQEs, one WQEBB may contains up to 3429 * four packets, in this case elts become scarce resource 3430 * - data inlining into WQEs, one packet may require multiple 3431 * WQEBBs, the WQEs become the limiting factor. 3432 */ 3433 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3434 loc.elts_free = txq->elts_s - 3435 (uint16_t)(txq->elts_head - txq->elts_tail); 3436 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3437 loc.wqe_free = txq->wqe_s - 3438 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3439 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3440 goto burst_exit; 3441 for (;;) { 3442 /* 3443 * Fetch the packet from array. Usually this is the first 3444 * packet in series of multi/single segment packets. 3445 */ 3446 loc.mbuf = *(pkts + loc.pkts_sent); 3447 /* Dedicated branch for multi-segment packets. */ 3448 if (MLX5_TXOFF_CONFIG(MULTI) && 3449 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3450 /* 3451 * Multi-segment packet encountered. 3452 * Hardware is able to process it only 3453 * with SEND/TSO opcodes, one packet 3454 * per WQE, do it in dedicated routine. 3455 */ 3456 enter_send_multi: 3457 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3458 part = loc.pkts_sent - loc.pkts_copy; 3459 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3460 /* 3461 * There are some single-segment mbufs not 3462 * stored in elts. The mbufs must be in the 3463 * same order as WQEs, so we must copy the 3464 * mbufs to elts here, before the coming 3465 * multi-segment packet mbufs is appended. 3466 */ 3467 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3468 part, olx); 3469 loc.pkts_copy = loc.pkts_sent; 3470 } 3471 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3472 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3473 if (!MLX5_TXOFF_CONFIG(INLINE)) 3474 loc.pkts_copy = loc.pkts_sent; 3475 /* 3476 * These returned code checks are supposed 3477 * to be optimized out due to routine inlining. 3478 */ 3479 if (ret == MLX5_TXCMP_CODE_EXIT) { 3480 /* 3481 * The routine returns this code when 3482 * all packets are sent or there is no 3483 * enough resources to complete request. 3484 */ 3485 break; 3486 } 3487 if (ret == MLX5_TXCMP_CODE_ERROR) { 3488 /* 3489 * The routine returns this code when some error 3490 * in the incoming packets format occurred. 3491 */ 3492 txq->stats.oerrors++; 3493 break; 3494 } 3495 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3496 /* 3497 * The single-segment packet was encountered 3498 * in the array, try to send it with the 3499 * best optimized way, possible engaging eMPW. 3500 */ 3501 goto enter_send_single; 3502 } 3503 if (MLX5_TXOFF_CONFIG(TSO) && 3504 ret == MLX5_TXCMP_CODE_TSO) { 3505 /* 3506 * The single-segment TSO packet was 3507 * encountered in the array. 3508 */ 3509 goto enter_send_tso; 3510 } 3511 /* We must not get here. Something is going wrong. */ 3512 MLX5_ASSERT(false); 3513 txq->stats.oerrors++; 3514 break; 3515 } 3516 /* Dedicated branch for single-segment TSO packets. */ 3517 if (MLX5_TXOFF_CONFIG(TSO) && 3518 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3519 /* 3520 * TSO might require special way for inlining 3521 * (dedicated parameters) and is sent with 3522 * MLX5_OPCODE_TSO opcode only, provide this 3523 * in dedicated branch. 3524 */ 3525 enter_send_tso: 3526 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3527 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3528 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3529 /* 3530 * These returned code checks are supposed 3531 * to be optimized out due to routine inlining. 3532 */ 3533 if (ret == MLX5_TXCMP_CODE_EXIT) 3534 break; 3535 if (ret == MLX5_TXCMP_CODE_ERROR) { 3536 txq->stats.oerrors++; 3537 break; 3538 } 3539 if (ret == MLX5_TXCMP_CODE_SINGLE) 3540 goto enter_send_single; 3541 if (MLX5_TXOFF_CONFIG(MULTI) && 3542 ret == MLX5_TXCMP_CODE_MULTI) { 3543 /* 3544 * The multi-segment packet was 3545 * encountered in the array. 3546 */ 3547 goto enter_send_multi; 3548 } 3549 /* We must not get here. Something is going wrong. */ 3550 MLX5_ASSERT(false); 3551 txq->stats.oerrors++; 3552 break; 3553 } 3554 /* 3555 * The dedicated branch for the single-segment packets 3556 * without TSO. Often these ones can be sent using 3557 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3558 * The routine builds the WQEs till it encounters 3559 * the TSO or multi-segment packet (in case if these 3560 * offloads are requested at SQ configuration time). 3561 */ 3562 enter_send_single: 3563 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3564 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3565 /* 3566 * These returned code checks are supposed 3567 * to be optimized out due to routine inlining. 3568 */ 3569 if (ret == MLX5_TXCMP_CODE_EXIT) 3570 break; 3571 if (ret == MLX5_TXCMP_CODE_ERROR) { 3572 txq->stats.oerrors++; 3573 break; 3574 } 3575 if (MLX5_TXOFF_CONFIG(MULTI) && 3576 ret == MLX5_TXCMP_CODE_MULTI) { 3577 /* 3578 * The multi-segment packet was 3579 * encountered in the array. 3580 */ 3581 goto enter_send_multi; 3582 } 3583 if (MLX5_TXOFF_CONFIG(TSO) && 3584 ret == MLX5_TXCMP_CODE_TSO) { 3585 /* 3586 * The single-segment TSO packet was 3587 * encountered in the array. 3588 */ 3589 goto enter_send_tso; 3590 } 3591 /* We must not get here. Something is going wrong. */ 3592 MLX5_ASSERT(false); 3593 txq->stats.oerrors++; 3594 break; 3595 } 3596 /* 3597 * Main Tx loop is completed, do the rest: 3598 * - set completion request if thresholds are reached 3599 * - doorbell the hardware 3600 * - copy the rest of mbufs to elts (if any) 3601 */ 3602 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3603 loc.pkts_sent >= loc.pkts_copy); 3604 /* Take a shortcut if nothing is sent. */ 3605 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3606 goto burst_exit; 3607 /* Request CQE generation if limits are reached. */ 3608 mlx5_tx_request_completion(txq, &loc, olx); 3609 /* 3610 * Ring QP doorbell immediately after WQE building completion 3611 * to improve latencies. The pure software related data treatment 3612 * can be completed after doorbell. Tx CQEs for this SQ are 3613 * processed in this thread only by the polling. 3614 * 3615 * The rdma core library can map doorbell register in two ways, 3616 * depending on the environment variable "MLX5_SHUT_UP_BF": 3617 * 3618 * - as regular cached memory, the variable is either missing or 3619 * set to zero. This type of mapping may cause the significant 3620 * doorbell register writing latency and requires explicit memory 3621 * write barrier to mitigate this issue and prevent write combining. 3622 * 3623 * - as non-cached memory, the variable is present and set to not "0" 3624 * value. This type of mapping may cause performance impact under 3625 * heavy loading conditions but the explicit write memory barrier is 3626 * not required and it may improve core performance. 3627 * 3628 * - the legacy behaviour (prior 19.08 release) was to use some 3629 * heuristics to decide whether write memory barrier should 3630 * be performed. This behavior is supported with specifying 3631 * tx_db_nc=2, write barrier is skipped if application provides 3632 * the full recommended burst of packets, it supposes the next 3633 * packets are coming and the write barrier will be issued on 3634 * the next burst (after descriptor writing, at least). 3635 */ 3636 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3637 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3638 txq->qp_db, !txq->db_nc && 3639 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3640 /* Not all of the mbufs may be stored into elts yet. */ 3641 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3642 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3643 /* 3644 * There are some single-segment mbufs not stored in elts. 3645 * It can be only if the last packet was single-segment. 3646 * The copying is gathered into one place due to it is 3647 * a good opportunity to optimize that with SIMD. 3648 * Unfortunately if inlining is enabled the gaps in pointer 3649 * array may happen due to early freeing of the inlined mbufs. 3650 */ 3651 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3652 loc.pkts_copy = loc.pkts_sent; 3653 } 3654 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3655 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3656 if (pkts_n > loc.pkts_sent) { 3657 /* 3658 * If burst size is large there might be no enough CQE 3659 * fetched from completion queue and no enough resources 3660 * freed to send all the packets. 3661 */ 3662 goto send_loop; 3663 } 3664 burst_exit: 3665 #ifdef MLX5_PMD_SOFT_COUNTERS 3666 /* Increment sent packets counter. */ 3667 txq->stats.opackets += loc.pkts_sent; 3668 #endif 3669 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3670 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3671 return loc.pkts_sent; 3672 } 3673 3674 #endif /* RTE_PMD_MLX5_TX_H_ */ 3675