1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2019-2021 Xilinx, Inc. 4 * Copyright(c) 2018-2019 Solarflare Communications Inc. 5 * 6 * This software was jointly developed between OKTET Labs (under contract 7 * for Solarflare) and Solarflare Communications, Inc. 8 */ 9 10 /* EF100 native datapath implementation */ 11 12 #include <stdbool.h> 13 14 #include <rte_byteorder.h> 15 #include <rte_mbuf_ptype.h> 16 #include <rte_mbuf.h> 17 #include <rte_io.h> 18 19 #include "efx_types.h" 20 #include "efx_regs_ef100.h" 21 #include "efx.h" 22 23 #include "sfc.h" 24 #include "sfc_debug.h" 25 #include "sfc_flow_tunnel.h" 26 #include "sfc_tweak.h" 27 #include "sfc_dp_rx.h" 28 #include "sfc_kvargs.h" 29 #include "sfc_ef100.h" 30 #include "sfc_nic_dma_dp.h" 31 32 33 #define sfc_ef100_rx_err(_rxq, ...) \ 34 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, ERR, &(_rxq)->dp.dpq, __VA_ARGS__) 35 36 #define sfc_ef100_rx_debug(_rxq, ...) \ 37 SFC_DP_LOG(SFC_KVARG_DATAPATH_EF100, DEBUG, &(_rxq)->dp.dpq, \ 38 __VA_ARGS__) 39 40 /** 41 * Maximum number of descriptors/buffers in the Rx ring. 42 * It should guarantee that corresponding event queue never overfill. 43 * EF10 native datapath uses event queue of the same size as Rx queue. 44 * Maximum number of events on datapath can be estimated as number of 45 * Rx queue entries (one event per Rx buffer in the worst case) plus 46 * Rx error and flush events. 47 */ 48 #define SFC_EF100_RXQ_LIMIT(_ndesc) \ 49 ((_ndesc) - 1 /* head must not step on tail */ - \ 50 1 /* Rx error */ - 1 /* flush */) 51 52 /** Invalid user mark value when the mark should be treated as unset */ 53 #define SFC_EF100_USER_MARK_INVALID 0 54 55 struct sfc_ef100_rx_sw_desc { 56 struct rte_mbuf *mbuf; 57 }; 58 59 struct sfc_ef100_rxq { 60 /* Used on data path */ 61 unsigned int flags; 62 #define SFC_EF100_RXQ_STARTED 0x1 63 #define SFC_EF100_RXQ_NOT_RUNNING 0x2 64 #define SFC_EF100_RXQ_EXCEPTION 0x4 65 #define SFC_EF100_RXQ_RSS_HASH 0x10 66 #define SFC_EF100_RXQ_USER_MARK 0x20 67 #define SFC_EF100_RXQ_FLAG_INTR_EN 0x40 68 #define SFC_EF100_RXQ_INGRESS_MPORT 0x80 69 #define SFC_EF100_RXQ_USER_FLAG 0x100 70 #define SFC_EF100_RXQ_NIC_DMA_MAP 0x200 71 unsigned int ptr_mask; 72 unsigned int evq_phase_bit_shift; 73 unsigned int ready_pkts; 74 unsigned int completed; 75 unsigned int evq_read_ptr; 76 unsigned int evq_read_ptr_primed; 77 volatile efx_qword_t *evq_hw_ring; 78 struct sfc_ef100_rx_sw_desc *sw_ring; 79 uint64_t rearm_data; 80 uint16_t buf_size; 81 uint16_t prefix_size; 82 uint32_t user_mark_mask; 83 84 unsigned int evq_hw_index; 85 volatile void *evq_prime; 86 87 /* Used on refill */ 88 unsigned int added; 89 unsigned int max_fill_level; 90 unsigned int refill_threshold; 91 struct rte_mempool *refill_mb_pool; 92 efx_qword_t *rxq_hw_ring; 93 volatile void *doorbell; 94 95 /* Datapath receive queue anchor */ 96 struct sfc_dp_rxq dp; 97 98 const struct sfc_nic_dma_info *nic_dma_info; 99 }; 100 101 static inline struct sfc_ef100_rxq * 102 sfc_ef100_rxq_by_dp_rxq(struct sfc_dp_rxq *dp_rxq) 103 { 104 return container_of(dp_rxq, struct sfc_ef100_rxq, dp); 105 } 106 107 static void 108 sfc_ef100_rx_qprime(struct sfc_ef100_rxq *rxq) 109 { 110 sfc_ef100_evq_prime(rxq->evq_prime, rxq->evq_hw_index, 111 rxq->evq_read_ptr & rxq->ptr_mask); 112 rxq->evq_read_ptr_primed = rxq->evq_read_ptr; 113 } 114 115 static inline void 116 sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added) 117 { 118 efx_dword_t dword; 119 120 EFX_POPULATE_DWORD_1(dword, ERF_GZ_RX_RING_PIDX, added & rxq->ptr_mask); 121 122 /* DMA sync to device is not required */ 123 124 /* 125 * rte_write32() has rte_io_wmb() which guarantees that the STORE 126 * operations (i.e. Rx and event descriptor updates) that precede 127 * the rte_io_wmb() call are visible to NIC before the STORE 128 * operations that follow it (i.e. doorbell write). 129 */ 130 rte_write32(dword.ed_u32[0], rxq->doorbell); 131 rxq->dp.dpq.dbells++; 132 133 sfc_ef100_rx_debug(rxq, "RxQ pushed doorbell at pidx %u (added=%u)", 134 EFX_DWORD_FIELD(dword, ERF_GZ_RX_RING_PIDX), 135 added); 136 } 137 138 static void 139 sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq) 140 { 141 const unsigned int ptr_mask = rxq->ptr_mask; 142 unsigned int free_space; 143 unsigned int bulks; 144 void *objs[SFC_RX_REFILL_BULK]; 145 unsigned int added = rxq->added; 146 147 free_space = rxq->max_fill_level - (added - rxq->completed); 148 149 if (free_space < rxq->refill_threshold) 150 return; 151 152 bulks = free_space / RTE_DIM(objs); 153 /* refill_threshold guarantees that bulks is positive */ 154 SFC_ASSERT(bulks > 0); 155 156 do { 157 unsigned int i; 158 159 if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs, 160 RTE_DIM(objs)) < 0)) { 161 struct rte_eth_dev_data *dev_data = 162 rte_eth_devices[rxq->dp.dpq.port_id].data; 163 164 /* 165 * It is hardly a safe way to increment counter 166 * from different contexts, but all PMDs do it. 167 */ 168 dev_data->rx_mbuf_alloc_failed += RTE_DIM(objs); 169 /* Return if we have posted nothing yet */ 170 if (added == rxq->added) 171 return; 172 /* Push posted */ 173 break; 174 } 175 176 for (i = 0; i < RTE_DIM(objs); ++i) { 177 struct rte_mbuf *m = objs[i]; 178 struct sfc_ef100_rx_sw_desc *rxd; 179 rte_iova_t dma_addr; 180 181 __rte_mbuf_raw_sanity_check(m); 182 183 dma_addr = rte_mbuf_data_iova_default(m); 184 if (rxq->flags & SFC_EF100_RXQ_NIC_DMA_MAP) { 185 dma_addr = sfc_nic_dma_map(rxq->nic_dma_info, 186 dma_addr, 187 rte_pktmbuf_data_len(m)); 188 if (unlikely(dma_addr == RTE_BAD_IOVA)) { 189 sfc_ef100_rx_err(rxq, 190 "failed to map DMA address on Rx"); 191 /* Just skip buffer and try to continue */ 192 rte_mempool_put(rxq->refill_mb_pool, m); 193 continue; 194 } 195 } 196 197 rxd = &rxq->sw_ring[added & ptr_mask]; 198 rxd->mbuf = m; 199 200 /* 201 * Avoid writing to mbuf. It is cheaper to do it 202 * when we receive packet and fill in nearby 203 * structure members. 204 */ 205 206 EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[added & ptr_mask], 207 ESF_GZ_RX_BUF_ADDR, dma_addr); 208 added++; 209 } 210 } while (--bulks > 0); 211 212 SFC_ASSERT(rxq->added != added); 213 rxq->added = added; 214 sfc_ef100_rx_qpush(rxq, added); 215 } 216 217 static inline uint64_t 218 sfc_ef100_rx_nt_or_inner_l4_csum(const efx_word_t class) 219 { 220 return EFX_WORD_FIELD(class, 221 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CSUM) == 222 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 223 RTE_MBUF_F_RX_L4_CKSUM_GOOD : RTE_MBUF_F_RX_L4_CKSUM_BAD; 224 } 225 226 static inline uint64_t 227 sfc_ef100_rx_tun_outer_l4_csum(const efx_word_t class) 228 { 229 return EFX_WORD_FIELD(class, 230 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L4_CSUM) == 231 ESE_GZ_RH_HCLASS_L4_CSUM_GOOD ? 232 RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD : RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD; 233 } 234 235 static uint32_t 236 sfc_ef100_rx_class_decode(const efx_word_t class, uint64_t *ol_flags) 237 { 238 uint32_t ptype; 239 bool no_tunnel = false; 240 241 if (unlikely(EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_CLASS) != 242 ESE_GZ_RH_HCLASS_L2_CLASS_E2_0123VLAN)) 243 return 0; 244 245 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_L2_N_VLAN)) { 246 case 0: 247 ptype = RTE_PTYPE_L2_ETHER; 248 break; 249 case 1: 250 ptype = RTE_PTYPE_L2_ETHER_VLAN; 251 break; 252 default: 253 ptype = RTE_PTYPE_L2_ETHER_QINQ; 254 break; 255 } 256 257 switch (EFX_WORD_FIELD(class, ESF_GZ_RX_PREFIX_HCLASS_TUNNEL_CLASS)) { 258 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NONE: 259 no_tunnel = true; 260 break; 261 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_VXLAN: 262 ptype |= RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP; 263 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 264 break; 265 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_NVGRE: 266 ptype |= RTE_PTYPE_TUNNEL_NVGRE; 267 break; 268 case ESE_GZ_RH_HCLASS_TUNNEL_CLASS_GENEVE: 269 ptype |= RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP; 270 *ol_flags |= sfc_ef100_rx_tun_outer_l4_csum(class); 271 break; 272 default: 273 /* 274 * Driver does not know the tunnel, but it is 275 * still a tunnel and NT_OR_INNER refer to inner 276 * frame. 277 */ 278 no_tunnel = false; 279 } 280 281 if (no_tunnel) { 282 bool l4_valid = true; 283 284 switch (EFX_WORD_FIELD(class, 285 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 286 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 287 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 288 *ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 289 break; 290 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 291 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 292 *ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 293 break; 294 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 295 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 296 break; 297 default: 298 l4_valid = false; 299 } 300 301 if (l4_valid) { 302 switch (EFX_WORD_FIELD(class, 303 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 304 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 305 ptype |= RTE_PTYPE_L4_TCP; 306 *ol_flags |= 307 sfc_ef100_rx_nt_or_inner_l4_csum(class); 308 break; 309 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 310 ptype |= RTE_PTYPE_L4_UDP; 311 *ol_flags |= 312 sfc_ef100_rx_nt_or_inner_l4_csum(class); 313 break; 314 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 315 ptype |= RTE_PTYPE_L4_FRAG; 316 break; 317 } 318 } 319 } else { 320 bool l4_valid = true; 321 322 switch (EFX_WORD_FIELD(class, 323 ESF_GZ_RX_PREFIX_HCLASS_TUN_OUTER_L3_CLASS)) { 324 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 325 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 326 break; 327 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 328 ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 329 *ol_flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD; 330 break; 331 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 332 ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 333 break; 334 } 335 336 switch (EFX_WORD_FIELD(class, 337 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L3_CLASS)) { 338 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4GOOD: 339 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 340 *ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD; 341 break; 342 case ESE_GZ_RH_HCLASS_L3_CLASS_IP4BAD: 343 ptype |= RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN; 344 *ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD; 345 break; 346 case ESE_GZ_RH_HCLASS_L3_CLASS_IP6: 347 ptype |= RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN; 348 break; 349 default: 350 l4_valid = false; 351 break; 352 } 353 354 if (l4_valid) { 355 switch (EFX_WORD_FIELD(class, 356 ESF_GZ_RX_PREFIX_HCLASS_NT_OR_INNER_L4_CLASS)) { 357 case ESE_GZ_RH_HCLASS_L4_CLASS_TCP: 358 ptype |= RTE_PTYPE_INNER_L4_TCP; 359 *ol_flags |= 360 sfc_ef100_rx_nt_or_inner_l4_csum(class); 361 break; 362 case ESE_GZ_RH_HCLASS_L4_CLASS_UDP: 363 ptype |= RTE_PTYPE_INNER_L4_UDP; 364 *ol_flags |= 365 sfc_ef100_rx_nt_or_inner_l4_csum(class); 366 break; 367 case ESE_GZ_RH_HCLASS_L4_CLASS_FRAG: 368 ptype |= RTE_PTYPE_INNER_L4_FRAG; 369 break; 370 } 371 } 372 } 373 374 return ptype; 375 } 376 377 /* 378 * Below function relies on the following fields in Rx prefix. 379 * Some fields are mandatory, some fields are optional. 380 * See sfc_ef100_rx_qstart() below. 381 */ 382 static const efx_rx_prefix_layout_t sfc_ef100_rx_prefix_layout = { 383 .erpl_fields = { 384 #define SFC_EF100_RX_PREFIX_FIELD(_name, _big_endian) \ 385 EFX_RX_PREFIX_FIELD(_name, ESF_GZ_RX_PREFIX_ ## _name, _big_endian) 386 387 SFC_EF100_RX_PREFIX_FIELD(LENGTH, B_FALSE), 388 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH_VALID, B_FALSE), 389 SFC_EF100_RX_PREFIX_FIELD(CLASS, B_FALSE), 390 EFX_RX_PREFIX_FIELD(INGRESS_MPORT, 391 ESF_GZ_RX_PREFIX_INGRESS_MPORT, B_FALSE), 392 SFC_EF100_RX_PREFIX_FIELD(RSS_HASH, B_FALSE), 393 SFC_EF100_RX_PREFIX_FIELD(USER_FLAG, B_FALSE), 394 SFC_EF100_RX_PREFIX_FIELD(USER_MARK, B_FALSE), 395 396 #undef SFC_EF100_RX_PREFIX_FIELD 397 } 398 }; 399 400 static bool 401 sfc_ef100_rx_prefix_to_offloads(const struct sfc_ef100_rxq *rxq, 402 const efx_xword_t *rx_prefix, 403 struct rte_mbuf *m) 404 { 405 const efx_word_t *class; 406 uint64_t ol_flags = 0; 407 408 RTE_BUILD_BUG_ON(EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 409 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) % CHAR_BIT != 0); 410 RTE_BUILD_BUG_ON(EFX_WIDTH(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT != 411 sizeof(*class)); 412 class = (const efx_word_t *)((const uint8_t *)rx_prefix + 413 EFX_LOW_BIT(ESF_GZ_RX_PREFIX_CLASS) / CHAR_BIT); 414 if (unlikely(EFX_WORD_FIELD(*class, 415 ESF_GZ_RX_PREFIX_HCLASS_L2_STATUS) != 416 ESE_GZ_RH_HCLASS_L2_STATUS_OK)) 417 return false; 418 419 m->packet_type = sfc_ef100_rx_class_decode(*class, &ol_flags); 420 421 if ((rxq->flags & SFC_EF100_RXQ_RSS_HASH) && 422 EFX_TEST_XWORD_BIT(rx_prefix[0], 423 ESF_GZ_RX_PREFIX_RSS_HASH_VALID_LBN)) { 424 ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 425 /* EFX_XWORD_FIELD converts little-endian to CPU */ 426 m->hash.rss = EFX_XWORD_FIELD(rx_prefix[0], 427 ESF_GZ_RX_PREFIX_RSS_HASH); 428 } 429 430 if (rxq->flags & SFC_EF100_RXQ_USER_FLAG) { 431 uint32_t user_flag; 432 433 user_flag = EFX_XWORD_FIELD(rx_prefix[0], 434 ESF_GZ_RX_PREFIX_USER_FLAG); 435 if (user_flag != 0) 436 ol_flags |= RTE_MBUF_F_RX_FDIR; 437 } 438 439 if (rxq->flags & SFC_EF100_RXQ_USER_MARK) { 440 uint8_t tunnel_mark; 441 uint32_t user_mark; 442 uint32_t mark; 443 444 /* EFX_XWORD_FIELD converts little-endian to CPU */ 445 mark = EFX_XWORD_FIELD(rx_prefix[0], 446 ESF_GZ_RX_PREFIX_USER_MARK); 447 448 user_mark = mark & rxq->user_mark_mask; 449 if (user_mark != SFC_EF100_USER_MARK_INVALID) { 450 ol_flags |= RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID; 451 m->hash.fdir.hi = user_mark; 452 } 453 454 tunnel_mark = SFC_FT_GET_TUNNEL_MARK(mark); 455 if (tunnel_mark != SFC_FT_TUNNEL_MARK_INVALID) { 456 sfc_ft_id_t ft_id; 457 458 ft_id = SFC_FT_TUNNEL_MARK_TO_ID(tunnel_mark); 459 460 ol_flags |= sfc_dp_ft_id_valid; 461 *RTE_MBUF_DYNFIELD(m, sfc_dp_ft_id_offset, 462 sfc_ft_id_t *) = ft_id; 463 } 464 } 465 466 if (rxq->flags & SFC_EF100_RXQ_INGRESS_MPORT) { 467 ol_flags |= sfc_dp_mport_override; 468 *RTE_MBUF_DYNFIELD(m, 469 sfc_dp_mport_offset, 470 typeof(&((efx_mport_id_t *)0)->id)) = 471 EFX_XWORD_FIELD(rx_prefix[0], 472 ESF_GZ_RX_PREFIX_INGRESS_MPORT); 473 } 474 475 m->ol_flags = ol_flags; 476 return true; 477 } 478 479 static const uint8_t * 480 sfc_ef100_rx_pkt_prefix(const struct rte_mbuf *m) 481 { 482 return (const uint8_t *)m->buf_addr + RTE_PKTMBUF_HEADROOM; 483 } 484 485 static struct rte_mbuf * 486 sfc_ef100_rx_next_mbuf(struct sfc_ef100_rxq *rxq) 487 { 488 struct rte_mbuf *m; 489 unsigned int id; 490 491 /* mbuf associated with current Rx descriptor */ 492 m = rxq->sw_ring[rxq->completed++ & rxq->ptr_mask].mbuf; 493 494 /* completed is already moved to the next one */ 495 if (unlikely(rxq->completed == rxq->added)) 496 goto done; 497 498 /* 499 * Prefetch Rx prefix of the next packet. 500 * Current packet is scattered and the next mbuf is its fragment 501 * it simply prefetches some data - no harm since packet rate 502 * should not be high if scatter is used. 503 */ 504 id = rxq->completed & rxq->ptr_mask; 505 rte_prefetch0(sfc_ef100_rx_pkt_prefix(rxq->sw_ring[id].mbuf)); 506 507 if (unlikely(rxq->completed + 1 == rxq->added)) 508 goto done; 509 510 /* 511 * Prefetch mbuf control structure of the next after next Rx 512 * descriptor. 513 */ 514 id = (id == rxq->ptr_mask) ? 0 : (id + 1); 515 rte_mbuf_prefetch_part1(rxq->sw_ring[id].mbuf); 516 517 /* 518 * If the next time we'll need SW Rx descriptor from the next 519 * cache line, try to make sure that we have it in cache. 520 */ 521 if ((id & 0x7) == 0x7) 522 rte_prefetch0(&rxq->sw_ring[(id + 1) & rxq->ptr_mask]); 523 524 done: 525 return m; 526 } 527 528 static struct rte_mbuf ** 529 sfc_ef100_rx_process_ready_pkts(struct sfc_ef100_rxq *rxq, 530 struct rte_mbuf **rx_pkts, 531 struct rte_mbuf ** const rx_pkts_end) 532 { 533 while (rxq->ready_pkts > 0 && rx_pkts != rx_pkts_end) { 534 struct rte_mbuf *pkt; 535 struct rte_mbuf *lastseg; 536 const efx_xword_t *rx_prefix; 537 uint16_t pkt_len; 538 uint16_t seg_len; 539 bool deliver; 540 541 rxq->ready_pkts--; 542 543 pkt = sfc_ef100_rx_next_mbuf(rxq); 544 __rte_mbuf_raw_sanity_check(pkt); 545 546 RTE_BUILD_BUG_ON(sizeof(pkt->rearm_data[0]) != 547 sizeof(rxq->rearm_data)); 548 pkt->rearm_data[0] = rxq->rearm_data; 549 550 /* data_off already moved past Rx prefix */ 551 rx_prefix = (const efx_xword_t *)sfc_ef100_rx_pkt_prefix(pkt); 552 553 pkt_len = EFX_XWORD_FIELD(rx_prefix[0], 554 ESF_GZ_RX_PREFIX_LENGTH); 555 SFC_ASSERT(pkt_len > 0); 556 rte_pktmbuf_pkt_len(pkt) = pkt_len; 557 558 seg_len = RTE_MIN(pkt_len, rxq->buf_size - rxq->prefix_size); 559 rte_pktmbuf_data_len(pkt) = seg_len; 560 561 deliver = sfc_ef100_rx_prefix_to_offloads(rxq, rx_prefix, pkt); 562 563 lastseg = pkt; 564 while ((pkt_len -= seg_len) > 0) { 565 struct rte_mbuf *seg; 566 567 seg = sfc_ef100_rx_next_mbuf(rxq); 568 __rte_mbuf_raw_sanity_check(seg); 569 570 seg->data_off = RTE_PKTMBUF_HEADROOM; 571 572 seg_len = RTE_MIN(pkt_len, rxq->buf_size); 573 rte_pktmbuf_data_len(seg) = seg_len; 574 rte_pktmbuf_pkt_len(seg) = seg_len; 575 576 pkt->nb_segs++; 577 lastseg->next = seg; 578 lastseg = seg; 579 } 580 581 if (likely(deliver)) { 582 *rx_pkts++ = pkt; 583 sfc_pkts_bytes_add(&rxq->dp.dpq.stats, 1, 584 rte_pktmbuf_pkt_len(pkt)); 585 } else { 586 rte_pktmbuf_free(pkt); 587 } 588 } 589 590 return rx_pkts; 591 } 592 593 static bool 594 sfc_ef100_rx_get_event(struct sfc_ef100_rxq *rxq, efx_qword_t *ev) 595 { 596 *ev = rxq->evq_hw_ring[rxq->evq_read_ptr & rxq->ptr_mask]; 597 598 if (!sfc_ef100_ev_present(ev, 599 (rxq->evq_read_ptr >> rxq->evq_phase_bit_shift) & 1)) 600 return false; 601 602 if (unlikely(!sfc_ef100_ev_type_is(ev, ESE_GZ_EF100_EV_RX_PKTS))) { 603 /* 604 * Do not move read_ptr to keep the event for exception 605 * handling by the control path. 606 */ 607 rxq->flags |= SFC_EF100_RXQ_EXCEPTION; 608 sfc_ef100_rx_err(rxq, 609 "RxQ exception at EvQ ptr %u(%#x), event %08x:%08x", 610 rxq->evq_read_ptr, rxq->evq_read_ptr & rxq->ptr_mask, 611 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 612 EFX_QWORD_FIELD(*ev, EFX_DWORD_0)); 613 return false; 614 } 615 616 sfc_ef100_rx_debug(rxq, "RxQ got event %08x:%08x at %u (%#x)", 617 EFX_QWORD_FIELD(*ev, EFX_DWORD_1), 618 EFX_QWORD_FIELD(*ev, EFX_DWORD_0), 619 rxq->evq_read_ptr, 620 rxq->evq_read_ptr & rxq->ptr_mask); 621 622 rxq->evq_read_ptr++; 623 return true; 624 } 625 626 static uint16_t 627 sfc_ef100_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) 628 { 629 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(rx_queue); 630 struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts]; 631 efx_qword_t rx_ev; 632 633 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, rx_pkts_end); 634 635 if (unlikely(rxq->flags & 636 (SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION))) 637 goto done; 638 639 while (rx_pkts != rx_pkts_end && sfc_ef100_rx_get_event(rxq, &rx_ev)) { 640 rxq->ready_pkts = 641 EFX_QWORD_FIELD(rx_ev, ESF_GZ_EV_RXPKTS_NUM_PKT); 642 rx_pkts = sfc_ef100_rx_process_ready_pkts(rxq, rx_pkts, 643 rx_pkts_end); 644 } 645 646 /* It is not a problem if we refill in the case of exception */ 647 sfc_ef100_rx_qrefill(rxq); 648 649 if ((rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) && 650 rxq->evq_read_ptr_primed != rxq->evq_read_ptr) 651 sfc_ef100_rx_qprime(rxq); 652 653 done: 654 return nb_pkts - (rx_pkts_end - rx_pkts); 655 } 656 657 static const uint32_t * 658 sfc_ef100_supported_ptypes_get(__rte_unused uint32_t tunnel_encaps) 659 { 660 static const uint32_t ef100_native_ptypes[] = { 661 RTE_PTYPE_L2_ETHER, 662 RTE_PTYPE_L2_ETHER_VLAN, 663 RTE_PTYPE_L2_ETHER_QINQ, 664 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, 665 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, 666 RTE_PTYPE_L4_TCP, 667 RTE_PTYPE_L4_UDP, 668 RTE_PTYPE_L4_FRAG, 669 RTE_PTYPE_TUNNEL_VXLAN, 670 RTE_PTYPE_TUNNEL_NVGRE, 671 RTE_PTYPE_TUNNEL_GENEVE, 672 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, 673 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, 674 RTE_PTYPE_INNER_L4_TCP, 675 RTE_PTYPE_INNER_L4_UDP, 676 RTE_PTYPE_INNER_L4_FRAG, 677 RTE_PTYPE_UNKNOWN 678 }; 679 680 return ef100_native_ptypes; 681 } 682 683 static sfc_dp_rx_qdesc_npending_t sfc_ef100_rx_qdesc_npending; 684 static unsigned int 685 sfc_ef100_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq) 686 { 687 return 0; 688 } 689 690 static sfc_dp_rx_qdesc_status_t sfc_ef100_rx_qdesc_status; 691 static int 692 sfc_ef100_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq, 693 __rte_unused uint16_t offset) 694 { 695 return -ENOTSUP; 696 } 697 698 699 static sfc_dp_rx_get_dev_info_t sfc_ef100_rx_get_dev_info; 700 static void 701 sfc_ef100_rx_get_dev_info(struct rte_eth_dev_info *dev_info) 702 { 703 /* 704 * Number of descriptors just defines maximum number of pushed 705 * descriptors (fill level). 706 */ 707 dev_info->rx_desc_lim.nb_min = SFC_RX_REFILL_BULK; 708 dev_info->rx_desc_lim.nb_align = SFC_RX_REFILL_BULK; 709 } 710 711 712 static sfc_dp_rx_qsize_up_rings_t sfc_ef100_rx_qsize_up_rings; 713 static int 714 sfc_ef100_rx_qsize_up_rings(uint16_t nb_rx_desc, 715 struct sfc_dp_rx_hw_limits *limits, 716 __rte_unused struct rte_mempool *mb_pool, 717 unsigned int *rxq_entries, 718 unsigned int *evq_entries, 719 unsigned int *rxq_max_fill_level) 720 { 721 /* 722 * rte_ethdev API guarantees that the number meets min, max and 723 * alignment requirements. 724 */ 725 if (nb_rx_desc <= limits->rxq_min_entries) 726 *rxq_entries = limits->rxq_min_entries; 727 else 728 *rxq_entries = rte_align32pow2(nb_rx_desc); 729 730 *evq_entries = *rxq_entries; 731 732 *rxq_max_fill_level = RTE_MIN(nb_rx_desc, 733 SFC_EF100_RXQ_LIMIT(*evq_entries)); 734 return 0; 735 } 736 737 738 static uint64_t 739 sfc_ef100_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size) 740 { 741 struct rte_mbuf m; 742 743 memset(&m, 0, sizeof(m)); 744 745 rte_mbuf_refcnt_set(&m, 1); 746 m.data_off = RTE_PKTMBUF_HEADROOM + prefix_size; 747 m.nb_segs = 1; 748 m.port = port_id; 749 750 /* rearm_data covers structure members filled in above */ 751 rte_compiler_barrier(); 752 RTE_BUILD_BUG_ON(sizeof(m.rearm_data[0]) != sizeof(uint64_t)); 753 return m.rearm_data[0]; 754 } 755 756 static sfc_dp_rx_qcreate_t sfc_ef100_rx_qcreate; 757 static int 758 sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id, 759 const struct rte_pci_addr *pci_addr, int socket_id, 760 const struct sfc_dp_rx_qcreate_info *info, 761 struct sfc_dp_rxq **dp_rxqp) 762 { 763 struct sfc_ef100_rxq *rxq; 764 int rc; 765 766 rc = EINVAL; 767 if (info->rxq_entries != info->evq_entries) 768 goto fail_rxq_args; 769 770 rc = ENOMEM; 771 rxq = rte_zmalloc_socket("sfc-ef100-rxq", sizeof(*rxq), 772 RTE_CACHE_LINE_SIZE, socket_id); 773 if (rxq == NULL) 774 goto fail_rxq_alloc; 775 776 sfc_dp_queue_init(&rxq->dp.dpq, port_id, queue_id, pci_addr); 777 778 rc = ENOMEM; 779 rxq->sw_ring = rte_calloc_socket("sfc-ef100-rxq-sw_ring", 780 info->rxq_entries, 781 sizeof(*rxq->sw_ring), 782 RTE_CACHE_LINE_SIZE, socket_id); 783 if (rxq->sw_ring == NULL) 784 goto fail_desc_alloc; 785 786 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 787 rxq->ptr_mask = info->rxq_entries - 1; 788 rxq->evq_phase_bit_shift = rte_bsf32(info->evq_entries); 789 rxq->evq_hw_ring = info->evq_hw_ring; 790 rxq->max_fill_level = info->max_fill_level; 791 rxq->refill_threshold = info->refill_threshold; 792 rxq->prefix_size = info->prefix_size; 793 794 SFC_ASSERT(info->user_mark_mask != 0); 795 rxq->user_mark_mask = info->user_mark_mask; 796 797 rxq->buf_size = info->buf_size; 798 rxq->refill_mb_pool = info->refill_mb_pool; 799 rxq->rxq_hw_ring = info->rxq_hw_ring; 800 rxq->doorbell = (volatile uint8_t *)info->mem_bar + 801 ER_GZ_RX_RING_DOORBELL_OFST + 802 (info->hw_index << info->vi_window_shift); 803 804 rxq->evq_hw_index = info->evq_hw_index; 805 rxq->evq_prime = (volatile uint8_t *)info->mem_bar + 806 info->fcw_offset + 807 ER_GZ_EVQ_INT_PRIME_OFST; 808 809 rxq->nic_dma_info = info->nic_dma_info; 810 if (rxq->nic_dma_info->nb_regions > 0) 811 rxq->flags |= SFC_EF100_RXQ_NIC_DMA_MAP; 812 813 sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell); 814 815 *dp_rxqp = &rxq->dp; 816 return 0; 817 818 fail_desc_alloc: 819 rte_free(rxq); 820 821 fail_rxq_alloc: 822 fail_rxq_args: 823 return rc; 824 } 825 826 static sfc_dp_rx_qdestroy_t sfc_ef100_rx_qdestroy; 827 static void 828 sfc_ef100_rx_qdestroy(struct sfc_dp_rxq *dp_rxq) 829 { 830 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 831 832 rte_free(rxq->sw_ring); 833 rte_free(rxq); 834 } 835 836 static sfc_dp_rx_qstart_t sfc_ef100_rx_qstart; 837 static int 838 sfc_ef100_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr, 839 const efx_rx_prefix_layout_t *pinfo) 840 { 841 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 842 uint32_t unsup_rx_prefix_fields; 843 844 SFC_ASSERT(rxq->completed == 0); 845 SFC_ASSERT(rxq->added == 0); 846 847 /* Prefix must fit into reserved Rx buffer space */ 848 if (pinfo->erpl_length > rxq->prefix_size) 849 return ENOTSUP; 850 851 unsup_rx_prefix_fields = 852 efx_rx_prefix_layout_check(pinfo, &sfc_ef100_rx_prefix_layout); 853 854 /* LENGTH and CLASS fields must always be present */ 855 if ((unsup_rx_prefix_fields & 856 ((1U << EFX_RX_PREFIX_FIELD_LENGTH) | 857 (1U << EFX_RX_PREFIX_FIELD_CLASS))) != 0) 858 return ENOTSUP; 859 860 if ((unsup_rx_prefix_fields & 861 ((1U << EFX_RX_PREFIX_FIELD_RSS_HASH_VALID) | 862 (1U << EFX_RX_PREFIX_FIELD_RSS_HASH))) == 0) 863 rxq->flags |= SFC_EF100_RXQ_RSS_HASH; 864 else 865 rxq->flags &= ~SFC_EF100_RXQ_RSS_HASH; 866 867 if ((unsup_rx_prefix_fields & 868 (1U << EFX_RX_PREFIX_FIELD_USER_FLAG)) == 0) 869 rxq->flags |= SFC_EF100_RXQ_USER_FLAG; 870 else 871 rxq->flags &= ~SFC_EF100_RXQ_USER_FLAG; 872 873 if ((unsup_rx_prefix_fields & 874 (1U << EFX_RX_PREFIX_FIELD_USER_MARK)) == 0) 875 rxq->flags |= SFC_EF100_RXQ_USER_MARK; 876 else 877 rxq->flags &= ~SFC_EF100_RXQ_USER_MARK; 878 879 if ((unsup_rx_prefix_fields & 880 (1U << EFX_RX_PREFIX_FIELD_INGRESS_MPORT)) == 0) 881 rxq->flags |= SFC_EF100_RXQ_INGRESS_MPORT; 882 else 883 rxq->flags &= ~SFC_EF100_RXQ_INGRESS_MPORT; 884 885 rxq->prefix_size = pinfo->erpl_length; 886 rxq->rearm_data = sfc_ef100_mk_mbuf_rearm_data(rxq->dp.dpq.port_id, 887 rxq->prefix_size); 888 889 sfc_ef100_rx_qrefill(rxq); 890 891 rxq->evq_read_ptr = evq_read_ptr; 892 893 rxq->flags |= SFC_EF100_RXQ_STARTED; 894 rxq->flags &= ~(SFC_EF100_RXQ_NOT_RUNNING | SFC_EF100_RXQ_EXCEPTION); 895 896 if (rxq->flags & SFC_EF100_RXQ_FLAG_INTR_EN) 897 sfc_ef100_rx_qprime(rxq); 898 899 return 0; 900 } 901 902 static sfc_dp_rx_qstop_t sfc_ef100_rx_qstop; 903 static void 904 sfc_ef100_rx_qstop(struct sfc_dp_rxq *dp_rxq, unsigned int *evq_read_ptr) 905 { 906 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 907 908 rxq->flags |= SFC_EF100_RXQ_NOT_RUNNING; 909 910 *evq_read_ptr = rxq->evq_read_ptr; 911 } 912 913 static sfc_dp_rx_qrx_ev_t sfc_ef100_rx_qrx_ev; 914 static bool 915 sfc_ef100_rx_qrx_ev(struct sfc_dp_rxq *dp_rxq, __rte_unused unsigned int id) 916 { 917 __rte_unused struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 918 919 SFC_ASSERT(rxq->flags & SFC_EF100_RXQ_NOT_RUNNING); 920 921 /* 922 * It is safe to ignore Rx event since we free all mbufs on 923 * queue purge anyway. 924 */ 925 926 return false; 927 } 928 929 static sfc_dp_rx_qpurge_t sfc_ef100_rx_qpurge; 930 static void 931 sfc_ef100_rx_qpurge(struct sfc_dp_rxq *dp_rxq) 932 { 933 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 934 unsigned int i; 935 struct sfc_ef100_rx_sw_desc *rxd; 936 937 for (i = rxq->completed; i != rxq->added; ++i) { 938 rxd = &rxq->sw_ring[i & rxq->ptr_mask]; 939 rte_mbuf_raw_free(rxd->mbuf); 940 rxd->mbuf = NULL; 941 } 942 943 rxq->completed = rxq->added = 0; 944 rxq->ready_pkts = 0; 945 946 rxq->flags &= ~SFC_EF100_RXQ_STARTED; 947 } 948 949 static sfc_dp_rx_intr_enable_t sfc_ef100_rx_intr_enable; 950 static int 951 sfc_ef100_rx_intr_enable(struct sfc_dp_rxq *dp_rxq) 952 { 953 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 954 955 rxq->flags |= SFC_EF100_RXQ_FLAG_INTR_EN; 956 if (rxq->flags & SFC_EF100_RXQ_STARTED) 957 sfc_ef100_rx_qprime(rxq); 958 return 0; 959 } 960 961 static sfc_dp_rx_intr_disable_t sfc_ef100_rx_intr_disable; 962 static int 963 sfc_ef100_rx_intr_disable(struct sfc_dp_rxq *dp_rxq) 964 { 965 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 966 967 /* Cannot disarm, just disable rearm */ 968 rxq->flags &= ~SFC_EF100_RXQ_FLAG_INTR_EN; 969 return 0; 970 } 971 972 static sfc_dp_rx_get_pushed_t sfc_ef100_rx_get_pushed; 973 static unsigned int 974 sfc_ef100_rx_get_pushed(struct sfc_dp_rxq *dp_rxq) 975 { 976 struct sfc_ef100_rxq *rxq = sfc_ef100_rxq_by_dp_rxq(dp_rxq); 977 978 /* 979 * The datapath keeps track only of added descriptors, since 980 * the number of pushed descriptors always equals the number 981 * of added descriptors due to enforced alignment. 982 */ 983 return rxq->added; 984 } 985 986 struct sfc_dp_rx sfc_ef100_rx = { 987 .dp = { 988 .name = SFC_KVARG_DATAPATH_EF100, 989 .type = SFC_DP_RX, 990 .hw_fw_caps = SFC_DP_HW_FW_CAP_EF100, 991 }, 992 .features = SFC_DP_RX_FEAT_MULTI_PROCESS | 993 SFC_DP_RX_FEAT_FLOW_FLAG | 994 SFC_DP_RX_FEAT_FLOW_MARK | 995 SFC_DP_RX_FEAT_INTR | 996 SFC_DP_RX_FEAT_STATS, 997 .dev_offload_capa = 0, 998 .queue_offload_capa = RTE_ETH_RX_OFFLOAD_CHECKSUM | 999 RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM | 1000 RTE_ETH_RX_OFFLOAD_OUTER_UDP_CKSUM | 1001 RTE_ETH_RX_OFFLOAD_SCATTER | 1002 RTE_ETH_RX_OFFLOAD_RSS_HASH, 1003 .get_dev_info = sfc_ef100_rx_get_dev_info, 1004 .qsize_up_rings = sfc_ef100_rx_qsize_up_rings, 1005 .qcreate = sfc_ef100_rx_qcreate, 1006 .qdestroy = sfc_ef100_rx_qdestroy, 1007 .qstart = sfc_ef100_rx_qstart, 1008 .qstop = sfc_ef100_rx_qstop, 1009 .qrx_ev = sfc_ef100_rx_qrx_ev, 1010 .qpurge = sfc_ef100_rx_qpurge, 1011 .supported_ptypes_get = sfc_ef100_supported_ptypes_get, 1012 .qdesc_npending = sfc_ef100_rx_qdesc_npending, 1013 .qdesc_status = sfc_ef100_rx_qdesc_status, 1014 .intr_enable = sfc_ef100_rx_intr_enable, 1015 .intr_disable = sfc_ef100_rx_intr_disable, 1016 .get_pushed = sfc_ef100_rx_get_pushed, 1017 .pkt_burst = sfc_ef100_recv_pkts, 1018 }; 1019