1 /* SPDX-License-Identifier: BSD-3-Clause 2 * 3 * Copyright(c) 2020-2021 Xilinx, Inc. 4 */ 5 6 #include <rte_common.h> 7 #include <rte_service_component.h> 8 9 #include "efx.h" 10 #include "efx_regs_counters_pkt_format.h" 11 12 #include "sfc_ev.h" 13 #include "sfc.h" 14 #include "sfc_rx.h" 15 #include "sfc_mae_counter.h" 16 #include "sfc_service.h" 17 18 /** 19 * Approximate maximum number of counters per packet. 20 * In fact maximum depends on per-counter data offset which is specified 21 * in counter packet header. 22 */ 23 #define SFC_MAE_COUNTERS_PER_PACKET_MAX \ 24 ((SFC_MAE_COUNTER_STREAM_PACKET_SIZE - \ 25 ER_RX_SL_PACKETISER_HEADER_WORD_SIZE) / \ 26 ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE) 27 28 /** 29 * Minimum number of Rx buffers in counters only Rx queue. 30 */ 31 #define SFC_MAE_COUNTER_RXQ_BUFS_MIN \ 32 (SFC_COUNTER_RXQ_RX_DESC_COUNT - SFC_COUNTER_RXQ_REFILL_LEVEL) 33 34 /** 35 * Approximate number of counter updates fit in counters only Rx queue. 36 * The number is inaccurate since SFC_MAE_COUNTERS_PER_PACKET_MAX is 37 * inaccurate (see above). However, it provides the gist for a number of 38 * counter updates which can fit in an Rx queue after empty poll. 39 * 40 * The define is not actually used, but provides calculations details. 41 */ 42 #define SFC_MAE_COUNTERS_RXQ_SPACE \ 43 (SFC_MAE_COUNTER_RXQ_BUFS_MIN * SFC_MAE_COUNTERS_PER_PACKET_MAX) 44 45 static uint32_t 46 sfc_mae_counter_get_service_lcore(struct sfc_adapter *sa) 47 { 48 uint32_t cid; 49 50 cid = sfc_get_service_lcore(sa->socket_id); 51 if (cid != RTE_MAX_LCORE) 52 return cid; 53 54 if (sa->socket_id != SOCKET_ID_ANY) 55 cid = sfc_get_service_lcore(SOCKET_ID_ANY); 56 57 if (cid == RTE_MAX_LCORE) { 58 sfc_warn(sa, "failed to get service lcore for counter service"); 59 } else if (sa->socket_id != SOCKET_ID_ANY) { 60 sfc_warn(sa, 61 "failed to get service lcore for counter service at socket %d, but got at socket %u", 62 sa->socket_id, rte_lcore_to_socket_id(cid)); 63 } 64 return cid; 65 } 66 67 bool 68 sfc_mae_counter_rxq_required(struct sfc_adapter *sa) 69 { 70 const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic); 71 72 if (encp->enc_mae_supported == B_FALSE) 73 return false; 74 75 return true; 76 } 77 78 int 79 sfc_mae_counter_enable(struct sfc_adapter *sa, 80 struct sfc_mae_counter_id *counterp) 81 { 82 struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry; 83 struct sfc_mae_counters *counters = ®->counters; 84 struct sfc_mae_counter *p; 85 efx_counter_t mae_counter; 86 uint32_t generation_count; 87 uint32_t unused; 88 int rc; 89 90 /* 91 * The actual count of counters allocated is ignored since a failure 92 * to allocate a single counter is indicated by non-zero return code. 93 */ 94 rc = efx_mae_counters_alloc(sa->nic, 1, &unused, &mae_counter, 95 &generation_count); 96 if (rc != 0) { 97 sfc_err(sa, "failed to alloc MAE counter: %s", 98 rte_strerror(rc)); 99 goto fail_mae_counter_alloc; 100 } 101 102 if (mae_counter.id >= counters->n_mae_counters) { 103 /* 104 * ID of a counter is expected to be within the range 105 * between 0 and the maximum count of counters to always 106 * fit into a pre-allocated array size of maximum counter ID. 107 */ 108 sfc_err(sa, "MAE counter ID is out of expected range"); 109 rc = EFAULT; 110 goto fail_counter_id_range; 111 } 112 113 counterp->mae_id = mae_counter; 114 115 p = &counters->mae_counters[mae_counter.id]; 116 117 /* 118 * Ordering is relaxed since it is the only operation on counter value. 119 * And it does not depend on different stores/loads in other threads. 120 * Paired with relaxed ordering in counter increment. 121 */ 122 __atomic_store(&p->reset.pkts_bytes.int128, 123 &p->value.pkts_bytes.int128, __ATOMIC_RELAXED); 124 p->generation_count = generation_count; 125 126 p->ft_group_hit_counter = counterp->ft_group_hit_counter; 127 128 /* 129 * The flag is set at the very end of add operation and reset 130 * at the beginning of delete operation. Release ordering is 131 * paired with acquire ordering on load in counter increment operation. 132 */ 133 __atomic_store_n(&p->inuse, true, __ATOMIC_RELEASE); 134 135 sfc_info(sa, "enabled MAE counter #%u with reset pkts=%" PRIu64 136 " bytes=%" PRIu64, mae_counter.id, 137 p->reset.pkts, p->reset.bytes); 138 139 return 0; 140 141 fail_counter_id_range: 142 (void)efx_mae_counters_free(sa->nic, 1, &unused, &mae_counter, NULL); 143 144 fail_mae_counter_alloc: 145 sfc_log_init(sa, "failed: %s", rte_strerror(rc)); 146 return rc; 147 } 148 149 int 150 sfc_mae_counter_disable(struct sfc_adapter *sa, 151 struct sfc_mae_counter_id *counter) 152 { 153 struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry; 154 struct sfc_mae_counters *counters = ®->counters; 155 struct sfc_mae_counter *p; 156 uint32_t unused; 157 int rc; 158 159 if (counter->mae_id.id == EFX_MAE_RSRC_ID_INVALID) 160 return 0; 161 162 SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters); 163 /* 164 * The flag is set at the very end of add operation and reset 165 * at the beginning of delete operation. Release ordering is 166 * paired with acquire ordering on load in counter increment operation. 167 */ 168 p = &counters->mae_counters[counter->mae_id.id]; 169 __atomic_store_n(&p->inuse, false, __ATOMIC_RELEASE); 170 171 rc = efx_mae_counters_free(sa->nic, 1, &unused, &counter->mae_id, NULL); 172 if (rc != 0) 173 sfc_err(sa, "failed to free MAE counter %u: %s", 174 counter->mae_id.id, rte_strerror(rc)); 175 176 sfc_info(sa, "disabled MAE counter #%u with reset pkts=%" PRIu64 177 " bytes=%" PRIu64, counter->mae_id.id, 178 p->reset.pkts, p->reset.bytes); 179 180 /* 181 * Do this regardless of what efx_mae_counters_free() return value is. 182 * If there's some error, the resulting resource leakage is bad, but 183 * nothing sensible can be done in this case. 184 */ 185 counter->mae_id.id = EFX_MAE_RSRC_ID_INVALID; 186 187 return rc; 188 } 189 190 static void 191 sfc_mae_counter_increment(struct sfc_adapter *sa, 192 struct sfc_mae_counters *counters, 193 uint32_t mae_counter_id, 194 uint32_t generation_count, 195 uint64_t pkts, uint64_t bytes) 196 { 197 struct sfc_mae_counter *p = &counters->mae_counters[mae_counter_id]; 198 struct sfc_mae_counters_xstats *xstats = &counters->xstats; 199 union sfc_pkts_bytes cnt_val; 200 bool inuse; 201 202 /* 203 * Acquire ordering is paired with release ordering in counter add 204 * and delete operations. 205 */ 206 __atomic_load(&p->inuse, &inuse, __ATOMIC_ACQUIRE); 207 if (!inuse) { 208 /* 209 * Two possible cases include: 210 * 1) Counter is just allocated. Too early counter update 211 * cannot be processed properly. 212 * 2) Stale update of freed and not reallocated counter. 213 * There is no point in processing that update. 214 */ 215 xstats->not_inuse_update++; 216 return; 217 } 218 219 if (unlikely(generation_count < p->generation_count)) { 220 /* 221 * It is a stale update for the reallocated counter 222 * (i.e., freed and the same ID allocated again). 223 */ 224 xstats->realloc_update++; 225 return; 226 } 227 228 cnt_val.pkts = p->value.pkts + pkts; 229 cnt_val.bytes = p->value.bytes + bytes; 230 231 /* 232 * Ordering is relaxed since it is the only operation on counter value. 233 * And it does not depend on different stores/loads in other threads. 234 * Paired with relaxed ordering on counter reset. 235 */ 236 __atomic_store(&p->value.pkts_bytes, 237 &cnt_val.pkts_bytes, __ATOMIC_RELAXED); 238 239 if (p->ft_group_hit_counter != NULL) { 240 uint64_t ft_group_hit_counter; 241 242 ft_group_hit_counter = *p->ft_group_hit_counter + pkts; 243 __atomic_store_n(p->ft_group_hit_counter, ft_group_hit_counter, 244 __ATOMIC_RELAXED); 245 } 246 247 sfc_info(sa, "update MAE counter #%u: pkts+%" PRIu64 "=%" PRIu64 248 ", bytes+%" PRIu64 "=%" PRIu64, mae_counter_id, 249 pkts, cnt_val.pkts, bytes, cnt_val.bytes); 250 } 251 252 static void 253 sfc_mae_parse_counter_packet(struct sfc_adapter *sa, 254 struct sfc_mae_counter_registry *counter_registry, 255 const struct rte_mbuf *m) 256 { 257 uint32_t generation_count; 258 const efx_xword_t *hdr; 259 const efx_oword_t *counters_data; 260 unsigned int version; 261 unsigned int id; 262 unsigned int header_offset; 263 unsigned int payload_offset; 264 unsigned int counter_count; 265 unsigned int required_len; 266 unsigned int i; 267 268 if (unlikely(m->nb_segs != 1)) { 269 sfc_err(sa, "unexpectedly scattered MAE counters packet (%u segments)", 270 m->nb_segs); 271 return; 272 } 273 274 if (unlikely(m->data_len < ER_RX_SL_PACKETISER_HEADER_WORD_SIZE)) { 275 sfc_err(sa, "too short MAE counters packet (%u bytes)", 276 m->data_len); 277 return; 278 } 279 280 /* 281 * The generation count is located in the Rx prefix in the USER_MARK 282 * field which is written into hash.fdir.hi field of an mbuf. See 283 * SF-123581-TC SmartNIC Datapath Offloads section 4.7.5 Counters. 284 */ 285 generation_count = m->hash.fdir.hi; 286 287 hdr = rte_pktmbuf_mtod(m, const efx_xword_t *); 288 289 version = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_VERSION); 290 if (unlikely(version != ERF_SC_PACKETISER_HEADER_VERSION_2)) { 291 sfc_err(sa, "unexpected MAE counters packet version %u", 292 version); 293 return; 294 } 295 296 id = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_IDENTIFIER); 297 if (unlikely(id != ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR)) { 298 sfc_err(sa, "unexpected MAE counters source identifier %u", id); 299 return; 300 } 301 302 /* Packet layout definitions assume fixed header offset in fact */ 303 header_offset = 304 EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_HEADER_OFFSET); 305 if (unlikely(header_offset != 306 ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT)) { 307 sfc_err(sa, "unexpected MAE counters packet header offset %u", 308 header_offset); 309 return; 310 } 311 312 payload_offset = 313 EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_PAYLOAD_OFFSET); 314 315 counter_count = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_COUNT); 316 317 required_len = payload_offset + 318 counter_count * sizeof(counters_data[0]); 319 if (unlikely(required_len > m->data_len)) { 320 sfc_err(sa, "truncated MAE counters packet: %u counters, packet length is %u vs %u required", 321 counter_count, m->data_len, required_len); 322 /* 323 * In theory it is possible process available counters data, 324 * but such condition is really unexpected and it is 325 * better to treat entire packet as corrupted. 326 */ 327 return; 328 } 329 330 /* Ensure that counters data is 32-bit aligned */ 331 if (unlikely(payload_offset % sizeof(uint32_t) != 0)) { 332 sfc_err(sa, "unsupported MAE counters payload offset %u, must be 32-bit aligned", 333 payload_offset); 334 return; 335 } 336 RTE_BUILD_BUG_ON(sizeof(counters_data[0]) != 337 ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE); 338 339 counters_data = 340 rte_pktmbuf_mtod_offset(m, const efx_oword_t *, payload_offset); 341 342 sfc_info(sa, "update %u MAE counters with gc=%u", 343 counter_count, generation_count); 344 345 for (i = 0; i < counter_count; ++i) { 346 uint32_t packet_count_lo; 347 uint32_t packet_count_hi; 348 uint32_t byte_count_lo; 349 uint32_t byte_count_hi; 350 351 /* 352 * Use 32-bit field accessors below since counters data 353 * is not 64-bit aligned. 354 * 32-bit alignment is checked above taking into account 355 * that start of packet data is 32-bit aligned 356 * (cache-line size aligned in fact). 357 */ 358 packet_count_lo = 359 EFX_OWORD_FIELD32(counters_data[i], 360 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO); 361 packet_count_hi = 362 EFX_OWORD_FIELD32(counters_data[i], 363 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_HI); 364 byte_count_lo = 365 EFX_OWORD_FIELD32(counters_data[i], 366 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO); 367 byte_count_hi = 368 EFX_OWORD_FIELD32(counters_data[i], 369 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_HI); 370 sfc_mae_counter_increment(sa, 371 &counter_registry->counters, 372 EFX_OWORD_FIELD32(counters_data[i], 373 ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX), 374 generation_count, 375 (uint64_t)packet_count_lo | 376 ((uint64_t)packet_count_hi << 377 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO_WIDTH), 378 (uint64_t)byte_count_lo | 379 ((uint64_t)byte_count_hi << 380 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO_WIDTH)); 381 } 382 } 383 384 static int32_t 385 sfc_mae_counter_poll_packets(struct sfc_adapter *sa) 386 { 387 struct sfc_mae_counter_registry *counter_registry = 388 &sa->mae.counter_registry; 389 struct rte_mbuf *mbufs[SFC_MAE_COUNTER_RX_BURST]; 390 unsigned int pushed_diff; 391 unsigned int pushed; 392 unsigned int i; 393 uint16_t n; 394 int rc; 395 396 n = counter_registry->rx_pkt_burst(counter_registry->rx_dp, mbufs, 397 SFC_MAE_COUNTER_RX_BURST); 398 399 for (i = 0; i < n; i++) 400 sfc_mae_parse_counter_packet(sa, counter_registry, mbufs[i]); 401 402 rte_pktmbuf_free_bulk(mbufs, n); 403 404 if (!counter_registry->use_credits) 405 return n; 406 407 pushed = sfc_rx_get_pushed(sa, counter_registry->rx_dp); 408 pushed_diff = pushed - counter_registry->pushed_n_buffers; 409 410 if (pushed_diff >= SFC_COUNTER_RXQ_REFILL_LEVEL) { 411 rc = efx_mae_counters_stream_give_credits(sa->nic, pushed_diff); 412 if (rc == 0) { 413 counter_registry->pushed_n_buffers = pushed; 414 } else { 415 /* 416 * FIXME: counters might be important for the 417 * application. Handle the error in order to recover 418 * from the failure 419 */ 420 SFC_GENERIC_LOG(DEBUG, "Give credits failed: %s", 421 rte_strerror(rc)); 422 } 423 } 424 425 return n; 426 } 427 428 static int32_t 429 sfc_mae_counter_service_routine(void *arg) 430 { 431 struct sfc_adapter *sa = arg; 432 433 /* 434 * We cannot propagate any errors and we don't need to know 435 * the number of packets we've received. 436 */ 437 (void)sfc_mae_counter_poll_packets(sa); 438 439 return 0; 440 } 441 442 static void * 443 sfc_mae_counter_thread(void *data) 444 { 445 struct sfc_adapter *sa = data; 446 struct sfc_mae_counter_registry *counter_registry = 447 &sa->mae.counter_registry; 448 int32_t rc; 449 450 while (__atomic_load_n(&counter_registry->polling.thread.run, 451 __ATOMIC_ACQUIRE)) { 452 rc = sfc_mae_counter_poll_packets(sa); 453 if (rc == 0) { 454 /* 455 * The queue is empty. Do not burn CPU. 456 * An empty queue has just enough space for about 457 * SFC_MAE_COUNTERS_RXQ_SPACE counter updates which is 458 * more than 100K, so we can sleep a bit. The queue uses 459 * a credit-based flow control anyway, so firmware will 460 * not enqueue more counter updates until the host 461 * supplies it with additional credits. The counters are 462 * 48bits wide, so the timeout need only be short enough 463 * to ensure that the counter values do not overflow 464 * before the next counter update. Also we should not 465 * delay counter updates for a long time, otherwise 466 * application may decide that flow is idle and should 467 * be removed. 468 */ 469 rte_delay_ms(1); 470 } 471 } 472 473 return NULL; 474 } 475 476 static void 477 sfc_mae_counter_service_unregister(struct sfc_adapter *sa) 478 { 479 struct sfc_mae_counter_registry *registry = 480 &sa->mae.counter_registry; 481 const unsigned int wait_ms = 10000; 482 unsigned int i; 483 484 rte_service_runstate_set(registry->polling.service.id, 0); 485 rte_service_component_runstate_set(registry->polling.service.id, 0); 486 487 /* 488 * Wait for the counter routine to finish the last iteration. 489 * Give up on timeout. 490 */ 491 for (i = 0; i < wait_ms; i++) { 492 if (rte_service_may_be_active(registry->polling.service.id) == 0) 493 break; 494 495 rte_delay_ms(1); 496 } 497 if (i == wait_ms) 498 sfc_warn(sa, "failed to wait for counter service to stop"); 499 500 rte_service_map_lcore_set(registry->polling.service.id, 501 registry->polling.service.core_id, 0); 502 503 rte_service_component_unregister(registry->polling.service.id); 504 } 505 506 static struct sfc_rxq_info * 507 sfc_counter_rxq_info_get(struct sfc_adapter *sa) 508 { 509 return &sfc_sa2shared(sa)->rxq_info[sa->counter_rxq.sw_index]; 510 } 511 512 static void 513 sfc_mae_counter_registry_prepare(struct sfc_mae_counter_registry *registry, 514 struct sfc_adapter *sa, 515 uint32_t counter_stream_flags) 516 { 517 registry->rx_pkt_burst = sa->eth_dev->rx_pkt_burst; 518 registry->rx_dp = sfc_counter_rxq_info_get(sa)->dp; 519 registry->pushed_n_buffers = 0; 520 registry->use_credits = counter_stream_flags & 521 EFX_MAE_COUNTERS_STREAM_OUT_USES_CREDITS; 522 } 523 524 static int 525 sfc_mae_counter_service_register(struct sfc_adapter *sa, 526 uint32_t counter_stream_flags) 527 { 528 struct rte_service_spec service; 529 char counter_service_name[sizeof(service.name)] = "counter_service"; 530 struct sfc_mae_counter_registry *counter_registry = 531 &sa->mae.counter_registry; 532 uint32_t cid; 533 uint32_t sid; 534 int rc; 535 536 sfc_log_init(sa, "entry"); 537 538 /* Prepare service info */ 539 memset(&service, 0, sizeof(service)); 540 rte_strscpy(service.name, counter_service_name, sizeof(service.name)); 541 service.socket_id = sa->socket_id; 542 service.callback = sfc_mae_counter_service_routine; 543 service.callback_userdata = sa; 544 sfc_mae_counter_registry_prepare(counter_registry, sa, 545 counter_stream_flags); 546 547 cid = sfc_get_service_lcore(sa->socket_id); 548 if (cid == RTE_MAX_LCORE && sa->socket_id != SOCKET_ID_ANY) { 549 /* Warn and try to allocate on any NUMA node */ 550 sfc_warn(sa, 551 "failed to get service lcore for counter service at socket %d", 552 sa->socket_id); 553 554 cid = sfc_get_service_lcore(SOCKET_ID_ANY); 555 } 556 if (cid == RTE_MAX_LCORE) { 557 rc = ENOTSUP; 558 sfc_err(sa, "failed to get service lcore for counter service"); 559 goto fail_get_service_lcore; 560 } 561 562 /* Service core may be in "stopped" state, start it */ 563 rc = rte_service_lcore_start(cid); 564 if (rc != 0 && rc != -EALREADY) { 565 sfc_err(sa, "failed to start service core for counter service: %s", 566 rte_strerror(-rc)); 567 rc = ENOTSUP; 568 goto fail_start_core; 569 } 570 571 /* Register counter service */ 572 rc = rte_service_component_register(&service, &sid); 573 if (rc != 0) { 574 rc = ENOEXEC; 575 sfc_err(sa, "failed to register counter service component"); 576 goto fail_register; 577 } 578 579 /* Map the service with the service core */ 580 rc = rte_service_map_lcore_set(sid, cid, 1); 581 if (rc != 0) { 582 rc = -rc; 583 sfc_err(sa, "failed to map lcore for counter service: %s", 584 rte_strerror(rc)); 585 goto fail_map_lcore; 586 } 587 588 /* Run the service */ 589 rc = rte_service_component_runstate_set(sid, 1); 590 if (rc < 0) { 591 rc = -rc; 592 sfc_err(sa, "failed to run counter service component: %s", 593 rte_strerror(rc)); 594 goto fail_component_runstate_set; 595 } 596 rc = rte_service_runstate_set(sid, 1); 597 if (rc < 0) { 598 rc = -rc; 599 sfc_err(sa, "failed to run counter service"); 600 goto fail_runstate_set; 601 } 602 603 counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_SERVICE; 604 counter_registry->polling.service.core_id = cid; 605 counter_registry->polling.service.id = sid; 606 607 sfc_log_init(sa, "done"); 608 609 return 0; 610 611 fail_runstate_set: 612 rte_service_component_runstate_set(sid, 0); 613 614 fail_component_runstate_set: 615 rte_service_map_lcore_set(sid, cid, 0); 616 617 fail_map_lcore: 618 rte_service_component_unregister(sid); 619 620 fail_register: 621 fail_start_core: 622 fail_get_service_lcore: 623 sfc_log_init(sa, "failed: %s", rte_strerror(rc)); 624 625 return rc; 626 } 627 628 static void 629 sfc_mae_counter_thread_stop(struct sfc_adapter *sa) 630 { 631 struct sfc_mae_counter_registry *counter_registry = 632 &sa->mae.counter_registry; 633 int rc; 634 635 /* Ensure that flag is set before attempting to join thread */ 636 __atomic_store_n(&counter_registry->polling.thread.run, false, 637 __ATOMIC_RELEASE); 638 639 rc = pthread_join(counter_registry->polling.thread.id, NULL); 640 if (rc != 0) 641 sfc_err(sa, "failed to join the MAE counter polling thread"); 642 643 counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_OFF; 644 } 645 646 static int 647 sfc_mae_counter_thread_spawn(struct sfc_adapter *sa, 648 uint32_t counter_stream_flags) 649 { 650 struct sfc_mae_counter_registry *counter_registry = 651 &sa->mae.counter_registry; 652 int rc; 653 654 sfc_log_init(sa, "entry"); 655 656 sfc_mae_counter_registry_prepare(counter_registry, sa, 657 counter_stream_flags); 658 659 counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_THREAD; 660 counter_registry->polling.thread.run = true; 661 662 rc = rte_ctrl_thread_create(&sa->mae.counter_registry.polling.thread.id, 663 "mae_counter_thread", NULL, 664 sfc_mae_counter_thread, sa); 665 666 return rc; 667 } 668 669 int 670 sfc_mae_counters_init(struct sfc_mae_counters *counters, 671 uint32_t nb_counters_max) 672 { 673 int rc; 674 675 SFC_GENERIC_LOG(DEBUG, "%s: entry", __func__); 676 677 counters->mae_counters = rte_zmalloc("sfc_mae_counters", 678 sizeof(*counters->mae_counters) * nb_counters_max, 0); 679 if (counters->mae_counters == NULL) { 680 rc = ENOMEM; 681 SFC_GENERIC_LOG(ERR, "%s: failed: %s", __func__, 682 rte_strerror(rc)); 683 return rc; 684 } 685 686 counters->n_mae_counters = nb_counters_max; 687 688 SFC_GENERIC_LOG(DEBUG, "%s: done", __func__); 689 690 return 0; 691 } 692 693 void 694 sfc_mae_counters_fini(struct sfc_mae_counters *counters) 695 { 696 rte_free(counters->mae_counters); 697 counters->mae_counters = NULL; 698 } 699 700 int 701 sfc_mae_counter_rxq_attach(struct sfc_adapter *sa) 702 { 703 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 704 char name[RTE_MEMPOOL_NAMESIZE]; 705 struct rte_mempool *mp; 706 unsigned int n_elements; 707 unsigned int cache_size; 708 /* The mempool is internal and private area is not required */ 709 const uint16_t priv_size = 0; 710 const uint16_t data_room_size = RTE_PKTMBUF_HEADROOM + 711 SFC_MAE_COUNTER_STREAM_PACKET_SIZE; 712 int rc; 713 714 sfc_log_init(sa, "entry"); 715 716 if (!sas->counters_rxq_allocated) { 717 sfc_log_init(sa, "counter queue is not supported - skip"); 718 return 0; 719 } 720 721 /* 722 * At least one element in the ring is always unused to distinguish 723 * between empty and full ring cases. 724 */ 725 n_elements = SFC_COUNTER_RXQ_RX_DESC_COUNT - 1; 726 727 /* 728 * The cache must have sufficient space to put received buckets 729 * before they're reused on refill. 730 */ 731 cache_size = rte_align32pow2(SFC_COUNTER_RXQ_REFILL_LEVEL + 732 SFC_MAE_COUNTER_RX_BURST - 1); 733 734 if (snprintf(name, sizeof(name), "counter_rxq-pool-%u", sas->port_id) >= 735 (int)sizeof(name)) { 736 sfc_err(sa, "failed: counter RxQ mempool name is too long"); 737 rc = ENAMETOOLONG; 738 goto fail_long_name; 739 } 740 741 /* 742 * It could be single-producer single-consumer ring mempool which 743 * requires minimal barriers. However, cache size and refill/burst 744 * policy are aligned, therefore it does not matter which 745 * mempool backend is chosen since backend is unused. 746 */ 747 mp = rte_pktmbuf_pool_create(name, n_elements, cache_size, 748 priv_size, data_room_size, sa->socket_id); 749 if (mp == NULL) { 750 sfc_err(sa, "failed to create counter RxQ mempool"); 751 rc = rte_errno; 752 goto fail_mp_create; 753 } 754 755 sa->counter_rxq.sw_index = sfc_counters_rxq_sw_index(sas); 756 sa->counter_rxq.mp = mp; 757 sa->counter_rxq.state |= SFC_COUNTER_RXQ_ATTACHED; 758 759 sfc_log_init(sa, "done"); 760 761 return 0; 762 763 fail_mp_create: 764 fail_long_name: 765 sfc_log_init(sa, "failed: %s", rte_strerror(rc)); 766 767 return rc; 768 } 769 770 void 771 sfc_mae_counter_rxq_detach(struct sfc_adapter *sa) 772 { 773 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 774 775 sfc_log_init(sa, "entry"); 776 777 if (!sas->counters_rxq_allocated) { 778 sfc_log_init(sa, "counter queue is not supported - skip"); 779 return; 780 } 781 782 if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) { 783 sfc_log_init(sa, "counter queue is not attached - skip"); 784 return; 785 } 786 787 rte_mempool_free(sa->counter_rxq.mp); 788 sa->counter_rxq.mp = NULL; 789 sa->counter_rxq.state &= ~SFC_COUNTER_RXQ_ATTACHED; 790 791 sfc_log_init(sa, "done"); 792 } 793 794 int 795 sfc_mae_counter_rxq_init(struct sfc_adapter *sa) 796 { 797 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 798 const struct rte_eth_rxconf rxconf = { 799 .rx_free_thresh = SFC_COUNTER_RXQ_REFILL_LEVEL, 800 .rx_drop_en = 1, 801 }; 802 uint16_t nb_rx_desc = SFC_COUNTER_RXQ_RX_DESC_COUNT; 803 int rc; 804 805 sfc_log_init(sa, "entry"); 806 807 if (!sas->counters_rxq_allocated) { 808 sfc_log_init(sa, "counter queue is not supported - skip"); 809 return 0; 810 } 811 812 if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) { 813 sfc_log_init(sa, "counter queue is not attached - skip"); 814 return 0; 815 } 816 817 nb_rx_desc = RTE_MIN(nb_rx_desc, sa->rxq_max_entries); 818 nb_rx_desc = RTE_MAX(nb_rx_desc, sa->rxq_min_entries); 819 820 rc = sfc_rx_qinit_info(sa, sa->counter_rxq.sw_index, 821 EFX_RXQ_FLAG_USER_MARK); 822 if (rc != 0) 823 goto fail_counter_rxq_init_info; 824 825 rc = sfc_rx_qinit(sa, sa->counter_rxq.sw_index, nb_rx_desc, 826 sa->socket_id, &rxconf, sa->counter_rxq.mp); 827 if (rc != 0) { 828 sfc_err(sa, "failed to init counter RxQ"); 829 goto fail_counter_rxq_init; 830 } 831 832 sa->counter_rxq.state |= SFC_COUNTER_RXQ_INITIALIZED; 833 834 sfc_log_init(sa, "done"); 835 836 return 0; 837 838 fail_counter_rxq_init: 839 fail_counter_rxq_init_info: 840 sfc_log_init(sa, "failed: %s", rte_strerror(rc)); 841 842 return rc; 843 } 844 845 void 846 sfc_mae_counter_rxq_fini(struct sfc_adapter *sa) 847 { 848 struct sfc_adapter_shared * const sas = sfc_sa2shared(sa); 849 850 sfc_log_init(sa, "entry"); 851 852 if (!sas->counters_rxq_allocated) { 853 sfc_log_init(sa, "counter queue is not supported - skip"); 854 return; 855 } 856 857 if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0) { 858 sfc_log_init(sa, "counter queue is not initialized - skip"); 859 return; 860 } 861 862 sfc_rx_qfini(sa, sa->counter_rxq.sw_index); 863 864 sfc_log_init(sa, "done"); 865 } 866 867 void 868 sfc_mae_counter_stop(struct sfc_adapter *sa) 869 { 870 struct sfc_mae *mae = &sa->mae; 871 872 sfc_log_init(sa, "entry"); 873 874 if (!mae->counter_rxq_running) { 875 sfc_log_init(sa, "counter queue is not running - skip"); 876 return; 877 } 878 879 SFC_ASSERT(mae->counter_registry.polling_mode != 880 SFC_MAE_COUNTER_POLLING_OFF); 881 882 if (mae->counter_registry.polling_mode == 883 SFC_MAE_COUNTER_POLLING_SERVICE) 884 sfc_mae_counter_service_unregister(sa); 885 else 886 sfc_mae_counter_thread_stop(sa); 887 888 efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL); 889 890 mae->counter_rxq_running = false; 891 892 sfc_log_init(sa, "done"); 893 } 894 895 int 896 sfc_mae_counter_start(struct sfc_adapter *sa) 897 { 898 struct sfc_mae *mae = &sa->mae; 899 uint32_t flags; 900 int rc; 901 902 SFC_ASSERT(sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED); 903 904 if (mae->counter_rxq_running) 905 return 0; 906 907 sfc_log_init(sa, "entry"); 908 909 rc = efx_mae_counters_stream_start(sa->nic, sa->counter_rxq.sw_index, 910 SFC_MAE_COUNTER_STREAM_PACKET_SIZE, 911 0 /* No flags required */, &flags); 912 if (rc != 0) { 913 sfc_err(sa, "failed to start MAE counters stream: %s", 914 rte_strerror(rc)); 915 goto fail_counter_stream; 916 } 917 918 sfc_log_init(sa, "stream start flags: 0x%x", flags); 919 920 if (sfc_mae_counter_get_service_lcore(sa) != RTE_MAX_LCORE) { 921 rc = sfc_mae_counter_service_register(sa, flags); 922 if (rc != 0) 923 goto fail_service_register; 924 } else { 925 rc = sfc_mae_counter_thread_spawn(sa, flags); 926 if (rc != 0) 927 goto fail_thread_spawn; 928 } 929 930 mae->counter_rxq_running = true; 931 932 return 0; 933 934 fail_service_register: 935 fail_thread_spawn: 936 efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL); 937 938 fail_counter_stream: 939 sfc_log_init(sa, "failed: %s", rte_strerror(rc)); 940 941 return rc; 942 } 943 944 int 945 sfc_mae_counter_get(struct sfc_mae_counters *counters, 946 const struct sfc_mae_counter_id *counter, 947 struct rte_flow_query_count *data) 948 { 949 struct sfc_flow_tunnel *ft = counter->ft; 950 uint64_t non_reset_jump_hit_counter; 951 struct sfc_mae_counter *p; 952 union sfc_pkts_bytes value; 953 954 SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters); 955 p = &counters->mae_counters[counter->mae_id.id]; 956 957 /* 958 * Ordering is relaxed since it is the only operation on counter value. 959 * And it does not depend on different stores/loads in other threads. 960 * Paired with relaxed ordering in counter increment. 961 */ 962 value.pkts_bytes.int128 = __atomic_load_n(&p->value.pkts_bytes.int128, 963 __ATOMIC_RELAXED); 964 965 data->hits_set = 1; 966 data->hits = value.pkts - p->reset.pkts; 967 968 if (ft != NULL) { 969 data->hits += ft->group_hit_counter; 970 non_reset_jump_hit_counter = data->hits; 971 data->hits -= ft->reset_jump_hit_counter; 972 } else { 973 data->bytes_set = 1; 974 data->bytes = value.bytes - p->reset.bytes; 975 } 976 977 if (data->reset != 0) { 978 if (ft != NULL) { 979 ft->reset_jump_hit_counter = non_reset_jump_hit_counter; 980 } else { 981 p->reset.pkts = value.pkts; 982 p->reset.bytes = value.bytes; 983 } 984 } 985 986 return 0; 987 } 988 989 bool 990 sfc_mae_counter_stream_enabled(struct sfc_adapter *sa) 991 { 992 if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0 || 993 sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE) 994 return B_FALSE; 995 else 996 return B_TRUE; 997 } 998