xref: /dpdk/drivers/net/sfc/sfc_mae_counter.c (revision 29fd052d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Copyright(c) 2020-2021 Xilinx, Inc.
4  */
5 
6 #include <rte_common.h>
7 #include <rte_service_component.h>
8 
9 #include "efx.h"
10 #include "efx_regs_counters_pkt_format.h"
11 
12 #include "sfc_ev.h"
13 #include "sfc.h"
14 #include "sfc_rx.h"
15 #include "sfc_mae_counter.h"
16 #include "sfc_service.h"
17 
18 /**
19  * Approximate maximum number of counters per packet.
20  * In fact maximum depends on per-counter data offset which is specified
21  * in counter packet header.
22  */
23 #define SFC_MAE_COUNTERS_PER_PACKET_MAX \
24 	((SFC_MAE_COUNTER_STREAM_PACKET_SIZE - \
25 	  ER_RX_SL_PACKETISER_HEADER_WORD_SIZE) / \
26 	  ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
27 
28 /**
29  * Minimum number of Rx buffers in counters only Rx queue.
30  */
31 #define SFC_MAE_COUNTER_RXQ_BUFS_MIN \
32 	(SFC_COUNTER_RXQ_RX_DESC_COUNT - SFC_COUNTER_RXQ_REFILL_LEVEL)
33 
34 /**
35  * Approximate number of counter updates fit in counters only Rx queue.
36  * The number is inaccurate since SFC_MAE_COUNTERS_PER_PACKET_MAX is
37  * inaccurate (see above). However, it provides the gist for a number of
38  * counter updates which can fit in an Rx queue after empty poll.
39  *
40  * The define is not actually used, but provides calculations details.
41  */
42 #define SFC_MAE_COUNTERS_RXQ_SPACE \
43 	(SFC_MAE_COUNTER_RXQ_BUFS_MIN * SFC_MAE_COUNTERS_PER_PACKET_MAX)
44 
45 static uint32_t
46 sfc_mae_counter_get_service_lcore(struct sfc_adapter *sa)
47 {
48 	uint32_t cid;
49 
50 	cid = sfc_get_service_lcore(sa->socket_id);
51 	if (cid != RTE_MAX_LCORE)
52 		return cid;
53 
54 	if (sa->socket_id != SOCKET_ID_ANY)
55 		cid = sfc_get_service_lcore(SOCKET_ID_ANY);
56 
57 	if (cid == RTE_MAX_LCORE) {
58 		sfc_warn(sa, "failed to get service lcore for counter service");
59 	} else if (sa->socket_id != SOCKET_ID_ANY) {
60 		sfc_warn(sa,
61 			"failed to get service lcore for counter service at socket %d, but got at socket %u",
62 			sa->socket_id, rte_lcore_to_socket_id(cid));
63 	}
64 	return cid;
65 }
66 
67 bool
68 sfc_mae_counter_rxq_required(struct sfc_adapter *sa)
69 {
70 	const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
71 
72 	if (encp->enc_mae_supported == B_FALSE)
73 		return false;
74 
75 	return true;
76 }
77 
78 int
79 sfc_mae_counter_enable(struct sfc_adapter *sa,
80 		       struct sfc_mae_counter_id *counterp)
81 {
82 	struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
83 	struct sfc_mae_counters *counters = &reg->counters;
84 	struct sfc_mae_counter *p;
85 	efx_counter_t mae_counter;
86 	uint32_t generation_count;
87 	uint32_t unused;
88 	int rc;
89 
90 	/*
91 	 * The actual count of counters allocated is ignored since a failure
92 	 * to allocate a single counter is indicated by non-zero return code.
93 	 */
94 	rc = efx_mae_counters_alloc(sa->nic, 1, &unused, &mae_counter,
95 				    &generation_count);
96 	if (rc != 0) {
97 		sfc_err(sa, "failed to alloc MAE counter: %s",
98 			rte_strerror(rc));
99 		goto fail_mae_counter_alloc;
100 	}
101 
102 	if (mae_counter.id >= counters->n_mae_counters) {
103 		/*
104 		 * ID of a counter is expected to be within the range
105 		 * between 0 and the maximum count of counters to always
106 		 * fit into a pre-allocated array size of maximum counter ID.
107 		 */
108 		sfc_err(sa, "MAE counter ID is out of expected range");
109 		rc = EFAULT;
110 		goto fail_counter_id_range;
111 	}
112 
113 	counterp->mae_id = mae_counter;
114 
115 	p = &counters->mae_counters[mae_counter.id];
116 
117 	/*
118 	 * Ordering is relaxed since it is the only operation on counter value.
119 	 * And it does not depend on different stores/loads in other threads.
120 	 * Paired with relaxed ordering in counter increment.
121 	 */
122 	__atomic_store(&p->reset.pkts_bytes.int128,
123 		       &p->value.pkts_bytes.int128, __ATOMIC_RELAXED);
124 	p->generation_count = generation_count;
125 
126 	p->ft_group_hit_counter = counterp->ft_group_hit_counter;
127 
128 	/*
129 	 * The flag is set at the very end of add operation and reset
130 	 * at the beginning of delete operation. Release ordering is
131 	 * paired with acquire ordering on load in counter increment operation.
132 	 */
133 	__atomic_store_n(&p->inuse, true, __ATOMIC_RELEASE);
134 
135 	sfc_info(sa, "enabled MAE counter #%u with reset pkts=%" PRIu64
136 		 " bytes=%" PRIu64, mae_counter.id,
137 		 p->reset.pkts, p->reset.bytes);
138 
139 	return 0;
140 
141 fail_counter_id_range:
142 	(void)efx_mae_counters_free(sa->nic, 1, &unused, &mae_counter, NULL);
143 
144 fail_mae_counter_alloc:
145 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
146 	return rc;
147 }
148 
149 int
150 sfc_mae_counter_disable(struct sfc_adapter *sa,
151 			struct sfc_mae_counter_id *counter)
152 {
153 	struct sfc_mae_counter_registry *reg = &sa->mae.counter_registry;
154 	struct sfc_mae_counters *counters = &reg->counters;
155 	struct sfc_mae_counter *p;
156 	uint32_t unused;
157 	int rc;
158 
159 	if (counter->mae_id.id == EFX_MAE_RSRC_ID_INVALID)
160 		return 0;
161 
162 	SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters);
163 	/*
164 	 * The flag is set at the very end of add operation and reset
165 	 * at the beginning of delete operation. Release ordering is
166 	 * paired with acquire ordering on load in counter increment operation.
167 	 */
168 	p = &counters->mae_counters[counter->mae_id.id];
169 	__atomic_store_n(&p->inuse, false, __ATOMIC_RELEASE);
170 
171 	rc = efx_mae_counters_free(sa->nic, 1, &unused, &counter->mae_id, NULL);
172 	if (rc != 0)
173 		sfc_err(sa, "failed to free MAE counter %u: %s",
174 			counter->mae_id.id, rte_strerror(rc));
175 
176 	sfc_info(sa, "disabled MAE counter #%u with reset pkts=%" PRIu64
177 		 " bytes=%" PRIu64, counter->mae_id.id,
178 		 p->reset.pkts, p->reset.bytes);
179 
180 	/*
181 	 * Do this regardless of what efx_mae_counters_free() return value is.
182 	 * If there's some error, the resulting resource leakage is bad, but
183 	 * nothing sensible can be done in this case.
184 	 */
185 	counter->mae_id.id = EFX_MAE_RSRC_ID_INVALID;
186 
187 	return rc;
188 }
189 
190 static void
191 sfc_mae_counter_increment(struct sfc_adapter *sa,
192 			  struct sfc_mae_counters *counters,
193 			  uint32_t mae_counter_id,
194 			  uint32_t generation_count,
195 			  uint64_t pkts, uint64_t bytes)
196 {
197 	struct sfc_mae_counter *p = &counters->mae_counters[mae_counter_id];
198 	struct sfc_mae_counters_xstats *xstats = &counters->xstats;
199 	union sfc_pkts_bytes cnt_val;
200 	bool inuse;
201 
202 	/*
203 	 * Acquire ordering is paired with release ordering in counter add
204 	 * and delete operations.
205 	 */
206 	__atomic_load(&p->inuse, &inuse, __ATOMIC_ACQUIRE);
207 	if (!inuse) {
208 		/*
209 		 * Two possible cases include:
210 		 * 1) Counter is just allocated. Too early counter update
211 		 *    cannot be processed properly.
212 		 * 2) Stale update of freed and not reallocated counter.
213 		 *    There is no point in processing that update.
214 		 */
215 		xstats->not_inuse_update++;
216 		return;
217 	}
218 
219 	if (unlikely(generation_count < p->generation_count)) {
220 		/*
221 		 * It is a stale update for the reallocated counter
222 		 * (i.e., freed and the same ID allocated again).
223 		 */
224 		xstats->realloc_update++;
225 		return;
226 	}
227 
228 	cnt_val.pkts = p->value.pkts + pkts;
229 	cnt_val.bytes = p->value.bytes + bytes;
230 
231 	/*
232 	 * Ordering is relaxed since it is the only operation on counter value.
233 	 * And it does not depend on different stores/loads in other threads.
234 	 * Paired with relaxed ordering on counter reset.
235 	 */
236 	__atomic_store(&p->value.pkts_bytes,
237 		       &cnt_val.pkts_bytes, __ATOMIC_RELAXED);
238 
239 	if (p->ft_group_hit_counter != NULL) {
240 		uint64_t ft_group_hit_counter;
241 
242 		ft_group_hit_counter = *p->ft_group_hit_counter + pkts;
243 		__atomic_store_n(p->ft_group_hit_counter, ft_group_hit_counter,
244 				 __ATOMIC_RELAXED);
245 	}
246 
247 	sfc_info(sa, "update MAE counter #%u: pkts+%" PRIu64 "=%" PRIu64
248 		 ", bytes+%" PRIu64 "=%" PRIu64, mae_counter_id,
249 		 pkts, cnt_val.pkts, bytes, cnt_val.bytes);
250 }
251 
252 static void
253 sfc_mae_parse_counter_packet(struct sfc_adapter *sa,
254 			     struct sfc_mae_counter_registry *counter_registry,
255 			     const struct rte_mbuf *m)
256 {
257 	uint32_t generation_count;
258 	const efx_xword_t *hdr;
259 	const efx_oword_t *counters_data;
260 	unsigned int version;
261 	unsigned int id;
262 	unsigned int header_offset;
263 	unsigned int payload_offset;
264 	unsigned int counter_count;
265 	unsigned int required_len;
266 	unsigned int i;
267 
268 	if (unlikely(m->nb_segs != 1)) {
269 		sfc_err(sa, "unexpectedly scattered MAE counters packet (%u segments)",
270 			m->nb_segs);
271 		return;
272 	}
273 
274 	if (unlikely(m->data_len < ER_RX_SL_PACKETISER_HEADER_WORD_SIZE)) {
275 		sfc_err(sa, "too short MAE counters packet (%u bytes)",
276 			m->data_len);
277 		return;
278 	}
279 
280 	/*
281 	 * The generation count is located in the Rx prefix in the USER_MARK
282 	 * field which is written into hash.fdir.hi field of an mbuf. See
283 	 * SF-123581-TC SmartNIC Datapath Offloads section 4.7.5 Counters.
284 	 */
285 	generation_count = m->hash.fdir.hi;
286 
287 	hdr = rte_pktmbuf_mtod(m, const efx_xword_t *);
288 
289 	version = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_VERSION);
290 	if (unlikely(version != ERF_SC_PACKETISER_HEADER_VERSION_2)) {
291 		sfc_err(sa, "unexpected MAE counters packet version %u",
292 			version);
293 		return;
294 	}
295 
296 	id = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_IDENTIFIER);
297 	if (unlikely(id != ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR)) {
298 		sfc_err(sa, "unexpected MAE counters source identifier %u", id);
299 		return;
300 	}
301 
302 	/* Packet layout definitions assume fixed header offset in fact */
303 	header_offset =
304 		EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_HEADER_OFFSET);
305 	if (unlikely(header_offset !=
306 		     ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT)) {
307 		sfc_err(sa, "unexpected MAE counters packet header offset %u",
308 			header_offset);
309 		return;
310 	}
311 
312 	payload_offset =
313 		EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_PAYLOAD_OFFSET);
314 
315 	counter_count = EFX_XWORD_FIELD(*hdr, ERF_SC_PACKETISER_HEADER_COUNT);
316 
317 	required_len = payload_offset +
318 			counter_count * sizeof(counters_data[0]);
319 	if (unlikely(required_len > m->data_len)) {
320 		sfc_err(sa, "truncated MAE counters packet: %u counters, packet length is %u vs %u required",
321 			counter_count, m->data_len, required_len);
322 		/*
323 		 * In theory it is possible process available counters data,
324 		 * but such condition is really unexpected and it is
325 		 * better to treat entire packet as corrupted.
326 		 */
327 		return;
328 	}
329 
330 	/* Ensure that counters data is 32-bit aligned */
331 	if (unlikely(payload_offset % sizeof(uint32_t) != 0)) {
332 		sfc_err(sa, "unsupported MAE counters payload offset %u, must be 32-bit aligned",
333 			payload_offset);
334 		return;
335 	}
336 	RTE_BUILD_BUG_ON(sizeof(counters_data[0]) !=
337 			ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE);
338 
339 	counters_data =
340 		rte_pktmbuf_mtod_offset(m, const efx_oword_t *, payload_offset);
341 
342 	sfc_info(sa, "update %u MAE counters with gc=%u",
343 		 counter_count, generation_count);
344 
345 	for (i = 0; i < counter_count; ++i) {
346 		uint32_t packet_count_lo;
347 		uint32_t packet_count_hi;
348 		uint32_t byte_count_lo;
349 		uint32_t byte_count_hi;
350 
351 		/*
352 		 * Use 32-bit field accessors below since counters data
353 		 * is not 64-bit aligned.
354 		 * 32-bit alignment is checked above taking into account
355 		 * that start of packet data is 32-bit aligned
356 		 * (cache-line size aligned in fact).
357 		 */
358 		packet_count_lo =
359 			EFX_OWORD_FIELD32(counters_data[i],
360 				ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO);
361 		packet_count_hi =
362 			EFX_OWORD_FIELD32(counters_data[i],
363 				ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_HI);
364 		byte_count_lo =
365 			EFX_OWORD_FIELD32(counters_data[i],
366 				ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO);
367 		byte_count_hi =
368 			EFX_OWORD_FIELD32(counters_data[i],
369 				ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_HI);
370 		sfc_mae_counter_increment(sa,
371 			&counter_registry->counters,
372 			EFX_OWORD_FIELD32(counters_data[i],
373 				ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX),
374 			generation_count,
375 			(uint64_t)packet_count_lo |
376 			((uint64_t)packet_count_hi <<
377 			 ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LO_WIDTH),
378 			(uint64_t)byte_count_lo |
379 			((uint64_t)byte_count_hi <<
380 			 ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LO_WIDTH));
381 	}
382 }
383 
384 static int32_t
385 sfc_mae_counter_poll_packets(struct sfc_adapter *sa)
386 {
387 	struct sfc_mae_counter_registry *counter_registry =
388 		&sa->mae.counter_registry;
389 	struct rte_mbuf *mbufs[SFC_MAE_COUNTER_RX_BURST];
390 	unsigned int pushed_diff;
391 	unsigned int pushed;
392 	unsigned int i;
393 	uint16_t n;
394 	int rc;
395 
396 	n = counter_registry->rx_pkt_burst(counter_registry->rx_dp, mbufs,
397 					   SFC_MAE_COUNTER_RX_BURST);
398 
399 	for (i = 0; i < n; i++)
400 		sfc_mae_parse_counter_packet(sa, counter_registry, mbufs[i]);
401 
402 	rte_pktmbuf_free_bulk(mbufs, n);
403 
404 	if (!counter_registry->use_credits)
405 		return n;
406 
407 	pushed = sfc_rx_get_pushed(sa, counter_registry->rx_dp);
408 	pushed_diff = pushed - counter_registry->pushed_n_buffers;
409 
410 	if (pushed_diff >= SFC_COUNTER_RXQ_REFILL_LEVEL) {
411 		rc = efx_mae_counters_stream_give_credits(sa->nic, pushed_diff);
412 		if (rc == 0) {
413 			counter_registry->pushed_n_buffers = pushed;
414 		} else {
415 			/*
416 			 * FIXME: counters might be important for the
417 			 * application. Handle the error in order to recover
418 			 * from the failure
419 			 */
420 			SFC_GENERIC_LOG(DEBUG, "Give credits failed: %s",
421 					rte_strerror(rc));
422 		}
423 	}
424 
425 	return n;
426 }
427 
428 static int32_t
429 sfc_mae_counter_service_routine(void *arg)
430 {
431 	struct sfc_adapter *sa = arg;
432 
433 	/*
434 	 * We cannot propagate any errors and we don't need to know
435 	 * the number of packets we've received.
436 	 */
437 	(void)sfc_mae_counter_poll_packets(sa);
438 
439 	return 0;
440 }
441 
442 static void *
443 sfc_mae_counter_thread(void *data)
444 {
445 	struct sfc_adapter *sa = data;
446 	struct sfc_mae_counter_registry *counter_registry =
447 		&sa->mae.counter_registry;
448 	int32_t rc;
449 
450 	while (__atomic_load_n(&counter_registry->polling.thread.run,
451 			       __ATOMIC_ACQUIRE)) {
452 		rc = sfc_mae_counter_poll_packets(sa);
453 		if (rc == 0) {
454 			/*
455 			 * The queue is empty. Do not burn CPU.
456 			 * An empty queue has just enough space for about
457 			 * SFC_MAE_COUNTERS_RXQ_SPACE counter updates which is
458 			 * more than 100K, so we can sleep a bit. The queue uses
459 			 * a credit-based flow control anyway, so firmware will
460 			 * not enqueue more counter updates until the host
461 			 * supplies it with additional credits. The counters are
462 			 * 48bits wide, so the timeout need only be short enough
463 			 * to ensure that the counter values do not overflow
464 			 * before the next counter update. Also we should not
465 			 * delay counter updates for a long time, otherwise
466 			 * application may decide that flow is idle and should
467 			 * be removed.
468 			 */
469 			rte_delay_ms(1);
470 		}
471 	}
472 
473 	return NULL;
474 }
475 
476 static void
477 sfc_mae_counter_service_unregister(struct sfc_adapter *sa)
478 {
479 	struct sfc_mae_counter_registry *registry =
480 		&sa->mae.counter_registry;
481 	const unsigned int wait_ms = 10000;
482 	unsigned int i;
483 
484 	rte_service_runstate_set(registry->polling.service.id, 0);
485 	rte_service_component_runstate_set(registry->polling.service.id, 0);
486 
487 	/*
488 	 * Wait for the counter routine to finish the last iteration.
489 	 * Give up on timeout.
490 	 */
491 	for (i = 0; i < wait_ms; i++) {
492 		if (rte_service_may_be_active(registry->polling.service.id) == 0)
493 			break;
494 
495 		rte_delay_ms(1);
496 	}
497 	if (i == wait_ms)
498 		sfc_warn(sa, "failed to wait for counter service to stop");
499 
500 	rte_service_map_lcore_set(registry->polling.service.id,
501 				  registry->polling.service.core_id, 0);
502 
503 	rte_service_component_unregister(registry->polling.service.id);
504 }
505 
506 static struct sfc_rxq_info *
507 sfc_counter_rxq_info_get(struct sfc_adapter *sa)
508 {
509 	return &sfc_sa2shared(sa)->rxq_info[sa->counter_rxq.sw_index];
510 }
511 
512 static void
513 sfc_mae_counter_registry_prepare(struct sfc_mae_counter_registry *registry,
514 				 struct sfc_adapter *sa,
515 				 uint32_t counter_stream_flags)
516 {
517 	registry->rx_pkt_burst = sa->eth_dev->rx_pkt_burst;
518 	registry->rx_dp = sfc_counter_rxq_info_get(sa)->dp;
519 	registry->pushed_n_buffers = 0;
520 	registry->use_credits = counter_stream_flags &
521 		EFX_MAE_COUNTERS_STREAM_OUT_USES_CREDITS;
522 }
523 
524 static int
525 sfc_mae_counter_service_register(struct sfc_adapter *sa,
526 				 uint32_t counter_stream_flags)
527 {
528 	struct rte_service_spec service;
529 	char counter_service_name[sizeof(service.name)] = "counter_service";
530 	struct sfc_mae_counter_registry *counter_registry =
531 		&sa->mae.counter_registry;
532 	uint32_t cid;
533 	uint32_t sid;
534 	int rc;
535 
536 	sfc_log_init(sa, "entry");
537 
538 	/* Prepare service info */
539 	memset(&service, 0, sizeof(service));
540 	rte_strscpy(service.name, counter_service_name, sizeof(service.name));
541 	service.socket_id = sa->socket_id;
542 	service.callback = sfc_mae_counter_service_routine;
543 	service.callback_userdata = sa;
544 	sfc_mae_counter_registry_prepare(counter_registry, sa,
545 					 counter_stream_flags);
546 
547 	cid = sfc_get_service_lcore(sa->socket_id);
548 	if (cid == RTE_MAX_LCORE && sa->socket_id != SOCKET_ID_ANY) {
549 		/* Warn and try to allocate on any NUMA node */
550 		sfc_warn(sa,
551 			"failed to get service lcore for counter service at socket %d",
552 			sa->socket_id);
553 
554 		cid = sfc_get_service_lcore(SOCKET_ID_ANY);
555 	}
556 	if (cid == RTE_MAX_LCORE) {
557 		rc = ENOTSUP;
558 		sfc_err(sa, "failed to get service lcore for counter service");
559 		goto fail_get_service_lcore;
560 	}
561 
562 	/* Service core may be in "stopped" state, start it */
563 	rc = rte_service_lcore_start(cid);
564 	if (rc != 0 && rc != -EALREADY) {
565 		sfc_err(sa, "failed to start service core for counter service: %s",
566 			rte_strerror(-rc));
567 		rc = ENOTSUP;
568 		goto fail_start_core;
569 	}
570 
571 	/* Register counter service */
572 	rc = rte_service_component_register(&service, &sid);
573 	if (rc != 0) {
574 		rc = ENOEXEC;
575 		sfc_err(sa, "failed to register counter service component");
576 		goto fail_register;
577 	}
578 
579 	/* Map the service with the service core */
580 	rc = rte_service_map_lcore_set(sid, cid, 1);
581 	if (rc != 0) {
582 		rc = -rc;
583 		sfc_err(sa, "failed to map lcore for counter service: %s",
584 			rte_strerror(rc));
585 		goto fail_map_lcore;
586 	}
587 
588 	/* Run the service */
589 	rc = rte_service_component_runstate_set(sid, 1);
590 	if (rc < 0) {
591 		rc = -rc;
592 		sfc_err(sa, "failed to run counter service component: %s",
593 			rte_strerror(rc));
594 		goto fail_component_runstate_set;
595 	}
596 	rc = rte_service_runstate_set(sid, 1);
597 	if (rc < 0) {
598 		rc = -rc;
599 		sfc_err(sa, "failed to run counter service");
600 		goto fail_runstate_set;
601 	}
602 
603 	counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_SERVICE;
604 	counter_registry->polling.service.core_id = cid;
605 	counter_registry->polling.service.id = sid;
606 
607 	sfc_log_init(sa, "done");
608 
609 	return 0;
610 
611 fail_runstate_set:
612 	rte_service_component_runstate_set(sid, 0);
613 
614 fail_component_runstate_set:
615 	rte_service_map_lcore_set(sid, cid, 0);
616 
617 fail_map_lcore:
618 	rte_service_component_unregister(sid);
619 
620 fail_register:
621 fail_start_core:
622 fail_get_service_lcore:
623 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
624 
625 	return rc;
626 }
627 
628 static void
629 sfc_mae_counter_thread_stop(struct sfc_adapter *sa)
630 {
631 	struct sfc_mae_counter_registry *counter_registry =
632 		&sa->mae.counter_registry;
633 	int rc;
634 
635 	/* Ensure that flag is set before attempting to join thread */
636 	__atomic_store_n(&counter_registry->polling.thread.run, false,
637 			 __ATOMIC_RELEASE);
638 
639 	rc = pthread_join(counter_registry->polling.thread.id, NULL);
640 	if (rc != 0)
641 		sfc_err(sa, "failed to join the MAE counter polling thread");
642 
643 	counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_OFF;
644 }
645 
646 static int
647 sfc_mae_counter_thread_spawn(struct sfc_adapter *sa,
648 			     uint32_t counter_stream_flags)
649 {
650 	struct sfc_mae_counter_registry *counter_registry =
651 		&sa->mae.counter_registry;
652 	int rc;
653 
654 	sfc_log_init(sa, "entry");
655 
656 	sfc_mae_counter_registry_prepare(counter_registry, sa,
657 					 counter_stream_flags);
658 
659 	counter_registry->polling_mode = SFC_MAE_COUNTER_POLLING_THREAD;
660 	counter_registry->polling.thread.run = true;
661 
662 	rc = rte_ctrl_thread_create(&sa->mae.counter_registry.polling.thread.id,
663 				    "mae_counter_thread", NULL,
664 				    sfc_mae_counter_thread, sa);
665 
666 	return rc;
667 }
668 
669 int
670 sfc_mae_counters_init(struct sfc_mae_counters *counters,
671 		      uint32_t nb_counters_max)
672 {
673 	int rc;
674 
675 	SFC_GENERIC_LOG(DEBUG, "%s: entry", __func__);
676 
677 	counters->mae_counters = rte_zmalloc("sfc_mae_counters",
678 		sizeof(*counters->mae_counters) * nb_counters_max, 0);
679 	if (counters->mae_counters == NULL) {
680 		rc = ENOMEM;
681 		SFC_GENERIC_LOG(ERR, "%s: failed: %s", __func__,
682 				rte_strerror(rc));
683 		return rc;
684 	}
685 
686 	counters->n_mae_counters = nb_counters_max;
687 
688 	SFC_GENERIC_LOG(DEBUG, "%s: done", __func__);
689 
690 	return 0;
691 }
692 
693 void
694 sfc_mae_counters_fini(struct sfc_mae_counters *counters)
695 {
696 	rte_free(counters->mae_counters);
697 	counters->mae_counters = NULL;
698 }
699 
700 int
701 sfc_mae_counter_rxq_attach(struct sfc_adapter *sa)
702 {
703 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
704 	char name[RTE_MEMPOOL_NAMESIZE];
705 	struct rte_mempool *mp;
706 	unsigned int n_elements;
707 	unsigned int cache_size;
708 	/* The mempool is internal and private area is not required */
709 	const uint16_t priv_size = 0;
710 	const uint16_t data_room_size = RTE_PKTMBUF_HEADROOM +
711 		SFC_MAE_COUNTER_STREAM_PACKET_SIZE;
712 	int rc;
713 
714 	sfc_log_init(sa, "entry");
715 
716 	if (!sas->counters_rxq_allocated) {
717 		sfc_log_init(sa, "counter queue is not supported - skip");
718 		return 0;
719 	}
720 
721 	/*
722 	 * At least one element in the ring is always unused to distinguish
723 	 * between empty and full ring cases.
724 	 */
725 	n_elements = SFC_COUNTER_RXQ_RX_DESC_COUNT - 1;
726 
727 	/*
728 	 * The cache must have sufficient space to put received buckets
729 	 * before they're reused on refill.
730 	 */
731 	cache_size = rte_align32pow2(SFC_COUNTER_RXQ_REFILL_LEVEL +
732 				     SFC_MAE_COUNTER_RX_BURST - 1);
733 
734 	if (snprintf(name, sizeof(name), "counter_rxq-pool-%u", sas->port_id) >=
735 	    (int)sizeof(name)) {
736 		sfc_err(sa, "failed: counter RxQ mempool name is too long");
737 		rc = ENAMETOOLONG;
738 		goto fail_long_name;
739 	}
740 
741 	/*
742 	 * It could be single-producer single-consumer ring mempool which
743 	 * requires minimal barriers. However, cache size and refill/burst
744 	 * policy are aligned, therefore it does not matter which
745 	 * mempool backend is chosen since backend is unused.
746 	 */
747 	mp = rte_pktmbuf_pool_create(name, n_elements, cache_size,
748 				     priv_size, data_room_size, sa->socket_id);
749 	if (mp == NULL) {
750 		sfc_err(sa, "failed to create counter RxQ mempool");
751 		rc = rte_errno;
752 		goto fail_mp_create;
753 	}
754 
755 	sa->counter_rxq.sw_index = sfc_counters_rxq_sw_index(sas);
756 	sa->counter_rxq.mp = mp;
757 	sa->counter_rxq.state |= SFC_COUNTER_RXQ_ATTACHED;
758 
759 	sfc_log_init(sa, "done");
760 
761 	return 0;
762 
763 fail_mp_create:
764 fail_long_name:
765 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
766 
767 	return rc;
768 }
769 
770 void
771 sfc_mae_counter_rxq_detach(struct sfc_adapter *sa)
772 {
773 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
774 
775 	sfc_log_init(sa, "entry");
776 
777 	if (!sas->counters_rxq_allocated) {
778 		sfc_log_init(sa, "counter queue is not supported - skip");
779 		return;
780 	}
781 
782 	if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) {
783 		sfc_log_init(sa, "counter queue is not attached - skip");
784 		return;
785 	}
786 
787 	rte_mempool_free(sa->counter_rxq.mp);
788 	sa->counter_rxq.mp = NULL;
789 	sa->counter_rxq.state &= ~SFC_COUNTER_RXQ_ATTACHED;
790 
791 	sfc_log_init(sa, "done");
792 }
793 
794 int
795 sfc_mae_counter_rxq_init(struct sfc_adapter *sa)
796 {
797 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
798 	const struct rte_eth_rxconf rxconf = {
799 		.rx_free_thresh = SFC_COUNTER_RXQ_REFILL_LEVEL,
800 		.rx_drop_en = 1,
801 	};
802 	uint16_t nb_rx_desc = SFC_COUNTER_RXQ_RX_DESC_COUNT;
803 	int rc;
804 
805 	sfc_log_init(sa, "entry");
806 
807 	if (!sas->counters_rxq_allocated) {
808 		sfc_log_init(sa, "counter queue is not supported - skip");
809 		return 0;
810 	}
811 
812 	if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED) == 0) {
813 		sfc_log_init(sa, "counter queue is not attached - skip");
814 		return 0;
815 	}
816 
817 	nb_rx_desc = RTE_MIN(nb_rx_desc, sa->rxq_max_entries);
818 	nb_rx_desc = RTE_MAX(nb_rx_desc, sa->rxq_min_entries);
819 
820 	rc = sfc_rx_qinit_info(sa, sa->counter_rxq.sw_index,
821 			       EFX_RXQ_FLAG_USER_MARK);
822 	if (rc != 0)
823 		goto fail_counter_rxq_init_info;
824 
825 	rc = sfc_rx_qinit(sa, sa->counter_rxq.sw_index, nb_rx_desc,
826 			  sa->socket_id, &rxconf, sa->counter_rxq.mp);
827 	if (rc != 0) {
828 		sfc_err(sa, "failed to init counter RxQ");
829 		goto fail_counter_rxq_init;
830 	}
831 
832 	sa->counter_rxq.state |= SFC_COUNTER_RXQ_INITIALIZED;
833 
834 	sfc_log_init(sa, "done");
835 
836 	return 0;
837 
838 fail_counter_rxq_init:
839 fail_counter_rxq_init_info:
840 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
841 
842 	return rc;
843 }
844 
845 void
846 sfc_mae_counter_rxq_fini(struct sfc_adapter *sa)
847 {
848 	struct sfc_adapter_shared * const sas = sfc_sa2shared(sa);
849 
850 	sfc_log_init(sa, "entry");
851 
852 	if (!sas->counters_rxq_allocated) {
853 		sfc_log_init(sa, "counter queue is not supported - skip");
854 		return;
855 	}
856 
857 	if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0) {
858 		sfc_log_init(sa, "counter queue is not initialized - skip");
859 		return;
860 	}
861 
862 	sfc_rx_qfini(sa, sa->counter_rxq.sw_index);
863 
864 	sfc_log_init(sa, "done");
865 }
866 
867 void
868 sfc_mae_counter_stop(struct sfc_adapter *sa)
869 {
870 	struct sfc_mae *mae = &sa->mae;
871 
872 	sfc_log_init(sa, "entry");
873 
874 	if (!mae->counter_rxq_running) {
875 		sfc_log_init(sa, "counter queue is not running - skip");
876 		return;
877 	}
878 
879 	SFC_ASSERT(mae->counter_registry.polling_mode !=
880 			SFC_MAE_COUNTER_POLLING_OFF);
881 
882 	if (mae->counter_registry.polling_mode ==
883 			SFC_MAE_COUNTER_POLLING_SERVICE)
884 		sfc_mae_counter_service_unregister(sa);
885 	else
886 		sfc_mae_counter_thread_stop(sa);
887 
888 	efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
889 
890 	mae->counter_rxq_running = false;
891 
892 	sfc_log_init(sa, "done");
893 }
894 
895 int
896 sfc_mae_counter_start(struct sfc_adapter *sa)
897 {
898 	struct sfc_mae *mae = &sa->mae;
899 	uint32_t flags;
900 	int rc;
901 
902 	SFC_ASSERT(sa->counter_rxq.state & SFC_COUNTER_RXQ_ATTACHED);
903 
904 	if (mae->counter_rxq_running)
905 		return 0;
906 
907 	sfc_log_init(sa, "entry");
908 
909 	rc = efx_mae_counters_stream_start(sa->nic, sa->counter_rxq.sw_index,
910 					   SFC_MAE_COUNTER_STREAM_PACKET_SIZE,
911 					   0 /* No flags required */, &flags);
912 	if (rc != 0) {
913 		sfc_err(sa, "failed to start MAE counters stream: %s",
914 			rte_strerror(rc));
915 		goto fail_counter_stream;
916 	}
917 
918 	sfc_log_init(sa, "stream start flags: 0x%x", flags);
919 
920 	if (sfc_mae_counter_get_service_lcore(sa) != RTE_MAX_LCORE) {
921 		rc = sfc_mae_counter_service_register(sa, flags);
922 		if (rc != 0)
923 			goto fail_service_register;
924 	} else {
925 		rc = sfc_mae_counter_thread_spawn(sa, flags);
926 		if (rc != 0)
927 			goto fail_thread_spawn;
928 	}
929 
930 	mae->counter_rxq_running = true;
931 
932 	return 0;
933 
934 fail_service_register:
935 fail_thread_spawn:
936 	efx_mae_counters_stream_stop(sa->nic, sa->counter_rxq.sw_index, NULL);
937 
938 fail_counter_stream:
939 	sfc_log_init(sa, "failed: %s", rte_strerror(rc));
940 
941 	return rc;
942 }
943 
944 int
945 sfc_mae_counter_get(struct sfc_mae_counters *counters,
946 		    const struct sfc_mae_counter_id *counter,
947 		    struct rte_flow_query_count *data)
948 {
949 	struct sfc_flow_tunnel *ft = counter->ft;
950 	uint64_t non_reset_jump_hit_counter;
951 	struct sfc_mae_counter *p;
952 	union sfc_pkts_bytes value;
953 
954 	SFC_ASSERT(counter->mae_id.id < counters->n_mae_counters);
955 	p = &counters->mae_counters[counter->mae_id.id];
956 
957 	/*
958 	 * Ordering is relaxed since it is the only operation on counter value.
959 	 * And it does not depend on different stores/loads in other threads.
960 	 * Paired with relaxed ordering in counter increment.
961 	 */
962 	value.pkts_bytes.int128 = __atomic_load_n(&p->value.pkts_bytes.int128,
963 						  __ATOMIC_RELAXED);
964 
965 	data->hits_set = 1;
966 	data->hits = value.pkts - p->reset.pkts;
967 
968 	if (ft != NULL) {
969 		data->hits += ft->group_hit_counter;
970 		non_reset_jump_hit_counter = data->hits;
971 		data->hits -= ft->reset_jump_hit_counter;
972 	} else {
973 		data->bytes_set = 1;
974 		data->bytes = value.bytes - p->reset.bytes;
975 	}
976 
977 	if (data->reset != 0) {
978 		if (ft != NULL) {
979 			ft->reset_jump_hit_counter = non_reset_jump_hit_counter;
980 		} else {
981 			p->reset.pkts = value.pkts;
982 			p->reset.bytes = value.bytes;
983 		}
984 	}
985 
986 	return 0;
987 }
988 
989 bool
990 sfc_mae_counter_stream_enabled(struct sfc_adapter *sa)
991 {
992 	if ((sa->counter_rxq.state & SFC_COUNTER_RXQ_INITIALIZED) == 0 ||
993 	    sfc_get_service_lcore(SOCKET_ID_ANY) == RTE_MAX_LCORE)
994 		return B_FALSE;
995 	else
996 		return B_TRUE;
997 }
998