xref: /dpdk/drivers/net/mlx5/mlx5_flow_aso.c (revision 147f6fb4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8 
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12 
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15 
16 /**
17  * Free MR resources.
18  *
19  * @param[in] cdev
20  *   Pointer to the mlx5 common device.
21  * @param[in] mr
22  *   MR to free.
23  */
24 static void
mlx5_aso_dereg_mr(struct mlx5_common_device * cdev,struct mlx5_pmd_mr * mr)25 mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
26 {
27 	void *addr = mr->addr;
28 
29 	cdev->mr_scache.dereg_mr_cb(mr);
30 	mlx5_free(addr);
31 	memset(mr, 0, sizeof(*mr));
32 }
33 
34 /**
35  * Register Memory Region.
36  *
37  * @param[in] cdev
38  *   Pointer to the mlx5 common device.
39  * @param[in] length
40  *   Size of MR buffer.
41  * @param[in/out] mr
42  *   Pointer to MR to create.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
mlx5_aso_reg_mr(struct mlx5_common_device * cdev,size_t length,struct mlx5_pmd_mr * mr)48 mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
49 		struct mlx5_pmd_mr *mr)
50 {
51 	int ret;
52 
53 	mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
54 			       SOCKET_ID_ANY);
55 	if (!mr->addr) {
56 		DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
57 		return -1;
58 	}
59 	ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
60 	if (ret) {
61 		DRV_LOG(ERR, "Failed to create direct Mkey.");
62 		mlx5_free(mr->addr);
63 		return -1;
64 	}
65 	return 0;
66 }
67 
68 /**
69  * Destroy Send Queue used for ASO access.
70  *
71  * @param[in] sq
72  *   ASO SQ to destroy.
73  */
74 static void
mlx5_aso_destroy_sq(struct mlx5_aso_sq * sq)75 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
76 {
77 	mlx5_devx_sq_destroy(&sq->sq_obj);
78 	mlx5_devx_cq_destroy(&sq->cq.cq_obj);
79 	memset(sq, 0, sizeof(*sq));
80 }
81 
82 /**
83  * Initialize Send Queue used for ASO access.
84  *
85  * @param[in] sq
86  *   ASO SQ to initialize.
87  */
88 static void
mlx5_aso_age_init_sq(struct mlx5_aso_sq * sq)89 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
90 {
91 	volatile struct mlx5_aso_wqe *restrict wqe;
92 	int i;
93 	int size = 1 << sq->log_desc_n;
94 	uint64_t addr;
95 
96 	/* All the next fields state should stay constant. */
97 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
98 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
99 							  (sizeof(*wqe) >> 4));
100 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
101 		addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
102 					    MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
103 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
104 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
105 		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
106 			(0u |
107 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
108 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
109 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
110 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
111 		wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
112 	}
113 }
114 
115 /**
116  * Initialize Send Queue used for ASO flow meter access.
117  *
118  * @param[in] sq
119  *   ASO SQ to initialize.
120  */
121 static void
mlx5_aso_mtr_init_sq(struct mlx5_aso_sq * sq)122 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
123 {
124 	volatile struct mlx5_aso_wqe *restrict wqe;
125 	int i;
126 	int size = 1 << sq->log_desc_n;
127 
128 	/* All the next fields state should stay constant. */
129 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
130 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
131 							  (sizeof(*wqe) >> 4));
132 		wqe->aso_cseg.operand_masks = RTE_BE32(0u |
133 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
134 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
135 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
136 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
137 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
138 							 MLX5_COMP_MODE_OFFSET);
139 	}
140 }
141 
142 /*
143  * Initialize Send Queue used for ASO connection tracking.
144  *
145  * @param[in] sq
146  *   ASO SQ to initialize.
147  */
148 static void
mlx5_aso_ct_init_sq(struct mlx5_aso_sq * sq)149 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
150 {
151 	volatile struct mlx5_aso_wqe *restrict wqe;
152 	int i;
153 	int size = 1 << sq->log_desc_n;
154 	uint64_t addr;
155 
156 	/* All the next fields state should stay constant. */
157 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
158 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
159 							  (sizeof(*wqe) >> 4));
160 		/* One unique MR for the query data. */
161 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
162 		/* Magic number 64 represents the length of a ASO CT obj. */
163 		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
164 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
165 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
166 		/*
167 		 * The values of operand_masks are different for modify
168 		 * and query.
169 		 * And data_mask may be different for each modification. In
170 		 * query, it could be zero and ignored.
171 		 * CQE generation is always needed, in order to decide when
172 		 * it is available to create the flow or read the data.
173 		 */
174 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
175 						   MLX5_COMP_MODE_OFFSET);
176 	}
177 }
178 
179 /**
180  * Create Send Queue used for ASO access.
181  *
182  * @param[in] cdev
183  *   Pointer to the mlx5 common device.
184  * @param[in/out] sq
185  *   Pointer to SQ to create.
186  * @param[in] uar
187  *   User Access Region object.
188  *
189  * @return
190  *   0 on success, a negative errno value otherwise and rte_errno is set.
191  */
192 static int
mlx5_aso_sq_create(struct mlx5_common_device * cdev,struct mlx5_aso_sq * sq,void * uar)193 mlx5_aso_sq_create(struct mlx5_common_device *cdev, struct mlx5_aso_sq *sq,
194 		   void *uar)
195 {
196 	struct mlx5_devx_cq_attr cq_attr = {
197 		.uar_page_id = mlx5_os_get_devx_uar_page_id(uar),
198 	};
199 	struct mlx5_devx_create_sq_attr sq_attr = {
200 		.user_index = 0xFFFF,
201 		.wq_attr = (struct mlx5_devx_wq_attr){
202 			.pd = cdev->pdn,
203 			.uar_page = mlx5_os_get_devx_uar_page_id(uar),
204 		},
205 		.ts_format =
206 			mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
207 	};
208 	struct mlx5_devx_modify_sq_attr modify_attr = {
209 		.state = MLX5_SQC_STATE_RDY,
210 	};
211 	uint16_t log_wqbb_n;
212 	int ret;
213 
214 	if (mlx5_devx_cq_create(cdev->ctx, &sq->cq.cq_obj,
215 				MLX5_ASO_QUEUE_LOG_DESC, &cq_attr,
216 				SOCKET_ID_ANY))
217 		goto error;
218 	sq->cq.cq_ci = 0;
219 	sq->cq.log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
220 	sq->log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
221 	sq_attr.cqn = sq->cq.cq_obj.cq->id;
222 	/* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
223 	log_wqbb_n = sq->log_desc_n + 1;
224 	ret = mlx5_devx_sq_create(cdev->ctx, &sq->sq_obj, log_wqbb_n, &sq_attr,
225 				  SOCKET_ID_ANY);
226 	if (ret) {
227 		DRV_LOG(ERR, "Can't create SQ object.");
228 		rte_errno = ENOMEM;
229 		goto error;
230 	}
231 	ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
232 	if (ret) {
233 		DRV_LOG(ERR, "Can't change SQ state to ready.");
234 		rte_errno = ENOMEM;
235 		goto error;
236 	}
237 	sq->pi = 0;
238 	sq->head = 0;
239 	sq->tail = 0;
240 	sq->sqn = sq->sq_obj.sq->id;
241 	rte_spinlock_init(&sq->sqsl);
242 	return 0;
243 error:
244 	mlx5_aso_destroy_sq(sq);
245 	return -1;
246 }
247 
248 /**
249  * API to create and initialize Send Queue used for ASO access.
250  *
251  * @param[in] sh
252  *   Pointer to shared device context.
253  * @param[in] aso_opc_mod
254  *   Mode of ASO feature.
255  *
256  * @return
257  *   0 on success, a negative errno value otherwise and rte_errno is set.
258  */
259 int
mlx5_aso_queue_init(struct mlx5_dev_ctx_shared * sh,enum mlx5_access_aso_opc_mod aso_opc_mod)260 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
261 		    enum mlx5_access_aso_opc_mod aso_opc_mod)
262 {
263 	uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
264 	struct mlx5_common_device *cdev = sh->cdev;
265 
266 	switch (aso_opc_mod) {
267 	case ASO_OPC_MOD_FLOW_HIT:
268 		if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
269 				    sq_desc_n, &sh->aso_age_mng->aso_sq.mr))
270 			return -1;
271 		if (mlx5_aso_sq_create(cdev, &sh->aso_age_mng->aso_sq,
272 				       sh->tx_uar.obj)) {
273 			mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
274 			return -1;
275 		}
276 		mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
277 		break;
278 	case ASO_OPC_MOD_POLICER:
279 		if (mlx5_aso_sq_create(cdev, &sh->mtrmng->pools_mng.sq,
280 				       sh->tx_uar.obj))
281 			return -1;
282 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
283 		break;
284 	case ASO_OPC_MOD_CONNECTION_TRACKING:
285 		/* 64B per object for query. */
286 		if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
287 				    &sh->ct_mng->aso_sq.mr))
288 			return -1;
289 		if (mlx5_aso_sq_create(cdev, &sh->ct_mng->aso_sq,
290 				       sh->tx_uar.obj)) {
291 			mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
292 			return -1;
293 		}
294 		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
295 		break;
296 	default:
297 		DRV_LOG(ERR, "Unknown ASO operation mode");
298 		return -1;
299 	}
300 	return 0;
301 }
302 
303 /**
304  * API to destroy Send Queue used for ASO access.
305  *
306  * @param[in] sh
307  *   Pointer to shared device context.
308  * @param[in] aso_opc_mod
309  *   Mode of ASO feature.
310  */
311 void
mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared * sh,enum mlx5_access_aso_opc_mod aso_opc_mod)312 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
313 		      enum mlx5_access_aso_opc_mod aso_opc_mod)
314 {
315 	struct mlx5_aso_sq *sq;
316 
317 	switch (aso_opc_mod) {
318 	case ASO_OPC_MOD_FLOW_HIT:
319 		mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
320 		sq = &sh->aso_age_mng->aso_sq;
321 		break;
322 	case ASO_OPC_MOD_POLICER:
323 		sq = &sh->mtrmng->pools_mng.sq;
324 		break;
325 	case ASO_OPC_MOD_CONNECTION_TRACKING:
326 		mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
327 		sq = &sh->ct_mng->aso_sq;
328 		break;
329 	default:
330 		DRV_LOG(ERR, "Unknown ASO operation mode");
331 		return;
332 	}
333 	mlx5_aso_destroy_sq(sq);
334 }
335 
336 /**
337  * Write a burst of WQEs to ASO SQ.
338  *
339  * @param[in] sh
340  *   Pointer to shared device context.
341  * @param[in] n
342  *   Index of the last valid pool.
343  *
344  * @return
345  *   Number of WQEs in burst.
346  */
347 static uint16_t
mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared * sh,uint16_t n)348 mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
349 {
350 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
351 	volatile struct mlx5_aso_wqe *wqe;
352 	struct mlx5_aso_sq *sq = &mng->aso_sq;
353 	struct mlx5_aso_age_pool *pool;
354 	uint16_t size = 1 << sq->log_desc_n;
355 	uint16_t mask = size - 1;
356 	uint16_t max;
357 	uint16_t start_head = sq->head;
358 
359 	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
360 	if (unlikely(!max))
361 		return 0;
362 	sq->elts[start_head & mask].burst_size = max;
363 	do {
364 		wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
365 		rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
366 		/* Fill next WQE. */
367 		rte_rwlock_read_lock(&mng->resize_rwl);
368 		pool = mng->pools[sq->next];
369 		rte_rwlock_read_unlock(&mng->resize_rwl);
370 		sq->elts[sq->head & mask].pool = pool;
371 		wqe->general_cseg.misc =
372 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
373 						 (pool->flow_hit_aso_obj))->id);
374 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
375 							 MLX5_COMP_MODE_OFFSET);
376 		wqe->general_cseg.opcode = rte_cpu_to_be_32
377 						(MLX5_OPCODE_ACCESS_ASO |
378 						 (ASO_OPC_MOD_FLOW_HIT <<
379 						  WQE_CSEG_OPC_MOD_OFFSET) |
380 						 (sq->pi <<
381 						  WQE_CSEG_WQE_INDEX_OFFSET));
382 		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
383 		sq->head++;
384 		sq->next++;
385 		max--;
386 	} while (max);
387 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
388 							 MLX5_COMP_MODE_OFFSET);
389 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
390 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
391 			   !sh->tx_uar.dbnc);
392 	return sq->elts[start_head & mask].burst_size;
393 }
394 
395 /**
396  * Debug utility function. Dump contents of error CQE and WQE.
397  *
398  * @param[in] cqe
399  *   Error CQE to dump.
400  * @param[in] wqe
401  *   Error WQE to dump.
402  */
403 static void
mlx5_aso_dump_err_objs(volatile uint32_t * cqe,volatile uint32_t * wqe)404 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
405 {
406 	int i;
407 
408 	DRV_LOG(ERR, "Error cqe:");
409 	for (i = 0; i < 16; i += 4)
410 		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
411 			cqe[i + 2], cqe[i + 3]);
412 	DRV_LOG(ERR, "\nError wqe:");
413 	for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
414 		DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
415 			wqe[i + 2], wqe[i + 3]);
416 }
417 
418 /**
419  * Handle case of error CQE.
420  *
421  * @param[in] sq
422  *   ASO SQ to use.
423  */
424 static void
mlx5_aso_cqe_err_handle(struct mlx5_aso_sq * sq)425 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
426 {
427 	struct mlx5_aso_cq *cq = &sq->cq;
428 	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
429 	volatile struct mlx5_err_cqe *cqe =
430 			(volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
431 
432 	cq->errors++;
433 	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
434 	mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
435 			       (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
436 }
437 
438 /**
439  * Update ASO objects upon completion.
440  *
441  * @param[in] sh
442  *   Shared device context.
443  * @param[in] n
444  *   Number of completed ASO objects.
445  */
446 static void
mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared * sh,uint16_t n)447 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
448 {
449 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
450 	struct mlx5_aso_sq *sq = &mng->aso_sq;
451 	struct mlx5_age_info *age_info;
452 	const uint16_t size = 1 << sq->log_desc_n;
453 	const uint16_t mask = size - 1;
454 	const uint64_t curr = MLX5_CURR_TIME_SEC;
455 	uint16_t expected = AGE_CANDIDATE;
456 	uint16_t i;
457 
458 	for (i = 0; i < n; ++i) {
459 		uint16_t idx = (sq->tail + i) & mask;
460 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
461 		uint64_t diff = curr - pool->time_of_last_age_check;
462 		uint64_t *addr = sq->mr.addr;
463 		int j;
464 
465 		addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
466 		pool->time_of_last_age_check = curr;
467 		for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
468 			struct mlx5_aso_age_action *act = &pool->actions[j];
469 			struct mlx5_age_param *ap = &act->age_params;
470 			uint8_t byte;
471 			uint8_t offset;
472 			uint8_t *u8addr;
473 			uint8_t hit;
474 
475 			if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
476 					    AGE_CANDIDATE)
477 				continue;
478 			byte = 63 - (j / 8);
479 			offset = j % 8;
480 			u8addr = (uint8_t *)addr;
481 			hit = (u8addr[byte] >> offset) & 0x1;
482 			if (hit) {
483 				__atomic_store_n(&ap->sec_since_last_hit, 0,
484 						 __ATOMIC_RELAXED);
485 			} else {
486 				struct mlx5_priv *priv;
487 
488 				__atomic_fetch_add(&ap->sec_since_last_hit,
489 						   diff, __ATOMIC_RELAXED);
490 				/* If timeout passed add to aged-out list. */
491 				if (ap->sec_since_last_hit <= ap->timeout)
492 					continue;
493 				priv =
494 				rte_eth_devices[ap->port_id].data->dev_private;
495 				age_info = GET_PORT_AGE_INFO(priv);
496 				rte_spinlock_lock(&age_info->aged_sl);
497 				if (__atomic_compare_exchange_n(&ap->state,
498 								&expected,
499 								AGE_TMOUT,
500 								false,
501 							       __ATOMIC_RELAXED,
502 							    __ATOMIC_RELAXED)) {
503 					LIST_INSERT_HEAD(&age_info->aged_aso,
504 							 act, next);
505 					MLX5_AGE_SET(age_info,
506 						     MLX5_AGE_EVENT_NEW);
507 				}
508 				rte_spinlock_unlock(&age_info->aged_sl);
509 			}
510 		}
511 	}
512 	mlx5_age_event_prepare(sh);
513 }
514 
515 /**
516  * Handle completions from WQEs sent to ASO SQ.
517  *
518  * @param[in] sh
519  *   Shared device context.
520  *
521  * @return
522  *   Number of CQEs handled.
523  */
524 static uint16_t
mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared * sh)525 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
526 {
527 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
528 	struct mlx5_aso_sq *sq = &mng->aso_sq;
529 	struct mlx5_aso_cq *cq = &sq->cq;
530 	volatile struct mlx5_cqe *restrict cqe;
531 	const unsigned int cq_size = 1 << cq->log_desc_n;
532 	const unsigned int mask = cq_size - 1;
533 	uint32_t idx;
534 	uint32_t next_idx = cq->cq_ci & mask;
535 	const uint16_t max = (uint16_t)(sq->head - sq->tail);
536 	uint16_t i = 0;
537 	int ret;
538 	if (unlikely(!max))
539 		return 0;
540 	do {
541 		idx = next_idx;
542 		next_idx = (cq->cq_ci + 1) & mask;
543 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
544 		cqe = &cq->cq_obj.cqes[idx];
545 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
546 		/*
547 		 * Be sure owner read is done before any other cookie field or
548 		 * opaque field.
549 		 */
550 		rte_io_rmb();
551 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
552 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
553 				break;
554 			mlx5_aso_cqe_err_handle(sq);
555 		} else {
556 			i += sq->elts[(sq->tail + i) & mask].burst_size;
557 		}
558 		cq->cq_ci++;
559 	} while (1);
560 	if (likely(i)) {
561 		mlx5_aso_age_action_update(sh, i);
562 		sq->tail += i;
563 		rte_io_wmb();
564 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
565 	}
566 	return i;
567 }
568 
569 /**
570  * Periodically read CQEs and send WQEs to ASO SQ.
571  *
572  * @param[in] arg
573  *   Shared device context containing the ASO SQ.
574  */
575 static void
mlx5_flow_aso_alarm(void * arg)576 mlx5_flow_aso_alarm(void *arg)
577 {
578 	struct mlx5_dev_ctx_shared *sh = arg;
579 	struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
580 	uint32_t us = 100u;
581 	uint16_t n;
582 
583 	rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl);
584 	n = sh->aso_age_mng->next;
585 	rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl);
586 	mlx5_aso_completion_handle(sh);
587 	if (sq->next == n) {
588 		/* End of loop: wait 1 second. */
589 		us = US_PER_S;
590 		sq->next = 0;
591 	}
592 	mlx5_aso_sq_enqueue_burst(sh, n);
593 	if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
594 		DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
595 }
596 
597 /**
598  * API to start ASO access using ASO SQ.
599  *
600  * @param[in] sh
601  *   Pointer to shared device context.
602  *
603  * @return
604  *   0 on success, a negative errno value otherwise and rte_errno is set.
605  */
606 int
mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared * sh)607 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
608 {
609 	if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
610 		DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
611 		return -rte_errno;
612 	}
613 	return 0;
614 }
615 
616 /**
617  * API to stop ASO access using ASO SQ.
618  *
619  * @param[in] sh
620  *   Pointer to shared device context.
621  *
622  * @return
623  *   0 on success, a negative errno value otherwise and rte_errno is set.
624  */
625 int
mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared * sh)626 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
627 {
628 	int retries = 1024;
629 
630 	if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
631 		return -EINVAL;
632 	rte_errno = 0;
633 	while (--retries) {
634 		rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
635 		if (rte_errno != EINPROGRESS)
636 			break;
637 		rte_pause();
638 	}
639 	return -rte_errno;
640 }
641 
642 static uint16_t
mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_sq * sq,struct mlx5_aso_mtr * aso_mtr)643 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
644 			       struct mlx5_aso_sq *sq,
645 			       struct mlx5_aso_mtr *aso_mtr)
646 {
647 	volatile struct mlx5_aso_wqe *wqe = NULL;
648 	struct mlx5_flow_meter_info *fm = NULL;
649 	struct mlx5_flow_meter_profile *fmp;
650 	uint16_t size = 1 << sq->log_desc_n;
651 	uint16_t mask = size - 1;
652 	uint16_t res;
653 	uint32_t dseg_idx = 0;
654 	struct mlx5_aso_mtr_pool *pool = NULL;
655 
656 	rte_spinlock_lock(&sq->sqsl);
657 	res = size - (uint16_t)(sq->head - sq->tail);
658 	if (unlikely(!res)) {
659 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
660 		rte_spinlock_unlock(&sq->sqsl);
661 		return 0;
662 	}
663 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
664 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
665 	/* Fill next WQE. */
666 	fm = &aso_mtr->fm;
667 	sq->elts[sq->head & mask].mtr = aso_mtr;
668 	pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
669 			mtrs[aso_mtr->offset]);
670 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
671 			(aso_mtr->offset >> 1));
672 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
673 			(ASO_OPC_MOD_POLICER <<
674 			WQE_CSEG_OPC_MOD_OFFSET) |
675 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
676 	/* There are 2 meters in one ASO cache line. */
677 	dseg_idx = aso_mtr->offset & 0x1;
678 	wqe->aso_cseg.data_mask =
679 		RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
680 	if (fm->is_enable) {
681 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
682 			fm->profile->srtcm_prm.cbs_cir;
683 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
684 			fm->profile->srtcm_prm.ebs_eir;
685 	} else {
686 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
687 			RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
688 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
689 	}
690 	fmp = fm->profile;
691 	if (fmp->profile.packet_mode)
692 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
693 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
694 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
695 				(MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
696 	else
697 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
698 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
699 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
700 	switch (fmp->profile.alg) {
701 	case RTE_MTR_SRTCM_RFC2697:
702 		/* Only needed for RFC2697. */
703 		if (fm->profile->srtcm_prm.ebs_eir)
704 			wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
705 					RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
706 		break;
707 	case RTE_MTR_TRTCM_RFC2698:
708 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
709 				RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
710 		break;
711 	case RTE_MTR_TRTCM_RFC4115:
712 	default:
713 		break;
714 	}
715 	/*
716 	 * Note:
717 	 * Due to software performance reason, the token fields will not be
718 	 * set when posting the WQE to ASO SQ. It will be filled by the HW
719 	 * automatically.
720 	 */
721 	sq->head++;
722 	sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
723 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
724 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
725 			   !sh->tx_uar.dbnc);
726 	rte_spinlock_unlock(&sq->sqsl);
727 	return 1;
728 }
729 
730 static void
mlx5_aso_mtrs_status_update(struct mlx5_aso_sq * sq,uint16_t aso_mtrs_nums)731 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
732 {
733 	uint16_t size = 1 << sq->log_desc_n;
734 	uint16_t mask = size - 1;
735 	uint16_t i;
736 	struct mlx5_aso_mtr *aso_mtr = NULL;
737 	uint8_t exp_state = ASO_METER_WAIT;
738 
739 	for (i = 0; i < aso_mtrs_nums; ++i) {
740 		aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
741 		MLX5_ASSERT(aso_mtr);
742 		(void)__atomic_compare_exchange_n(&aso_mtr->state,
743 				&exp_state, ASO_METER_READY,
744 				false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
745 	}
746 }
747 
748 static void
mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq * sq)749 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
750 {
751 	struct mlx5_aso_cq *cq = &sq->cq;
752 	volatile struct mlx5_cqe *restrict cqe;
753 	const unsigned int cq_size = 1 << cq->log_desc_n;
754 	const unsigned int mask = cq_size - 1;
755 	uint32_t idx;
756 	uint32_t next_idx = cq->cq_ci & mask;
757 	uint16_t max;
758 	uint16_t n = 0;
759 	int ret;
760 
761 	rte_spinlock_lock(&sq->sqsl);
762 	max = (uint16_t)(sq->head - sq->tail);
763 	if (unlikely(!max)) {
764 		rte_spinlock_unlock(&sq->sqsl);
765 		return;
766 	}
767 	do {
768 		idx = next_idx;
769 		next_idx = (cq->cq_ci + 1) & mask;
770 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
771 		cqe = &cq->cq_obj.cqes[idx];
772 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
773 		/*
774 		 * Be sure owner read is done before any other cookie field or
775 		 * opaque field.
776 		 */
777 		rte_io_rmb();
778 		if (ret != MLX5_CQE_STATUS_SW_OWN) {
779 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
780 				break;
781 			mlx5_aso_cqe_err_handle(sq);
782 		} else {
783 			n++;
784 		}
785 		cq->cq_ci++;
786 	} while (1);
787 	if (likely(n)) {
788 		mlx5_aso_mtrs_status_update(sq, n);
789 		sq->tail += n;
790 		rte_io_wmb();
791 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
792 	}
793 	rte_spinlock_unlock(&sq->sqsl);
794 }
795 
796 /**
797  * Update meter parameter by send WQE.
798  *
799  * @param[in] dev
800  *   Pointer to Ethernet device.
801  * @param[in] priv
802  *   Pointer to mlx5 private data structure.
803  * @param[in] fm
804  *   Pointer to flow meter to be modified.
805  *
806  * @return
807  *   0 on success, a negative errno value otherwise and rte_errno is set.
808  */
809 int
mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_mtr * mtr)810 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
811 			struct mlx5_aso_mtr *mtr)
812 {
813 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
814 	uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
815 
816 	do {
817 		mlx5_aso_mtr_completion_handle(sq);
818 		if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr))
819 			return 0;
820 		/* Waiting for wqe resource. */
821 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
822 	} while (--poll_wqe_times);
823 	DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
824 			mtr->offset);
825 	return -1;
826 }
827 
828 /**
829  * Wait for meter to be ready.
830  *
831  * @param[in] dev
832  *   Pointer to Ethernet device.
833  * @param[in] priv
834  *   Pointer to mlx5 private data structure.
835  * @param[in] fm
836  *   Pointer to flow meter to be modified.
837  *
838  * @return
839  *   0 on success, a negative errno value otherwise and rte_errno is set.
840  */
841 int
mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_mtr * mtr)842 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
843 			struct mlx5_aso_mtr *mtr)
844 {
845 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
846 	uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
847 
848 	if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
849 					    ASO_METER_READY)
850 		return 0;
851 	do {
852 		mlx5_aso_mtr_completion_handle(sq);
853 		if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
854 					    ASO_METER_READY)
855 			return 0;
856 		/* Waiting for CQE ready. */
857 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
858 	} while (--poll_cqe_times);
859 	DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
860 			mtr->offset);
861 	return -1;
862 }
863 
864 /*
865  * Post a WQE to the ASO CT SQ to modify the context.
866  *
867  * @param[in] sh
868  *   Pointer to shared device context.
869  * @param[in] ct
870  *   Pointer to the generic CT structure related to the context.
871  * @param[in] profile
872  *   Pointer to configuration profile.
873  *
874  * @return
875  *   1 on success (WQE number), 0 on failure.
876  */
877 static uint16_t
mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct,const struct rte_flow_action_conntrack * profile)878 mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
879 			      struct mlx5_aso_ct_action *ct,
880 			      const struct rte_flow_action_conntrack *profile)
881 {
882 	volatile struct mlx5_aso_wqe *wqe = NULL;
883 	struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
884 	uint16_t size = 1 << sq->log_desc_n;
885 	uint16_t mask = size - 1;
886 	uint16_t res;
887 	struct mlx5_aso_ct_pool *pool;
888 	void *desg;
889 	void *orig_dir;
890 	void *reply_dir;
891 
892 	rte_spinlock_lock(&sq->sqsl);
893 	/* Prevent other threads to update the index. */
894 	res = size - (uint16_t)(sq->head - sq->tail);
895 	if (unlikely(!res)) {
896 		rte_spinlock_unlock(&sq->sqsl);
897 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
898 		return 0;
899 	}
900 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
901 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
902 	/* Fill next WQE. */
903 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
904 	sq->elts[sq->head & mask].ct = ct;
905 	sq->elts[sq->head & mask].query_data = NULL;
906 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
907 	/* Each WQE will have a single CT object. */
908 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
909 						  ct->offset);
910 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
911 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
912 			 WQE_CSEG_OPC_MOD_OFFSET) |
913 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
914 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
915 			(0u |
916 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
917 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
918 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
919 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
920 	wqe->aso_cseg.data_mask = UINT64_MAX;
921 	/* To make compiler happy. */
922 	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
923 	MLX5_SET(conn_track_aso, desg, valid, 1);
924 	MLX5_SET(conn_track_aso, desg, state, profile->state);
925 	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
926 	MLX5_SET(conn_track_aso, desg, connection_assured,
927 		 profile->live_connection);
928 	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
929 	MLX5_SET(conn_track_aso, desg, challenged_acked,
930 		 profile->challenge_ack_passed);
931 	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
932 	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
933 	MLX5_SET(conn_track_aso, desg, max_ack_window,
934 		 profile->max_ack_window);
935 	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
936 	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
937 	MLX5_SET(conn_track_aso, desg, retranmission_limit,
938 		 profile->retransmission_limit);
939 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
940 		 profile->reply_dir.scale);
941 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
942 		 profile->reply_dir.close_initiated);
943 	/* Both directions will use the same liberal mode. */
944 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
945 		 profile->liberal_mode);
946 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
947 		 profile->reply_dir.data_unacked);
948 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
949 		 profile->reply_dir.last_ack_seen);
950 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
951 		 profile->original_dir.scale);
952 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
953 		 profile->original_dir.close_initiated);
954 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
955 		 profile->liberal_mode);
956 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
957 		 profile->original_dir.data_unacked);
958 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
959 		 profile->original_dir.last_ack_seen);
960 	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
961 	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
962 	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
963 	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
964 	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
965 	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
966 	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
967 	MLX5_SET(tcp_window_params, orig_dir, sent_end,
968 		 profile->original_dir.sent_end);
969 	MLX5_SET(tcp_window_params, orig_dir, reply_end,
970 		 profile->original_dir.reply_end);
971 	MLX5_SET(tcp_window_params, orig_dir, max_win,
972 		 profile->original_dir.max_win);
973 	MLX5_SET(tcp_window_params, orig_dir, max_ack,
974 		 profile->original_dir.max_ack);
975 	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
976 	MLX5_SET(tcp_window_params, reply_dir, sent_end,
977 		 profile->reply_dir.sent_end);
978 	MLX5_SET(tcp_window_params, reply_dir, reply_end,
979 		 profile->reply_dir.reply_end);
980 	MLX5_SET(tcp_window_params, reply_dir, max_win,
981 		 profile->reply_dir.max_win);
982 	MLX5_SET(tcp_window_params, reply_dir, max_ack,
983 		 profile->reply_dir.max_ack);
984 	sq->head++;
985 	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
986 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
987 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
988 			   !sh->tx_uar.dbnc);
989 	rte_spinlock_unlock(&sq->sqsl);
990 	return 1;
991 }
992 
993 /*
994  * Update the status field of CTs to indicate ready to be used by flows.
995  * A continuous number of CTs since last update.
996  *
997  * @param[in] sq
998  *   Pointer to ASO CT SQ.
999  * @param[in] num
1000  *   Number of CT structures to be updated.
1001  *
1002  * @return
1003  *   0 on success, a negative value.
1004  */
1005 static void
mlx5_aso_ct_status_update(struct mlx5_aso_sq * sq,uint16_t num)1006 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1007 {
1008 	uint16_t size = 1 << sq->log_desc_n;
1009 	uint16_t mask = size - 1;
1010 	uint16_t i;
1011 	struct mlx5_aso_ct_action *ct = NULL;
1012 	uint16_t idx;
1013 
1014 	for (i = 0; i < num; i++) {
1015 		idx = (uint16_t)((sq->tail + i) & mask);
1016 		ct = sq->elts[idx].ct;
1017 		MLX5_ASSERT(ct);
1018 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1019 		if (sq->elts[idx].query_data)
1020 			rte_memcpy(sq->elts[idx].query_data,
1021 				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
1022 				   64);
1023 	}
1024 }
1025 
1026 /*
1027  * Post a WQE to the ASO CT SQ to query the current context.
1028  *
1029  * @param[in] sh
1030  *   Pointer to shared device context.
1031  * @param[in] ct
1032  *   Pointer to the generic CT structure related to the context.
1033  * @param[in] data
1034  *   Pointer to data area to be filled.
1035  *
1036  * @return
1037  *   1 on success (WQE number), 0 on failure.
1038  */
1039 static int
mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct,char * data)1040 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
1041 			    struct mlx5_aso_ct_action *ct, char *data)
1042 {
1043 	volatile struct mlx5_aso_wqe *wqe = NULL;
1044 	struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
1045 	uint16_t size = 1 << sq->log_desc_n;
1046 	uint16_t mask = size - 1;
1047 	uint16_t res;
1048 	uint16_t wqe_idx;
1049 	struct mlx5_aso_ct_pool *pool;
1050 	enum mlx5_aso_ct_state state =
1051 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1052 
1053 	if (state == ASO_CONNTRACK_FREE) {
1054 		DRV_LOG(ERR, "Fail: No context to query");
1055 		return -1;
1056 	} else if (state == ASO_CONNTRACK_WAIT) {
1057 		return 0;
1058 	}
1059 	rte_spinlock_lock(&sq->sqsl);
1060 	res = size - (uint16_t)(sq->head - sq->tail);
1061 	if (unlikely(!res)) {
1062 		rte_spinlock_unlock(&sq->sqsl);
1063 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1064 		return 0;
1065 	}
1066 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1067 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1068 	/* Confirm the location and address of the prefetch instruction. */
1069 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1070 	/* Fill next WQE. */
1071 	wqe_idx = sq->head & mask;
1072 	sq->elts[wqe_idx].ct = ct;
1073 	sq->elts[wqe_idx].query_data = data;
1074 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1075 	/* Each WQE will have a single CT object. */
1076 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1077 						  ct->offset);
1078 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1079 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
1080 			 WQE_CSEG_OPC_MOD_OFFSET) |
1081 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1082 	/*
1083 	 * There is no write request is required.
1084 	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1085 	 * "BYTEWISE_64BYTE" is needed for a whole context.
1086 	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1087 	 * "data_mask" is ignored.
1088 	 * Buffer address was already filled during initialization.
1089 	 */
1090 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1091 					ASO_CSEG_DATA_MASK_MODE_OFFSET);
1092 	wqe->aso_cseg.data_mask = 0;
1093 	sq->head++;
1094 	/*
1095 	 * Each WQE contains 2 WQEBB's, even though
1096 	 * data segment is not used in this case.
1097 	 */
1098 	sq->pi += 2;
1099 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1100 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1101 			   !sh->tx_uar.dbnc);
1102 	rte_spinlock_unlock(&sq->sqsl);
1103 	return 1;
1104 }
1105 
1106 /*
1107  * Handle completions from WQEs sent to ASO CT.
1108  *
1109  * @param[in] mng
1110  *   Pointer to the CT pools management structure.
1111  */
1112 static void
mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng * mng)1113 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1114 {
1115 	struct mlx5_aso_sq *sq = &mng->aso_sq;
1116 	struct mlx5_aso_cq *cq = &sq->cq;
1117 	volatile struct mlx5_cqe *restrict cqe;
1118 	const uint32_t cq_size = 1 << cq->log_desc_n;
1119 	const uint32_t mask = cq_size - 1;
1120 	uint32_t idx;
1121 	uint32_t next_idx;
1122 	uint16_t max;
1123 	uint16_t n = 0;
1124 	int ret;
1125 
1126 	rte_spinlock_lock(&sq->sqsl);
1127 	max = (uint16_t)(sq->head - sq->tail);
1128 	if (unlikely(!max)) {
1129 		rte_spinlock_unlock(&sq->sqsl);
1130 		return;
1131 	}
1132 	next_idx = cq->cq_ci & mask;
1133 	do {
1134 		idx = next_idx;
1135 		next_idx = (cq->cq_ci + 1) & mask;
1136 		/* Need to confirm the position of the prefetch. */
1137 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1138 		cqe = &cq->cq_obj.cqes[idx];
1139 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
1140 		/*
1141 		 * Be sure owner read is done before any other cookie field or
1142 		 * opaque field.
1143 		 */
1144 		rte_io_rmb();
1145 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1146 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1147 				break;
1148 			mlx5_aso_cqe_err_handle(sq);
1149 		} else {
1150 			n++;
1151 		}
1152 		cq->cq_ci++;
1153 	} while (1);
1154 	if (likely(n)) {
1155 		mlx5_aso_ct_status_update(sq, n);
1156 		sq->tail += n;
1157 		rte_io_wmb();
1158 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1159 	}
1160 	rte_spinlock_unlock(&sq->sqsl);
1161 }
1162 
1163 /*
1164  * Update connection tracking ASO context by sending WQE.
1165  *
1166  * @param[in] sh
1167  *   Pointer to mlx5_dev_ctx_shared object.
1168  * @param[in] ct
1169  *   Pointer to connection tracking offload object.
1170  * @param[in] profile
1171  *   Pointer to connection tracking TCP parameter.
1172  *
1173  * @return
1174  *   0 on success, -1 on failure.
1175  */
1176 int
mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct,const struct rte_flow_action_conntrack * profile)1177 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1178 			  struct mlx5_aso_ct_action *ct,
1179 			  const struct rte_flow_action_conntrack *profile)
1180 {
1181 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1182 	struct mlx5_aso_ct_pool *pool;
1183 
1184 	MLX5_ASSERT(ct);
1185 	do {
1186 		mlx5_aso_ct_completion_handle(sh->ct_mng);
1187 		if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
1188 			return 0;
1189 		/* Waiting for wqe resource. */
1190 		rte_delay_us_sleep(10u);
1191 	} while (--poll_wqe_times);
1192 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1193 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1194 		ct->offset, pool->index);
1195 	return -1;
1196 }
1197 
1198 /*
1199  * The routine is used to wait for WQE completion to continue with queried data.
1200  *
1201  * @param[in] sh
1202  *   Pointer to mlx5_dev_ctx_shared object.
1203  * @param[in] ct
1204  *   Pointer to connection tracking offload object.
1205  *
1206  * @return
1207  *   0 on success, -1 on failure.
1208  */
1209 int
mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct)1210 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1211 		       struct mlx5_aso_ct_action *ct)
1212 {
1213 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1214 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1215 	struct mlx5_aso_ct_pool *pool;
1216 
1217 	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1218 	    ASO_CONNTRACK_READY)
1219 		return 0;
1220 	do {
1221 		mlx5_aso_ct_completion_handle(mng);
1222 		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1223 		    ASO_CONNTRACK_READY)
1224 			return 0;
1225 		/* Waiting for CQE ready, consider should block or sleep. */
1226 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1227 	} while (--poll_cqe_times);
1228 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1229 	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1230 		ct->offset, pool->index);
1231 	return -1;
1232 }
1233 
1234 /*
1235  * Convert the hardware conntrack data format into the profile.
1236  *
1237  * @param[in] profile
1238  *   Pointer to conntrack profile to be filled after query.
1239  * @param[in] wdata
1240  *   Pointer to data fetched from hardware.
1241  */
1242 static inline void
mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack * profile,char * wdata)1243 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1244 			char *wdata)
1245 {
1246 	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1247 	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1248 
1249 	/* MLX5_GET16 should be taken into consideration. */
1250 	profile->state = (enum rte_flow_conntrack_state)
1251 			 MLX5_GET(conn_track_aso, wdata, state);
1252 	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1253 	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1254 					  sack_permitted);
1255 	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1256 					    connection_assured);
1257 	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1258 						 challenged_acked);
1259 	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1260 					   max_ack_window);
1261 	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1262 						 retranmission_limit);
1263 	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1264 	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1265 	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1266 			      MLX5_GET(conn_track_aso, wdata, last_index);
1267 	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1268 	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1269 	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1270 	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1271 				reply_direction_tcp_liberal_enabled) |
1272 				MLX5_GET(conn_track_aso, wdata,
1273 				original_direction_tcp_liberal_enabled);
1274 	/* No liberal in the RTE structure profile. */
1275 	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1276 					    reply_direction_tcp_scale);
1277 	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1278 					reply_direction_tcp_close_initiated);
1279 	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1280 					reply_direction_tcp_data_unacked);
1281 	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1282 					reply_direction_tcp_max_ack);
1283 	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1284 					       r_dir, sent_end);
1285 	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1286 						r_dir, reply_end);
1287 	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1288 					      r_dir, max_win);
1289 	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1290 					      r_dir, max_ack);
1291 	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1292 					       original_direction_tcp_scale);
1293 	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1294 					original_direction_tcp_close_initiated);
1295 	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1296 					original_direction_tcp_data_unacked);
1297 	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1298 					original_direction_tcp_max_ack);
1299 	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1300 						  o_dir, sent_end);
1301 	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1302 						   o_dir, reply_end);
1303 	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1304 						 o_dir, max_win);
1305 	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1306 						 o_dir, max_ack);
1307 }
1308 
1309 /*
1310  * Query connection tracking information parameter by send WQE.
1311  *
1312  * @param[in] dev
1313  *   Pointer to Ethernet device.
1314  * @param[in] ct
1315  *   Pointer to connection tracking offload object.
1316  * @param[out] profile
1317  *   Pointer to connection tracking TCP information.
1318  *
1319  * @return
1320  *   0 on success, -1 on failure.
1321  */
1322 int
mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct,struct rte_flow_action_conntrack * profile)1323 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1324 			 struct mlx5_aso_ct_action *ct,
1325 			 struct rte_flow_action_conntrack *profile)
1326 {
1327 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1328 	struct mlx5_aso_ct_pool *pool;
1329 	char out_data[64 * 2];
1330 	int ret;
1331 
1332 	MLX5_ASSERT(ct);
1333 	do {
1334 		mlx5_aso_ct_completion_handle(sh->ct_mng);
1335 		ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
1336 		if (ret < 0)
1337 			return ret;
1338 		else if (ret > 0)
1339 			goto data_handle;
1340 		/* Waiting for wqe resource or state. */
1341 		else
1342 			rte_delay_us_sleep(10u);
1343 	} while (--poll_wqe_times);
1344 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1345 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1346 		ct->offset, pool->index);
1347 	return -1;
1348 data_handle:
1349 	ret = mlx5_aso_ct_wait_ready(sh, ct);
1350 	if (!ret)
1351 		mlx5_aso_ct_obj_analyze(profile, out_data);
1352 	return ret;
1353 }
1354 
1355 /*
1356  * Make sure the conntrack context is synchronized with hardware before
1357  * creating a flow rule that uses it.
1358  *
1359  * @param[in] sh
1360  *   Pointer to shared device context.
1361  * @param[in] ct
1362  *   Pointer to connection tracking offload object.
1363  *
1364  * @return
1365  *   0 on success, a negative errno value otherwise and rte_errno is set.
1366  */
1367 int
mlx5_aso_ct_available(struct mlx5_dev_ctx_shared * sh,struct mlx5_aso_ct_action * ct)1368 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1369 		      struct mlx5_aso_ct_action *ct)
1370 {
1371 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1372 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1373 	enum mlx5_aso_ct_state state =
1374 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1375 
1376 	if (state == ASO_CONNTRACK_FREE) {
1377 		rte_errno = ENXIO;
1378 		return -rte_errno;
1379 	} else if (state == ASO_CONNTRACK_READY ||
1380 		   state == ASO_CONNTRACK_QUERY) {
1381 		return 0;
1382 	}
1383 	do {
1384 		mlx5_aso_ct_completion_handle(mng);
1385 		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1386 		if (state == ASO_CONNTRACK_READY ||
1387 		    state == ASO_CONNTRACK_QUERY)
1388 			return 0;
1389 		/* Waiting for CQE ready, consider should block or sleep. */
1390 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1391 	} while (--poll_cqe_times);
1392 	rte_errno = EBUSY;
1393 	return -rte_errno;
1394 }
1395