xref: /dpdk/drivers/raw/ioat/rte_idxd_rawdev_fns.h (revision a03e4b62)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2021 Intel Corporation
3  */
4 #ifndef _RTE_IDXD_RAWDEV_FNS_H_
5 #define _RTE_IDXD_RAWDEV_FNS_H_
6 
7 /**
8  * @file
9  * This header file contains the implementation of the various ioat
10  * rawdev functions for DSA hardware. The API specification and key
11  * public structures are defined in "rte_ioat_rawdev.h".
12  *
13  * This file should not be included directly, but instead applications should
14  * include "rte_ioat_rawdev.h", which then includes this file - and the
15  * IOAT/CBDMA equivalent header - in turn.
16  */
17 
18 #include <stdint.h>
19 
20 /*
21  * Defines used in the data path for interacting with IDXD hardware.
22  */
23 #define IDXD_CMD_OP_SHIFT 24
24 enum rte_idxd_ops {
25 	idxd_op_nop = 0,
26 	idxd_op_batch,
27 	idxd_op_drain,
28 	idxd_op_memmove,
29 	idxd_op_fill
30 };
31 
32 #define IDXD_FLAG_FENCE                 (1 << 0)
33 #define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2)
34 #define IDXD_FLAG_REQUEST_COMPLETION    (1 << 3)
35 #define IDXD_FLAG_CACHE_CONTROL         (1 << 8)
36 
37 #define IOAT_COMP_UPDATE_SHIFT	3
38 #define IOAT_CMD_OP_SHIFT	24
39 enum rte_ioat_ops {
40 	ioat_op_copy = 0,	/* Standard DMA Operation */
41 	ioat_op_fill		/* Block Fill */
42 };
43 
44 /**
45  * Hardware descriptor used by DSA hardware, for both bursts and
46  * for individual operations.
47  */
48 struct rte_idxd_hw_desc {
49 	uint32_t pasid;
50 	uint32_t op_flags;
51 	rte_iova_t completion;
52 
53 	RTE_STD_C11
54 	union {
55 		rte_iova_t src;      /* source address for copy ops etc. */
56 		rte_iova_t desc_addr; /* descriptor pointer for batch */
57 	};
58 	rte_iova_t dst;
59 
60 	uint32_t size;    /* length of data for op, or batch size */
61 
62 	uint16_t intr_handle; /* completion interrupt handle */
63 
64 	/* remaining 26 bytes are reserved */
65 	uint16_t __reserved[13];
66 } __rte_aligned(64);
67 
68 /**
69  * Completion record structure written back by DSA
70  */
71 struct rte_idxd_completion {
72 	uint8_t status;
73 	uint8_t result;
74 	/* 16-bits pad here */
75 	uint32_t completed_size; /* data length, or descriptors for batch */
76 
77 	rte_iova_t fault_address;
78 	uint32_t invalid_flags;
79 } __rte_aligned(32);
80 
81 /**
82  * structure used to save the "handles" provided by the user to be
83  * returned to the user on job completion.
84  */
85 struct rte_idxd_user_hdl {
86 	uint64_t src;
87 	uint64_t dst;
88 };
89 
90 /**
91  * @internal
92  * Structure representing an IDXD device instance
93  */
94 struct rte_idxd_rawdev {
95 	enum rte_ioat_dev_type type;
96 	struct rte_ioat_xstats xstats;
97 
98 	void *portal; /* address to write the batch descriptor */
99 
100 	struct rte_ioat_rawdev_config cfg;
101 	rte_iova_t desc_iova; /* base address of desc ring, needed for completions */
102 
103 	/* counters to track the batches */
104 	unsigned short max_batches;
105 	unsigned short batch_idx_read;
106 	unsigned short batch_idx_write;
107 	unsigned short *batch_idx_ring; /* store where each batch ends */
108 
109 	/* track descriptors and handles */
110 	unsigned short desc_ring_mask;
111 	unsigned short hdls_avail; /* handles for ops completed */
112 	unsigned short hdls_read; /* the read pointer for hdls/desc rings */
113 	unsigned short batch_start; /* start+size == write pointer for hdls/desc */
114 	unsigned short batch_size;
115 
116 	struct rte_idxd_hw_desc *desc_ring;
117 	struct rte_idxd_user_hdl *hdl_ring;
118 	/* flags to indicate handle validity. Kept separate from ring, to avoid
119 	 * using 8 bytes per flag. Upper 8 bits holds error code if any.
120 	 */
121 	uint16_t *hdl_ring_flags;
122 };
123 
124 #define RTE_IDXD_HDL_NORMAL     0
125 #define RTE_IDXD_HDL_INVALID    (1 << 0) /* no handle stored for this element */
126 #define RTE_IDXD_HDL_OP_FAILED  (1 << 1) /* return failure for this one */
127 #define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) /* this op was skipped */
128 
129 static __rte_always_inline uint16_t
__idxd_burst_capacity(int dev_id)130 __idxd_burst_capacity(int dev_id)
131 {
132 	struct rte_idxd_rawdev *idxd =
133 			(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
134 	uint16_t write_idx = idxd->batch_start + idxd->batch_size;
135 	uint16_t used_space, free_space;
136 
137 	/* Check for space in the batch ring */
138 	if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
139 			idxd->batch_idx_write + 1 == idxd->batch_idx_read)
140 		return 0;
141 
142 	/* for descriptors, check for wrap-around on write but not read */
143 	if (idxd->hdls_read > write_idx)
144 		write_idx += idxd->desc_ring_mask + 1;
145 	used_space = write_idx - idxd->hdls_read;
146 
147 	/* Return amount of free space in the descriptor ring
148 	 * subtract 1 for space for batch descriptor and 1 for possible null desc
149 	 */
150 	free_space = idxd->desc_ring_mask - used_space;
151 	if (free_space < 2)
152 		return 0;
153 	return free_space - 2;
154 }
155 
156 static __rte_always_inline rte_iova_t
__desc_idx_to_iova(struct rte_idxd_rawdev * idxd,uint16_t n)157 __desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n)
158 {
159 	return idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc));
160 }
161 
162 static __rte_always_inline int
__idxd_write_desc(int dev_id,const uint32_t op_flags,const rte_iova_t src,const rte_iova_t dst,const uint32_t size,const struct rte_idxd_user_hdl * hdl)163 __idxd_write_desc(int dev_id,
164 		const uint32_t op_flags,
165 		const rte_iova_t src,
166 		const rte_iova_t dst,
167 		const uint32_t size,
168 		const struct rte_idxd_user_hdl *hdl)
169 {
170 	struct rte_idxd_rawdev *idxd =
171 			(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
172 	uint16_t write_idx = idxd->batch_start + idxd->batch_size;
173 	uint16_t mask = idxd->desc_ring_mask;
174 
175 	/* first check batch ring space then desc ring space */
176 	if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) ||
177 			idxd->batch_idx_write + 1 == idxd->batch_idx_read)
178 		goto failed;
179 	/* for descriptor ring, we always need a slot for batch completion */
180 	if (((write_idx + 2) & mask) == idxd->hdls_read ||
181 			((write_idx + 1) & mask) == idxd->hdls_read)
182 		goto failed;
183 
184 	/* write desc and handle. Note, descriptors don't wrap */
185 	idxd->desc_ring[write_idx].pasid = 0;
186 	idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID;
187 	idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx & mask);
188 	idxd->desc_ring[write_idx].src = src;
189 	idxd->desc_ring[write_idx].dst = dst;
190 	idxd->desc_ring[write_idx].size = size;
191 
192 	if (hdl == NULL)
193 		idxd->hdl_ring_flags[write_idx & mask] = RTE_IDXD_HDL_INVALID;
194 	else
195 		idxd->hdl_ring[write_idx & mask] = *hdl;
196 	idxd->batch_size++;
197 
198 	idxd->xstats.enqueued++;
199 
200 	rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]);
201 	return 1;
202 
203 failed:
204 	idxd->xstats.enqueue_failed++;
205 	rte_errno = ENOSPC;
206 	return 0;
207 }
208 
209 static __rte_always_inline int
__idxd_enqueue_fill(int dev_id,uint64_t pattern,rte_iova_t dst,unsigned int length,uintptr_t dst_hdl)210 __idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,
211 		unsigned int length, uintptr_t dst_hdl)
212 {
213 	const struct rte_idxd_user_hdl hdl = {
214 			.dst = dst_hdl
215 	};
216 	return __idxd_write_desc(dev_id,
217 			(idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
218 			pattern, dst, length, &hdl);
219 }
220 
221 static __rte_always_inline int
__idxd_enqueue_copy(int dev_id,rte_iova_t src,rte_iova_t dst,unsigned int length,uintptr_t src_hdl,uintptr_t dst_hdl)222 __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,
223 		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
224 {
225 	const struct rte_idxd_user_hdl hdl = {
226 			.src = src_hdl,
227 			.dst = dst_hdl
228 	};
229 	return __idxd_write_desc(dev_id,
230 			(idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL,
231 			src, dst, length, &hdl);
232 }
233 
234 static __rte_always_inline int
__idxd_enqueue_nop(int dev_id)235 __idxd_enqueue_nop(int dev_id)
236 {
237 	/* only op field needs filling - zero src, dst and length */
238 	return __idxd_write_desc(dev_id, idxd_op_nop << IDXD_CMD_OP_SHIFT,
239 			0, 0, 0, NULL);
240 }
241 
242 static __rte_always_inline int
__idxd_fence(int dev_id)243 __idxd_fence(int dev_id)
244 {
245 	/* only op field needs filling - zero src, dst and length */
246 	return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL);
247 }
248 
249 static __rte_always_inline void
__idxd_movdir64b(volatile void * dst,const struct rte_idxd_hw_desc * src)250 __idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src)
251 {
252 	asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
253 			:
254 			: "a" (dst), "d" (src)
255 			: "memory");
256 }
257 
258 static __rte_always_inline int
__idxd_perform_ops(int dev_id)259 __idxd_perform_ops(int dev_id)
260 {
261 	struct rte_idxd_rawdev *idxd =
262 			(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
263 
264 	if (!idxd->cfg.no_prefetch_completions)
265 		rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]);
266 
267 	if (idxd->batch_size == 0)
268 		return 0;
269 
270 	if (idxd->batch_size == 1)
271 		/* use a NOP as a null descriptor, so batch_size >= 2 */
272 		if (__idxd_enqueue_nop(dev_id) != 1)
273 			return -1;
274 
275 	/* write completion beyond last desc in the batch */
276 	uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask;
277 	*((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */
278 	idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID;
279 
280 	const struct rte_idxd_hw_desc batch_desc = {
281 			.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) |
282 				IDXD_FLAG_COMPLETION_ADDR_VALID |
283 				IDXD_FLAG_REQUEST_COMPLETION,
284 			.desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start),
285 			.completion = __desc_idx_to_iova(idxd, comp_idx),
286 			.size = idxd->batch_size,
287 	};
288 
289 	_mm_sfence(); /* fence before writing desc to device */
290 	__idxd_movdir64b(idxd->portal, &batch_desc);
291 	idxd->xstats.started += idxd->batch_size;
292 
293 	idxd->batch_start += idxd->batch_size + 1;
294 	idxd->batch_start &= idxd->desc_ring_mask;
295 	idxd->batch_size = 0;
296 
297 	idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx;
298 	if (idxd->batch_idx_write > idxd->max_batches)
299 		idxd->batch_idx_write = 0;
300 
301 	return 0;
302 }
303 
304 static __rte_always_inline int
__idxd_completed_ops(int dev_id,uint8_t max_ops,uint32_t * status,uint8_t * num_unsuccessful,uintptr_t * src_hdls,uintptr_t * dst_hdls)305 __idxd_completed_ops(int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful,
306 		uintptr_t *src_hdls, uintptr_t *dst_hdls)
307 {
308 	struct rte_idxd_rawdev *idxd =
309 			(struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private;
310 	unsigned short n, h_idx;
311 
312 	while (idxd->batch_idx_read != idxd->batch_idx_write) {
313 		uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read];
314 		volatile struct rte_idxd_completion *comp_to_chk =
315 				(struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk];
316 		uint8_t batch_status = comp_to_chk->status;
317 		if (batch_status == 0)
318 			break;
319 		comp_to_chk->status = 0;
320 		if (unlikely(batch_status > 1)) {
321 			/* error occurred somewhere in batch, start where last checked */
322 			uint16_t desc_count = comp_to_chk->completed_size;
323 			uint16_t batch_start = idxd->hdls_avail;
324 			uint16_t batch_end = idx_to_chk;
325 
326 			if (batch_start > batch_end)
327 				batch_end += idxd->desc_ring_mask + 1;
328 			/* go through each batch entry and see status */
329 			for (n = 0; n < desc_count; n++) {
330 				uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
331 				volatile struct rte_idxd_completion *comp =
332 					(struct rte_idxd_completion *)&idxd->desc_ring[idx];
333 				if (comp->status != 0 &&
334 						idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) {
335 					idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED;
336 					idxd->hdl_ring_flags[idx] |= (comp->status << 8);
337 					comp->status = 0; /* clear error for next time */
338 				}
339 			}
340 			/* if batch is incomplete, mark rest as skipped */
341 			for ( ; n < batch_end - batch_start; n++) {
342 				uint16_t idx = (batch_start + n) & idxd->desc_ring_mask;
343 				if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL)
344 					idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED;
345 			}
346 		}
347 		/* avail points to one after the last one written */
348 		idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask;
349 		idxd->batch_idx_read++;
350 		if (idxd->batch_idx_read > idxd->max_batches)
351 			idxd->batch_idx_read = 0;
352 	}
353 
354 	n = 0;
355 	h_idx = idxd->hdls_read;
356 	while (h_idx != idxd->hdls_avail) {
357 		uint16_t flag = idxd->hdl_ring_flags[h_idx];
358 		if (flag != RTE_IDXD_HDL_INVALID) {
359 			if (!idxd->cfg.hdls_disable) {
360 				src_hdls[n] = idxd->hdl_ring[h_idx].src;
361 				dst_hdls[n] = idxd->hdl_ring[h_idx].dst;
362 			}
363 			if (unlikely(flag != RTE_IDXD_HDL_NORMAL)) {
364 				if (status != NULL)
365 					status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ?
366 							RTE_IOAT_OP_SKIPPED :
367 							/* failure case, return err code */
368 							idxd->hdl_ring_flags[h_idx] >> 8;
369 				if (num_unsuccessful != NULL)
370 					*num_unsuccessful += 1;
371 			}
372 			n++;
373 		}
374 		idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
375 		if (++h_idx > idxd->desc_ring_mask)
376 			h_idx = 0;
377 		if (n >= max_ops)
378 			break;
379 	}
380 
381 	/* skip over any remaining blank elements, e.g. batch completion */
382 	while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) {
383 		idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL;
384 		if (++h_idx > idxd->desc_ring_mask)
385 			h_idx = 0;
386 	}
387 	idxd->hdls_read = h_idx;
388 
389 	idxd->xstats.completed += n;
390 	return n;
391 }
392 
393 #endif
394