xref: /dpdk/drivers/event/dlb2/dlb2.c (revision 29fd052d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016-2020 Intel Corporation
3  */
4 
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15 
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33 
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37 
38 /*
39  * Resources exposed to eventdev. Some values overridden at runtime using
40  * values returned by the DLB kernel driver.
41  */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 	.driver_name = "", /* probe will set */
47 	.min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 	.max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 	.max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52 	.max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54 	.max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 	.max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 	.max_event_priority_levels = DLB2_QID_PRIORITIES,
57 	.max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 	.max_event_port_dequeue_depth = DLB2_MAX_CQ_DEPTH,
59 	.max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 	.max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 	.max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 	.max_single_link_event_port_queue_pairs =
63 		DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 	.event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65 			  RTE_EVENT_DEV_CAP_EVENT_QOS |
66 			  RTE_EVENT_DEV_CAP_BURST_MODE |
67 			  RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68 			  RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 			  RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
70 			  RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
71 };
72 
73 struct process_local_port_data
74 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
75 
76 static void
77 dlb2_free_qe_mem(struct dlb2_port *qm_port)
78 {
79 	if (qm_port == NULL)
80 		return;
81 
82 	rte_free(qm_port->qe4);
83 	qm_port->qe4 = NULL;
84 
85 	rte_free(qm_port->int_arm_qe);
86 	qm_port->int_arm_qe = NULL;
87 
88 	rte_free(qm_port->consume_qe);
89 	qm_port->consume_qe = NULL;
90 
91 	rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
92 	dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
93 }
94 
95 /* override defaults with value(s) provided on command line */
96 static void
97 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
98 				 int *qid_depth_thresholds)
99 {
100 	int q;
101 
102 	for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
103 		if (qid_depth_thresholds[q] != 0)
104 			dlb2->ev_queues[q].depth_threshold =
105 				qid_depth_thresholds[q];
106 	}
107 }
108 
109 static int
110 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 {
112 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
113 	struct dlb2_hw_resource_info *dlb2_info = &handle->info;
114 	int ret;
115 
116 	/* Query driver resources provisioned for this device */
117 
118 	ret = dlb2_iface_get_num_resources(handle,
119 					   &dlb2->hw_rsrc_query_results);
120 	if (ret) {
121 		DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
122 		return ret;
123 	}
124 
125 	/* Complete filling in device resource info returned to evdev app,
126 	 * overriding any default values.
127 	 * The capabilities (CAPs) were set at compile time.
128 	 */
129 
130 	evdev_dlb2_default_info.max_event_queues =
131 		dlb2->hw_rsrc_query_results.num_ldb_queues;
132 
133 	evdev_dlb2_default_info.max_event_ports =
134 		dlb2->hw_rsrc_query_results.num_ldb_ports;
135 
136 	if (dlb2->version == DLB2_HW_V2_5) {
137 		evdev_dlb2_default_info.max_num_events =
138 			dlb2->hw_rsrc_query_results.num_credits;
139 	} else {
140 		evdev_dlb2_default_info.max_num_events =
141 			dlb2->hw_rsrc_query_results.num_ldb_credits;
142 	}
143 	/* Save off values used when creating the scheduling domain. */
144 
145 	handle->info.num_sched_domains =
146 		dlb2->hw_rsrc_query_results.num_sched_domains;
147 
148 	if (dlb2->version == DLB2_HW_V2_5) {
149 		handle->info.hw_rsrc_max.nb_events_limit =
150 			dlb2->hw_rsrc_query_results.num_credits;
151 	} else {
152 		handle->info.hw_rsrc_max.nb_events_limit =
153 			dlb2->hw_rsrc_query_results.num_ldb_credits;
154 	}
155 	handle->info.hw_rsrc_max.num_queues =
156 		dlb2->hw_rsrc_query_results.num_ldb_queues +
157 		dlb2->hw_rsrc_query_results.num_dir_ports;
158 
159 	handle->info.hw_rsrc_max.num_ldb_queues =
160 		dlb2->hw_rsrc_query_results.num_ldb_queues;
161 
162 	handle->info.hw_rsrc_max.num_ldb_ports =
163 		dlb2->hw_rsrc_query_results.num_ldb_ports;
164 
165 	handle->info.hw_rsrc_max.num_dir_ports =
166 		dlb2->hw_rsrc_query_results.num_dir_ports;
167 
168 	handle->info.hw_rsrc_max.reorder_window_size =
169 		dlb2->hw_rsrc_query_results.num_hist_list_entries;
170 
171 	rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
172 
173 	return 0;
174 }
175 
176 #define DLB2_BASE_10 10
177 
178 static int
179 dlb2_string_to_int(int *result, const char *str)
180 {
181 	long ret;
182 	char *endptr;
183 
184 	if (str == NULL || result == NULL)
185 		return -EINVAL;
186 
187 	errno = 0;
188 	ret = strtol(str, &endptr, DLB2_BASE_10);
189 	if (errno)
190 		return -errno;
191 
192 	/* long int and int may be different width for some architectures */
193 	if (ret < INT_MIN || ret > INT_MAX || endptr == str)
194 		return -EINVAL;
195 
196 	*result = ret;
197 	return 0;
198 }
199 
200 static int
201 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
202 {
203 	int *socket_id = opaque;
204 	int ret;
205 
206 	ret = dlb2_string_to_int(socket_id, value);
207 	if (ret < 0)
208 		return ret;
209 
210 	if (*socket_id > RTE_MAX_NUMA_NODES)
211 		return -EINVAL;
212 	return 0;
213 }
214 
215 static int
216 set_max_num_events(const char *key __rte_unused,
217 		   const char *value,
218 		   void *opaque)
219 {
220 	int *max_num_events = opaque;
221 	int ret;
222 
223 	if (value == NULL || opaque == NULL) {
224 		DLB2_LOG_ERR("NULL pointer\n");
225 		return -EINVAL;
226 	}
227 
228 	ret = dlb2_string_to_int(max_num_events, value);
229 	if (ret < 0)
230 		return ret;
231 
232 	if (*max_num_events < 0 || *max_num_events >
233 			DLB2_MAX_NUM_LDB_CREDITS) {
234 		DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
235 			     DLB2_MAX_NUM_LDB_CREDITS);
236 		return -EINVAL;
237 	}
238 
239 	return 0;
240 }
241 
242 static int
243 set_num_dir_credits(const char *key __rte_unused,
244 		    const char *value,
245 		    void *opaque)
246 {
247 	int *num_dir_credits = opaque;
248 	int ret;
249 
250 	if (value == NULL || opaque == NULL) {
251 		DLB2_LOG_ERR("NULL pointer\n");
252 		return -EINVAL;
253 	}
254 
255 	ret = dlb2_string_to_int(num_dir_credits, value);
256 	if (ret < 0)
257 		return ret;
258 
259 	if (*num_dir_credits < 0 ||
260 	    *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
261 		DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
262 			     DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
263 		return -EINVAL;
264 	}
265 
266 	return 0;
267 }
268 
269 static int
270 set_dev_id(const char *key __rte_unused,
271 	   const char *value,
272 	   void *opaque)
273 {
274 	int *dev_id = opaque;
275 	int ret;
276 
277 	if (value == NULL || opaque == NULL) {
278 		DLB2_LOG_ERR("NULL pointer\n");
279 		return -EINVAL;
280 	}
281 
282 	ret = dlb2_string_to_int(dev_id, value);
283 	if (ret < 0)
284 		return ret;
285 
286 	return 0;
287 }
288 
289 static int
290 set_cos(const char *key __rte_unused,
291 	const char *value,
292 	void *opaque)
293 {
294 	enum dlb2_cos *cos_id = opaque;
295 	int x = 0;
296 	int ret;
297 
298 	if (value == NULL || opaque == NULL) {
299 		DLB2_LOG_ERR("NULL pointer\n");
300 		return -EINVAL;
301 	}
302 
303 	ret = dlb2_string_to_int(&x, value);
304 	if (ret < 0)
305 		return ret;
306 
307 	if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
308 		DLB2_LOG_ERR(
309 			"COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
310 			x);
311 		return -EINVAL;
312 	}
313 
314 	*cos_id = x;
315 
316 	return 0;
317 }
318 
319 static int
320 set_poll_interval(const char *key __rte_unused,
321 	const char *value,
322 	void *opaque)
323 {
324 	int *poll_interval = opaque;
325 	int ret;
326 
327 	if (value == NULL || opaque == NULL) {
328 		DLB2_LOG_ERR("NULL pointer\n");
329 		return -EINVAL;
330 	}
331 
332 	ret = dlb2_string_to_int(poll_interval, value);
333 	if (ret < 0)
334 		return ret;
335 
336 	return 0;
337 }
338 
339 static int
340 set_sw_credit_quanta(const char *key __rte_unused,
341 	const char *value,
342 	void *opaque)
343 {
344 	int *sw_credit_quanta = opaque;
345 	int ret;
346 
347 	if (value == NULL || opaque == NULL) {
348 		DLB2_LOG_ERR("NULL pointer\n");
349 		return -EINVAL;
350 	}
351 
352 	ret = dlb2_string_to_int(sw_credit_quanta, value);
353 	if (ret < 0)
354 		return ret;
355 
356 	return 0;
357 }
358 
359 static int
360 set_hw_credit_quanta(const char *key __rte_unused,
361 	const char *value,
362 	void *opaque)
363 {
364 	int *hw_credit_quanta = opaque;
365 	int ret;
366 
367 	if (value == NULL || opaque == NULL) {
368 		DLB2_LOG_ERR("NULL pointer\n");
369 		return -EINVAL;
370 	}
371 
372 	ret = dlb2_string_to_int(hw_credit_quanta, value);
373 	if (ret < 0)
374 		return ret;
375 
376 	return 0;
377 }
378 
379 static int
380 set_default_depth_thresh(const char *key __rte_unused,
381 	const char *value,
382 	void *opaque)
383 {
384 	int *default_depth_thresh = opaque;
385 	int ret;
386 
387 	if (value == NULL || opaque == NULL) {
388 		DLB2_LOG_ERR("NULL pointer\n");
389 		return -EINVAL;
390 	}
391 
392 	ret = dlb2_string_to_int(default_depth_thresh, value);
393 	if (ret < 0)
394 		return ret;
395 
396 	return 0;
397 }
398 
399 static int
400 set_vector_opts_enab(const char *key __rte_unused,
401 	const char *value,
402 	void *opaque)
403 {
404 	bool *dlb2_vector_opts_enabled = opaque;
405 
406 	if (value == NULL || opaque == NULL) {
407 		DLB2_LOG_ERR("NULL pointer\n");
408 		return -EINVAL;
409 	}
410 
411 	if ((*value == 'y') || (*value == 'Y'))
412 		*dlb2_vector_opts_enabled = true;
413 	else
414 		*dlb2_vector_opts_enabled = false;
415 
416 	return 0;
417 }
418 
419 static int
420 set_qid_depth_thresh(const char *key __rte_unused,
421 		     const char *value,
422 		     void *opaque)
423 {
424 	struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
425 	int first, last, thresh, i;
426 
427 	if (value == NULL || opaque == NULL) {
428 		DLB2_LOG_ERR("NULL pointer\n");
429 		return -EINVAL;
430 	}
431 
432 	/* command line override may take one of the following 3 forms:
433 	 * qid_depth_thresh=all:<threshold_value> ... all queues
434 	 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
435 	 * qid_depth_thresh=qid:<threshold_value> ... just one queue
436 	 */
437 	if (sscanf(value, "all:%d", &thresh) == 1) {
438 		first = 0;
439 		last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
440 	} else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
441 		/* we have everything we need */
442 	} else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
443 		last = first;
444 	} else {
445 		DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
446 		return -EINVAL;
447 	}
448 
449 	if (first > last || first < 0 ||
450 		last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
451 		DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
452 		return -EINVAL;
453 	}
454 
455 	if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
456 		DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
457 			     DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
458 		return -EINVAL;
459 	}
460 
461 	for (i = first; i <= last; i++)
462 		qid_thresh->val[i] = thresh; /* indexed by qid */
463 
464 	return 0;
465 }
466 
467 static int
468 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
469 			  const char *value,
470 			  void *opaque)
471 {
472 	struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
473 	int first, last, thresh, i;
474 
475 	if (value == NULL || opaque == NULL) {
476 		DLB2_LOG_ERR("NULL pointer\n");
477 		return -EINVAL;
478 	}
479 
480 	/* command line override may take one of the following 3 forms:
481 	 * qid_depth_thresh=all:<threshold_value> ... all queues
482 	 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
483 	 * qid_depth_thresh=qid:<threshold_value> ... just one queue
484 	 */
485 	if (sscanf(value, "all:%d", &thresh) == 1) {
486 		first = 0;
487 		last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
488 	} else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
489 		/* we have everything we need */
490 	} else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
491 		last = first;
492 	} else {
493 		DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
494 		return -EINVAL;
495 	}
496 
497 	if (first > last || first < 0 ||
498 		last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
499 		DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
500 		return -EINVAL;
501 	}
502 
503 	if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
504 		DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
505 			     DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
506 		return -EINVAL;
507 	}
508 
509 	for (i = first; i <= last; i++)
510 		qid_thresh->val[i] = thresh; /* indexed by qid */
511 
512 	return 0;
513 }
514 
515 static void
516 dlb2_eventdev_info_get(struct rte_eventdev *dev,
517 		       struct rte_event_dev_info *dev_info)
518 {
519 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
520 	int ret;
521 
522 	ret = dlb2_hw_query_resources(dlb2);
523 	if (ret) {
524 		const struct rte_eventdev_data *data = dev->data;
525 
526 		DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
527 			     ret, data->dev_id);
528 		/* fn is void, so fall through and return values set up in
529 		 * probe
530 		 */
531 	}
532 
533 	/* Add num resources currently owned by this domain.
534 	 * These would become available if the scheduling domain were reset due
535 	 * to the application recalling eventdev_configure to *reconfigure* the
536 	 * domain.
537 	 */
538 	evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
539 	evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
540 	if (dlb2->version == DLB2_HW_V2_5) {
541 		evdev_dlb2_default_info.max_num_events +=
542 			dlb2->max_credits;
543 	} else {
544 		evdev_dlb2_default_info.max_num_events +=
545 			dlb2->max_ldb_credits;
546 	}
547 	evdev_dlb2_default_info.max_event_queues =
548 		RTE_MIN(evdev_dlb2_default_info.max_event_queues,
549 			RTE_EVENT_MAX_QUEUES_PER_DEV);
550 
551 	evdev_dlb2_default_info.max_num_events =
552 		RTE_MIN(evdev_dlb2_default_info.max_num_events,
553 			dlb2->max_num_events_override);
554 
555 	*dev_info = evdev_dlb2_default_info;
556 }
557 
558 static int
559 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
560 			    const struct dlb2_hw_rsrcs *resources_asked,
561 			    uint8_t device_version)
562 {
563 	int ret = 0;
564 	struct dlb2_create_sched_domain_args *cfg;
565 
566 	if (resources_asked == NULL) {
567 		DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
568 		ret = EINVAL;
569 		goto error_exit;
570 	}
571 
572 	/* Map generic qm resources to dlb2 resources */
573 	cfg = &handle->cfg.resources;
574 
575 	/* DIR ports and queues */
576 
577 	cfg->num_dir_ports = resources_asked->num_dir_ports;
578 	if (device_version == DLB2_HW_V2_5)
579 		cfg->num_credits = resources_asked->num_credits;
580 	else
581 		cfg->num_dir_credits = resources_asked->num_dir_credits;
582 
583 	/* LDB queues */
584 
585 	cfg->num_ldb_queues = resources_asked->num_ldb_queues;
586 
587 	/* LDB ports */
588 
589 	cfg->cos_strict = 0; /* Best effort */
590 	cfg->num_cos_ldb_ports[0] = 0;
591 	cfg->num_cos_ldb_ports[1] = 0;
592 	cfg->num_cos_ldb_ports[2] = 0;
593 	cfg->num_cos_ldb_ports[3] = 0;
594 
595 	switch (handle->cos_id) {
596 	case DLB2_COS_0:
597 		cfg->num_ldb_ports = 0; /* no don't care ports */
598 		cfg->num_cos_ldb_ports[0] =
599 			resources_asked->num_ldb_ports;
600 		break;
601 	case DLB2_COS_1:
602 		cfg->num_ldb_ports = 0; /* no don't care ports */
603 		cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
604 		break;
605 	case DLB2_COS_2:
606 		cfg->num_ldb_ports = 0; /* no don't care ports */
607 		cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
608 		break;
609 	case DLB2_COS_3:
610 		cfg->num_ldb_ports = 0; /* no don't care ports */
611 		cfg->num_cos_ldb_ports[3] =
612 			resources_asked->num_ldb_ports;
613 		break;
614 	case DLB2_COS_DEFAULT:
615 		/* all ldb ports are don't care ports from a cos perspective */
616 		cfg->num_ldb_ports =
617 			resources_asked->num_ldb_ports;
618 		break;
619 	}
620 
621 	if (device_version == DLB2_HW_V2)
622 		cfg->num_ldb_credits = resources_asked->num_ldb_credits;
623 
624 	cfg->num_atomic_inflights =
625 		DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
626 		cfg->num_ldb_queues;
627 
628 	cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
629 		DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
630 
631 	if (device_version == DLB2_HW_V2_5) {
632 		DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
633 			     cfg->num_ldb_queues,
634 			     resources_asked->num_ldb_ports,
635 			     cfg->num_dir_ports,
636 			     cfg->num_atomic_inflights,
637 			     cfg->num_hist_list_entries,
638 			     cfg->num_credits);
639 	} else {
640 		DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
641 			     cfg->num_ldb_queues,
642 			     resources_asked->num_ldb_ports,
643 			     cfg->num_dir_ports,
644 			     cfg->num_atomic_inflights,
645 			     cfg->num_hist_list_entries,
646 			     cfg->num_ldb_credits,
647 			     cfg->num_dir_credits);
648 	}
649 
650 	/* Configure the QM */
651 
652 	ret = dlb2_iface_sched_domain_create(handle, cfg);
653 	if (ret < 0) {
654 		DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
655 			     ret,
656 			     dlb2_error_strings[cfg->response.status]);
657 
658 		goto error_exit;
659 	}
660 
661 	handle->domain_id = cfg->response.id;
662 	handle->cfg.configured = true;
663 
664 error_exit:
665 
666 	return ret;
667 }
668 
669 static void
670 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
671 {
672 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
673 	enum dlb2_configuration_state config_state;
674 	int i, j;
675 
676 	dlb2_iface_domain_reset(dlb2);
677 
678 	/* Free all dynamically allocated port memory */
679 	for (i = 0; i < dlb2->num_ports; i++)
680 		dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
681 
682 	/* If reconfiguring, mark the device's queues and ports as "previously
683 	 * configured." If the user doesn't reconfigure them, the PMD will
684 	 * reapply their previous configuration when the device is started.
685 	 */
686 	config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
687 		DLB2_NOT_CONFIGURED;
688 
689 	for (i = 0; i < dlb2->num_ports; i++) {
690 		dlb2->ev_ports[i].qm_port.config_state = config_state;
691 		/* Reset setup_done so ports can be reconfigured */
692 		dlb2->ev_ports[i].setup_done = false;
693 		for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
694 			dlb2->ev_ports[i].link[j].mapped = false;
695 	}
696 
697 	for (i = 0; i < dlb2->num_queues; i++)
698 		dlb2->ev_queues[i].qm_queue.config_state = config_state;
699 
700 	for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
701 		dlb2->ev_queues[i].setup_done = false;
702 
703 	dlb2->num_ports = 0;
704 	dlb2->num_ldb_ports = 0;
705 	dlb2->num_dir_ports = 0;
706 	dlb2->num_queues = 0;
707 	dlb2->num_ldb_queues = 0;
708 	dlb2->num_dir_queues = 0;
709 	dlb2->configured = false;
710 }
711 
712 /* Note: 1 QM instance per QM device, QM instance/device == event device */
713 static int
714 dlb2_eventdev_configure(const struct rte_eventdev *dev)
715 {
716 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
717 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
718 	struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
719 	const struct rte_eventdev_data *data = dev->data;
720 	const struct rte_event_dev_config *config = &data->dev_conf;
721 	int ret;
722 
723 	/* If this eventdev is already configured, we must release the current
724 	 * scheduling domain before attempting to configure a new one.
725 	 */
726 	if (dlb2->configured) {
727 		dlb2_hw_reset_sched_domain(dev, true);
728 		ret = dlb2_hw_query_resources(dlb2);
729 		if (ret) {
730 			DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
731 				     ret, data->dev_id);
732 			return ret;
733 		}
734 	}
735 
736 	if (config->nb_event_queues > rsrcs->num_queues) {
737 		DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
738 			     config->nb_event_queues,
739 			     rsrcs->num_queues);
740 		return -EINVAL;
741 	}
742 	if (config->nb_event_ports > (rsrcs->num_ldb_ports
743 			+ rsrcs->num_dir_ports)) {
744 		DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
745 			     config->nb_event_ports,
746 			     (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
747 		return -EINVAL;
748 	}
749 	if (config->nb_events_limit > rsrcs->nb_events_limit) {
750 		DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
751 			     config->nb_events_limit,
752 			     rsrcs->nb_events_limit);
753 		return -EINVAL;
754 	}
755 
756 	if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
757 		dlb2->global_dequeue_wait = false;
758 	else {
759 		uint32_t timeout32;
760 
761 		dlb2->global_dequeue_wait = true;
762 
763 		/* note size mismatch of timeout vals in eventdev lib. */
764 		timeout32 = config->dequeue_timeout_ns;
765 
766 		dlb2->global_dequeue_wait_ticks =
767 			timeout32 * (rte_get_timer_hz() / 1E9);
768 	}
769 
770 	/* Does this platform support umonitor/umwait? */
771 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
772 		dlb2->umwait_allowed = true;
773 
774 	rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
775 	rsrcs->num_ldb_ports  = config->nb_event_ports - rsrcs->num_dir_ports;
776 	/* 1 dir queue per dir port */
777 	rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
778 
779 	if (dlb2->version == DLB2_HW_V2_5) {
780 		rsrcs->num_credits = 0;
781 		if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
782 			rsrcs->num_credits = config->nb_events_limit;
783 	} else {
784 		/* Scale down nb_events_limit by 4 for directed credits,
785 		 * since there are 4x as many load-balanced credits.
786 		 */
787 		rsrcs->num_ldb_credits = 0;
788 		rsrcs->num_dir_credits = 0;
789 
790 		if (rsrcs->num_ldb_queues)
791 			rsrcs->num_ldb_credits = config->nb_events_limit;
792 		if (rsrcs->num_dir_ports)
793 			rsrcs->num_dir_credits = config->nb_events_limit / 2;
794 		if (dlb2->num_dir_credits_override != -1)
795 			rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
796 	}
797 
798 	if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
799 		DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
800 		return -ENODEV;
801 	}
802 
803 	dlb2->new_event_limit = config->nb_events_limit;
804 	__atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
805 
806 	/* Save number of ports/queues for this event dev */
807 	dlb2->num_ports = config->nb_event_ports;
808 	dlb2->num_queues = config->nb_event_queues;
809 	dlb2->num_dir_ports = rsrcs->num_dir_ports;
810 	dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
811 	dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
812 	dlb2->num_dir_queues = dlb2->num_dir_ports;
813 	if (dlb2->version == DLB2_HW_V2_5) {
814 		dlb2->credit_pool = rsrcs->num_credits;
815 		dlb2->max_credits = rsrcs->num_credits;
816 	} else {
817 		dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
818 		dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
819 		dlb2->dir_credit_pool = rsrcs->num_dir_credits;
820 		dlb2->max_dir_credits = rsrcs->num_dir_credits;
821 	}
822 
823 	dlb2->configured = true;
824 
825 	return 0;
826 }
827 
828 static void
829 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
830 				    uint8_t port_id,
831 				    struct rte_event_port_conf *port_conf)
832 {
833 	RTE_SET_USED(port_id);
834 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
835 
836 	port_conf->new_event_threshold = dlb2->new_event_limit;
837 	port_conf->dequeue_depth = 32;
838 	port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
839 	port_conf->event_port_cfg = 0;
840 }
841 
842 static void
843 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
844 				     uint8_t queue_id,
845 				     struct rte_event_queue_conf *queue_conf)
846 {
847 	RTE_SET_USED(dev);
848 	RTE_SET_USED(queue_id);
849 
850 	queue_conf->nb_atomic_flows = 1024;
851 	queue_conf->nb_atomic_order_sequences = 64;
852 	queue_conf->event_queue_cfg = 0;
853 	queue_conf->priority = 0;
854 }
855 
856 static int32_t
857 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
858 {
859 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
860 	struct dlb2_get_sn_allocation_args cfg;
861 	int ret;
862 
863 	cfg.group = group;
864 
865 	ret = dlb2_iface_get_sn_allocation(handle, &cfg);
866 	if (ret < 0) {
867 		DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
868 			     ret, dlb2_error_strings[cfg.response.status]);
869 		return ret;
870 	}
871 
872 	return cfg.response.id;
873 }
874 
875 static int
876 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
877 {
878 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
879 	struct dlb2_set_sn_allocation_args cfg;
880 	int ret;
881 
882 	cfg.num = num;
883 	cfg.group = group;
884 
885 	ret = dlb2_iface_set_sn_allocation(handle, &cfg);
886 	if (ret < 0) {
887 		DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
888 			     ret, dlb2_error_strings[cfg.response.status]);
889 		return ret;
890 	}
891 
892 	return ret;
893 }
894 
895 static int32_t
896 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
897 {
898 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
899 	struct dlb2_get_sn_occupancy_args cfg;
900 	int ret;
901 
902 	cfg.group = group;
903 
904 	ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
905 	if (ret < 0) {
906 		DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
907 			     ret, dlb2_error_strings[cfg.response.status]);
908 		return ret;
909 	}
910 
911 	return cfg.response.id;
912 }
913 
914 /* Query the current sequence number allocations and, if they conflict with the
915  * requested LDB queue configuration, attempt to re-allocate sequence numbers.
916  * This is best-effort; if it fails, the PMD will attempt to configure the
917  * load-balanced queue and return an error.
918  */
919 static void
920 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
921 			   const struct rte_event_queue_conf *queue_conf)
922 {
923 	int grp_occupancy[DLB2_NUM_SN_GROUPS];
924 	int grp_alloc[DLB2_NUM_SN_GROUPS];
925 	int i, sequence_numbers;
926 
927 	sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
928 
929 	for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
930 		int total_slots;
931 
932 		grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
933 		if (grp_alloc[i] < 0)
934 			return;
935 
936 		total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
937 
938 		grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
939 		if (grp_occupancy[i] < 0)
940 			return;
941 
942 		/* DLB has at least one available slot for the requested
943 		 * sequence numbers, so no further configuration required.
944 		 */
945 		if (grp_alloc[i] == sequence_numbers &&
946 		    grp_occupancy[i] < total_slots)
947 			return;
948 	}
949 
950 	/* None of the sequence number groups are configured for the requested
951 	 * sequence numbers, so we have to reconfigure one of them. This is
952 	 * only possible if a group is not in use.
953 	 */
954 	for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
955 		if (grp_occupancy[i] == 0)
956 			break;
957 	}
958 
959 	if (i == DLB2_NUM_SN_GROUPS) {
960 		DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
961 		       __func__, sequence_numbers);
962 		return;
963 	}
964 
965 	/* Attempt to configure slot i with the requested number of sequence
966 	 * numbers. Ignore the return value -- if this fails, the error will be
967 	 * caught during subsequent queue configuration.
968 	 */
969 	dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
970 }
971 
972 static int32_t
973 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
974 			 struct dlb2_eventdev_queue *ev_queue,
975 			 const struct rte_event_queue_conf *evq_conf)
976 {
977 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
978 	struct dlb2_queue *queue = &ev_queue->qm_queue;
979 	struct dlb2_create_ldb_queue_args cfg;
980 	int32_t ret;
981 	uint32_t qm_qid;
982 	int sched_type = -1;
983 
984 	if (evq_conf == NULL)
985 		return -EINVAL;
986 
987 	if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
988 		if (evq_conf->nb_atomic_order_sequences != 0)
989 			sched_type = RTE_SCHED_TYPE_ORDERED;
990 		else
991 			sched_type = RTE_SCHED_TYPE_PARALLEL;
992 	} else
993 		sched_type = evq_conf->schedule_type;
994 
995 	cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
996 	cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
997 	cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
998 
999 	if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1000 		cfg.num_sequence_numbers = 0;
1001 		cfg.num_qid_inflights = 2048;
1002 	}
1003 
1004 	/* App should set this to the number of hardware flows they want, not
1005 	 * the overall number of flows they're going to use. E.g. if app is
1006 	 * using 64 flows and sets compression to 64, best-case they'll get
1007 	 * 64 unique hashed flows in hardware.
1008 	 */
1009 	switch (evq_conf->nb_atomic_flows) {
1010 	/* Valid DLB2 compression levels */
1011 	case 64:
1012 	case 128:
1013 	case 256:
1014 	case 512:
1015 	case (1 * 1024): /* 1K */
1016 	case (2 * 1024): /* 2K */
1017 	case (4 * 1024): /* 4K */
1018 	case (64 * 1024): /* 64K */
1019 		cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1020 		break;
1021 	default:
1022 		/* Invalid compression level */
1023 		cfg.lock_id_comp_level = 0; /* no compression */
1024 	}
1025 
1026 	if (ev_queue->depth_threshold == 0) {
1027 		cfg.depth_threshold = dlb2->default_depth_thresh;
1028 		ev_queue->depth_threshold =
1029 			dlb2->default_depth_thresh;
1030 	} else
1031 		cfg.depth_threshold = ev_queue->depth_threshold;
1032 
1033 	ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1034 	if (ret < 0) {
1035 		DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1036 			     ret, dlb2_error_strings[cfg.response.status]);
1037 		return -EINVAL;
1038 	}
1039 
1040 	qm_qid = cfg.response.id;
1041 
1042 	/* Save off queue config for debug, resource lookups, and reconfig */
1043 	queue->num_qid_inflights = cfg.num_qid_inflights;
1044 	queue->num_atm_inflights = cfg.num_atomic_inflights;
1045 
1046 	queue->sched_type = sched_type;
1047 	queue->config_state = DLB2_CONFIGURED;
1048 
1049 	DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1050 		     qm_qid,
1051 		     cfg.num_atomic_inflights,
1052 		     cfg.num_sequence_numbers,
1053 		     cfg.num_qid_inflights);
1054 
1055 	return qm_qid;
1056 }
1057 
1058 static int
1059 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1060 			      struct dlb2_eventdev_queue *ev_queue,
1061 			      const struct rte_event_queue_conf *queue_conf)
1062 {
1063 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1064 	int32_t qm_qid;
1065 
1066 	if (queue_conf->nb_atomic_order_sequences)
1067 		dlb2_program_sn_allocation(dlb2, queue_conf);
1068 
1069 	qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1070 	if (qm_qid < 0) {
1071 		DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1072 
1073 		return qm_qid;
1074 	}
1075 
1076 	dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1077 
1078 	ev_queue->qm_queue.id = qm_qid;
1079 
1080 	return 0;
1081 }
1082 
1083 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1084 {
1085 	int i, num = 0;
1086 
1087 	for (i = 0; i < dlb2->num_queues; i++) {
1088 		if (dlb2->ev_queues[i].setup_done &&
1089 		    dlb2->ev_queues[i].qm_queue.is_directed)
1090 			num++;
1091 	}
1092 
1093 	return num;
1094 }
1095 
1096 static void
1097 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1098 			 struct dlb2_eventdev_queue *ev_queue)
1099 {
1100 	struct dlb2_eventdev_port *ev_port;
1101 	int i, j;
1102 
1103 	for (i = 0; i < dlb2->num_ports; i++) {
1104 		ev_port = &dlb2->ev_ports[i];
1105 
1106 		for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1107 			if (!ev_port->link[j].valid ||
1108 			    ev_port->link[j].queue_id != ev_queue->id)
1109 				continue;
1110 
1111 			ev_port->link[j].valid = false;
1112 			ev_port->num_links--;
1113 		}
1114 	}
1115 
1116 	ev_queue->num_links = 0;
1117 }
1118 
1119 static int
1120 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1121 			  uint8_t ev_qid,
1122 			  const struct rte_event_queue_conf *queue_conf)
1123 {
1124 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1125 	struct dlb2_eventdev_queue *ev_queue;
1126 	int ret;
1127 
1128 	if (queue_conf == NULL)
1129 		return -EINVAL;
1130 
1131 	if (ev_qid >= dlb2->num_queues)
1132 		return -EINVAL;
1133 
1134 	ev_queue = &dlb2->ev_queues[ev_qid];
1135 
1136 	ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1137 		RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1138 	ev_queue->id = ev_qid;
1139 	ev_queue->conf = *queue_conf;
1140 
1141 	if (!ev_queue->qm_queue.is_directed) {
1142 		ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1143 	} else {
1144 		/* The directed queue isn't setup until link time, at which
1145 		 * point we know its directed port ID. Directed queue setup
1146 		 * will only fail if this queue is already setup or there are
1147 		 * no directed queues left to configure.
1148 		 */
1149 		ret = 0;
1150 
1151 		ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1152 
1153 		if (ev_queue->setup_done ||
1154 		    dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1155 			ret = -EINVAL;
1156 	}
1157 
1158 	/* Tear down pre-existing port->queue links */
1159 	if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1160 		dlb2_queue_link_teardown(dlb2, ev_queue);
1161 
1162 	if (!ret)
1163 		ev_queue->setup_done = true;
1164 
1165 	return ret;
1166 }
1167 
1168 static int
1169 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1170 {
1171 	struct dlb2_cq_pop_qe *qe;
1172 
1173 	qe = rte_zmalloc(mz_name,
1174 			DLB2_NUM_QES_PER_CACHE_LINE *
1175 				sizeof(struct dlb2_cq_pop_qe),
1176 			RTE_CACHE_LINE_SIZE);
1177 
1178 	if (qe == NULL)	{
1179 		DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1180 		return -ENOMEM;
1181 	}
1182 	qm_port->consume_qe = qe;
1183 
1184 	qe->qe_valid = 0;
1185 	qe->qe_frag = 0;
1186 	qe->qe_comp = 0;
1187 	qe->cq_token = 1;
1188 	/* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1189 	 * and so on.
1190 	 */
1191 	qe->tokens = 0;	/* set at run time */
1192 	qe->meas_lat = 0;
1193 	qe->no_dec = 0;
1194 	/* Completion IDs are disabled */
1195 	qe->cmp_id = 0;
1196 
1197 	return 0;
1198 }
1199 
1200 static int
1201 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1202 {
1203 	struct dlb2_enqueue_qe *qe;
1204 
1205 	qe = rte_zmalloc(mz_name,
1206 			DLB2_NUM_QES_PER_CACHE_LINE *
1207 				sizeof(struct dlb2_enqueue_qe),
1208 			RTE_CACHE_LINE_SIZE);
1209 
1210 	if (qe == NULL) {
1211 		DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1212 		return -ENOMEM;
1213 	}
1214 	qm_port->int_arm_qe = qe;
1215 
1216 	/* V2 - INT ARM is CQ_TOKEN + FRAG */
1217 	qe->qe_valid = 0;
1218 	qe->qe_frag = 1;
1219 	qe->qe_comp = 0;
1220 	qe->cq_token = 1;
1221 	qe->meas_lat = 0;
1222 	qe->no_dec = 0;
1223 	/* Completion IDs are disabled */
1224 	qe->cmp_id = 0;
1225 
1226 	return 0;
1227 }
1228 
1229 static int
1230 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1231 {
1232 	int ret, sz;
1233 
1234 	sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1235 
1236 	qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1237 
1238 	if (qm_port->qe4 == NULL) {
1239 		DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1240 		ret = -ENOMEM;
1241 		goto error_exit;
1242 	}
1243 
1244 	ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1245 	if (ret < 0) {
1246 		DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1247 		goto error_exit;
1248 	}
1249 
1250 	ret = dlb2_init_consume_qe(qm_port, mz_name);
1251 	if (ret < 0) {
1252 		DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1253 		goto error_exit;
1254 	}
1255 
1256 	return 0;
1257 
1258 error_exit:
1259 
1260 	dlb2_free_qe_mem(qm_port);
1261 
1262 	return ret;
1263 }
1264 
1265 static inline uint16_t
1266 dlb2_event_enqueue_delayed(void *event_port,
1267 			   const struct rte_event events[]);
1268 
1269 static inline uint16_t
1270 dlb2_event_enqueue_burst_delayed(void *event_port,
1271 				 const struct rte_event events[],
1272 				 uint16_t num);
1273 
1274 static inline uint16_t
1275 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1276 				     const struct rte_event events[],
1277 				     uint16_t num);
1278 
1279 static inline uint16_t
1280 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1281 					 const struct rte_event events[],
1282 					 uint16_t num);
1283 
1284 /* Generate the required bitmask for rotate-style expected QE gen bits.
1285  * This requires a pattern of 1's and zeros, starting with expected as
1286  * 1 bits, so when hardware writes 0's they're "new". This requires the
1287  * ring size to be powers of 2 to wrap correctly.
1288  */
1289 static void
1290 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1291 {
1292 	uint64_t cq_build_mask = 0;
1293 	uint32_t i;
1294 
1295 	if (cq_depth > 64)
1296 		return; /* need to fall back to scalar code */
1297 
1298 	/*
1299 	 * all 1's in first u64, all zeros in second is correct bit pattern to
1300 	 * start. Special casing == 64 easier than adapting complex loop logic.
1301 	 */
1302 	if (cq_depth == 64) {
1303 		qm_port->cq_rolling_mask = 0;
1304 		qm_port->cq_rolling_mask_2 = -1;
1305 		return;
1306 	}
1307 
1308 	for (i = 0; i < 64; i += (cq_depth * 2))
1309 		cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1310 
1311 	qm_port->cq_rolling_mask = cq_build_mask;
1312 	qm_port->cq_rolling_mask_2 = cq_build_mask;
1313 }
1314 
1315 static int
1316 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1317 			struct dlb2_eventdev_port *ev_port,
1318 			uint32_t dequeue_depth,
1319 			uint32_t enqueue_depth)
1320 {
1321 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1322 	struct dlb2_create_ldb_port_args cfg = { {0} };
1323 	int ret;
1324 	struct dlb2_port *qm_port = NULL;
1325 	char mz_name[RTE_MEMZONE_NAMESIZE];
1326 	uint32_t qm_port_id;
1327 	uint16_t ldb_credit_high_watermark = 0;
1328 	uint16_t dir_credit_high_watermark = 0;
1329 	uint16_t credit_high_watermark = 0;
1330 
1331 	if (handle == NULL)
1332 		return -EINVAL;
1333 
1334 	if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1335 		DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1336 			     DLB2_MIN_CQ_DEPTH);
1337 		return -EINVAL;
1338 	}
1339 
1340 	if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1341 		DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1342 			     DLB2_MIN_ENQUEUE_DEPTH);
1343 		return -EINVAL;
1344 	}
1345 
1346 	rte_spinlock_lock(&handle->resource_lock);
1347 
1348 	/* We round up to the next power of 2 if necessary */
1349 	cfg.cq_depth = rte_align32pow2(dequeue_depth);
1350 	cfg.cq_depth_threshold = 1;
1351 
1352 	cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1353 
1354 	if (handle->cos_id == DLB2_COS_DEFAULT)
1355 		cfg.cos_id = 0;
1356 	else
1357 		cfg.cos_id = handle->cos_id;
1358 
1359 	cfg.cos_strict = 0;
1360 
1361 	/* User controls the LDB high watermark via enqueue depth. The DIR high
1362 	 * watermark is equal, unless the directed credit pool is too small.
1363 	 */
1364 	if (dlb2->version == DLB2_HW_V2) {
1365 		ldb_credit_high_watermark = enqueue_depth;
1366 		/* If there are no directed ports, the kernel driver will
1367 		 * ignore this port's directed credit settings. Don't use
1368 		 * enqueue_depth if it would require more directed credits
1369 		 * than are available.
1370 		 */
1371 		dir_credit_high_watermark =
1372 			RTE_MIN(enqueue_depth,
1373 				handle->cfg.num_dir_credits / dlb2->num_ports);
1374 	} else
1375 		credit_high_watermark = enqueue_depth;
1376 
1377 	/* Per QM values */
1378 
1379 	ret = dlb2_iface_ldb_port_create(handle, &cfg,  dlb2->poll_mode);
1380 	if (ret < 0) {
1381 		DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1382 			     ret, dlb2_error_strings[cfg.response.status]);
1383 		goto error_exit;
1384 	}
1385 
1386 	qm_port_id = cfg.response.id;
1387 
1388 	DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1389 		     ev_port->id, qm_port_id);
1390 
1391 	qm_port = &ev_port->qm_port;
1392 	qm_port->ev_port = ev_port; /* back ptr */
1393 	qm_port->dlb2 = dlb2; /* back ptr */
1394 	/*
1395 	 * Allocate and init local qe struct(s).
1396 	 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1397 	 */
1398 
1399 	snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1400 		 ev_port->id);
1401 
1402 	ret = dlb2_init_qe_mem(qm_port, mz_name);
1403 	if (ret < 0) {
1404 		DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1405 		goto error_exit;
1406 	}
1407 
1408 	qm_port->id = qm_port_id;
1409 
1410 	if (dlb2->version == DLB2_HW_V2) {
1411 		qm_port->cached_ldb_credits = 0;
1412 		qm_port->cached_dir_credits = 0;
1413 	} else
1414 		qm_port->cached_credits = 0;
1415 
1416 	/* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1417 	 * the effective depth is smaller.
1418 	 */
1419 	qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1420 	qm_port->cq_idx = 0;
1421 	qm_port->cq_idx_unmasked = 0;
1422 
1423 	if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1424 		qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1425 	else
1426 		qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1427 
1428 	qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1429 	/* starting value of gen bit - it toggles at wrap time */
1430 	qm_port->gen_bit = 1;
1431 
1432 	dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1433 
1434 	qm_port->int_armed = false;
1435 
1436 	/* Save off for later use in info and lookup APIs. */
1437 	qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1438 
1439 	qm_port->dequeue_depth = dequeue_depth;
1440 	qm_port->token_pop_thresh = dequeue_depth;
1441 
1442 	/* The default enqueue functions do not include delayed-pop support for
1443 	 * performance reasons.
1444 	 */
1445 	if (qm_port->token_pop_mode == DELAYED_POP) {
1446 		dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1447 		dlb2->event_dev->enqueue_burst =
1448 			dlb2_event_enqueue_burst_delayed;
1449 		dlb2->event_dev->enqueue_new_burst =
1450 			dlb2_event_enqueue_new_burst_delayed;
1451 		dlb2->event_dev->enqueue_forward_burst =
1452 			dlb2_event_enqueue_forward_burst_delayed;
1453 	}
1454 
1455 	qm_port->owed_tokens = 0;
1456 	qm_port->issued_releases = 0;
1457 
1458 	/* Save config message too. */
1459 	rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1460 
1461 	/* update state */
1462 	qm_port->state = PORT_STARTED; /* enabled at create time */
1463 	qm_port->config_state = DLB2_CONFIGURED;
1464 
1465 	if (dlb2->version == DLB2_HW_V2) {
1466 		qm_port->dir_credits = dir_credit_high_watermark;
1467 		qm_port->ldb_credits = ldb_credit_high_watermark;
1468 		qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1469 		qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1470 
1471 		DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1472 			     qm_port_id,
1473 			     dequeue_depth,
1474 			     qm_port->ldb_credits,
1475 			     qm_port->dir_credits);
1476 	} else {
1477 		qm_port->credits = credit_high_watermark;
1478 		qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1479 
1480 		DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1481 			     qm_port_id,
1482 			     dequeue_depth,
1483 			     qm_port->credits);
1484 	}
1485 
1486 	qm_port->use_scalar = false;
1487 
1488 #if (!defined RTE_ARCH_X86_64)
1489 	qm_port->use_scalar = true;
1490 #else
1491 	if ((qm_port->cq_depth > 64) ||
1492 	    (!rte_is_power_of_2(qm_port->cq_depth)) ||
1493 	    (dlb2->vector_opts_enabled == false))
1494 		qm_port->use_scalar = true;
1495 #endif
1496 
1497 	rte_spinlock_unlock(&handle->resource_lock);
1498 
1499 	return 0;
1500 
1501 error_exit:
1502 
1503 	if (qm_port)
1504 		dlb2_free_qe_mem(qm_port);
1505 
1506 	rte_spinlock_unlock(&handle->resource_lock);
1507 
1508 	DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1509 
1510 	return ret;
1511 }
1512 
1513 static void
1514 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1515 			struct dlb2_eventdev_port *ev_port)
1516 {
1517 	struct dlb2_eventdev_queue *ev_queue;
1518 	int i;
1519 
1520 	for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1521 		if (!ev_port->link[i].valid)
1522 			continue;
1523 
1524 		ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1525 
1526 		ev_port->link[i].valid = false;
1527 		ev_port->num_links--;
1528 		ev_queue->num_links--;
1529 	}
1530 }
1531 
1532 static int
1533 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1534 			struct dlb2_eventdev_port *ev_port,
1535 			uint32_t dequeue_depth,
1536 			uint32_t enqueue_depth)
1537 {
1538 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1539 	struct dlb2_create_dir_port_args cfg = { {0} };
1540 	int ret;
1541 	struct dlb2_port *qm_port = NULL;
1542 	char mz_name[RTE_MEMZONE_NAMESIZE];
1543 	uint32_t qm_port_id;
1544 	uint16_t ldb_credit_high_watermark = 0;
1545 	uint16_t dir_credit_high_watermark = 0;
1546 	uint16_t credit_high_watermark = 0;
1547 
1548 	if (dlb2 == NULL || handle == NULL)
1549 		return -EINVAL;
1550 
1551 	if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1552 		DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1553 			     DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1554 		return -EINVAL;
1555 	}
1556 
1557 	if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1558 		DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1559 			     DLB2_MIN_ENQUEUE_DEPTH);
1560 		return -EINVAL;
1561 	}
1562 
1563 	rte_spinlock_lock(&handle->resource_lock);
1564 
1565 	/* Directed queues are configured at link time. */
1566 	cfg.queue_id = -1;
1567 
1568 	/* We round up to the next power of 2 if necessary */
1569 	cfg.cq_depth = rte_align32pow2(dequeue_depth);
1570 	cfg.cq_depth_threshold = 1;
1571 
1572 	/* User controls the LDB high watermark via enqueue depth. The DIR high
1573 	 * watermark is equal, unless the directed credit pool is too small.
1574 	 */
1575 	if (dlb2->version == DLB2_HW_V2) {
1576 		ldb_credit_high_watermark = enqueue_depth;
1577 		/* Don't use enqueue_depth if it would require more directed
1578 		 * credits than are available.
1579 		 */
1580 		dir_credit_high_watermark =
1581 			RTE_MIN(enqueue_depth,
1582 				handle->cfg.num_dir_credits / dlb2->num_ports);
1583 	} else
1584 		credit_high_watermark = enqueue_depth;
1585 
1586 	/* Per QM values */
1587 
1588 	ret = dlb2_iface_dir_port_create(handle, &cfg,  dlb2->poll_mode);
1589 	if (ret < 0) {
1590 		DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1591 			     ret, dlb2_error_strings[cfg.response.status]);
1592 		goto error_exit;
1593 	}
1594 
1595 	qm_port_id = cfg.response.id;
1596 
1597 	DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1598 		     ev_port->id, qm_port_id);
1599 
1600 	qm_port = &ev_port->qm_port;
1601 	qm_port->ev_port = ev_port; /* back ptr */
1602 	qm_port->dlb2 = dlb2;  /* back ptr */
1603 
1604 	/*
1605 	 * Init local qe struct(s).
1606 	 * Note: MOVDIR64 requires the enqueue QE to be aligned
1607 	 */
1608 
1609 	snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1610 		 ev_port->id);
1611 
1612 	ret = dlb2_init_qe_mem(qm_port, mz_name);
1613 
1614 	if (ret < 0) {
1615 		DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1616 		goto error_exit;
1617 	}
1618 
1619 	qm_port->id = qm_port_id;
1620 
1621 	if (dlb2->version == DLB2_HW_V2) {
1622 		qm_port->cached_ldb_credits = 0;
1623 		qm_port->cached_dir_credits = 0;
1624 	} else
1625 		qm_port->cached_credits = 0;
1626 
1627 	/* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1628 	 * the effective depth is smaller.
1629 	 */
1630 	qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1631 	qm_port->cq_idx = 0;
1632 	qm_port->cq_idx_unmasked = 0;
1633 
1634 	if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1635 		qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1636 	else
1637 		qm_port->cq_depth_mask = cfg.cq_depth - 1;
1638 
1639 	qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1640 	/* starting value of gen bit - it toggles at wrap time */
1641 	qm_port->gen_bit = 1;
1642 	dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1643 
1644 	qm_port->int_armed = false;
1645 
1646 	/* Save off for later use in info and lookup APIs. */
1647 	qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1648 
1649 	qm_port->dequeue_depth = dequeue_depth;
1650 
1651 	/* Directed ports are auto-pop, by default. */
1652 	qm_port->token_pop_mode = AUTO_POP;
1653 	qm_port->owed_tokens = 0;
1654 	qm_port->issued_releases = 0;
1655 
1656 	/* Save config message too. */
1657 	rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1658 
1659 	/* update state */
1660 	qm_port->state = PORT_STARTED; /* enabled at create time */
1661 	qm_port->config_state = DLB2_CONFIGURED;
1662 
1663 	if (dlb2->version == DLB2_HW_V2) {
1664 		qm_port->dir_credits = dir_credit_high_watermark;
1665 		qm_port->ldb_credits = ldb_credit_high_watermark;
1666 		qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1667 		qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1668 
1669 		DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1670 			     qm_port_id,
1671 			     dequeue_depth,
1672 			     dir_credit_high_watermark,
1673 			     ldb_credit_high_watermark);
1674 	} else {
1675 		qm_port->credits = credit_high_watermark;
1676 		qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1677 
1678 		DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1679 			     qm_port_id,
1680 			     dequeue_depth,
1681 			     credit_high_watermark);
1682 	}
1683 
1684 #if (!defined RTE_ARCH_X86_64)
1685 	qm_port->use_scalar = true;
1686 #else
1687 	if ((qm_port->cq_depth > 64) ||
1688 	    (!rte_is_power_of_2(qm_port->cq_depth)) ||
1689 	    (dlb2->vector_opts_enabled == false))
1690 		qm_port->use_scalar = true;
1691 #endif
1692 
1693 	rte_spinlock_unlock(&handle->resource_lock);
1694 
1695 	return 0;
1696 
1697 error_exit:
1698 
1699 	if (qm_port)
1700 		dlb2_free_qe_mem(qm_port);
1701 
1702 	rte_spinlock_unlock(&handle->resource_lock);
1703 
1704 	DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1705 
1706 	return ret;
1707 }
1708 
1709 static int
1710 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1711 			 uint8_t ev_port_id,
1712 			 const struct rte_event_port_conf *port_conf)
1713 {
1714 	struct dlb2_eventdev *dlb2;
1715 	struct dlb2_eventdev_port *ev_port;
1716 	int ret;
1717 	uint32_t hw_credit_quanta, sw_credit_quanta;
1718 
1719 	if (dev == NULL || port_conf == NULL) {
1720 		DLB2_LOG_ERR("Null parameter\n");
1721 		return -EINVAL;
1722 	}
1723 
1724 	dlb2 = dlb2_pmd_priv(dev);
1725 
1726 	if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1727 		return -EINVAL;
1728 
1729 	if (port_conf->dequeue_depth >
1730 		evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1731 	    port_conf->enqueue_depth >
1732 		evdev_dlb2_default_info.max_event_port_enqueue_depth)
1733 		return -EINVAL;
1734 
1735 	ev_port = &dlb2->ev_ports[ev_port_id];
1736 	/* configured? */
1737 	if (ev_port->setup_done) {
1738 		DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1739 		return -EINVAL;
1740 	}
1741 
1742 	ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1743 		RTE_EVENT_PORT_CFG_SINGLE_LINK;
1744 
1745 	if (!ev_port->qm_port.is_directed) {
1746 		ret = dlb2_hw_create_ldb_port(dlb2,
1747 					      ev_port,
1748 					      port_conf->dequeue_depth,
1749 					      port_conf->enqueue_depth);
1750 		if (ret < 0) {
1751 			DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1752 				     ev_port_id);
1753 
1754 			return ret;
1755 		}
1756 	} else {
1757 		ret = dlb2_hw_create_dir_port(dlb2,
1758 					      ev_port,
1759 					      port_conf->dequeue_depth,
1760 					      port_conf->enqueue_depth);
1761 		if (ret < 0) {
1762 			DLB2_LOG_ERR("Failed to create the DIR port\n");
1763 			return ret;
1764 		}
1765 	}
1766 
1767 	/* Save off port config for reconfig */
1768 	ev_port->conf = *port_conf;
1769 
1770 	ev_port->id = ev_port_id;
1771 	ev_port->enq_configured = true;
1772 	ev_port->setup_done = true;
1773 	ev_port->inflight_max = port_conf->new_event_threshold;
1774 	ev_port->implicit_release = !(port_conf->event_port_cfg &
1775 		  RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1776 	ev_port->outstanding_releases = 0;
1777 	ev_port->inflight_credits = 0;
1778 	ev_port->dlb2 = dlb2; /* reverse link */
1779 
1780 	/* Default for worker ports */
1781 	sw_credit_quanta = dlb2->sw_credit_quanta;
1782 	hw_credit_quanta = dlb2->hw_credit_quanta;
1783 
1784 	if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1785 		/* Producer type ports. Mostly enqueue */
1786 		sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1787 		hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1788 	}
1789 	if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1790 		/* Consumer type ports. Mostly dequeue */
1791 		sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1792 		hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1793 	}
1794 	ev_port->credit_update_quanta = sw_credit_quanta;
1795 	ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1796 
1797 	/* Tear down pre-existing port->queue links */
1798 	if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1799 		dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1800 
1801 	dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1802 
1803 	return 0;
1804 }
1805 
1806 static int16_t
1807 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1808 			    uint32_t qm_port_id,
1809 			    uint16_t qm_qid,
1810 			    uint8_t priority)
1811 {
1812 	struct dlb2_map_qid_args cfg;
1813 	int32_t ret;
1814 
1815 	if (handle == NULL)
1816 		return -EINVAL;
1817 
1818 	/* Build message */
1819 	cfg.port_id = qm_port_id;
1820 	cfg.qid = qm_qid;
1821 	cfg.priority = EV_TO_DLB2_PRIO(priority);
1822 
1823 	ret = dlb2_iface_map_qid(handle, &cfg);
1824 	if (ret < 0) {
1825 		DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1826 			     ret, dlb2_error_strings[cfg.response.status]);
1827 		DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1828 			     handle->domain_id, cfg.port_id,
1829 			     cfg.qid,
1830 			     cfg.priority);
1831 	} else {
1832 		DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1833 			     qm_qid, qm_port_id);
1834 	}
1835 
1836 	return ret;
1837 }
1838 
1839 static int
1840 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1841 			  struct dlb2_eventdev_port *ev_port,
1842 			  struct dlb2_eventdev_queue *ev_queue,
1843 			  uint8_t priority)
1844 {
1845 	int first_avail = -1;
1846 	int ret, i;
1847 
1848 	for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1849 		if (ev_port->link[i].valid) {
1850 			if (ev_port->link[i].queue_id == ev_queue->id &&
1851 			    ev_port->link[i].priority == priority) {
1852 				if (ev_port->link[i].mapped)
1853 					return 0; /* already mapped */
1854 				first_avail = i;
1855 			}
1856 		} else if (first_avail == -1)
1857 			first_avail = i;
1858 	}
1859 	if (first_avail == -1) {
1860 		DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1861 			     ev_port->qm_port.id);
1862 		return -EINVAL;
1863 	}
1864 
1865 	ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1866 					  ev_port->qm_port.id,
1867 					  ev_queue->qm_queue.id,
1868 					  priority);
1869 
1870 	if (!ret)
1871 		ev_port->link[first_avail].mapped = true;
1872 
1873 	return ret;
1874 }
1875 
1876 static int32_t
1877 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1878 			 struct dlb2_eventdev_queue *ev_queue,
1879 			 int32_t qm_port_id)
1880 {
1881 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1882 	struct dlb2_create_dir_queue_args cfg;
1883 	int32_t ret;
1884 
1885 	/* The directed port is always configured before its queue */
1886 	cfg.port_id = qm_port_id;
1887 
1888 	if (ev_queue->depth_threshold == 0) {
1889 		cfg.depth_threshold = dlb2->default_depth_thresh;
1890 		ev_queue->depth_threshold =
1891 			dlb2->default_depth_thresh;
1892 	} else
1893 		cfg.depth_threshold = ev_queue->depth_threshold;
1894 
1895 	ret = dlb2_iface_dir_queue_create(handle, &cfg);
1896 	if (ret < 0) {
1897 		DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1898 			     ret, dlb2_error_strings[cfg.response.status]);
1899 		return -EINVAL;
1900 	}
1901 
1902 	return cfg.response.id;
1903 }
1904 
1905 static int
1906 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1907 			      struct dlb2_eventdev_queue *ev_queue,
1908 			      struct dlb2_eventdev_port *ev_port)
1909 {
1910 	int32_t qm_qid;
1911 
1912 	qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1913 
1914 	if (qm_qid < 0) {
1915 		DLB2_LOG_ERR("Failed to create the DIR queue\n");
1916 		return qm_qid;
1917 	}
1918 
1919 	dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1920 
1921 	ev_queue->qm_queue.id = qm_qid;
1922 
1923 	return 0;
1924 }
1925 
1926 static int
1927 dlb2_do_port_link(struct rte_eventdev *dev,
1928 		  struct dlb2_eventdev_queue *ev_queue,
1929 		  struct dlb2_eventdev_port *ev_port,
1930 		  uint8_t prio)
1931 {
1932 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1933 	int err;
1934 
1935 	/* Don't link until start time. */
1936 	if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1937 		return 0;
1938 
1939 	if (ev_queue->qm_queue.is_directed)
1940 		err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1941 	else
1942 		err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1943 
1944 	if (err) {
1945 		DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1946 			     ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1947 			     ev_queue->id, ev_port->id);
1948 
1949 		rte_errno = err;
1950 		return -1;
1951 	}
1952 
1953 	return 0;
1954 }
1955 
1956 static int
1957 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1958 			uint8_t queue_id,
1959 			bool link_exists,
1960 			int index)
1961 {
1962 	struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1963 	struct dlb2_eventdev_queue *ev_queue;
1964 	bool port_is_dir, queue_is_dir;
1965 
1966 	if (queue_id > dlb2->num_queues) {
1967 		rte_errno = -EINVAL;
1968 		return -1;
1969 	}
1970 
1971 	ev_queue = &dlb2->ev_queues[queue_id];
1972 
1973 	if (!ev_queue->setup_done &&
1974 	    ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
1975 		rte_errno = -EINVAL;
1976 		return -1;
1977 	}
1978 
1979 	port_is_dir = ev_port->qm_port.is_directed;
1980 	queue_is_dir = ev_queue->qm_queue.is_directed;
1981 
1982 	if (port_is_dir != queue_is_dir) {
1983 		DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
1984 			     queue_is_dir ? "DIR" : "LDB", ev_queue->id,
1985 			     port_is_dir ? "DIR" : "LDB", ev_port->id);
1986 
1987 		rte_errno = -EINVAL;
1988 		return -1;
1989 	}
1990 
1991 	/* Check if there is space for the requested link */
1992 	if (!link_exists && index == -1) {
1993 		DLB2_LOG_ERR("no space for new link\n");
1994 		rte_errno = -ENOSPC;
1995 		return -1;
1996 	}
1997 
1998 	/* Check if the directed port is already linked */
1999 	if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2000 	    !link_exists) {
2001 		DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2002 			     ev_port->id);
2003 		rte_errno = -EINVAL;
2004 		return -1;
2005 	}
2006 
2007 	/* Check if the directed queue is already linked */
2008 	if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2009 	    !link_exists) {
2010 		DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2011 			     ev_queue->id);
2012 		rte_errno = -EINVAL;
2013 		return -1;
2014 	}
2015 
2016 	return 0;
2017 }
2018 
2019 static int
2020 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2021 			const uint8_t queues[], const uint8_t priorities[],
2022 			uint16_t nb_links)
2023 
2024 {
2025 	struct dlb2_eventdev_port *ev_port = event_port;
2026 	struct dlb2_eventdev *dlb2;
2027 	int i, j;
2028 
2029 	RTE_SET_USED(dev);
2030 
2031 	if (ev_port == NULL) {
2032 		DLB2_LOG_ERR("dlb2: evport not setup\n");
2033 		rte_errno = -EINVAL;
2034 		return 0;
2035 	}
2036 
2037 	if (!ev_port->setup_done &&
2038 	    ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2039 		DLB2_LOG_ERR("dlb2: evport not setup\n");
2040 		rte_errno = -EINVAL;
2041 		return 0;
2042 	}
2043 
2044 	/* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2045 	 * queues pointer.
2046 	 */
2047 	if (nb_links == 0) {
2048 		DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2049 		return 0; /* Ignore and return success */
2050 	}
2051 
2052 	dlb2 = ev_port->dlb2;
2053 
2054 	DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2055 		     nb_links,
2056 		     ev_port->qm_port.is_directed ? "DIR" : "LDB",
2057 		     ev_port->id);
2058 
2059 	for (i = 0; i < nb_links; i++) {
2060 		struct dlb2_eventdev_queue *ev_queue;
2061 		uint8_t queue_id, prio;
2062 		bool found = false;
2063 		int index = -1;
2064 
2065 		queue_id = queues[i];
2066 		prio = priorities[i];
2067 
2068 		/* Check if the link already exists. */
2069 		for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2070 			if (ev_port->link[j].valid) {
2071 				if (ev_port->link[j].queue_id == queue_id) {
2072 					found = true;
2073 					index = j;
2074 					break;
2075 				}
2076 			} else if (index == -1) {
2077 				index = j;
2078 			}
2079 
2080 		/* could not link */
2081 		if (index == -1)
2082 			break;
2083 
2084 		/* Check if already linked at the requested priority */
2085 		if (found && ev_port->link[j].priority == prio)
2086 			continue;
2087 
2088 		if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2089 			break; /* return index of offending queue */
2090 
2091 		ev_queue = &dlb2->ev_queues[queue_id];
2092 
2093 		if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2094 			break; /* return index of offending queue */
2095 
2096 		ev_queue->num_links++;
2097 
2098 		ev_port->link[index].queue_id = queue_id;
2099 		ev_port->link[index].priority = prio;
2100 		ev_port->link[index].valid = true;
2101 		/* Entry already exists?  If so, then must be prio change */
2102 		if (!found)
2103 			ev_port->num_links++;
2104 	}
2105 	return i;
2106 }
2107 
2108 static int16_t
2109 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2110 				uint32_t qm_port_id,
2111 				uint16_t qm_qid)
2112 {
2113 	struct dlb2_unmap_qid_args cfg;
2114 	int32_t ret;
2115 
2116 	if (handle == NULL)
2117 		return -EINVAL;
2118 
2119 	cfg.port_id = qm_port_id;
2120 	cfg.qid = qm_qid;
2121 
2122 	ret = dlb2_iface_unmap_qid(handle, &cfg);
2123 	if (ret < 0)
2124 		DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2125 			     ret, dlb2_error_strings[cfg.response.status]);
2126 
2127 	return ret;
2128 }
2129 
2130 static int
2131 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2132 			    struct dlb2_eventdev_port *ev_port,
2133 			    struct dlb2_eventdev_queue *ev_queue)
2134 {
2135 	int ret, i;
2136 
2137 	/* Don't unlink until start time. */
2138 	if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2139 		return 0;
2140 
2141 	for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2142 		if (ev_port->link[i].valid &&
2143 		    ev_port->link[i].queue_id == ev_queue->id)
2144 			break; /* found */
2145 	}
2146 
2147 	/* This is expected with eventdev API!
2148 	 * It blindly attempts to unmap all queues.
2149 	 */
2150 	if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2151 		DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2152 			     ev_queue->qm_queue.id,
2153 			     ev_port->qm_port.id);
2154 		return 0;
2155 	}
2156 
2157 	ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2158 					      ev_port->qm_port.id,
2159 					      ev_queue->qm_queue.id);
2160 	if (!ret)
2161 		ev_port->link[i].mapped = false;
2162 
2163 	return ret;
2164 }
2165 
2166 static int
2167 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2168 			  uint8_t queues[], uint16_t nb_unlinks)
2169 {
2170 	struct dlb2_eventdev_port *ev_port = event_port;
2171 	struct dlb2_eventdev *dlb2;
2172 	int i;
2173 
2174 	RTE_SET_USED(dev);
2175 
2176 	if (!ev_port->setup_done) {
2177 		DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2178 			     ev_port->id);
2179 		rte_errno = -EINVAL;
2180 		return 0;
2181 	}
2182 
2183 	if (queues == NULL || nb_unlinks == 0) {
2184 		DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2185 		return 0; /* Ignore and return success */
2186 	}
2187 
2188 	if (ev_port->qm_port.is_directed) {
2189 		DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2190 			     ev_port->id);
2191 		rte_errno = 0;
2192 		return nb_unlinks; /* as if success */
2193 	}
2194 
2195 	dlb2 = ev_port->dlb2;
2196 
2197 	for (i = 0; i < nb_unlinks; i++) {
2198 		struct dlb2_eventdev_queue *ev_queue;
2199 		int ret, j;
2200 
2201 		if (queues[i] >= dlb2->num_queues) {
2202 			DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2203 			rte_errno = -EINVAL;
2204 			return i; /* return index of offending queue */
2205 		}
2206 
2207 		ev_queue = &dlb2->ev_queues[queues[i]];
2208 
2209 		/* Does a link exist? */
2210 		for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2211 			if (ev_port->link[j].queue_id == queues[i] &&
2212 			    ev_port->link[j].valid)
2213 				break;
2214 
2215 		if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2216 			continue;
2217 
2218 		ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2219 		if (ret) {
2220 			DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2221 				     ret, ev_port->id, queues[i]);
2222 			rte_errno = -ENOENT;
2223 			return i; /* return index of offending queue */
2224 		}
2225 
2226 		ev_port->link[j].valid = false;
2227 		ev_port->num_links--;
2228 		ev_queue->num_links--;
2229 	}
2230 
2231 	return nb_unlinks;
2232 }
2233 
2234 static int
2235 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2236 				       void *event_port)
2237 {
2238 	struct dlb2_eventdev_port *ev_port = event_port;
2239 	struct dlb2_eventdev *dlb2;
2240 	struct dlb2_hw_dev *handle;
2241 	struct dlb2_pending_port_unmaps_args cfg;
2242 	int ret;
2243 
2244 	RTE_SET_USED(dev);
2245 
2246 	if (!ev_port->setup_done) {
2247 		DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2248 			     ev_port->id);
2249 		rte_errno = -EINVAL;
2250 		return 0;
2251 	}
2252 
2253 	cfg.port_id = ev_port->qm_port.id;
2254 	dlb2 = ev_port->dlb2;
2255 	handle = &dlb2->qm_instance;
2256 	ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2257 
2258 	if (ret < 0) {
2259 		DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2260 			     ret, dlb2_error_strings[cfg.response.status]);
2261 		return ret;
2262 	}
2263 
2264 	return cfg.response.id;
2265 }
2266 
2267 static int
2268 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2269 {
2270 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2271 	int ret, i;
2272 
2273 	/* If an event queue or port was previously configured, but hasn't been
2274 	 * reconfigured, reapply its original configuration.
2275 	 */
2276 	for (i = 0; i < dlb2->num_queues; i++) {
2277 		struct dlb2_eventdev_queue *ev_queue;
2278 
2279 		ev_queue = &dlb2->ev_queues[i];
2280 
2281 		if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2282 			continue;
2283 
2284 		ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2285 		if (ret < 0) {
2286 			DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2287 			return ret;
2288 		}
2289 	}
2290 
2291 	for (i = 0; i < dlb2->num_ports; i++) {
2292 		struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2293 
2294 		if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2295 			continue;
2296 
2297 		ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2298 		if (ret < 0) {
2299 			DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2300 				     i);
2301 			return ret;
2302 		}
2303 	}
2304 
2305 	return 0;
2306 }
2307 
2308 static int
2309 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2310 {
2311 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2312 	int i;
2313 
2314 	/* Perform requested port->queue links */
2315 	for (i = 0; i < dlb2->num_ports; i++) {
2316 		struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2317 		int j;
2318 
2319 		for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2320 			struct dlb2_eventdev_queue *ev_queue;
2321 			uint8_t prio, queue_id;
2322 
2323 			if (!ev_port->link[j].valid)
2324 				continue;
2325 
2326 			prio = ev_port->link[j].priority;
2327 			queue_id = ev_port->link[j].queue_id;
2328 
2329 			if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2330 				return -EINVAL;
2331 
2332 			ev_queue = &dlb2->ev_queues[queue_id];
2333 
2334 			if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2335 				return -EINVAL;
2336 		}
2337 	}
2338 
2339 	return 0;
2340 }
2341 
2342 static int
2343 dlb2_eventdev_start(struct rte_eventdev *dev)
2344 {
2345 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2346 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2347 	struct dlb2_start_domain_args cfg;
2348 	int ret, i;
2349 
2350 	rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2351 	if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2352 		DLB2_LOG_ERR("bad state %d for dev_start\n",
2353 			     (int)dlb2->run_state);
2354 		rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2355 		return -EINVAL;
2356 	}
2357 	dlb2->run_state = DLB2_RUN_STATE_STARTING;
2358 	rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2359 
2360 	/* If the device was configured more than once, some event ports and/or
2361 	 * queues may need to be reconfigured.
2362 	 */
2363 	ret = dlb2_eventdev_reapply_configuration(dev);
2364 	if (ret)
2365 		return ret;
2366 
2367 	/* The DLB PMD delays port links until the device is started. */
2368 	ret = dlb2_eventdev_apply_port_links(dev);
2369 	if (ret)
2370 		return ret;
2371 
2372 	for (i = 0; i < dlb2->num_ports; i++) {
2373 		if (!dlb2->ev_ports[i].setup_done) {
2374 			DLB2_LOG_ERR("dlb2: port %d not setup", i);
2375 			return -ESTALE;
2376 		}
2377 	}
2378 
2379 	for (i = 0; i < dlb2->num_queues; i++) {
2380 		if (dlb2->ev_queues[i].num_links == 0) {
2381 			DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2382 			return -ENOLINK;
2383 		}
2384 	}
2385 
2386 	ret = dlb2_iface_sched_domain_start(handle, &cfg);
2387 	if (ret < 0) {
2388 		DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2389 			     ret, dlb2_error_strings[cfg.response.status]);
2390 		return ret;
2391 	}
2392 
2393 	dlb2->run_state = DLB2_RUN_STATE_STARTED;
2394 	DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2395 
2396 	return 0;
2397 }
2398 
2399 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2400 	{
2401 		/* Load-balanced cmd bytes */
2402 		[RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2403 		[RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2404 		[RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2405 	},
2406 	{
2407 		/* Directed cmd bytes */
2408 		[RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2409 		[RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2410 		[RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2411 	},
2412 };
2413 
2414 static inline uint32_t
2415 dlb2_port_credits_get(struct dlb2_port *qm_port,
2416 		      enum dlb2_hw_queue_types type)
2417 {
2418 	uint32_t credits = *qm_port->credit_pool[type];
2419 	/* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2420 	uint32_t batch_size = qm_port->hw_credit_quanta;
2421 
2422 	if (unlikely(credits < batch_size))
2423 		batch_size = credits;
2424 
2425 	if (likely(credits &&
2426 		   __atomic_compare_exchange_n(
2427 			qm_port->credit_pool[type],
2428 			&credits, credits - batch_size, false,
2429 			__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2430 		return batch_size;
2431 	else
2432 		return 0;
2433 }
2434 
2435 static inline void
2436 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2437 			  struct dlb2_eventdev_port *ev_port)
2438 {
2439 	uint16_t quanta = ev_port->credit_update_quanta;
2440 
2441 	if (ev_port->inflight_credits >= quanta * 2) {
2442 		/* Replenish credits, saving one quanta for enqueues */
2443 		uint16_t val = ev_port->inflight_credits - quanta;
2444 
2445 		__atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2446 		ev_port->inflight_credits -= val;
2447 	}
2448 }
2449 
2450 static inline int
2451 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2452 			      struct dlb2_eventdev_port *ev_port)
2453 {
2454 	uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2455 						__ATOMIC_SEQ_CST);
2456 	const int num = 1;
2457 
2458 	if (unlikely(ev_port->inflight_max < sw_inflights)) {
2459 		DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2460 		rte_errno = -ENOSPC;
2461 		return 1;
2462 	}
2463 
2464 	if (ev_port->inflight_credits < num) {
2465 		/* check if event enqueue brings ev_port over max threshold */
2466 		uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2467 
2468 		if (sw_inflights + credit_update_quanta >
2469 				dlb2->new_event_limit) {
2470 			DLB2_INC_STAT(
2471 			ev_port->stats.traffic.tx_nospc_new_event_limit,
2472 			1);
2473 			rte_errno = -ENOSPC;
2474 			return 1;
2475 		}
2476 
2477 		__atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2478 				   __ATOMIC_SEQ_CST);
2479 		ev_port->inflight_credits += (credit_update_quanta);
2480 
2481 		if (ev_port->inflight_credits < num) {
2482 			DLB2_INC_STAT(
2483 			ev_port->stats.traffic.tx_nospc_inflight_credits,
2484 			1);
2485 			rte_errno = -ENOSPC;
2486 			return 1;
2487 		}
2488 	}
2489 
2490 	return 0;
2491 }
2492 
2493 static inline int
2494 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2495 {
2496 	if (unlikely(qm_port->cached_ldb_credits == 0)) {
2497 		qm_port->cached_ldb_credits =
2498 			dlb2_port_credits_get(qm_port,
2499 					      DLB2_LDB_QUEUE);
2500 		if (unlikely(qm_port->cached_ldb_credits == 0)) {
2501 			DLB2_INC_STAT(
2502 			qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2503 			1);
2504 			DLB2_LOG_DBG("ldb credits exhausted\n");
2505 			return 1; /* credits exhausted */
2506 		}
2507 	}
2508 
2509 	return 0;
2510 }
2511 
2512 static inline int
2513 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2514 {
2515 	if (unlikely(qm_port->cached_dir_credits == 0)) {
2516 		qm_port->cached_dir_credits =
2517 			dlb2_port_credits_get(qm_port,
2518 					      DLB2_DIR_QUEUE);
2519 		if (unlikely(qm_port->cached_dir_credits == 0)) {
2520 			DLB2_INC_STAT(
2521 			qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2522 			1);
2523 			DLB2_LOG_DBG("dir credits exhausted\n");
2524 			return 1; /* credits exhausted */
2525 		}
2526 	}
2527 
2528 	return 0;
2529 }
2530 
2531 static inline int
2532 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2533 {
2534 	if (unlikely(qm_port->cached_credits == 0)) {
2535 		qm_port->cached_credits =
2536 			dlb2_port_credits_get(qm_port,
2537 					      DLB2_COMBINED_POOL);
2538 		if (unlikely(qm_port->cached_credits == 0)) {
2539 			DLB2_INC_STAT(
2540 			qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2541 			DLB2_LOG_DBG("credits exhausted\n");
2542 			return 1; /* credits exhausted */
2543 		}
2544 	}
2545 
2546 	return 0;
2547 }
2548 
2549 static __rte_always_inline void
2550 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2551 	      struct process_local_port_data *port_data)
2552 {
2553 	dlb2_movdir64b(port_data->pp_addr, qe4);
2554 }
2555 
2556 static inline int
2557 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2558 {
2559 	struct process_local_port_data *port_data;
2560 	struct dlb2_cq_pop_qe *qe;
2561 
2562 	RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2563 
2564 	qe = qm_port->consume_qe;
2565 
2566 	qe->tokens = num - 1;
2567 
2568 	/* No store fence needed since no pointer is being sent, and CQ token
2569 	 * pops can be safely reordered with other HCWs.
2570 	 */
2571 	port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2572 
2573 	dlb2_movntdq_single(port_data->pp_addr, qe);
2574 
2575 	DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2576 
2577 	qm_port->owed_tokens = 0;
2578 
2579 	return 0;
2580 }
2581 
2582 static inline void
2583 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2584 		   bool do_sfence,
2585 		   struct process_local_port_data *port_data)
2586 {
2587 	/* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2588 	 * application writes complete before enqueueing the QE.
2589 	 */
2590 	if (do_sfence)
2591 		rte_wmb();
2592 
2593 	dlb2_pp_write(qm_port->qe4, port_data);
2594 }
2595 
2596 static inline void
2597 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2598 {
2599 	struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2600 	int num = qm_port->owed_tokens;
2601 
2602 	qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2603 	qe[idx].tokens = num - 1;
2604 
2605 	qm_port->owed_tokens = 0;
2606 }
2607 
2608 static inline void
2609 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2610 		      const struct rte_event ev[],
2611 		      int num,
2612 		      uint8_t *sched_type,
2613 		      uint8_t *queue_id)
2614 {
2615 	struct dlb2_enqueue_qe *qe;
2616 	uint16_t sched_word[4];
2617 	__m128i sse_qe[2];
2618 	int i;
2619 
2620 	qe = qm_port->qe4;
2621 
2622 	sse_qe[0] = _mm_setzero_si128();
2623 	sse_qe[1] = _mm_setzero_si128();
2624 
2625 	switch (num) {
2626 	case 4:
2627 		/* Construct the metadata portion of two HCWs in one 128b SSE
2628 		 * register. HCW metadata is constructed in the SSE registers
2629 		 * like so:
2630 		 * sse_qe[0][63:0]:   qe[0]'s metadata
2631 		 * sse_qe[0][127:64]: qe[1]'s metadata
2632 		 * sse_qe[1][63:0]:   qe[2]'s metadata
2633 		 * sse_qe[1][127:64]: qe[3]'s metadata
2634 		 */
2635 
2636 		/* Convert the event operation into a command byte and store it
2637 		 * in the metadata:
2638 		 * sse_qe[0][63:56]   = cmd_byte_map[is_directed][ev[0].op]
2639 		 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2640 		 * sse_qe[1][63:56]   = cmd_byte_map[is_directed][ev[2].op]
2641 		 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2642 		 */
2643 #define DLB2_QE_CMD_BYTE 7
2644 		sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2645 				cmd_byte_map[qm_port->is_directed][ev[0].op],
2646 				DLB2_QE_CMD_BYTE);
2647 		sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2648 				cmd_byte_map[qm_port->is_directed][ev[1].op],
2649 				DLB2_QE_CMD_BYTE + 8);
2650 		sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2651 				cmd_byte_map[qm_port->is_directed][ev[2].op],
2652 				DLB2_QE_CMD_BYTE);
2653 		sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2654 				cmd_byte_map[qm_port->is_directed][ev[3].op],
2655 				DLB2_QE_CMD_BYTE + 8);
2656 
2657 		/* Store priority, scheduling type, and queue ID in the sched
2658 		 * word array because these values are re-used when the
2659 		 * destination is a directed queue.
2660 		 */
2661 		sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2662 				sched_type[0] << 8 |
2663 				queue_id[0];
2664 		sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2665 				sched_type[1] << 8 |
2666 				queue_id[1];
2667 		sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2668 				sched_type[2] << 8 |
2669 				queue_id[2];
2670 		sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2671 				sched_type[3] << 8 |
2672 				queue_id[3];
2673 
2674 		/* Store the event priority, scheduling type, and queue ID in
2675 		 * the metadata:
2676 		 * sse_qe[0][31:16] = sched_word[0]
2677 		 * sse_qe[0][95:80] = sched_word[1]
2678 		 * sse_qe[1][31:16] = sched_word[2]
2679 		 * sse_qe[1][95:80] = sched_word[3]
2680 		 */
2681 #define DLB2_QE_QID_SCHED_WORD 1
2682 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2683 					     sched_word[0],
2684 					     DLB2_QE_QID_SCHED_WORD);
2685 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2686 					     sched_word[1],
2687 					     DLB2_QE_QID_SCHED_WORD + 4);
2688 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2689 					     sched_word[2],
2690 					     DLB2_QE_QID_SCHED_WORD);
2691 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2692 					     sched_word[3],
2693 					     DLB2_QE_QID_SCHED_WORD + 4);
2694 
2695 		/* If the destination is a load-balanced queue, store the lock
2696 		 * ID. If it is a directed queue, DLB places this field in
2697 		 * bytes 10-11 of the received QE, so we format it accordingly:
2698 		 * sse_qe[0][47:32]  = dir queue ? sched_word[0] : flow_id[0]
2699 		 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2700 		 * sse_qe[1][47:32]  = dir queue ? sched_word[2] : flow_id[2]
2701 		 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2702 		 */
2703 #define DLB2_QE_LOCK_ID_WORD 2
2704 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2705 				(sched_type[0] == DLB2_SCHED_DIRECTED) ?
2706 					sched_word[0] : ev[0].flow_id,
2707 				DLB2_QE_LOCK_ID_WORD);
2708 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2709 				(sched_type[1] == DLB2_SCHED_DIRECTED) ?
2710 					sched_word[1] : ev[1].flow_id,
2711 				DLB2_QE_LOCK_ID_WORD + 4);
2712 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2713 				(sched_type[2] == DLB2_SCHED_DIRECTED) ?
2714 					sched_word[2] : ev[2].flow_id,
2715 				DLB2_QE_LOCK_ID_WORD);
2716 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2717 				(sched_type[3] == DLB2_SCHED_DIRECTED) ?
2718 					sched_word[3] : ev[3].flow_id,
2719 				DLB2_QE_LOCK_ID_WORD + 4);
2720 
2721 		/* Store the event type and sub event type in the metadata:
2722 		 * sse_qe[0][15:0]  = flow_id[0]
2723 		 * sse_qe[0][79:64] = flow_id[1]
2724 		 * sse_qe[1][15:0]  = flow_id[2]
2725 		 * sse_qe[1][79:64] = flow_id[3]
2726 		 */
2727 #define DLB2_QE_EV_TYPE_WORD 0
2728 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2729 					     ev[0].sub_event_type << 8 |
2730 						ev[0].event_type,
2731 					     DLB2_QE_EV_TYPE_WORD);
2732 		sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2733 					     ev[1].sub_event_type << 8 |
2734 						ev[1].event_type,
2735 					     DLB2_QE_EV_TYPE_WORD + 4);
2736 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2737 					     ev[2].sub_event_type << 8 |
2738 						ev[2].event_type,
2739 					     DLB2_QE_EV_TYPE_WORD);
2740 		sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2741 					     ev[3].sub_event_type << 8 |
2742 						ev[3].event_type,
2743 					     DLB2_QE_EV_TYPE_WORD + 4);
2744 
2745 		/* Store the metadata to memory (use the double-precision
2746 		 * _mm_storeh_pd because there is no integer function for
2747 		 * storing the upper 64b):
2748 		 * qe[0] metadata = sse_qe[0][63:0]
2749 		 * qe[1] metadata = sse_qe[0][127:64]
2750 		 * qe[2] metadata = sse_qe[1][63:0]
2751 		 * qe[3] metadata = sse_qe[1][127:64]
2752 		 */
2753 		_mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2754 		_mm_storeh_pd((double *)&qe[1].u.opaque_data,
2755 			      (__m128d)sse_qe[0]);
2756 		_mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2757 		_mm_storeh_pd((double *)&qe[3].u.opaque_data,
2758 			      (__m128d)sse_qe[1]);
2759 
2760 		qe[0].data = ev[0].u64;
2761 		qe[1].data = ev[1].u64;
2762 		qe[2].data = ev[2].u64;
2763 		qe[3].data = ev[3].u64;
2764 
2765 		break;
2766 	case 3:
2767 	case 2:
2768 	case 1:
2769 		for (i = 0; i < num; i++) {
2770 			qe[i].cmd_byte =
2771 				cmd_byte_map[qm_port->is_directed][ev[i].op];
2772 			qe[i].sched_type = sched_type[i];
2773 			qe[i].data = ev[i].u64;
2774 			qe[i].qid = queue_id[i];
2775 			qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2776 			qe[i].lock_id = ev[i].flow_id;
2777 			if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2778 				struct dlb2_msg_info *info =
2779 					(struct dlb2_msg_info *)&qe[i].lock_id;
2780 
2781 				info->qid = queue_id[i];
2782 				info->sched_type = DLB2_SCHED_DIRECTED;
2783 				info->priority = qe[i].priority;
2784 			}
2785 			qe[i].u.event_type.major = ev[i].event_type;
2786 			qe[i].u.event_type.sub = ev[i].sub_event_type;
2787 		}
2788 		break;
2789 	case 0:
2790 		break;
2791 	}
2792 }
2793 
2794 static inline int
2795 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2796 			struct dlb2_port *qm_port,
2797 			const struct rte_event ev[],
2798 			uint8_t *sched_type,
2799 			uint8_t *queue_id)
2800 {
2801 	struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2802 	struct dlb2_eventdev_queue *ev_queue;
2803 	uint16_t *cached_credits = NULL;
2804 	struct dlb2_queue *qm_queue;
2805 
2806 	ev_queue = &dlb2->ev_queues[ev->queue_id];
2807 	qm_queue = &ev_queue->qm_queue;
2808 	*queue_id = qm_queue->id;
2809 
2810 	/* Ignore sched_type and hardware credits on release events */
2811 	if (ev->op == RTE_EVENT_OP_RELEASE)
2812 		goto op_check;
2813 
2814 	if (!qm_queue->is_directed) {
2815 		/* Load balanced destination queue */
2816 
2817 		if (dlb2->version == DLB2_HW_V2) {
2818 			if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2819 				rte_errno = -ENOSPC;
2820 				return 1;
2821 			}
2822 			cached_credits = &qm_port->cached_ldb_credits;
2823 		} else {
2824 			if (dlb2_check_enqueue_hw_credits(qm_port)) {
2825 				rte_errno = -ENOSPC;
2826 				return 1;
2827 			}
2828 			cached_credits = &qm_port->cached_credits;
2829 		}
2830 		switch (ev->sched_type) {
2831 		case RTE_SCHED_TYPE_ORDERED:
2832 			DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2833 			if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2834 				DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2835 					     *queue_id);
2836 				rte_errno = -EINVAL;
2837 				return 1;
2838 			}
2839 			*sched_type = DLB2_SCHED_ORDERED;
2840 			break;
2841 		case RTE_SCHED_TYPE_ATOMIC:
2842 			DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2843 			*sched_type = DLB2_SCHED_ATOMIC;
2844 			break;
2845 		case RTE_SCHED_TYPE_PARALLEL:
2846 			DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2847 			if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2848 				*sched_type = DLB2_SCHED_ORDERED;
2849 			else
2850 				*sched_type = DLB2_SCHED_UNORDERED;
2851 			break;
2852 		default:
2853 			DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2854 			DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2855 			rte_errno = -EINVAL;
2856 			return 1;
2857 		}
2858 	} else {
2859 		/* Directed destination queue */
2860 
2861 		if (dlb2->version == DLB2_HW_V2) {
2862 			if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2863 				rte_errno = -ENOSPC;
2864 				return 1;
2865 			}
2866 			cached_credits = &qm_port->cached_dir_credits;
2867 		} else {
2868 			if (dlb2_check_enqueue_hw_credits(qm_port)) {
2869 				rte_errno = -ENOSPC;
2870 				return 1;
2871 			}
2872 			cached_credits = &qm_port->cached_credits;
2873 		}
2874 		DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2875 
2876 		*sched_type = DLB2_SCHED_DIRECTED;
2877 	}
2878 
2879 op_check:
2880 	switch (ev->op) {
2881 	case RTE_EVENT_OP_NEW:
2882 		/* Check that a sw credit is available */
2883 		if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2884 			rte_errno = -ENOSPC;
2885 			return 1;
2886 		}
2887 		ev_port->inflight_credits--;
2888 		(*cached_credits)--;
2889 		break;
2890 	case RTE_EVENT_OP_FORWARD:
2891 		/* Check for outstanding_releases underflow. If this occurs,
2892 		 * the application is not using the EVENT_OPs correctly; for
2893 		 * example, forwarding or releasing events that were not
2894 		 * dequeued.
2895 		 */
2896 		RTE_ASSERT(ev_port->outstanding_releases > 0);
2897 		ev_port->outstanding_releases--;
2898 		qm_port->issued_releases++;
2899 		(*cached_credits)--;
2900 		break;
2901 	case RTE_EVENT_OP_RELEASE:
2902 		ev_port->inflight_credits++;
2903 		/* Check for outstanding_releases underflow. If this occurs,
2904 		 * the application is not using the EVENT_OPs correctly; for
2905 		 * example, forwarding or releasing events that were not
2906 		 * dequeued.
2907 		 */
2908 		RTE_ASSERT(ev_port->outstanding_releases > 0);
2909 		ev_port->outstanding_releases--;
2910 		qm_port->issued_releases++;
2911 
2912 		/* Replenish s/w credits if enough are cached */
2913 		dlb2_replenish_sw_credits(dlb2, ev_port);
2914 		break;
2915 	}
2916 
2917 	DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2918 	DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2919 
2920 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2921 	if (ev->op != RTE_EVENT_OP_RELEASE) {
2922 		DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2923 		DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2924 	}
2925 #endif
2926 
2927 	return 0;
2928 }
2929 
2930 static inline uint16_t
2931 __dlb2_event_enqueue_burst(void *event_port,
2932 			   const struct rte_event events[],
2933 			   uint16_t num,
2934 			   bool use_delayed)
2935 {
2936 	struct dlb2_eventdev_port *ev_port = event_port;
2937 	struct dlb2_port *qm_port = &ev_port->qm_port;
2938 	struct process_local_port_data *port_data;
2939 	int i;
2940 
2941 	RTE_ASSERT(ev_port->enq_configured);
2942 	RTE_ASSERT(events != NULL);
2943 
2944 	i = 0;
2945 
2946 	port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2947 
2948 	while (i < num) {
2949 		uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2950 		uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2951 		int pop_offs = 0;
2952 		int j = 0;
2953 
2954 		memset(qm_port->qe4,
2955 		       0,
2956 		       DLB2_NUM_QES_PER_CACHE_LINE *
2957 		       sizeof(struct dlb2_enqueue_qe));
2958 
2959 		for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2960 			const struct rte_event *ev = &events[i + j];
2961 			int16_t thresh = qm_port->token_pop_thresh;
2962 
2963 			if (use_delayed &&
2964 			    qm_port->token_pop_mode == DELAYED_POP &&
2965 			    (ev->op == RTE_EVENT_OP_FORWARD ||
2966 			     ev->op == RTE_EVENT_OP_RELEASE) &&
2967 			    qm_port->issued_releases >= thresh - 1) {
2968 				/* Insert the token pop QE and break out. This
2969 				 * may result in a partial HCW, but that is
2970 				 * simpler than supporting arbitrary QE
2971 				 * insertion.
2972 				 */
2973 				dlb2_construct_token_pop_qe(qm_port, j);
2974 
2975 				/* Reset the releases for the next QE batch */
2976 				qm_port->issued_releases -= thresh;
2977 
2978 				pop_offs = 1;
2979 				j++;
2980 				break;
2981 			}
2982 
2983 			if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
2984 						    &sched_types[j],
2985 						    &queue_ids[j]))
2986 				break;
2987 		}
2988 
2989 		if (j == 0)
2990 			break;
2991 
2992 		dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
2993 				      sched_types, queue_ids);
2994 
2995 		dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
2996 
2997 		/* Don't include the token pop QE in the enqueue count */
2998 		i += j - pop_offs;
2999 
3000 		/* Don't interpret j < DLB2_NUM_... as out-of-credits if
3001 		 * pop_offs != 0
3002 		 */
3003 		if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3004 			break;
3005 	}
3006 
3007 	return i;
3008 }
3009 
3010 static uint16_t
3011 dlb2_event_enqueue_burst(void *event_port,
3012 			     const struct rte_event events[],
3013 			     uint16_t num)
3014 {
3015 	return __dlb2_event_enqueue_burst(event_port, events, num, false);
3016 }
3017 
3018 static uint16_t
3019 dlb2_event_enqueue_burst_delayed(void *event_port,
3020 				     const struct rte_event events[],
3021 				     uint16_t num)
3022 {
3023 	return __dlb2_event_enqueue_burst(event_port, events, num, true);
3024 }
3025 
3026 static inline uint16_t
3027 dlb2_event_enqueue(void *event_port,
3028 		   const struct rte_event events[])
3029 {
3030 	return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3031 }
3032 
3033 static inline uint16_t
3034 dlb2_event_enqueue_delayed(void *event_port,
3035 			   const struct rte_event events[])
3036 {
3037 	return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3038 }
3039 
3040 static uint16_t
3041 dlb2_event_enqueue_new_burst(void *event_port,
3042 			     const struct rte_event events[],
3043 			     uint16_t num)
3044 {
3045 	return __dlb2_event_enqueue_burst(event_port, events, num, false);
3046 }
3047 
3048 static uint16_t
3049 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3050 				     const struct rte_event events[],
3051 				     uint16_t num)
3052 {
3053 	return __dlb2_event_enqueue_burst(event_port, events, num, true);
3054 }
3055 
3056 static uint16_t
3057 dlb2_event_enqueue_forward_burst(void *event_port,
3058 				 const struct rte_event events[],
3059 				 uint16_t num)
3060 {
3061 	return __dlb2_event_enqueue_burst(event_port, events, num, false);
3062 }
3063 
3064 static uint16_t
3065 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3066 					 const struct rte_event events[],
3067 					 uint16_t num)
3068 {
3069 	return __dlb2_event_enqueue_burst(event_port, events, num, true);
3070 }
3071 
3072 static void
3073 dlb2_event_release(struct dlb2_eventdev *dlb2,
3074 		   uint8_t port_id,
3075 		   int n)
3076 {
3077 	struct process_local_port_data *port_data;
3078 	struct dlb2_eventdev_port *ev_port;
3079 	struct dlb2_port *qm_port;
3080 	int i;
3081 
3082 	if (port_id > dlb2->num_ports) {
3083 		DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3084 			     port_id);
3085 		rte_errno = -EINVAL;
3086 		return;
3087 	}
3088 
3089 	ev_port = &dlb2->ev_ports[port_id];
3090 	qm_port = &ev_port->qm_port;
3091 	port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3092 
3093 	i = 0;
3094 
3095 	if (qm_port->is_directed) {
3096 		i = n;
3097 		goto sw_credit_update;
3098 	}
3099 
3100 	while (i < n) {
3101 		int pop_offs = 0;
3102 		int j = 0;
3103 
3104 		/* Zero-out QEs */
3105 		_mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3106 		_mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3107 		_mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3108 		_mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3109 
3110 
3111 		for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3112 			int16_t thresh = qm_port->token_pop_thresh;
3113 
3114 			if (qm_port->token_pop_mode == DELAYED_POP &&
3115 			    qm_port->issued_releases >= thresh - 1) {
3116 				/* Insert the token pop QE */
3117 				dlb2_construct_token_pop_qe(qm_port, j);
3118 
3119 				/* Reset the releases for the next QE batch */
3120 				qm_port->issued_releases -= thresh;
3121 
3122 				pop_offs = 1;
3123 				j++;
3124 				break;
3125 			}
3126 
3127 			qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3128 			qm_port->issued_releases++;
3129 		}
3130 
3131 		dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3132 
3133 		/* Don't include the token pop QE in the release count */
3134 		i += j - pop_offs;
3135 	}
3136 
3137 sw_credit_update:
3138 	/* each release returns one credit */
3139 	if (unlikely(!ev_port->outstanding_releases)) {
3140 		DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3141 			     __func__);
3142 		return;
3143 	}
3144 	ev_port->outstanding_releases -= i;
3145 	ev_port->inflight_credits += i;
3146 
3147 	/* Replenish s/w credits if enough releases are performed */
3148 	dlb2_replenish_sw_credits(dlb2, ev_port);
3149 }
3150 
3151 static inline void
3152 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3153 {
3154 	uint32_t batch_size = qm_port->hw_credit_quanta;
3155 
3156 	/* increment port credits, and return to pool if exceeds threshold */
3157 	if (!qm_port->is_directed) {
3158 		if (qm_port->dlb2->version == DLB2_HW_V2) {
3159 			qm_port->cached_ldb_credits += num;
3160 			if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3161 				__atomic_fetch_add(
3162 					qm_port->credit_pool[DLB2_LDB_QUEUE],
3163 					batch_size, __ATOMIC_SEQ_CST);
3164 				qm_port->cached_ldb_credits -= batch_size;
3165 			}
3166 		} else {
3167 			qm_port->cached_credits += num;
3168 			if (qm_port->cached_credits >= 2 * batch_size) {
3169 				__atomic_fetch_add(
3170 				      qm_port->credit_pool[DLB2_COMBINED_POOL],
3171 				      batch_size, __ATOMIC_SEQ_CST);
3172 				qm_port->cached_credits -= batch_size;
3173 			}
3174 		}
3175 	} else {
3176 		if (qm_port->dlb2->version == DLB2_HW_V2) {
3177 			qm_port->cached_dir_credits += num;
3178 			if (qm_port->cached_dir_credits >= 2 * batch_size) {
3179 				__atomic_fetch_add(
3180 					qm_port->credit_pool[DLB2_DIR_QUEUE],
3181 					batch_size, __ATOMIC_SEQ_CST);
3182 				qm_port->cached_dir_credits -= batch_size;
3183 			}
3184 		} else {
3185 			qm_port->cached_credits += num;
3186 			if (qm_port->cached_credits >= 2 * batch_size) {
3187 				__atomic_fetch_add(
3188 				      qm_port->credit_pool[DLB2_COMBINED_POOL],
3189 				      batch_size, __ATOMIC_SEQ_CST);
3190 				qm_port->cached_credits -= batch_size;
3191 			}
3192 		}
3193 	}
3194 }
3195 
3196 #define CLB_MASK_IDX 0
3197 #define CLB_VAL_IDX 1
3198 static int
3199 dlb2_monitor_callback(const uint64_t val,
3200 		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3201 {
3202 	/* abort if the value matches */
3203 	return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3204 }
3205 
3206 static inline int
3207 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3208 		  struct dlb2_eventdev_port *ev_port,
3209 		  struct dlb2_port *qm_port,
3210 		  uint64_t timeout,
3211 		  uint64_t start_ticks)
3212 {
3213 	struct process_local_port_data *port_data;
3214 	uint64_t elapsed_ticks;
3215 
3216 	port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3217 
3218 	elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3219 
3220 	/* Wait/poll time expired */
3221 	if (elapsed_ticks >= timeout) {
3222 		return 1;
3223 	} else if (dlb2->umwait_allowed) {
3224 		struct rte_power_monitor_cond pmc;
3225 		volatile struct dlb2_dequeue_qe *cq_base;
3226 		union {
3227 			uint64_t raw_qe[2];
3228 			struct dlb2_dequeue_qe qe;
3229 		} qe_mask;
3230 		uint64_t expected_value;
3231 		volatile uint64_t *monitor_addr;
3232 
3233 		qe_mask.qe.cq_gen = 1; /* set mask */
3234 
3235 		cq_base = port_data->cq_base;
3236 		monitor_addr = (volatile uint64_t *)(volatile void *)
3237 			&cq_base[qm_port->cq_idx];
3238 		monitor_addr++; /* cq_gen bit is in second 64bit location */
3239 
3240 		if (qm_port->gen_bit)
3241 			expected_value = qe_mask.raw_qe[1];
3242 		else
3243 			expected_value = 0;
3244 
3245 		pmc.addr = monitor_addr;
3246 		/* store expected value and comparison mask in opaque data */
3247 		pmc.opaque[CLB_VAL_IDX] = expected_value;
3248 		pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3249 		/* set up callback */
3250 		pmc.fn = dlb2_monitor_callback;
3251 		pmc.size = sizeof(uint64_t);
3252 
3253 		rte_power_monitor(&pmc, timeout + start_ticks);
3254 
3255 		DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3256 	} else {
3257 		uint64_t poll_interval = dlb2->poll_interval;
3258 		uint64_t curr_ticks = rte_get_timer_cycles();
3259 		uint64_t init_ticks = curr_ticks;
3260 
3261 		while ((curr_ticks - start_ticks < timeout) &&
3262 		       (curr_ticks - init_ticks < poll_interval))
3263 			curr_ticks = rte_get_timer_cycles();
3264 	}
3265 
3266 	return 0;
3267 }
3268 
3269 static __rte_noinline int
3270 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3271 			 struct dlb2_port *qm_port,
3272 			 struct rte_event *events,
3273 			 struct dlb2_dequeue_qe *qes,
3274 			 int cnt)
3275 {
3276 	uint8_t *qid_mappings = qm_port->qid_mappings;
3277 	int i, num, evq_id;
3278 
3279 	for (i = 0, num = 0; i < cnt; i++) {
3280 		struct dlb2_dequeue_qe *qe = &qes[i];
3281 		int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3282 			[DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3283 			[DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3284 			[DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3285 			[DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3286 		};
3287 
3288 		/* Fill in event information.
3289 		 * Note that flow_id must be embedded in the data by
3290 		 * the app, such as the mbuf RSS hash field if the data
3291 		 * buffer is a mbuf.
3292 		 */
3293 		if (unlikely(qe->error)) {
3294 			DLB2_LOG_ERR("QE error bit ON\n");
3295 			DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3296 			dlb2_consume_qe_immediate(qm_port, 1);
3297 			continue; /* Ignore */
3298 		}
3299 
3300 		events[num].u64 = qe->data;
3301 		events[num].flow_id = qe->flow_id;
3302 		events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3303 		events[num].event_type = qe->u.event_type.major;
3304 		events[num].sub_event_type = qe->u.event_type.sub;
3305 		events[num].sched_type = sched_type_map[qe->sched_type];
3306 		events[num].impl_opaque = qe->qid_depth;
3307 
3308 		/* qid not preserved for directed queues */
3309 		if (qm_port->is_directed)
3310 			evq_id = ev_port->link[0].queue_id;
3311 		else
3312 			evq_id = qid_mappings[qe->qid];
3313 
3314 		events[num].queue_id = evq_id;
3315 		DLB2_INC_STAT(
3316 			ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3317 			1);
3318 		DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3319 		num++;
3320 	}
3321 
3322 	DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3323 
3324 	return num;
3325 }
3326 
3327 static inline int
3328 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3329 			      struct dlb2_port *qm_port,
3330 			      struct rte_event *events,
3331 			      struct dlb2_dequeue_qe *qes)
3332 {
3333 	int sched_type_map[] = {
3334 		[DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3335 		[DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3336 		[DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3337 		[DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3338 	};
3339 	const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3340 	uint8_t *qid_mappings = qm_port->qid_mappings;
3341 	__m128i sse_evt[2];
3342 
3343 	/* In the unlikely case that any of the QE error bits are set, process
3344 	 * them one at a time.
3345 	 */
3346 	if (unlikely(qes[0].error || qes[1].error ||
3347 		     qes[2].error || qes[3].error))
3348 		return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3349 						 qes, num_events);
3350 
3351 	events[0].u64 = qes[0].data;
3352 	events[1].u64 = qes[1].data;
3353 	events[2].u64 = qes[2].data;
3354 	events[3].u64 = qes[3].data;
3355 
3356 	/* Construct the metadata portion of two struct rte_events
3357 	 * in one 128b SSE register. Event metadata is constructed in the SSE
3358 	 * registers like so:
3359 	 * sse_evt[0][63:0]:   event[0]'s metadata
3360 	 * sse_evt[0][127:64]: event[1]'s metadata
3361 	 * sse_evt[1][63:0]:   event[2]'s metadata
3362 	 * sse_evt[1][127:64]: event[3]'s metadata
3363 	 */
3364 	sse_evt[0] = _mm_setzero_si128();
3365 	sse_evt[1] = _mm_setzero_si128();
3366 
3367 	/* Convert the hardware queue ID to an event queue ID and store it in
3368 	 * the metadata:
3369 	 * sse_evt[0][47:40]   = qid_mappings[qes[0].qid]
3370 	 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3371 	 * sse_evt[1][47:40]   = qid_mappings[qes[2].qid]
3372 	 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3373 	 */
3374 #define DLB_EVENT_QUEUE_ID_BYTE 5
3375 	sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3376 				     qid_mappings[qes[0].qid],
3377 				     DLB_EVENT_QUEUE_ID_BYTE);
3378 	sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3379 				     qid_mappings[qes[1].qid],
3380 				     DLB_EVENT_QUEUE_ID_BYTE + 8);
3381 	sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3382 				     qid_mappings[qes[2].qid],
3383 				     DLB_EVENT_QUEUE_ID_BYTE);
3384 	sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3385 				     qid_mappings[qes[3].qid],
3386 				     DLB_EVENT_QUEUE_ID_BYTE + 8);
3387 
3388 	/* Convert the hardware priority to an event priority and store it in
3389 	 * the metadata, while also returning the queue depth status
3390 	 * value captured by the hardware, storing it in impl_opaque, which can
3391 	 * be read by the application but not modified
3392 	 * sse_evt[0][55:48]   = DLB2_TO_EV_PRIO(qes[0].priority)
3393 	 * sse_evt[0][63:56]   = qes[0].qid_depth
3394 	 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3395 	 * sse_evt[0][127:120] = qes[1].qid_depth
3396 	 * sse_evt[1][55:48]   = DLB2_TO_EV_PRIO(qes[2].priority)
3397 	 * sse_evt[1][63:56]   = qes[2].qid_depth
3398 	 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3399 	 * sse_evt[1][127:120] = qes[3].qid_depth
3400 	 */
3401 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3402 #define DLB_BYTE_SHIFT 8
3403 	sse_evt[0] =
3404 		_mm_insert_epi16(sse_evt[0],
3405 			DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3406 			(qes[0].qid_depth << DLB_BYTE_SHIFT),
3407 			DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3408 	sse_evt[0] =
3409 		_mm_insert_epi16(sse_evt[0],
3410 			DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3411 			(qes[1].qid_depth << DLB_BYTE_SHIFT),
3412 			DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3413 	sse_evt[1] =
3414 		_mm_insert_epi16(sse_evt[1],
3415 			DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3416 			(qes[2].qid_depth << DLB_BYTE_SHIFT),
3417 			DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3418 	sse_evt[1] =
3419 		_mm_insert_epi16(sse_evt[1],
3420 			DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3421 			(qes[3].qid_depth << DLB_BYTE_SHIFT),
3422 			DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3423 
3424 	/* Write the event type, sub event type, and flow_id to the event
3425 	 * metadata.
3426 	 * sse_evt[0][31:0]   = qes[0].flow_id |
3427 	 *			qes[0].u.event_type.major << 28 |
3428 	 *			qes[0].u.event_type.sub << 20;
3429 	 * sse_evt[0][95:64]  = qes[1].flow_id |
3430 	 *			qes[1].u.event_type.major << 28 |
3431 	 *			qes[1].u.event_type.sub << 20;
3432 	 * sse_evt[1][31:0]   = qes[2].flow_id |
3433 	 *			qes[2].u.event_type.major << 28 |
3434 	 *			qes[2].u.event_type.sub << 20;
3435 	 * sse_evt[1][95:64]  = qes[3].flow_id |
3436 	 *			qes[3].u.event_type.major << 28 |
3437 	 *			qes[3].u.event_type.sub << 20;
3438 	 */
3439 #define DLB_EVENT_EV_TYPE_DW 0
3440 #define DLB_EVENT_EV_TYPE_SHIFT 28
3441 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3442 	sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3443 			qes[0].flow_id |
3444 			qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3445 			qes[0].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3446 			DLB_EVENT_EV_TYPE_DW);
3447 	sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3448 			qes[1].flow_id |
3449 			qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3450 			qes[1].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3451 			DLB_EVENT_EV_TYPE_DW + 2);
3452 	sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3453 			qes[2].flow_id |
3454 			qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3455 			qes[2].u.event_type.sub <<  DLB_EVENT_SUB_EV_TYPE_SHIFT,
3456 			DLB_EVENT_EV_TYPE_DW);
3457 	sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3458 			qes[3].flow_id |
3459 			qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT  |
3460 			qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3461 			DLB_EVENT_EV_TYPE_DW + 2);
3462 
3463 	/* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3464 	 * set:
3465 	 * sse_evt[0][39:32]  = sched_type_map[qes[0].sched_type] << 6
3466 	 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3467 	 * sse_evt[1][39:32]  = sched_type_map[qes[2].sched_type] << 6
3468 	 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3469 	 */
3470 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3471 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3472 	sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3473 		sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3474 		DLB_EVENT_SCHED_TYPE_BYTE);
3475 	sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3476 		sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3477 		DLB_EVENT_SCHED_TYPE_BYTE + 8);
3478 	sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3479 		sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3480 		DLB_EVENT_SCHED_TYPE_BYTE);
3481 	sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3482 		sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3483 		DLB_EVENT_SCHED_TYPE_BYTE + 8);
3484 
3485 	/* Store the metadata to the event (use the double-precision
3486 	 * _mm_storeh_pd because there is no integer function for storing the
3487 	 * upper 64b):
3488 	 * events[0].event = sse_evt[0][63:0]
3489 	 * events[1].event = sse_evt[0][127:64]
3490 	 * events[2].event = sse_evt[1][63:0]
3491 	 * events[3].event = sse_evt[1][127:64]
3492 	 */
3493 	_mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3494 	_mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3495 	_mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3496 	_mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3497 
3498 	DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3499 	DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3500 	DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3501 	DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3502 
3503 	DLB2_INC_STAT(
3504 		ev_port->stats.queue[events[0].queue_id].
3505 			qid_depth[qes[0].qid_depth],
3506 		1);
3507 	DLB2_INC_STAT(
3508 		ev_port->stats.queue[events[1].queue_id].
3509 			qid_depth[qes[1].qid_depth],
3510 		1);
3511 	DLB2_INC_STAT(
3512 		ev_port->stats.queue[events[2].queue_id].
3513 			qid_depth[qes[2].qid_depth],
3514 		1);
3515 	DLB2_INC_STAT(
3516 		ev_port->stats.queue[events[3].queue_id].
3517 			qid_depth[qes[3].qid_depth],
3518 		1);
3519 
3520 	DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3521 
3522 	return num_events;
3523 }
3524 
3525 static __rte_always_inline int
3526 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3527 {
3528 	volatile struct dlb2_dequeue_qe *cq_addr;
3529 	uint8_t xor_mask[2] = {0x0F, 0x00};
3530 	const uint8_t and_mask = 0x0F;
3531 	__m128i *qes = (__m128i *)qe;
3532 	uint8_t gen_bits, gen_bit;
3533 	uintptr_t addr[4];
3534 	uint16_t idx;
3535 
3536 	cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3537 
3538 	idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3539 	/* Load the next 4 QEs */
3540 	addr[0] = (uintptr_t)&cq_addr[idx];
3541 	addr[1] = (uintptr_t)&cq_addr[(idx +  4) & qm_port->cq_depth_mask];
3542 	addr[2] = (uintptr_t)&cq_addr[(idx +  8) & qm_port->cq_depth_mask];
3543 	addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3544 
3545 	/* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3546 	rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3547 	rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3548 	rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3549 	rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3550 
3551 	/* Correct the xor_mask for wrap-around QEs */
3552 	gen_bit = qm_port->gen_bit;
3553 	xor_mask[gen_bit] ^= !!((idx +  4) > qm_port->cq_depth_mask) << 1;
3554 	xor_mask[gen_bit] ^= !!((idx +  8) > qm_port->cq_depth_mask) << 2;
3555 	xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3556 
3557 	/* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3558 	 * valid, then QEs[0:N-1] are too.
3559 	 */
3560 	qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3561 	rte_compiler_barrier();
3562 	qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3563 	rte_compiler_barrier();
3564 	qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3565 	rte_compiler_barrier();
3566 	qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3567 
3568 	/* Extract and combine the gen bits */
3569 	gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3570 		   ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3571 		   ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3572 		   ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3573 
3574 	/* XOR the combined bits such that a 1 represents a valid QE */
3575 	gen_bits ^= xor_mask[gen_bit];
3576 
3577 	/* Mask off gen bits we don't care about */
3578 	gen_bits &= and_mask;
3579 
3580 	return __builtin_popcount(gen_bits);
3581 }
3582 
3583 static inline void
3584 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3585 			  struct rte_event *events,
3586 			  __m128i v_qe_3,
3587 			  __m128i v_qe_2,
3588 			  __m128i v_qe_1,
3589 			  __m128i v_qe_0,
3590 			  __m128i v_qe_meta,
3591 			  __m128i v_qe_status,
3592 			  uint32_t valid_events)
3593 {
3594 	/* Look up the event QIDs, using the hardware QIDs to index the
3595 	 * port's QID mapping.
3596 	 *
3597 	 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3598 	 * passed along in registers as the QE data is required later.
3599 	 *
3600 	 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3601 	 * 32-bit slice of each QE, so makes up a full SSE register. This
3602 	 * allows parallel processing of 4x QEs in a single register.
3603 	 */
3604 
3605 	__m128i v_qid_done = {0};
3606 	int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3607 	int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3608 	int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3609 	int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3610 
3611 	int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3612 	int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3613 	int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3614 	int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3615 
3616 	int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3617 	int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3618 	int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3619 	int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3620 
3621 	v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3622 	v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3623 	v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3624 	v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3625 
3626 	/* Schedule field remapping using byte shuffle
3627 	 * - Full byte containing sched field handled here (op, rsvd are zero)
3628 	 * - Note sanitizing the register requires two masking ANDs:
3629 	 *   1) to strip prio/msg_type from byte for correct shuffle lookup
3630 	 *   2) to strip any non-sched-field lanes from any results to OR later
3631 	 * - Final byte result is >> 10 to another byte-lane inside the u32.
3632 	 *   This makes the final combination OR easier to make the rte_event.
3633 	 */
3634 	__m128i v_sched_done;
3635 	__m128i v_sched_bits;
3636 	{
3637 		static const uint8_t sched_type_map[16] = {
3638 			[DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3639 			[DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3640 			[DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3641 			[DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3642 		};
3643 		static const uint8_t sched_and_mask[16] = {
3644 			0x00, 0x00, 0x00, 0x03,
3645 			0x00, 0x00, 0x00, 0x03,
3646 			0x00, 0x00, 0x00, 0x03,
3647 			0x00, 0x00, 0x00, 0x03,
3648 		};
3649 		const __m128i v_sched_map = _mm_loadu_si128(
3650 					     (const __m128i *)sched_type_map);
3651 		__m128i v_sched_mask = _mm_loadu_si128(
3652 					     (const __m128i *)&sched_and_mask);
3653 		v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3654 		__m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3655 							    v_sched_bits);
3656 		__m128i v_preshift = _mm_and_si128(v_sched_remapped,
3657 						   v_sched_mask);
3658 		v_sched_done = _mm_srli_epi32(v_preshift, 10);
3659 	}
3660 
3661 	/* Priority handling
3662 	 * - QE provides 3 bits of priority
3663 	 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3664 	 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3665 	 */
3666 	__m128i v_prio_done;
3667 	{
3668 		static const uint8_t prio_mask[16] = {
3669 			0x00, 0x00, 0x00, 0x07 << 5,
3670 			0x00, 0x00, 0x00, 0x07 << 5,
3671 			0x00, 0x00, 0x00, 0x07 << 5,
3672 			0x00, 0x00, 0x00, 0x07 << 5,
3673 		};
3674 		__m128i v_prio_mask  = _mm_loadu_si128(
3675 						(const __m128i *)prio_mask);
3676 		__m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3677 		v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3678 	}
3679 
3680 	/* Event Sub/Type handling:
3681 	 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3682 	 * to get the sub/ev type data into rte_event location, clearing the
3683 	 * lower 20 bits in the process.
3684 	 */
3685 	__m128i v_types_done;
3686 	{
3687 		static const uint8_t event_mask[16] = {
3688 			0x0f, 0x00, 0x00, 0x00,
3689 			0x0f, 0x00, 0x00, 0x00,
3690 			0x0f, 0x00, 0x00, 0x00,
3691 			0x0f, 0x00, 0x00, 0x00,
3692 		};
3693 		static const uint8_t sub_event_mask[16] = {
3694 			0xff, 0x00, 0x00, 0x00,
3695 			0xff, 0x00, 0x00, 0x00,
3696 			0xff, 0x00, 0x00, 0x00,
3697 			0xff, 0x00, 0x00, 0x00,
3698 		};
3699 		static const uint8_t flow_mask[16] = {
3700 			0xff, 0xff, 0x00, 0x00,
3701 			0xff, 0xff, 0x00, 0x00,
3702 			0xff, 0xff, 0x00, 0x00,
3703 			0xff, 0xff, 0x00, 0x00,
3704 		};
3705 		__m128i v_event_mask  = _mm_loadu_si128(
3706 					(const __m128i *)event_mask);
3707 		__m128i v_sub_event_mask  = _mm_loadu_si128(
3708 					(const __m128i *)sub_event_mask);
3709 		__m128i v_flow_mask  = _mm_loadu_si128(
3710 				       (const __m128i *)flow_mask);
3711 		__m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3712 		v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3713 		__m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3714 		v_type = _mm_slli_epi32(v_type, 8);
3715 		v_types_done = _mm_or_si128(v_type, v_sub);
3716 		v_types_done = _mm_slli_epi32(v_types_done, 20);
3717 		__m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3718 		v_types_done = _mm_or_si128(v_types_done, v_flow);
3719 	}
3720 
3721 	/* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3722 	 * with the rte_event, allowing unpacks to move/blend with payload.
3723 	 */
3724 	__m128i v_q_s_p_done;
3725 	{
3726 		__m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3727 		__m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3728 		v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3729 	}
3730 
3731 	__m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3732 
3733 	/* Unpack evs into u64 metadata, then indiv events */
3734 	v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3735 	v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3736 
3737 	switch (valid_events) {
3738 	case 4:
3739 		v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3740 		v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3741 		_mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3742 		DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3743 			      1);
3744 		/* fallthrough */
3745 	case 3:
3746 		v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3747 		_mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3748 		DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3749 			      1);
3750 		/* fallthrough */
3751 	case 2:
3752 		v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3753 		v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3754 		_mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3755 		DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3756 			      1);
3757 		/* fallthrough */
3758 	case 1:
3759 		v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3760 		_mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3761 		DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3762 			      1);
3763 	}
3764 }
3765 
3766 static __rte_always_inline int
3767 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3768 			uint32_t max_events)
3769 {
3770 	/* Using unmasked idx for perf, and masking manually */
3771 	uint16_t idx = qm_port->cq_idx_unmasked;
3772 	volatile struct dlb2_dequeue_qe *cq_addr;
3773 
3774 	cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3775 
3776 	uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3777 						 qm_port->cq_depth_mask];
3778 	uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx +  8) &
3779 						 qm_port->cq_depth_mask];
3780 	uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx +  4) &
3781 						 qm_port->cq_depth_mask];
3782 	uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx +  0) &
3783 						 qm_port->cq_depth_mask];
3784 
3785 	/* Load QEs from CQ: use compiler barriers to avoid load reordering */
3786 	__m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3787 	rte_compiler_barrier();
3788 	__m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3789 	rte_compiler_barrier();
3790 	__m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3791 	rte_compiler_barrier();
3792 	__m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3793 
3794 	/* Generate the pkt_shuffle mask;
3795 	 * - Avoids load in otherwise load-heavy section of code
3796 	 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3797 	 */
3798 	const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3799 	__m128i v_zeros = _mm_setzero_si128();
3800 	__m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3801 	__m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3802 
3803 	/* Extract u32 components required from the QE
3804 	 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3805 	 * - QE[96 to 127] for status (cq gen bit, error)
3806 	 *
3807 	 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3808 	 */
3809 	__m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3810 	__m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3811 	__m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3812 	__m128i v_qe_meta   = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3813 
3814 	/* Status byte (gen_bit, error) handling:
3815 	 * - Shuffle to lanes 0,1,2,3, clear all others
3816 	 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3817 	 * - Shift right by 2 for error bit to MSB, movemask to scalar
3818 	 */
3819 	__m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3820 	__m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3821 	int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3822 
3823 	/* Expected vs Reality of QE Gen bits
3824 	 * - cq_rolling_mask provides expected bits
3825 	 * - QE loads, unpacks/shuffle and movemask provides reality
3826 	 * - XOR of the two gives bitmask of new packets
3827 	 * - POPCNT to get the number of new events
3828 	 */
3829 	uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3830 	uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3831 	uint32_t count_new = __builtin_popcount(qe_xor_bits);
3832 	count_new = RTE_MIN(count_new, max_events);
3833 	if (!count_new)
3834 		return 0;
3835 
3836 	/* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3837 
3838 	uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3839 	uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3840 	uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3841 	uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3842 
3843 	/* shifted out of m2 into MSB of m */
3844 	qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3845 
3846 	/* shifted out of m "looped back" into MSB of m2 */
3847 	qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3848 
3849 	/* Prefetch the next QEs - should run as IPC instead of cycles */
3850 	rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3851 	rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3852 	rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3853 	rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3854 
3855 	/* Convert QEs from XMM regs to events and store events directly */
3856 	_process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3857 				  v_qe_0, v_qe_meta, v_qe_status, count_new);
3858 
3859 	return count_new;
3860 }
3861 
3862 static inline void
3863 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3864 {
3865 	uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3866 
3867 	qm_port->cq_idx_unmasked = idx;
3868 	qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3869 	qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3870 }
3871 
3872 static inline int16_t
3873 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3874 		       struct dlb2_eventdev_port *ev_port,
3875 		       struct rte_event *events,
3876 		       uint16_t max_num,
3877 		       uint64_t dequeue_timeout_ticks)
3878 {
3879 	uint64_t start_ticks = 0ULL;
3880 	struct dlb2_port *qm_port;
3881 	int num = 0;
3882 	bool use_scalar;
3883 	uint64_t timeout;
3884 
3885 	qm_port = &ev_port->qm_port;
3886 	use_scalar = qm_port->use_scalar;
3887 
3888 	if (!dlb2->global_dequeue_wait)
3889 		timeout = dequeue_timeout_ticks;
3890 	else
3891 		timeout = dlb2->global_dequeue_wait_ticks;
3892 
3893 	start_ticks = rte_get_timer_cycles();
3894 
3895 	use_scalar = use_scalar || (max_num & 0x3);
3896 
3897 	while (num < max_num) {
3898 		struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3899 		int num_avail;
3900 
3901 		if (use_scalar) {
3902 			int n_iter = 0;
3903 			uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3904 
3905 			num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3906 			num_avail = RTE_MIN(num_avail, max_num - num);
3907 			dlb2_inc_cq_idx(qm_port, num_avail << 2);
3908 			if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3909 				n_iter = dlb2_process_dequeue_four_qes(ev_port,
3910 								qm_port,
3911 								&events[num],
3912 								&qes[0]);
3913 			else if (num_avail)
3914 				n_iter = dlb2_process_dequeue_qes(ev_port,
3915 								qm_port,
3916 								&events[num],
3917 								&qes[0],
3918 								num_avail);
3919 			if (n_iter != 0) {
3920 				num += n_iter;
3921 				/* update rolling_mask for vector code support */
3922 				m_rshift = qm_port->cq_rolling_mask >> n_iter;
3923 				m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3924 				m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3925 				m2_lshift = qm_port->cq_rolling_mask_2 <<
3926 					(64 - n_iter);
3927 				qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3928 				qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3929 			}
3930 		} else { /* !use_scalar */
3931 			num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3932 							    &events[num],
3933 							    max_num - num);
3934 			dlb2_inc_cq_idx(qm_port, num_avail << 2);
3935 			num += num_avail;
3936 			DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3937 		}
3938 		if (!num_avail) {
3939 			if ((timeout == 0) || (num > 0))
3940 				/* Not waiting in any form or 1+ events recd */
3941 				break;
3942 			else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3943 						   timeout, start_ticks))
3944 				break;
3945 		}
3946 	}
3947 
3948 	qm_port->owed_tokens += num;
3949 
3950 	if (num) {
3951 		if (qm_port->token_pop_mode == AUTO_POP)
3952 			dlb2_consume_qe_immediate(qm_port, num);
3953 
3954 		ev_port->outstanding_releases += num;
3955 
3956 		dlb2_port_credits_inc(qm_port, num);
3957 	}
3958 
3959 	return num;
3960 }
3961 
3962 static __rte_always_inline int
3963 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3964 	     uint8_t *offset)
3965 {
3966 	uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
3967 				   {0x00, 0x01, 0x03, 0x07} };
3968 	uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
3969 	volatile struct dlb2_dequeue_qe *cq_addr;
3970 	__m128i *qes = (__m128i *)qe;
3971 	uint64_t *cache_line_base;
3972 	uint8_t gen_bits;
3973 
3974 	cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3975 	cq_addr = &cq_addr[qm_port->cq_idx];
3976 
3977 	cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
3978 	*offset = ((uintptr_t)cq_addr & 0x30) >> 4;
3979 
3980 	/* Load the next CQ cache line from memory. Pack these reads as tight
3981 	 * as possible to reduce the chance that DLB invalidates the line while
3982 	 * the CPU is reading it. Read the cache line backwards to ensure that
3983 	 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
3984 	 *
3985 	 * (Valid QEs start at &qe[offset])
3986 	 */
3987 	qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
3988 	qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
3989 	qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
3990 	qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
3991 
3992 	/* Evict the cache line ASAP */
3993 	rte_cldemote(cache_line_base);
3994 
3995 	/* Extract and combine the gen bits */
3996 	gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3997 		   ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3998 		   ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3999 		   ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4000 
4001 	/* XOR the combined bits such that a 1 represents a valid QE */
4002 	gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4003 
4004 	/* Mask off gen bits we don't care about */
4005 	gen_bits &= and_mask[*offset];
4006 
4007 	return __builtin_popcount(gen_bits);
4008 }
4009 
4010 static inline int16_t
4011 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4012 		struct dlb2_eventdev_port *ev_port,
4013 		struct rte_event *events,
4014 		uint16_t max_num,
4015 		uint64_t dequeue_timeout_ticks)
4016 {
4017 	uint64_t timeout;
4018 	uint64_t start_ticks = 0ULL;
4019 	struct dlb2_port *qm_port;
4020 	int num = 0;
4021 
4022 	qm_port = &ev_port->qm_port;
4023 
4024 	/* We have a special implementation for waiting. Wait can be:
4025 	 * 1) no waiting at all
4026 	 * 2) busy poll only
4027 	 * 3) wait for interrupt. If wakeup and poll time
4028 	 * has expired, then return to caller
4029 	 * 4) umonitor/umwait repeatedly up to poll time
4030 	 */
4031 
4032 	/* If configured for per dequeue wait, then use wait value provided
4033 	 * to this API. Otherwise we must use the global
4034 	 * value from eventdev config time.
4035 	 */
4036 	if (!dlb2->global_dequeue_wait)
4037 		timeout = dequeue_timeout_ticks;
4038 	else
4039 		timeout = dlb2->global_dequeue_wait_ticks;
4040 
4041 	start_ticks = rte_get_timer_cycles();
4042 
4043 	while (num < max_num) {
4044 		struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4045 		uint8_t offset;
4046 		int num_avail;
4047 
4048 		/* Copy up to 4 QEs from the current cache line into qes */
4049 		num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4050 
4051 		/* But don't process more than the user requested */
4052 		num_avail = RTE_MIN(num_avail, max_num - num);
4053 
4054 		dlb2_inc_cq_idx(qm_port, num_avail);
4055 
4056 		if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4057 			num += dlb2_process_dequeue_four_qes(ev_port,
4058 							     qm_port,
4059 							     &events[num],
4060 							     &qes[offset]);
4061 		else if (num_avail)
4062 			num += dlb2_process_dequeue_qes(ev_port,
4063 							qm_port,
4064 							&events[num],
4065 							&qes[offset],
4066 							num_avail);
4067 		else if ((timeout == 0) || (num > 0))
4068 			/* Not waiting in any form, or 1+ events received? */
4069 			break;
4070 		else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4071 					   timeout, start_ticks))
4072 			break;
4073 	}
4074 
4075 	qm_port->owed_tokens += num;
4076 
4077 	if (num) {
4078 		if (qm_port->token_pop_mode == AUTO_POP)
4079 			dlb2_consume_qe_immediate(qm_port, num);
4080 
4081 		ev_port->outstanding_releases += num;
4082 
4083 		dlb2_port_credits_inc(qm_port, num);
4084 	}
4085 
4086 	return num;
4087 }
4088 
4089 static uint16_t
4090 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4091 			 uint64_t wait)
4092 {
4093 	struct dlb2_eventdev_port *ev_port = event_port;
4094 	struct dlb2_port *qm_port = &ev_port->qm_port;
4095 	struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4096 	uint16_t cnt;
4097 
4098 	RTE_ASSERT(ev_port->setup_done);
4099 	RTE_ASSERT(ev != NULL);
4100 
4101 	if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4102 		uint16_t out_rels = ev_port->outstanding_releases;
4103 
4104 		dlb2_event_release(dlb2, ev_port->id, out_rels);
4105 
4106 		DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4107 	}
4108 
4109 	if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4110 		dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4111 
4112 	cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4113 
4114 	DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4115 	DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4116 
4117 	return cnt;
4118 }
4119 
4120 static uint16_t
4121 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4122 {
4123 	return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4124 }
4125 
4126 static uint16_t
4127 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4128 				uint16_t num, uint64_t wait)
4129 {
4130 	struct dlb2_eventdev_port *ev_port = event_port;
4131 	struct dlb2_port *qm_port = &ev_port->qm_port;
4132 	struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4133 	uint16_t cnt;
4134 
4135 	RTE_ASSERT(ev_port->setup_done);
4136 	RTE_ASSERT(ev != NULL);
4137 
4138 	if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4139 		uint16_t out_rels = ev_port->outstanding_releases;
4140 
4141 		dlb2_event_release(dlb2, ev_port->id, out_rels);
4142 
4143 		DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4144 	}
4145 
4146 	if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4147 		dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4148 
4149 	cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4150 
4151 	DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4152 	DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4153 	return cnt;
4154 }
4155 
4156 static uint16_t
4157 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4158 			  uint64_t wait)
4159 {
4160 	return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4161 }
4162 
4163 static void
4164 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4165 {
4166 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4167 	eventdev_stop_flush_t flush;
4168 	struct rte_event ev;
4169 	uint8_t dev_id;
4170 	void *arg;
4171 	int i;
4172 
4173 	flush = dev->dev_ops->dev_stop_flush;
4174 	dev_id = dev->data->dev_id;
4175 	arg = dev->data->dev_stop_flush_arg;
4176 
4177 	while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4178 		if (flush)
4179 			flush(dev_id, ev, arg);
4180 
4181 		if (dlb2->ev_ports[port_id].qm_port.is_directed)
4182 			continue;
4183 
4184 		ev.op = RTE_EVENT_OP_RELEASE;
4185 
4186 		rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4187 	}
4188 
4189 	/* Enqueue any additional outstanding releases */
4190 	ev.op = RTE_EVENT_OP_RELEASE;
4191 
4192 	for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4193 		rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4194 }
4195 
4196 static uint32_t
4197 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4198 			 struct dlb2_eventdev_queue *queue)
4199 {
4200 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4201 	struct dlb2_get_ldb_queue_depth_args cfg;
4202 	int ret;
4203 
4204 	cfg.queue_id = queue->qm_queue.id;
4205 
4206 	ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4207 	if (ret < 0) {
4208 		DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4209 			     ret, dlb2_error_strings[cfg.response.status]);
4210 		return ret;
4211 	}
4212 
4213 	return cfg.response.id;
4214 }
4215 
4216 static uint32_t
4217 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4218 			 struct dlb2_eventdev_queue *queue)
4219 {
4220 	struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4221 	struct dlb2_get_dir_queue_depth_args cfg;
4222 	int ret;
4223 
4224 	cfg.queue_id = queue->qm_queue.id;
4225 
4226 	ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4227 	if (ret < 0) {
4228 		DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4229 			     ret, dlb2_error_strings[cfg.response.status]);
4230 		return ret;
4231 	}
4232 
4233 	return cfg.response.id;
4234 }
4235 
4236 uint32_t
4237 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4238 		     struct dlb2_eventdev_queue *queue)
4239 {
4240 	if (queue->qm_queue.is_directed)
4241 		return dlb2_get_dir_queue_depth(dlb2, queue);
4242 	else
4243 		return dlb2_get_ldb_queue_depth(dlb2, queue);
4244 }
4245 
4246 static bool
4247 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4248 		    struct dlb2_eventdev_queue *queue)
4249 {
4250 	return dlb2_get_queue_depth(dlb2, queue) == 0;
4251 }
4252 
4253 static bool
4254 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4255 {
4256 	int i;
4257 
4258 	for (i = 0; i < dlb2->num_queues; i++) {
4259 		if (dlb2->ev_queues[i].num_links == 0)
4260 			continue;
4261 		if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4262 			return false;
4263 	}
4264 
4265 	return true;
4266 }
4267 
4268 static bool
4269 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4270 {
4271 	int i;
4272 
4273 	for (i = 0; i < dlb2->num_queues; i++) {
4274 		if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4275 			return false;
4276 	}
4277 
4278 	return true;
4279 }
4280 
4281 static void
4282 dlb2_drain(struct rte_eventdev *dev)
4283 {
4284 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4285 	struct dlb2_eventdev_port *ev_port = NULL;
4286 	uint8_t dev_id;
4287 	int i;
4288 
4289 	dev_id = dev->data->dev_id;
4290 
4291 	while (!dlb2_linked_queues_empty(dlb2)) {
4292 		/* Flush all the ev_ports, which will drain all their connected
4293 		 * queues.
4294 		 */
4295 		for (i = 0; i < dlb2->num_ports; i++)
4296 			dlb2_flush_port(dev, i);
4297 	}
4298 
4299 	/* The queues are empty, but there may be events left in the ports. */
4300 	for (i = 0; i < dlb2->num_ports; i++)
4301 		dlb2_flush_port(dev, i);
4302 
4303 	/* If the domain's queues are empty, we're done. */
4304 	if (dlb2_queues_empty(dlb2))
4305 		return;
4306 
4307 	/* Else, there must be at least one unlinked load-balanced queue.
4308 	 * Select a load-balanced port with which to drain the unlinked
4309 	 * queue(s).
4310 	 */
4311 	for (i = 0; i < dlb2->num_ports; i++) {
4312 		ev_port = &dlb2->ev_ports[i];
4313 
4314 		if (!ev_port->qm_port.is_directed)
4315 			break;
4316 	}
4317 
4318 	if (i == dlb2->num_ports) {
4319 		DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4320 		return;
4321 	}
4322 
4323 	rte_errno = 0;
4324 	rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4325 
4326 	if (rte_errno) {
4327 		DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4328 			     ev_port->id);
4329 		return;
4330 	}
4331 
4332 	for (i = 0; i < dlb2->num_queues; i++) {
4333 		uint8_t qid, prio;
4334 		int ret;
4335 
4336 		if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4337 			continue;
4338 
4339 		qid = i;
4340 		prio = 0;
4341 
4342 		/* Link the ev_port to the queue */
4343 		ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4344 		if (ret != 1) {
4345 			DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4346 				     ev_port->id, qid);
4347 			return;
4348 		}
4349 
4350 		/* Flush the queue */
4351 		while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4352 			dlb2_flush_port(dev, ev_port->id);
4353 
4354 		/* Drain any extant events in the ev_port. */
4355 		dlb2_flush_port(dev, ev_port->id);
4356 
4357 		/* Unlink the ev_port from the queue */
4358 		ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4359 		if (ret != 1) {
4360 			DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4361 				     ev_port->id, qid);
4362 			return;
4363 		}
4364 	}
4365 }
4366 
4367 static void
4368 dlb2_eventdev_stop(struct rte_eventdev *dev)
4369 {
4370 	struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4371 
4372 	rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4373 
4374 	if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4375 		DLB2_LOG_DBG("Internal error: already stopped\n");
4376 		rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4377 		return;
4378 	} else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4379 		DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4380 			     (int)dlb2->run_state);
4381 		rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4382 		return;
4383 	}
4384 
4385 	dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4386 
4387 	rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4388 
4389 	dlb2_drain(dev);
4390 
4391 	dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4392 }
4393 
4394 static int
4395 dlb2_eventdev_close(struct rte_eventdev *dev)
4396 {
4397 	dlb2_hw_reset_sched_domain(dev, false);
4398 
4399 	return 0;
4400 }
4401 
4402 static void
4403 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4404 {
4405 	RTE_SET_USED(dev);
4406 	RTE_SET_USED(id);
4407 
4408 	/* This function intentionally left blank. */
4409 }
4410 
4411 static void
4412 dlb2_eventdev_port_release(void *port)
4413 {
4414 	struct dlb2_eventdev_port *ev_port = port;
4415 	struct dlb2_port *qm_port;
4416 
4417 	if (ev_port) {
4418 		qm_port = &ev_port->qm_port;
4419 		if (qm_port->config_state == DLB2_CONFIGURED)
4420 			dlb2_free_qe_mem(qm_port);
4421 	}
4422 }
4423 
4424 static int
4425 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4426 			    uint64_t *timeout_ticks)
4427 {
4428 	RTE_SET_USED(dev);
4429 	uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4430 
4431 	*timeout_ticks = ns * cycles_per_ns;
4432 
4433 	return 0;
4434 }
4435 
4436 static void
4437 dlb2_entry_points_init(struct rte_eventdev *dev)
4438 {
4439 	struct dlb2_eventdev *dlb2;
4440 
4441 	/* Expose PMD's eventdev interface */
4442 	static struct eventdev_ops dlb2_eventdev_entry_ops = {
4443 		.dev_infos_get    = dlb2_eventdev_info_get,
4444 		.dev_configure    = dlb2_eventdev_configure,
4445 		.dev_start        = dlb2_eventdev_start,
4446 		.dev_stop         = dlb2_eventdev_stop,
4447 		.dev_close        = dlb2_eventdev_close,
4448 		.queue_def_conf   = dlb2_eventdev_queue_default_conf_get,
4449 		.queue_setup      = dlb2_eventdev_queue_setup,
4450 		.queue_release    = dlb2_eventdev_queue_release,
4451 		.port_def_conf    = dlb2_eventdev_port_default_conf_get,
4452 		.port_setup       = dlb2_eventdev_port_setup,
4453 		.port_release     = dlb2_eventdev_port_release,
4454 		.port_link        = dlb2_eventdev_port_link,
4455 		.port_unlink      = dlb2_eventdev_port_unlink,
4456 		.port_unlinks_in_progress =
4457 				    dlb2_eventdev_port_unlinks_in_progress,
4458 		.timeout_ticks    = dlb2_eventdev_timeout_ticks,
4459 		.dump             = dlb2_eventdev_dump,
4460 		.xstats_get       = dlb2_eventdev_xstats_get,
4461 		.xstats_get_names = dlb2_eventdev_xstats_get_names,
4462 		.xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4463 		.xstats_reset	    = dlb2_eventdev_xstats_reset,
4464 		.dev_selftest     = test_dlb2_eventdev,
4465 	};
4466 
4467 	/* Expose PMD's eventdev interface */
4468 
4469 	dev->dev_ops = &dlb2_eventdev_entry_ops;
4470 	dev->enqueue = dlb2_event_enqueue;
4471 	dev->enqueue_burst = dlb2_event_enqueue_burst;
4472 	dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4473 	dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4474 
4475 	dlb2 = dev->data->dev_private;
4476 	if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4477 		dev->dequeue = dlb2_event_dequeue_sparse;
4478 		dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4479 	} else {
4480 		dev->dequeue = dlb2_event_dequeue;
4481 		dev->dequeue_burst = dlb2_event_dequeue_burst;
4482 	}
4483 }
4484 
4485 int
4486 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4487 			    const char *name,
4488 			    struct dlb2_devargs *dlb2_args)
4489 {
4490 	struct dlb2_eventdev *dlb2;
4491 	int err, i;
4492 
4493 	dlb2 = dev->data->dev_private;
4494 
4495 	dlb2->event_dev = dev; /* backlink */
4496 
4497 	evdev_dlb2_default_info.driver_name = name;
4498 
4499 	dlb2->max_num_events_override = dlb2_args->max_num_events;
4500 	dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4501 	dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4502 	dlb2->poll_interval = dlb2_args->poll_interval;
4503 	dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4504 	dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4505 	dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4506 	dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4507 
4508 	err = dlb2_iface_open(&dlb2->qm_instance, name);
4509 	if (err < 0) {
4510 		DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4511 			     err);
4512 		return err;
4513 	}
4514 
4515 	err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4516 					    &dlb2->revision);
4517 	if (err < 0) {
4518 		DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4519 			     err);
4520 		return err;
4521 	}
4522 
4523 	err = dlb2_hw_query_resources(dlb2);
4524 	if (err) {
4525 		DLB2_LOG_ERR("get resources err=%d for %s\n",
4526 			     err, name);
4527 		return err;
4528 	}
4529 
4530 	dlb2_iface_hardware_init(&dlb2->qm_instance);
4531 
4532 	err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4533 	if (err < 0) {
4534 		DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4535 			     err);
4536 		return err;
4537 	}
4538 
4539 	/* Complete xtstats runtime initialization */
4540 	err = dlb2_xstats_init(dlb2);
4541 	if (err) {
4542 		DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4543 		return err;
4544 	}
4545 
4546 	/* Initialize each port's token pop mode */
4547 	for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4548 		dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4549 
4550 	rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4551 
4552 	dlb2_iface_low_level_io_init();
4553 
4554 	dlb2_entry_points_init(dev);
4555 
4556 	dlb2_init_queue_depth_thresholds(dlb2,
4557 					 dlb2_args->qid_depth_thresholds.val);
4558 
4559 	return 0;
4560 }
4561 
4562 int
4563 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4564 			      const char *name)
4565 {
4566 	struct dlb2_eventdev *dlb2;
4567 	int err;
4568 
4569 	dlb2 = dev->data->dev_private;
4570 
4571 	evdev_dlb2_default_info.driver_name = name;
4572 
4573 	err = dlb2_iface_open(&dlb2->qm_instance, name);
4574 	if (err < 0) {
4575 		DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4576 			     err);
4577 		return err;
4578 	}
4579 
4580 	err = dlb2_hw_query_resources(dlb2);
4581 	if (err) {
4582 		DLB2_LOG_ERR("get resources err=%d for %s\n",
4583 			     err, name);
4584 		return err;
4585 	}
4586 
4587 	dlb2_iface_low_level_io_init();
4588 
4589 	dlb2_entry_points_init(dev);
4590 
4591 	return 0;
4592 }
4593 
4594 int
4595 dlb2_parse_params(const char *params,
4596 		  const char *name,
4597 		  struct dlb2_devargs *dlb2_args,
4598 		  uint8_t version)
4599 {
4600 	int ret = 0;
4601 	static const char * const args[] = { NUMA_NODE_ARG,
4602 					     DLB2_MAX_NUM_EVENTS,
4603 					     DLB2_NUM_DIR_CREDITS,
4604 					     DEV_ID_ARG,
4605 					     DLB2_QID_DEPTH_THRESH_ARG,
4606 					     DLB2_COS_ARG,
4607 					     DLB2_POLL_INTERVAL_ARG,
4608 					     DLB2_SW_CREDIT_QUANTA_ARG,
4609 					     DLB2_HW_CREDIT_QUANTA_ARG,
4610 					     DLB2_DEPTH_THRESH_ARG,
4611 					     DLB2_VECTOR_OPTS_ENAB_ARG,
4612 					     NULL };
4613 
4614 	if (params != NULL && params[0] != '\0') {
4615 		struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4616 
4617 		if (kvlist == NULL) {
4618 			RTE_LOG(INFO, PMD,
4619 				"Ignoring unsupported parameters when creating device '%s'\n",
4620 				name);
4621 		} else {
4622 			int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4623 						     set_numa_node,
4624 						     &dlb2_args->socket_id);
4625 			if (ret != 0) {
4626 				DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4627 					     name);
4628 				rte_kvargs_free(kvlist);
4629 				return ret;
4630 			}
4631 
4632 			ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4633 						 set_max_num_events,
4634 						 &dlb2_args->max_num_events);
4635 			if (ret != 0) {
4636 				DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4637 					     name);
4638 				rte_kvargs_free(kvlist);
4639 				return ret;
4640 			}
4641 
4642 			if (version == DLB2_HW_V2) {
4643 				ret = rte_kvargs_process(kvlist,
4644 					DLB2_NUM_DIR_CREDITS,
4645 					set_num_dir_credits,
4646 					&dlb2_args->num_dir_credits_override);
4647 				if (ret != 0) {
4648 					DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4649 						     name);
4650 					rte_kvargs_free(kvlist);
4651 					return ret;
4652 				}
4653 			}
4654 			ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4655 						 set_dev_id,
4656 						 &dlb2_args->dev_id);
4657 			if (ret != 0) {
4658 				DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4659 					     name);
4660 				rte_kvargs_free(kvlist);
4661 				return ret;
4662 			}
4663 
4664 			if (version == DLB2_HW_V2) {
4665 				ret = rte_kvargs_process(
4666 					kvlist,
4667 					DLB2_QID_DEPTH_THRESH_ARG,
4668 					set_qid_depth_thresh,
4669 					&dlb2_args->qid_depth_thresholds);
4670 			} else {
4671 				ret = rte_kvargs_process(
4672 					kvlist,
4673 					DLB2_QID_DEPTH_THRESH_ARG,
4674 					set_qid_depth_thresh_v2_5,
4675 					&dlb2_args->qid_depth_thresholds);
4676 			}
4677 			if (ret != 0) {
4678 				DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4679 					     name);
4680 				rte_kvargs_free(kvlist);
4681 				return ret;
4682 			}
4683 
4684 			ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4685 						 set_cos,
4686 						 &dlb2_args->cos_id);
4687 			if (ret != 0) {
4688 				DLB2_LOG_ERR("%s: Error parsing cos parameter",
4689 					     name);
4690 				rte_kvargs_free(kvlist);
4691 				return ret;
4692 			}
4693 
4694 			ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4695 						 set_poll_interval,
4696 						 &dlb2_args->poll_interval);
4697 			if (ret != 0) {
4698 				DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4699 					     name);
4700 				rte_kvargs_free(kvlist);
4701 				return ret;
4702 			}
4703 
4704 			ret = rte_kvargs_process(kvlist,
4705 						 DLB2_SW_CREDIT_QUANTA_ARG,
4706 						 set_sw_credit_quanta,
4707 						 &dlb2_args->sw_credit_quanta);
4708 			if (ret != 0) {
4709 				DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4710 					     name);
4711 				rte_kvargs_free(kvlist);
4712 				return ret;
4713 			}
4714 
4715 			ret = rte_kvargs_process(kvlist,
4716 						 DLB2_HW_CREDIT_QUANTA_ARG,
4717 						 set_hw_credit_quanta,
4718 						 &dlb2_args->hw_credit_quanta);
4719 			if (ret != 0) {
4720 				DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4721 					     name);
4722 				rte_kvargs_free(kvlist);
4723 				return ret;
4724 			}
4725 
4726 			ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4727 					set_default_depth_thresh,
4728 					&dlb2_args->default_depth_thresh);
4729 			if (ret != 0) {
4730 				DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4731 					     name);
4732 				rte_kvargs_free(kvlist);
4733 				return ret;
4734 			}
4735 
4736 			ret = rte_kvargs_process(kvlist,
4737 					DLB2_VECTOR_OPTS_ENAB_ARG,
4738 					set_vector_opts_enab,
4739 					&dlb2_args->vector_opts_enabled);
4740 			if (ret != 0) {
4741 				DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4742 					     name);
4743 				rte_kvargs_free(kvlist);
4744 				return ret;
4745 			}
4746 
4747 			rte_kvargs_free(kvlist);
4748 		}
4749 	}
4750 	return ret;
4751 }
4752 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);
4753