1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016-2020 Intel Corporation
3 */
4
5 #include <assert.h>
6 #include <errno.h>
7 #include <nmmintrin.h>
8 #include <pthread.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <fcntl.h>
15
16 #include <rte_common.h>
17 #include <rte_config.h>
18 #include <rte_cycles.h>
19 #include <rte_debug.h>
20 #include <rte_dev.h>
21 #include <rte_errno.h>
22 #include <rte_eventdev.h>
23 #include <eventdev_pmd.h>
24 #include <rte_io.h>
25 #include <rte_kvargs.h>
26 #include <rte_log.h>
27 #include <rte_malloc.h>
28 #include <rte_mbuf.h>
29 #include <rte_power_intrinsics.h>
30 #include <rte_prefetch.h>
31 #include <rte_ring.h>
32 #include <rte_string_fns.h>
33
34 #include "dlb2_priv.h"
35 #include "dlb2_iface.h"
36 #include "dlb2_inline_fns.h"
37
38 /*
39 * Resources exposed to eventdev. Some values overridden at runtime using
40 * values returned by the DLB kernel driver.
41 */
42 #if (RTE_EVENT_MAX_QUEUES_PER_DEV > UINT8_MAX)
43 #error "RTE_EVENT_MAX_QUEUES_PER_DEV cannot fit in member max_event_queues"
44 #endif
45 static struct rte_event_dev_info evdev_dlb2_default_info = {
46 .driver_name = "", /* probe will set */
47 .min_dequeue_timeout_ns = DLB2_MIN_DEQUEUE_TIMEOUT_NS,
48 .max_dequeue_timeout_ns = DLB2_MAX_DEQUEUE_TIMEOUT_NS,
49 #if (RTE_EVENT_MAX_QUEUES_PER_DEV < DLB2_MAX_NUM_LDB_QUEUES)
50 .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV,
51 #else
52 .max_event_queues = DLB2_MAX_NUM_LDB_QUEUES,
53 #endif
54 .max_event_queue_flows = DLB2_MAX_NUM_FLOWS,
55 .max_event_queue_priority_levels = DLB2_QID_PRIORITIES,
56 .max_event_priority_levels = DLB2_QID_PRIORITIES,
57 .max_event_ports = DLB2_MAX_NUM_LDB_PORTS,
58 .max_event_port_dequeue_depth = DLB2_DEFAULT_CQ_DEPTH,
59 .max_event_port_enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH,
60 .max_event_port_links = DLB2_MAX_NUM_QIDS_PER_LDB_CQ,
61 .max_num_events = DLB2_MAX_NUM_LDB_CREDITS,
62 .max_single_link_event_port_queue_pairs =
63 DLB2_MAX_NUM_DIR_PORTS(DLB2_HW_V2),
64 .event_dev_cap = (RTE_EVENT_DEV_CAP_QUEUE_QOS |
65 RTE_EVENT_DEV_CAP_EVENT_QOS |
66 RTE_EVENT_DEV_CAP_BURST_MODE |
67 RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED |
68 RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE |
69 RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES |
70 RTE_EVENT_DEV_CAP_MAINTENANCE_FREE),
71 };
72
73 struct process_local_port_data
74 dlb2_port[DLB2_MAX_NUM_PORTS_ALL][DLB2_NUM_PORT_TYPES];
75
76 static void
dlb2_free_qe_mem(struct dlb2_port * qm_port)77 dlb2_free_qe_mem(struct dlb2_port *qm_port)
78 {
79 if (qm_port == NULL)
80 return;
81
82 rte_free(qm_port->qe4);
83 qm_port->qe4 = NULL;
84
85 rte_free(qm_port->int_arm_qe);
86 qm_port->int_arm_qe = NULL;
87
88 rte_free(qm_port->consume_qe);
89 qm_port->consume_qe = NULL;
90
91 rte_memzone_free(dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz);
92 dlb2_port[qm_port->id][PORT_TYPE(qm_port)].mz = NULL;
93 }
94
95 /* override defaults with value(s) provided on command line */
96 static void
dlb2_init_queue_depth_thresholds(struct dlb2_eventdev * dlb2,int * qid_depth_thresholds)97 dlb2_init_queue_depth_thresholds(struct dlb2_eventdev *dlb2,
98 int *qid_depth_thresholds)
99 {
100 int q;
101
102 for (q = 0; q < DLB2_MAX_NUM_QUEUES(dlb2->version); q++) {
103 if (qid_depth_thresholds[q] != 0)
104 dlb2->ev_queues[q].depth_threshold =
105 qid_depth_thresholds[q];
106 }
107 }
108
109 static int
dlb2_hw_query_resources(struct dlb2_eventdev * dlb2)110 dlb2_hw_query_resources(struct dlb2_eventdev *dlb2)
111 {
112 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
113 struct dlb2_hw_resource_info *dlb2_info = &handle->info;
114 int num_ldb_ports;
115 int ret;
116
117 /* Query driver resources provisioned for this device */
118
119 ret = dlb2_iface_get_num_resources(handle,
120 &dlb2->hw_rsrc_query_results);
121 if (ret) {
122 DLB2_LOG_ERR("ioctl get dlb2 num resources, err=%d\n", ret);
123 return ret;
124 }
125
126 /* Complete filling in device resource info returned to evdev app,
127 * overriding any default values.
128 * The capabilities (CAPs) were set at compile time.
129 */
130
131 if (dlb2->max_cq_depth != DLB2_DEFAULT_CQ_DEPTH)
132 num_ldb_ports = DLB2_MAX_HL_ENTRIES / dlb2->max_cq_depth;
133 else
134 num_ldb_ports = dlb2->hw_rsrc_query_results.num_ldb_ports;
135
136 evdev_dlb2_default_info.max_event_queues =
137 dlb2->hw_rsrc_query_results.num_ldb_queues;
138
139 evdev_dlb2_default_info.max_event_ports = num_ldb_ports;
140
141 if (dlb2->version == DLB2_HW_V2_5) {
142 evdev_dlb2_default_info.max_num_events =
143 dlb2->hw_rsrc_query_results.num_credits;
144 } else {
145 evdev_dlb2_default_info.max_num_events =
146 dlb2->hw_rsrc_query_results.num_ldb_credits;
147 }
148 /* Save off values used when creating the scheduling domain. */
149
150 handle->info.num_sched_domains =
151 dlb2->hw_rsrc_query_results.num_sched_domains;
152
153 if (dlb2->version == DLB2_HW_V2_5) {
154 handle->info.hw_rsrc_max.nb_events_limit =
155 dlb2->hw_rsrc_query_results.num_credits;
156 } else {
157 handle->info.hw_rsrc_max.nb_events_limit =
158 dlb2->hw_rsrc_query_results.num_ldb_credits;
159 }
160 handle->info.hw_rsrc_max.num_queues =
161 dlb2->hw_rsrc_query_results.num_ldb_queues +
162 dlb2->hw_rsrc_query_results.num_dir_ports;
163
164 handle->info.hw_rsrc_max.num_ldb_queues =
165 dlb2->hw_rsrc_query_results.num_ldb_queues;
166
167 handle->info.hw_rsrc_max.num_ldb_ports = num_ldb_ports;
168
169 handle->info.hw_rsrc_max.num_dir_ports =
170 dlb2->hw_rsrc_query_results.num_dir_ports;
171
172 handle->info.hw_rsrc_max.reorder_window_size =
173 dlb2->hw_rsrc_query_results.num_hist_list_entries;
174
175 rte_memcpy(dlb2_info, &handle->info.hw_rsrc_max, sizeof(*dlb2_info));
176
177 return 0;
178 }
179
180 #define DLB2_BASE_10 10
181
182 static int
dlb2_string_to_int(int * result,const char * str)183 dlb2_string_to_int(int *result, const char *str)
184 {
185 long ret;
186 char *endptr;
187
188 if (str == NULL || result == NULL)
189 return -EINVAL;
190
191 errno = 0;
192 ret = strtol(str, &endptr, DLB2_BASE_10);
193 if (errno)
194 return -errno;
195
196 /* long int and int may be different width for some architectures */
197 if (ret < INT_MIN || ret > INT_MAX || endptr == str)
198 return -EINVAL;
199
200 *result = ret;
201 return 0;
202 }
203
204 static int
set_numa_node(const char * key __rte_unused,const char * value,void * opaque)205 set_numa_node(const char *key __rte_unused, const char *value, void *opaque)
206 {
207 int *socket_id = opaque;
208 int ret;
209
210 ret = dlb2_string_to_int(socket_id, value);
211 if (ret < 0)
212 return ret;
213
214 if (*socket_id > RTE_MAX_NUMA_NODES)
215 return -EINVAL;
216 return 0;
217 }
218
219
220 static int
set_max_cq_depth(const char * key __rte_unused,const char * value,void * opaque)221 set_max_cq_depth(const char *key __rte_unused,
222 const char *value,
223 void *opaque)
224 {
225 int *max_cq_depth = opaque;
226 int ret;
227
228 if (value == NULL || opaque == NULL) {
229 DLB2_LOG_ERR("NULL pointer\n");
230 return -EINVAL;
231 }
232
233 ret = dlb2_string_to_int(max_cq_depth, value);
234 if (ret < 0)
235 return ret;
236
237 if (*max_cq_depth < DLB2_MIN_CQ_DEPTH_OVERRIDE ||
238 *max_cq_depth > DLB2_MAX_CQ_DEPTH_OVERRIDE ||
239 !rte_is_power_of_2(*max_cq_depth)) {
240 DLB2_LOG_ERR("dlb2: max_cq_depth %d and %d and a power of 2\n",
241 DLB2_MIN_CQ_DEPTH_OVERRIDE,
242 DLB2_MAX_CQ_DEPTH_OVERRIDE);
243 return -EINVAL;
244 }
245
246 return 0;
247 }
248
249 static int
set_max_num_events(const char * key __rte_unused,const char * value,void * opaque)250 set_max_num_events(const char *key __rte_unused,
251 const char *value,
252 void *opaque)
253 {
254 int *max_num_events = opaque;
255 int ret;
256
257 if (value == NULL || opaque == NULL) {
258 DLB2_LOG_ERR("NULL pointer\n");
259 return -EINVAL;
260 }
261
262 ret = dlb2_string_to_int(max_num_events, value);
263 if (ret < 0)
264 return ret;
265
266 if (*max_num_events < 0 || *max_num_events >
267 DLB2_MAX_NUM_LDB_CREDITS) {
268 DLB2_LOG_ERR("dlb2: max_num_events must be between 0 and %d\n",
269 DLB2_MAX_NUM_LDB_CREDITS);
270 return -EINVAL;
271 }
272
273 return 0;
274 }
275
276 static int
set_num_dir_credits(const char * key __rte_unused,const char * value,void * opaque)277 set_num_dir_credits(const char *key __rte_unused,
278 const char *value,
279 void *opaque)
280 {
281 int *num_dir_credits = opaque;
282 int ret;
283
284 if (value == NULL || opaque == NULL) {
285 DLB2_LOG_ERR("NULL pointer\n");
286 return -EINVAL;
287 }
288
289 ret = dlb2_string_to_int(num_dir_credits, value);
290 if (ret < 0)
291 return ret;
292
293 if (*num_dir_credits < 0 ||
294 *num_dir_credits > DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2)) {
295 DLB2_LOG_ERR("dlb2: num_dir_credits must be between 0 and %d\n",
296 DLB2_MAX_NUM_DIR_CREDITS(DLB2_HW_V2));
297 return -EINVAL;
298 }
299
300 return 0;
301 }
302
303 static int
set_dev_id(const char * key __rte_unused,const char * value,void * opaque)304 set_dev_id(const char *key __rte_unused,
305 const char *value,
306 void *opaque)
307 {
308 int *dev_id = opaque;
309 int ret;
310
311 if (value == NULL || opaque == NULL) {
312 DLB2_LOG_ERR("NULL pointer\n");
313 return -EINVAL;
314 }
315
316 ret = dlb2_string_to_int(dev_id, value);
317 if (ret < 0)
318 return ret;
319
320 return 0;
321 }
322
323 static int
set_cos(const char * key __rte_unused,const char * value,void * opaque)324 set_cos(const char *key __rte_unused,
325 const char *value,
326 void *opaque)
327 {
328 enum dlb2_cos *cos_id = opaque;
329 int x = 0;
330 int ret;
331
332 if (value == NULL || opaque == NULL) {
333 DLB2_LOG_ERR("NULL pointer\n");
334 return -EINVAL;
335 }
336
337 ret = dlb2_string_to_int(&x, value);
338 if (ret < 0)
339 return ret;
340
341 if (x != DLB2_COS_DEFAULT && (x < DLB2_COS_0 || x > DLB2_COS_3)) {
342 DLB2_LOG_ERR(
343 "COS %d out of range, must be DLB2_COS_DEFAULT or 0-3\n",
344 x);
345 return -EINVAL;
346 }
347
348 *cos_id = x;
349
350 return 0;
351 }
352
353 static int
set_poll_interval(const char * key __rte_unused,const char * value,void * opaque)354 set_poll_interval(const char *key __rte_unused,
355 const char *value,
356 void *opaque)
357 {
358 int *poll_interval = opaque;
359 int ret;
360
361 if (value == NULL || opaque == NULL) {
362 DLB2_LOG_ERR("NULL pointer\n");
363 return -EINVAL;
364 }
365
366 ret = dlb2_string_to_int(poll_interval, value);
367 if (ret < 0)
368 return ret;
369
370 return 0;
371 }
372
373 static int
set_sw_credit_quanta(const char * key __rte_unused,const char * value,void * opaque)374 set_sw_credit_quanta(const char *key __rte_unused,
375 const char *value,
376 void *opaque)
377 {
378 int *sw_credit_quanta = opaque;
379 int ret;
380
381 if (value == NULL || opaque == NULL) {
382 DLB2_LOG_ERR("NULL pointer\n");
383 return -EINVAL;
384 }
385
386 ret = dlb2_string_to_int(sw_credit_quanta, value);
387 if (ret < 0)
388 return ret;
389
390 return 0;
391 }
392
393 static int
set_hw_credit_quanta(const char * key __rte_unused,const char * value,void * opaque)394 set_hw_credit_quanta(const char *key __rte_unused,
395 const char *value,
396 void *opaque)
397 {
398 int *hw_credit_quanta = opaque;
399 int ret;
400
401 if (value == NULL || opaque == NULL) {
402 DLB2_LOG_ERR("NULL pointer\n");
403 return -EINVAL;
404 }
405
406 ret = dlb2_string_to_int(hw_credit_quanta, value);
407 if (ret < 0)
408 return ret;
409
410 return 0;
411 }
412
413 static int
set_default_depth_thresh(const char * key __rte_unused,const char * value,void * opaque)414 set_default_depth_thresh(const char *key __rte_unused,
415 const char *value,
416 void *opaque)
417 {
418 int *default_depth_thresh = opaque;
419 int ret;
420
421 if (value == NULL || opaque == NULL) {
422 DLB2_LOG_ERR("NULL pointer\n");
423 return -EINVAL;
424 }
425
426 ret = dlb2_string_to_int(default_depth_thresh, value);
427 if (ret < 0)
428 return ret;
429
430 return 0;
431 }
432
433 static int
set_vector_opts_enab(const char * key __rte_unused,const char * value,void * opaque)434 set_vector_opts_enab(const char *key __rte_unused,
435 const char *value,
436 void *opaque)
437 {
438 bool *dlb2_vector_opts_enabled = opaque;
439
440 if (value == NULL || opaque == NULL) {
441 DLB2_LOG_ERR("NULL pointer\n");
442 return -EINVAL;
443 }
444
445 if ((*value == 'y') || (*value == 'Y'))
446 *dlb2_vector_opts_enabled = true;
447 else
448 *dlb2_vector_opts_enabled = false;
449
450 return 0;
451 }
452
453 static int
set_qid_depth_thresh(const char * key __rte_unused,const char * value,void * opaque)454 set_qid_depth_thresh(const char *key __rte_unused,
455 const char *value,
456 void *opaque)
457 {
458 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
459 int first, last, thresh, i;
460
461 if (value == NULL || opaque == NULL) {
462 DLB2_LOG_ERR("NULL pointer\n");
463 return -EINVAL;
464 }
465
466 /* command line override may take one of the following 3 forms:
467 * qid_depth_thresh=all:<threshold_value> ... all queues
468 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
469 * qid_depth_thresh=qid:<threshold_value> ... just one queue
470 */
471 if (sscanf(value, "all:%d", &thresh) == 1) {
472 first = 0;
473 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2) - 1;
474 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
475 /* we have everything we need */
476 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
477 last = first;
478 } else {
479 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
480 return -EINVAL;
481 }
482
483 if (first > last || first < 0 ||
484 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2)) {
485 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
486 return -EINVAL;
487 }
488
489 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
490 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
491 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
492 return -EINVAL;
493 }
494
495 for (i = first; i <= last; i++)
496 qid_thresh->val[i] = thresh; /* indexed by qid */
497
498 return 0;
499 }
500
501 static int
set_qid_depth_thresh_v2_5(const char * key __rte_unused,const char * value,void * opaque)502 set_qid_depth_thresh_v2_5(const char *key __rte_unused,
503 const char *value,
504 void *opaque)
505 {
506 struct dlb2_qid_depth_thresholds *qid_thresh = opaque;
507 int first, last, thresh, i;
508
509 if (value == NULL || opaque == NULL) {
510 DLB2_LOG_ERR("NULL pointer\n");
511 return -EINVAL;
512 }
513
514 /* command line override may take one of the following 3 forms:
515 * qid_depth_thresh=all:<threshold_value> ... all queues
516 * qid_depth_thresh=qidA-qidB:<threshold_value> ... a range of queues
517 * qid_depth_thresh=qid:<threshold_value> ... just one queue
518 */
519 if (sscanf(value, "all:%d", &thresh) == 1) {
520 first = 0;
521 last = DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5) - 1;
522 } else if (sscanf(value, "%d-%d:%d", &first, &last, &thresh) == 3) {
523 /* we have everything we need */
524 } else if (sscanf(value, "%d:%d", &first, &thresh) == 2) {
525 last = first;
526 } else {
527 DLB2_LOG_ERR("Error parsing qid depth devarg. Should be all:val, qid-qid:val, or qid:val\n");
528 return -EINVAL;
529 }
530
531 if (first > last || first < 0 ||
532 last >= DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5)) {
533 DLB2_LOG_ERR("Error parsing qid depth devarg, invalid qid value\n");
534 return -EINVAL;
535 }
536
537 if (thresh < 0 || thresh > DLB2_MAX_QUEUE_DEPTH_THRESHOLD) {
538 DLB2_LOG_ERR("Error parsing qid depth devarg, threshold > %d\n",
539 DLB2_MAX_QUEUE_DEPTH_THRESHOLD);
540 return -EINVAL;
541 }
542
543 for (i = first; i <= last; i++)
544 qid_thresh->val[i] = thresh; /* indexed by qid */
545
546 return 0;
547 }
548
549 static void
dlb2_eventdev_info_get(struct rte_eventdev * dev,struct rte_event_dev_info * dev_info)550 dlb2_eventdev_info_get(struct rte_eventdev *dev,
551 struct rte_event_dev_info *dev_info)
552 {
553 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
554 int ret;
555
556 ret = dlb2_hw_query_resources(dlb2);
557 if (ret) {
558 const struct rte_eventdev_data *data = dev->data;
559
560 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
561 ret, data->dev_id);
562 /* fn is void, so fall through and return values set up in
563 * probe
564 */
565 }
566
567 /* Add num resources currently owned by this domain.
568 * These would become available if the scheduling domain were reset due
569 * to the application recalling eventdev_configure to *reconfigure* the
570 * domain.
571 */
572 evdev_dlb2_default_info.max_event_ports += dlb2->num_ldb_ports;
573 evdev_dlb2_default_info.max_event_queues += dlb2->num_ldb_queues;
574 if (dlb2->version == DLB2_HW_V2_5) {
575 evdev_dlb2_default_info.max_num_events +=
576 dlb2->max_credits;
577 } else {
578 evdev_dlb2_default_info.max_num_events +=
579 dlb2->max_ldb_credits;
580 }
581 evdev_dlb2_default_info.max_event_queues =
582 RTE_MIN(evdev_dlb2_default_info.max_event_queues,
583 RTE_EVENT_MAX_QUEUES_PER_DEV);
584
585 evdev_dlb2_default_info.max_num_events =
586 RTE_MIN(evdev_dlb2_default_info.max_num_events,
587 dlb2->max_num_events_override);
588
589 *dev_info = evdev_dlb2_default_info;
590 }
591
592 static int
dlb2_hw_create_sched_domain(struct dlb2_hw_dev * handle,const struct dlb2_hw_rsrcs * resources_asked,uint8_t device_version)593 dlb2_hw_create_sched_domain(struct dlb2_hw_dev *handle,
594 const struct dlb2_hw_rsrcs *resources_asked,
595 uint8_t device_version)
596 {
597 int ret = 0;
598 struct dlb2_create_sched_domain_args *cfg;
599
600 if (resources_asked == NULL) {
601 DLB2_LOG_ERR("dlb2: dlb2_create NULL parameter\n");
602 ret = EINVAL;
603 goto error_exit;
604 }
605
606 /* Map generic qm resources to dlb2 resources */
607 cfg = &handle->cfg.resources;
608
609 /* DIR ports and queues */
610
611 cfg->num_dir_ports = resources_asked->num_dir_ports;
612 if (device_version == DLB2_HW_V2_5)
613 cfg->num_credits = resources_asked->num_credits;
614 else
615 cfg->num_dir_credits = resources_asked->num_dir_credits;
616
617 /* LDB queues */
618
619 cfg->num_ldb_queues = resources_asked->num_ldb_queues;
620
621 /* LDB ports */
622
623 cfg->cos_strict = 0; /* Best effort */
624 cfg->num_cos_ldb_ports[0] = 0;
625 cfg->num_cos_ldb_ports[1] = 0;
626 cfg->num_cos_ldb_ports[2] = 0;
627 cfg->num_cos_ldb_ports[3] = 0;
628
629 switch (handle->cos_id) {
630 case DLB2_COS_0:
631 cfg->num_ldb_ports = 0; /* no don't care ports */
632 cfg->num_cos_ldb_ports[0] =
633 resources_asked->num_ldb_ports;
634 break;
635 case DLB2_COS_1:
636 cfg->num_ldb_ports = 0; /* no don't care ports */
637 cfg->num_cos_ldb_ports[1] = resources_asked->num_ldb_ports;
638 break;
639 case DLB2_COS_2:
640 cfg->num_ldb_ports = 0; /* no don't care ports */
641 cfg->num_cos_ldb_ports[2] = resources_asked->num_ldb_ports;
642 break;
643 case DLB2_COS_3:
644 cfg->num_ldb_ports = 0; /* no don't care ports */
645 cfg->num_cos_ldb_ports[3] =
646 resources_asked->num_ldb_ports;
647 break;
648 case DLB2_COS_DEFAULT:
649 /* all ldb ports are don't care ports from a cos perspective */
650 cfg->num_ldb_ports =
651 resources_asked->num_ldb_ports;
652 break;
653 }
654
655 if (device_version == DLB2_HW_V2)
656 cfg->num_ldb_credits = resources_asked->num_ldb_credits;
657
658 cfg->num_atomic_inflights =
659 DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE *
660 cfg->num_ldb_queues;
661
662 cfg->num_hist_list_entries = resources_asked->num_ldb_ports *
663 DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
664
665 if (device_version == DLB2_HW_V2_5) {
666 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, credits=%d\n",
667 cfg->num_ldb_queues,
668 resources_asked->num_ldb_ports,
669 cfg->num_dir_ports,
670 cfg->num_atomic_inflights,
671 cfg->num_hist_list_entries,
672 cfg->num_credits);
673 } else {
674 DLB2_LOG_DBG("sched domain create - ldb_qs=%d, ldb_ports=%d, dir_ports=%d, atomic_inflights=%d, hist_list_entries=%d, ldb_credits=%d, dir_credits=%d\n",
675 cfg->num_ldb_queues,
676 resources_asked->num_ldb_ports,
677 cfg->num_dir_ports,
678 cfg->num_atomic_inflights,
679 cfg->num_hist_list_entries,
680 cfg->num_ldb_credits,
681 cfg->num_dir_credits);
682 }
683
684 /* Configure the QM */
685
686 ret = dlb2_iface_sched_domain_create(handle, cfg);
687 if (ret < 0) {
688 DLB2_LOG_ERR("dlb2: domain create failed, ret = %d, extra status: %s\n",
689 ret,
690 dlb2_error_strings[cfg->response.status]);
691
692 goto error_exit;
693 }
694
695 handle->domain_id = cfg->response.id;
696 handle->cfg.configured = true;
697
698 error_exit:
699
700 return ret;
701 }
702
703 static void
dlb2_hw_reset_sched_domain(const struct rte_eventdev * dev,bool reconfig)704 dlb2_hw_reset_sched_domain(const struct rte_eventdev *dev, bool reconfig)
705 {
706 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
707 enum dlb2_configuration_state config_state;
708 int i, j;
709
710 dlb2_iface_domain_reset(dlb2);
711
712 /* Free all dynamically allocated port memory */
713 for (i = 0; i < dlb2->num_ports; i++)
714 dlb2_free_qe_mem(&dlb2->ev_ports[i].qm_port);
715
716 /* If reconfiguring, mark the device's queues and ports as "previously
717 * configured." If the user doesn't reconfigure them, the PMD will
718 * reapply their previous configuration when the device is started.
719 */
720 config_state = (reconfig) ? DLB2_PREV_CONFIGURED :
721 DLB2_NOT_CONFIGURED;
722
723 for (i = 0; i < dlb2->num_ports; i++) {
724 dlb2->ev_ports[i].qm_port.config_state = config_state;
725 /* Reset setup_done so ports can be reconfigured */
726 dlb2->ev_ports[i].setup_done = false;
727 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
728 dlb2->ev_ports[i].link[j].mapped = false;
729 }
730
731 for (i = 0; i < dlb2->num_queues; i++)
732 dlb2->ev_queues[i].qm_queue.config_state = config_state;
733
734 for (i = 0; i < DLB2_MAX_NUM_QUEUES(DLB2_HW_V2_5); i++)
735 dlb2->ev_queues[i].setup_done = false;
736
737 dlb2->num_ports = 0;
738 dlb2->num_ldb_ports = 0;
739 dlb2->num_dir_ports = 0;
740 dlb2->num_queues = 0;
741 dlb2->num_ldb_queues = 0;
742 dlb2->num_dir_queues = 0;
743 dlb2->configured = false;
744 }
745
746 /* Note: 1 QM instance per QM device, QM instance/device == event device */
747 static int
dlb2_eventdev_configure(const struct rte_eventdev * dev)748 dlb2_eventdev_configure(const struct rte_eventdev *dev)
749 {
750 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
751 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
752 struct dlb2_hw_rsrcs *rsrcs = &handle->info.hw_rsrc_max;
753 const struct rte_eventdev_data *data = dev->data;
754 const struct rte_event_dev_config *config = &data->dev_conf;
755 int ret;
756
757 /* If this eventdev is already configured, we must release the current
758 * scheduling domain before attempting to configure a new one.
759 */
760 if (dlb2->configured) {
761 dlb2_hw_reset_sched_domain(dev, true);
762 ret = dlb2_hw_query_resources(dlb2);
763 if (ret) {
764 DLB2_LOG_ERR("get resources err=%d, devid=%d\n",
765 ret, data->dev_id);
766 return ret;
767 }
768 }
769
770 if (config->nb_event_queues > rsrcs->num_queues) {
771 DLB2_LOG_ERR("nb_event_queues parameter (%d) exceeds the QM device's capabilities (%d).\n",
772 config->nb_event_queues,
773 rsrcs->num_queues);
774 return -EINVAL;
775 }
776 if (config->nb_event_ports > (rsrcs->num_ldb_ports
777 + rsrcs->num_dir_ports)) {
778 DLB2_LOG_ERR("nb_event_ports parameter (%d) exceeds the QM device's capabilities (%d).\n",
779 config->nb_event_ports,
780 (rsrcs->num_ldb_ports + rsrcs->num_dir_ports));
781 return -EINVAL;
782 }
783 if (config->nb_events_limit > rsrcs->nb_events_limit) {
784 DLB2_LOG_ERR("nb_events_limit parameter (%d) exceeds the QM device's capabilities (%d).\n",
785 config->nb_events_limit,
786 rsrcs->nb_events_limit);
787 return -EINVAL;
788 }
789
790 if (config->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT)
791 dlb2->global_dequeue_wait = false;
792 else {
793 uint32_t timeout32;
794
795 dlb2->global_dequeue_wait = true;
796
797 /* note size mismatch of timeout vals in eventdev lib. */
798 timeout32 = config->dequeue_timeout_ns;
799
800 dlb2->global_dequeue_wait_ticks =
801 timeout32 * (rte_get_timer_hz() / 1E9);
802 }
803
804 /* Does this platform support umonitor/umwait? */
805 if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_WAITPKG))
806 dlb2->umwait_allowed = true;
807
808 rsrcs->num_dir_ports = config->nb_single_link_event_port_queues;
809 rsrcs->num_ldb_ports = config->nb_event_ports - rsrcs->num_dir_ports;
810 /* 1 dir queue per dir port */
811 rsrcs->num_ldb_queues = config->nb_event_queues - rsrcs->num_dir_ports;
812
813 if (dlb2->version == DLB2_HW_V2_5) {
814 rsrcs->num_credits = 0;
815 if (rsrcs->num_ldb_queues || rsrcs->num_dir_ports)
816 rsrcs->num_credits = config->nb_events_limit;
817 } else {
818 /* Scale down nb_events_limit by 4 for directed credits,
819 * since there are 4x as many load-balanced credits.
820 */
821 rsrcs->num_ldb_credits = 0;
822 rsrcs->num_dir_credits = 0;
823
824 if (rsrcs->num_ldb_queues)
825 rsrcs->num_ldb_credits = config->nb_events_limit;
826 if (rsrcs->num_dir_ports)
827 rsrcs->num_dir_credits = config->nb_events_limit / 2;
828 if (dlb2->num_dir_credits_override != -1)
829 rsrcs->num_dir_credits = dlb2->num_dir_credits_override;
830 }
831
832 if (dlb2_hw_create_sched_domain(handle, rsrcs, dlb2->version) < 0) {
833 DLB2_LOG_ERR("dlb2_hw_create_sched_domain failed\n");
834 return -ENODEV;
835 }
836
837 dlb2->new_event_limit = config->nb_events_limit;
838 __atomic_store_n(&dlb2->inflights, 0, __ATOMIC_SEQ_CST);
839
840 /* Save number of ports/queues for this event dev */
841 dlb2->num_ports = config->nb_event_ports;
842 dlb2->num_queues = config->nb_event_queues;
843 dlb2->num_dir_ports = rsrcs->num_dir_ports;
844 dlb2->num_ldb_ports = dlb2->num_ports - dlb2->num_dir_ports;
845 dlb2->num_ldb_queues = dlb2->num_queues - dlb2->num_dir_ports;
846 dlb2->num_dir_queues = dlb2->num_dir_ports;
847 if (dlb2->version == DLB2_HW_V2_5) {
848 dlb2->credit_pool = rsrcs->num_credits;
849 dlb2->max_credits = rsrcs->num_credits;
850 } else {
851 dlb2->ldb_credit_pool = rsrcs->num_ldb_credits;
852 dlb2->max_ldb_credits = rsrcs->num_ldb_credits;
853 dlb2->dir_credit_pool = rsrcs->num_dir_credits;
854 dlb2->max_dir_credits = rsrcs->num_dir_credits;
855 }
856
857 dlb2->configured = true;
858
859 return 0;
860 }
861
862 static void
dlb2_eventdev_port_default_conf_get(struct rte_eventdev * dev,uint8_t port_id,struct rte_event_port_conf * port_conf)863 dlb2_eventdev_port_default_conf_get(struct rte_eventdev *dev,
864 uint8_t port_id,
865 struct rte_event_port_conf *port_conf)
866 {
867 RTE_SET_USED(port_id);
868 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
869
870 port_conf->new_event_threshold = dlb2->new_event_limit;
871 port_conf->dequeue_depth = 32;
872 port_conf->enqueue_depth = DLB2_MAX_ENQUEUE_DEPTH;
873 port_conf->event_port_cfg = 0;
874 }
875
876 static void
dlb2_eventdev_queue_default_conf_get(struct rte_eventdev * dev,uint8_t queue_id,struct rte_event_queue_conf * queue_conf)877 dlb2_eventdev_queue_default_conf_get(struct rte_eventdev *dev,
878 uint8_t queue_id,
879 struct rte_event_queue_conf *queue_conf)
880 {
881 RTE_SET_USED(dev);
882 RTE_SET_USED(queue_id);
883
884 queue_conf->nb_atomic_flows = 1024;
885 queue_conf->nb_atomic_order_sequences = 64;
886 queue_conf->event_queue_cfg = 0;
887 queue_conf->priority = 0;
888 }
889
890 static int32_t
dlb2_get_sn_allocation(struct dlb2_eventdev * dlb2,int group)891 dlb2_get_sn_allocation(struct dlb2_eventdev *dlb2, int group)
892 {
893 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
894 struct dlb2_get_sn_allocation_args cfg;
895 int ret;
896
897 cfg.group = group;
898
899 ret = dlb2_iface_get_sn_allocation(handle, &cfg);
900 if (ret < 0) {
901 DLB2_LOG_ERR("dlb2: get_sn_allocation ret=%d (driver status: %s)\n",
902 ret, dlb2_error_strings[cfg.response.status]);
903 return ret;
904 }
905
906 return cfg.response.id;
907 }
908
909 static int
dlb2_set_sn_allocation(struct dlb2_eventdev * dlb2,int group,int num)910 dlb2_set_sn_allocation(struct dlb2_eventdev *dlb2, int group, int num)
911 {
912 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
913 struct dlb2_set_sn_allocation_args cfg;
914 int ret;
915
916 cfg.num = num;
917 cfg.group = group;
918
919 ret = dlb2_iface_set_sn_allocation(handle, &cfg);
920 if (ret < 0) {
921 DLB2_LOG_ERR("dlb2: set_sn_allocation ret=%d (driver status: %s)\n",
922 ret, dlb2_error_strings[cfg.response.status]);
923 return ret;
924 }
925
926 return ret;
927 }
928
929 static int32_t
dlb2_get_sn_occupancy(struct dlb2_eventdev * dlb2,int group)930 dlb2_get_sn_occupancy(struct dlb2_eventdev *dlb2, int group)
931 {
932 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
933 struct dlb2_get_sn_occupancy_args cfg;
934 int ret;
935
936 cfg.group = group;
937
938 ret = dlb2_iface_get_sn_occupancy(handle, &cfg);
939 if (ret < 0) {
940 DLB2_LOG_ERR("dlb2: get_sn_occupancy ret=%d (driver status: %s)\n",
941 ret, dlb2_error_strings[cfg.response.status]);
942 return ret;
943 }
944
945 return cfg.response.id;
946 }
947
948 /* Query the current sequence number allocations and, if they conflict with the
949 * requested LDB queue configuration, attempt to re-allocate sequence numbers.
950 * This is best-effort; if it fails, the PMD will attempt to configure the
951 * load-balanced queue and return an error.
952 */
953 static void
dlb2_program_sn_allocation(struct dlb2_eventdev * dlb2,const struct rte_event_queue_conf * queue_conf)954 dlb2_program_sn_allocation(struct dlb2_eventdev *dlb2,
955 const struct rte_event_queue_conf *queue_conf)
956 {
957 int grp_occupancy[DLB2_NUM_SN_GROUPS];
958 int grp_alloc[DLB2_NUM_SN_GROUPS];
959 int i, sequence_numbers;
960
961 sequence_numbers = (int)queue_conf->nb_atomic_order_sequences;
962
963 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
964 int total_slots;
965
966 grp_alloc[i] = dlb2_get_sn_allocation(dlb2, i);
967 if (grp_alloc[i] < 0)
968 return;
969
970 total_slots = DLB2_MAX_LDB_SN_ALLOC / grp_alloc[i];
971
972 grp_occupancy[i] = dlb2_get_sn_occupancy(dlb2, i);
973 if (grp_occupancy[i] < 0)
974 return;
975
976 /* DLB has at least one available slot for the requested
977 * sequence numbers, so no further configuration required.
978 */
979 if (grp_alloc[i] == sequence_numbers &&
980 grp_occupancy[i] < total_slots)
981 return;
982 }
983
984 /* None of the sequence number groups are configured for the requested
985 * sequence numbers, so we have to reconfigure one of them. This is
986 * only possible if a group is not in use.
987 */
988 for (i = 0; i < DLB2_NUM_SN_GROUPS; i++) {
989 if (grp_occupancy[i] == 0)
990 break;
991 }
992
993 if (i == DLB2_NUM_SN_GROUPS) {
994 DLB2_LOG_ERR("[%s()] No groups with %d sequence_numbers are available or have free slots\n",
995 __func__, sequence_numbers);
996 return;
997 }
998
999 /* Attempt to configure slot i with the requested number of sequence
1000 * numbers. Ignore the return value -- if this fails, the error will be
1001 * caught during subsequent queue configuration.
1002 */
1003 dlb2_set_sn_allocation(dlb2, i, sequence_numbers);
1004 }
1005
1006 static int32_t
dlb2_hw_create_ldb_queue(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * ev_queue,const struct rte_event_queue_conf * evq_conf)1007 dlb2_hw_create_ldb_queue(struct dlb2_eventdev *dlb2,
1008 struct dlb2_eventdev_queue *ev_queue,
1009 const struct rte_event_queue_conf *evq_conf)
1010 {
1011 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1012 struct dlb2_queue *queue = &ev_queue->qm_queue;
1013 struct dlb2_create_ldb_queue_args cfg;
1014 int32_t ret;
1015 uint32_t qm_qid;
1016 int sched_type = -1;
1017
1018 if (evq_conf == NULL)
1019 return -EINVAL;
1020
1021 if (evq_conf->event_queue_cfg & RTE_EVENT_QUEUE_CFG_ALL_TYPES) {
1022 if (evq_conf->nb_atomic_order_sequences != 0)
1023 sched_type = RTE_SCHED_TYPE_ORDERED;
1024 else
1025 sched_type = RTE_SCHED_TYPE_PARALLEL;
1026 } else
1027 sched_type = evq_conf->schedule_type;
1028
1029 cfg.num_atomic_inflights = DLB2_NUM_ATOMIC_INFLIGHTS_PER_QUEUE;
1030 cfg.num_sequence_numbers = evq_conf->nb_atomic_order_sequences;
1031 cfg.num_qid_inflights = evq_conf->nb_atomic_order_sequences;
1032
1033 if (sched_type != RTE_SCHED_TYPE_ORDERED) {
1034 cfg.num_sequence_numbers = 0;
1035 cfg.num_qid_inflights = 2048;
1036 }
1037
1038 /* App should set this to the number of hardware flows they want, not
1039 * the overall number of flows they're going to use. E.g. if app is
1040 * using 64 flows and sets compression to 64, best-case they'll get
1041 * 64 unique hashed flows in hardware.
1042 */
1043 switch (evq_conf->nb_atomic_flows) {
1044 /* Valid DLB2 compression levels */
1045 case 64:
1046 case 128:
1047 case 256:
1048 case 512:
1049 case (1 * 1024): /* 1K */
1050 case (2 * 1024): /* 2K */
1051 case (4 * 1024): /* 4K */
1052 case (64 * 1024): /* 64K */
1053 cfg.lock_id_comp_level = evq_conf->nb_atomic_flows;
1054 break;
1055 default:
1056 /* Invalid compression level */
1057 cfg.lock_id_comp_level = 0; /* no compression */
1058 }
1059
1060 if (ev_queue->depth_threshold == 0) {
1061 cfg.depth_threshold = dlb2->default_depth_thresh;
1062 ev_queue->depth_threshold =
1063 dlb2->default_depth_thresh;
1064 } else
1065 cfg.depth_threshold = ev_queue->depth_threshold;
1066
1067 ret = dlb2_iface_ldb_queue_create(handle, &cfg);
1068 if (ret < 0) {
1069 DLB2_LOG_ERR("dlb2: create LB event queue error, ret=%d (driver status: %s)\n",
1070 ret, dlb2_error_strings[cfg.response.status]);
1071 return -EINVAL;
1072 }
1073
1074 qm_qid = cfg.response.id;
1075
1076 /* Save off queue config for debug, resource lookups, and reconfig */
1077 queue->num_qid_inflights = cfg.num_qid_inflights;
1078 queue->num_atm_inflights = cfg.num_atomic_inflights;
1079
1080 queue->sched_type = sched_type;
1081 queue->config_state = DLB2_CONFIGURED;
1082
1083 DLB2_LOG_DBG("Created LB event queue %d, nb_inflights=%d, nb_seq=%d, qid inflights=%d\n",
1084 qm_qid,
1085 cfg.num_atomic_inflights,
1086 cfg.num_sequence_numbers,
1087 cfg.num_qid_inflights);
1088
1089 return qm_qid;
1090 }
1091
1092 static int
dlb2_eventdev_ldb_queue_setup(struct rte_eventdev * dev,struct dlb2_eventdev_queue * ev_queue,const struct rte_event_queue_conf * queue_conf)1093 dlb2_eventdev_ldb_queue_setup(struct rte_eventdev *dev,
1094 struct dlb2_eventdev_queue *ev_queue,
1095 const struct rte_event_queue_conf *queue_conf)
1096 {
1097 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1098 int32_t qm_qid;
1099
1100 if (queue_conf->nb_atomic_order_sequences)
1101 dlb2_program_sn_allocation(dlb2, queue_conf);
1102
1103 qm_qid = dlb2_hw_create_ldb_queue(dlb2, ev_queue, queue_conf);
1104 if (qm_qid < 0) {
1105 DLB2_LOG_ERR("Failed to create the load-balanced queue\n");
1106
1107 return qm_qid;
1108 }
1109
1110 dlb2->qm_ldb_to_ev_queue_id[qm_qid] = ev_queue->id;
1111
1112 ev_queue->qm_queue.id = qm_qid;
1113
1114 return 0;
1115 }
1116
dlb2_num_dir_queues_setup(struct dlb2_eventdev * dlb2)1117 static int dlb2_num_dir_queues_setup(struct dlb2_eventdev *dlb2)
1118 {
1119 int i, num = 0;
1120
1121 for (i = 0; i < dlb2->num_queues; i++) {
1122 if (dlb2->ev_queues[i].setup_done &&
1123 dlb2->ev_queues[i].qm_queue.is_directed)
1124 num++;
1125 }
1126
1127 return num;
1128 }
1129
1130 static void
dlb2_queue_link_teardown(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * ev_queue)1131 dlb2_queue_link_teardown(struct dlb2_eventdev *dlb2,
1132 struct dlb2_eventdev_queue *ev_queue)
1133 {
1134 struct dlb2_eventdev_port *ev_port;
1135 int i, j;
1136
1137 for (i = 0; i < dlb2->num_ports; i++) {
1138 ev_port = &dlb2->ev_ports[i];
1139
1140 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
1141 if (!ev_port->link[j].valid ||
1142 ev_port->link[j].queue_id != ev_queue->id)
1143 continue;
1144
1145 ev_port->link[j].valid = false;
1146 ev_port->num_links--;
1147 }
1148 }
1149
1150 ev_queue->num_links = 0;
1151 }
1152
1153 static int
dlb2_eventdev_queue_setup(struct rte_eventdev * dev,uint8_t ev_qid,const struct rte_event_queue_conf * queue_conf)1154 dlb2_eventdev_queue_setup(struct rte_eventdev *dev,
1155 uint8_t ev_qid,
1156 const struct rte_event_queue_conf *queue_conf)
1157 {
1158 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1159 struct dlb2_eventdev_queue *ev_queue;
1160 int ret;
1161
1162 if (queue_conf == NULL)
1163 return -EINVAL;
1164
1165 if (ev_qid >= dlb2->num_queues)
1166 return -EINVAL;
1167
1168 ev_queue = &dlb2->ev_queues[ev_qid];
1169
1170 ev_queue->qm_queue.is_directed = queue_conf->event_queue_cfg &
1171 RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
1172 ev_queue->id = ev_qid;
1173 ev_queue->conf = *queue_conf;
1174
1175 if (!ev_queue->qm_queue.is_directed) {
1176 ret = dlb2_eventdev_ldb_queue_setup(dev, ev_queue, queue_conf);
1177 } else {
1178 /* The directed queue isn't setup until link time, at which
1179 * point we know its directed port ID. Directed queue setup
1180 * will only fail if this queue is already setup or there are
1181 * no directed queues left to configure.
1182 */
1183 ret = 0;
1184
1185 ev_queue->qm_queue.config_state = DLB2_NOT_CONFIGURED;
1186
1187 if (ev_queue->setup_done ||
1188 dlb2_num_dir_queues_setup(dlb2) == dlb2->num_dir_queues)
1189 ret = -EINVAL;
1190 }
1191
1192 /* Tear down pre-existing port->queue links */
1193 if (!ret && dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1194 dlb2_queue_link_teardown(dlb2, ev_queue);
1195
1196 if (!ret)
1197 ev_queue->setup_done = true;
1198
1199 return ret;
1200 }
1201
1202 static int
dlb2_init_consume_qe(struct dlb2_port * qm_port,char * mz_name)1203 dlb2_init_consume_qe(struct dlb2_port *qm_port, char *mz_name)
1204 {
1205 struct dlb2_cq_pop_qe *qe;
1206
1207 qe = rte_zmalloc(mz_name,
1208 DLB2_NUM_QES_PER_CACHE_LINE *
1209 sizeof(struct dlb2_cq_pop_qe),
1210 RTE_CACHE_LINE_SIZE);
1211
1212 if (qe == NULL) {
1213 DLB2_LOG_ERR("dlb2: no memory for consume_qe\n");
1214 return -ENOMEM;
1215 }
1216 qm_port->consume_qe = qe;
1217
1218 qe->qe_valid = 0;
1219 qe->qe_frag = 0;
1220 qe->qe_comp = 0;
1221 qe->cq_token = 1;
1222 /* Tokens value is 0-based; i.e. '0' returns 1 token, '1' returns 2,
1223 * and so on.
1224 */
1225 qe->tokens = 0; /* set at run time */
1226 qe->meas_lat = 0;
1227 qe->no_dec = 0;
1228 /* Completion IDs are disabled */
1229 qe->cmp_id = 0;
1230
1231 return 0;
1232 }
1233
1234 static int
dlb2_init_int_arm_qe(struct dlb2_port * qm_port,char * mz_name)1235 dlb2_init_int_arm_qe(struct dlb2_port *qm_port, char *mz_name)
1236 {
1237 struct dlb2_enqueue_qe *qe;
1238
1239 qe = rte_zmalloc(mz_name,
1240 DLB2_NUM_QES_PER_CACHE_LINE *
1241 sizeof(struct dlb2_enqueue_qe),
1242 RTE_CACHE_LINE_SIZE);
1243
1244 if (qe == NULL) {
1245 DLB2_LOG_ERR("dlb2: no memory for complete_qe\n");
1246 return -ENOMEM;
1247 }
1248 qm_port->int_arm_qe = qe;
1249
1250 /* V2 - INT ARM is CQ_TOKEN + FRAG */
1251 qe->qe_valid = 0;
1252 qe->qe_frag = 1;
1253 qe->qe_comp = 0;
1254 qe->cq_token = 1;
1255 qe->meas_lat = 0;
1256 qe->no_dec = 0;
1257 /* Completion IDs are disabled */
1258 qe->cmp_id = 0;
1259
1260 return 0;
1261 }
1262
1263 static int
dlb2_init_qe_mem(struct dlb2_port * qm_port,char * mz_name)1264 dlb2_init_qe_mem(struct dlb2_port *qm_port, char *mz_name)
1265 {
1266 int ret, sz;
1267
1268 sz = DLB2_NUM_QES_PER_CACHE_LINE * sizeof(struct dlb2_enqueue_qe);
1269
1270 qm_port->qe4 = rte_zmalloc(mz_name, sz, RTE_CACHE_LINE_SIZE);
1271
1272 if (qm_port->qe4 == NULL) {
1273 DLB2_LOG_ERR("dlb2: no qe4 memory\n");
1274 ret = -ENOMEM;
1275 goto error_exit;
1276 }
1277
1278 ret = dlb2_init_int_arm_qe(qm_port, mz_name);
1279 if (ret < 0) {
1280 DLB2_LOG_ERR("dlb2: dlb2_init_int_arm_qe ret=%d\n", ret);
1281 goto error_exit;
1282 }
1283
1284 ret = dlb2_init_consume_qe(qm_port, mz_name);
1285 if (ret < 0) {
1286 DLB2_LOG_ERR("dlb2: dlb2_init_consume_qe ret=%d\n", ret);
1287 goto error_exit;
1288 }
1289
1290 return 0;
1291
1292 error_exit:
1293
1294 dlb2_free_qe_mem(qm_port);
1295
1296 return ret;
1297 }
1298
1299 static inline uint16_t
1300 dlb2_event_enqueue_delayed(void *event_port,
1301 const struct rte_event events[]);
1302
1303 static inline uint16_t
1304 dlb2_event_enqueue_burst_delayed(void *event_port,
1305 const struct rte_event events[],
1306 uint16_t num);
1307
1308 static inline uint16_t
1309 dlb2_event_enqueue_new_burst_delayed(void *event_port,
1310 const struct rte_event events[],
1311 uint16_t num);
1312
1313 static inline uint16_t
1314 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
1315 const struct rte_event events[],
1316 uint16_t num);
1317
1318 /* Generate the required bitmask for rotate-style expected QE gen bits.
1319 * This requires a pattern of 1's and zeros, starting with expected as
1320 * 1 bits, so when hardware writes 0's they're "new". This requires the
1321 * ring size to be powers of 2 to wrap correctly.
1322 */
1323 static void
dlb2_hw_cq_bitmask_init(struct dlb2_port * qm_port,uint32_t cq_depth)1324 dlb2_hw_cq_bitmask_init(struct dlb2_port *qm_port, uint32_t cq_depth)
1325 {
1326 uint64_t cq_build_mask = 0;
1327 uint32_t i;
1328
1329 if (cq_depth > 64)
1330 return; /* need to fall back to scalar code */
1331
1332 /*
1333 * all 1's in first u64, all zeros in second is correct bit pattern to
1334 * start. Special casing == 64 easier than adapting complex loop logic.
1335 */
1336 if (cq_depth == 64) {
1337 qm_port->cq_rolling_mask = 0;
1338 qm_port->cq_rolling_mask_2 = -1;
1339 return;
1340 }
1341
1342 for (i = 0; i < 64; i += (cq_depth * 2))
1343 cq_build_mask |= ((1ULL << cq_depth) - 1) << (i + cq_depth);
1344
1345 qm_port->cq_rolling_mask = cq_build_mask;
1346 qm_port->cq_rolling_mask_2 = cq_build_mask;
1347 }
1348
1349 static int
dlb2_hw_create_ldb_port(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,uint32_t dequeue_depth,uint32_t enqueue_depth)1350 dlb2_hw_create_ldb_port(struct dlb2_eventdev *dlb2,
1351 struct dlb2_eventdev_port *ev_port,
1352 uint32_t dequeue_depth,
1353 uint32_t enqueue_depth)
1354 {
1355 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1356 struct dlb2_create_ldb_port_args cfg = { {0} };
1357 int ret;
1358 struct dlb2_port *qm_port = NULL;
1359 char mz_name[RTE_MEMZONE_NAMESIZE];
1360 uint32_t qm_port_id;
1361 uint16_t ldb_credit_high_watermark = 0;
1362 uint16_t dir_credit_high_watermark = 0;
1363 uint16_t credit_high_watermark = 0;
1364
1365 if (handle == NULL)
1366 return -EINVAL;
1367
1368 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1369 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1370 DLB2_MIN_CQ_DEPTH);
1371 return -EINVAL;
1372 }
1373
1374 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1375 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1376 DLB2_MIN_ENQUEUE_DEPTH);
1377 return -EINVAL;
1378 }
1379
1380 rte_spinlock_lock(&handle->resource_lock);
1381
1382 /* We round up to the next power of 2 if necessary */
1383 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1384 cfg.cq_depth_threshold = 1;
1385
1386 cfg.cq_history_list_size = DLB2_NUM_HIST_LIST_ENTRIES_PER_LDB_PORT;
1387
1388 if (handle->cos_id == DLB2_COS_DEFAULT)
1389 cfg.cos_id = 0;
1390 else
1391 cfg.cos_id = handle->cos_id;
1392
1393 cfg.cos_strict = 0;
1394
1395 /* User controls the LDB high watermark via enqueue depth. The DIR high
1396 * watermark is equal, unless the directed credit pool is too small.
1397 */
1398 if (dlb2->version == DLB2_HW_V2) {
1399 ldb_credit_high_watermark = enqueue_depth;
1400 /* If there are no directed ports, the kernel driver will
1401 * ignore this port's directed credit settings. Don't use
1402 * enqueue_depth if it would require more directed credits
1403 * than are available.
1404 */
1405 dir_credit_high_watermark =
1406 RTE_MIN(enqueue_depth,
1407 handle->cfg.num_dir_credits / dlb2->num_ports);
1408 } else
1409 credit_high_watermark = enqueue_depth;
1410
1411 /* Per QM values */
1412
1413 ret = dlb2_iface_ldb_port_create(handle, &cfg, dlb2->poll_mode);
1414 if (ret < 0) {
1415 DLB2_LOG_ERR("dlb2: dlb2_ldb_port_create error, ret=%d (driver status: %s)\n",
1416 ret, dlb2_error_strings[cfg.response.status]);
1417 goto error_exit;
1418 }
1419
1420 qm_port_id = cfg.response.id;
1421
1422 DLB2_LOG_DBG("dlb2: ev_port %d uses qm LB port %d <<<<<\n",
1423 ev_port->id, qm_port_id);
1424
1425 qm_port = &ev_port->qm_port;
1426 qm_port->ev_port = ev_port; /* back ptr */
1427 qm_port->dlb2 = dlb2; /* back ptr */
1428 /*
1429 * Allocate and init local qe struct(s).
1430 * Note: MOVDIR64 requires the enqueue QE (qe4) to be aligned.
1431 */
1432
1433 snprintf(mz_name, sizeof(mz_name), "dlb2_ldb_port%d",
1434 ev_port->id);
1435
1436 ret = dlb2_init_qe_mem(qm_port, mz_name);
1437 if (ret < 0) {
1438 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1439 goto error_exit;
1440 }
1441
1442 qm_port->id = qm_port_id;
1443
1444 if (dlb2->version == DLB2_HW_V2) {
1445 qm_port->cached_ldb_credits = 0;
1446 qm_port->cached_dir_credits = 0;
1447 } else
1448 qm_port->cached_credits = 0;
1449
1450 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1451 * the effective depth is smaller.
1452 */
1453 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1454 qm_port->cq_idx = 0;
1455 qm_port->cq_idx_unmasked = 0;
1456
1457 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1458 qm_port->cq_depth_mask = (qm_port->cq_depth * 4) - 1;
1459 else
1460 qm_port->cq_depth_mask = qm_port->cq_depth - 1;
1461
1462 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1463 /* starting value of gen bit - it toggles at wrap time */
1464 qm_port->gen_bit = 1;
1465
1466 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1467
1468 qm_port->int_armed = false;
1469
1470 /* Save off for later use in info and lookup APIs. */
1471 qm_port->qid_mappings = &dlb2->qm_ldb_to_ev_queue_id[0];
1472
1473 qm_port->dequeue_depth = dequeue_depth;
1474 qm_port->token_pop_thresh = dequeue_depth;
1475
1476 /* The default enqueue functions do not include delayed-pop support for
1477 * performance reasons.
1478 */
1479 if (qm_port->token_pop_mode == DELAYED_POP) {
1480 dlb2->event_dev->enqueue = dlb2_event_enqueue_delayed;
1481 dlb2->event_dev->enqueue_burst =
1482 dlb2_event_enqueue_burst_delayed;
1483 dlb2->event_dev->enqueue_new_burst =
1484 dlb2_event_enqueue_new_burst_delayed;
1485 dlb2->event_dev->enqueue_forward_burst =
1486 dlb2_event_enqueue_forward_burst_delayed;
1487 }
1488
1489 qm_port->owed_tokens = 0;
1490 qm_port->issued_releases = 0;
1491
1492 /* Save config message too. */
1493 rte_memcpy(&qm_port->cfg.ldb, &cfg, sizeof(qm_port->cfg.ldb));
1494
1495 /* update state */
1496 qm_port->state = PORT_STARTED; /* enabled at create time */
1497 qm_port->config_state = DLB2_CONFIGURED;
1498
1499 if (dlb2->version == DLB2_HW_V2) {
1500 qm_port->dir_credits = dir_credit_high_watermark;
1501 qm_port->ldb_credits = ldb_credit_high_watermark;
1502 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1503 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1504
1505 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, ldb credits=%d, dir credits=%d\n",
1506 qm_port_id,
1507 dequeue_depth,
1508 qm_port->ldb_credits,
1509 qm_port->dir_credits);
1510 } else {
1511 qm_port->credits = credit_high_watermark;
1512 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1513
1514 DLB2_LOG_DBG("dlb2: created ldb port %d, depth = %d, credits=%d\n",
1515 qm_port_id,
1516 dequeue_depth,
1517 qm_port->credits);
1518 }
1519
1520 qm_port->use_scalar = false;
1521
1522 #if (!defined RTE_ARCH_X86_64)
1523 qm_port->use_scalar = true;
1524 #else
1525 if ((qm_port->cq_depth > 64) ||
1526 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1527 (dlb2->vector_opts_enabled == false))
1528 qm_port->use_scalar = true;
1529 #endif
1530
1531 rte_spinlock_unlock(&handle->resource_lock);
1532
1533 return 0;
1534
1535 error_exit:
1536
1537 if (qm_port)
1538 dlb2_free_qe_mem(qm_port);
1539
1540 rte_spinlock_unlock(&handle->resource_lock);
1541
1542 DLB2_LOG_ERR("dlb2: create ldb port failed!\n");
1543
1544 return ret;
1545 }
1546
1547 static void
dlb2_port_link_teardown(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port)1548 dlb2_port_link_teardown(struct dlb2_eventdev *dlb2,
1549 struct dlb2_eventdev_port *ev_port)
1550 {
1551 struct dlb2_eventdev_queue *ev_queue;
1552 int i;
1553
1554 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1555 if (!ev_port->link[i].valid)
1556 continue;
1557
1558 ev_queue = &dlb2->ev_queues[ev_port->link[i].queue_id];
1559
1560 ev_port->link[i].valid = false;
1561 ev_port->num_links--;
1562 ev_queue->num_links--;
1563 }
1564 }
1565
1566 static int
dlb2_hw_create_dir_port(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,uint32_t dequeue_depth,uint32_t enqueue_depth)1567 dlb2_hw_create_dir_port(struct dlb2_eventdev *dlb2,
1568 struct dlb2_eventdev_port *ev_port,
1569 uint32_t dequeue_depth,
1570 uint32_t enqueue_depth)
1571 {
1572 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1573 struct dlb2_create_dir_port_args cfg = { {0} };
1574 int ret;
1575 struct dlb2_port *qm_port = NULL;
1576 char mz_name[RTE_MEMZONE_NAMESIZE];
1577 uint32_t qm_port_id;
1578 uint16_t ldb_credit_high_watermark = 0;
1579 uint16_t dir_credit_high_watermark = 0;
1580 uint16_t credit_high_watermark = 0;
1581
1582 if (dlb2 == NULL || handle == NULL)
1583 return -EINVAL;
1584
1585 if (dequeue_depth < DLB2_MIN_CQ_DEPTH) {
1586 DLB2_LOG_ERR("dlb2: invalid dequeue_depth, must be %d-%d\n",
1587 DLB2_MIN_CQ_DEPTH, DLB2_MAX_INPUT_QUEUE_DEPTH);
1588 return -EINVAL;
1589 }
1590
1591 if (enqueue_depth < DLB2_MIN_ENQUEUE_DEPTH) {
1592 DLB2_LOG_ERR("dlb2: invalid enqueue_depth, must be at least %d\n",
1593 DLB2_MIN_ENQUEUE_DEPTH);
1594 return -EINVAL;
1595 }
1596
1597 rte_spinlock_lock(&handle->resource_lock);
1598
1599 /* Directed queues are configured at link time. */
1600 cfg.queue_id = -1;
1601
1602 /* We round up to the next power of 2 if necessary */
1603 cfg.cq_depth = rte_align32pow2(dequeue_depth);
1604 cfg.cq_depth_threshold = 1;
1605
1606 /* User controls the LDB high watermark via enqueue depth. The DIR high
1607 * watermark is equal, unless the directed credit pool is too small.
1608 */
1609 if (dlb2->version == DLB2_HW_V2) {
1610 ldb_credit_high_watermark = enqueue_depth;
1611 /* Don't use enqueue_depth if it would require more directed
1612 * credits than are available.
1613 */
1614 dir_credit_high_watermark =
1615 RTE_MIN(enqueue_depth,
1616 handle->cfg.num_dir_credits / dlb2->num_ports);
1617 } else
1618 credit_high_watermark = enqueue_depth;
1619
1620 /* Per QM values */
1621
1622 ret = dlb2_iface_dir_port_create(handle, &cfg, dlb2->poll_mode);
1623 if (ret < 0) {
1624 DLB2_LOG_ERR("dlb2: dlb2_dir_port_create error, ret=%d (driver status: %s)\n",
1625 ret, dlb2_error_strings[cfg.response.status]);
1626 goto error_exit;
1627 }
1628
1629 qm_port_id = cfg.response.id;
1630
1631 DLB2_LOG_DBG("dlb2: ev_port %d uses qm DIR port %d <<<<<\n",
1632 ev_port->id, qm_port_id);
1633
1634 qm_port = &ev_port->qm_port;
1635 qm_port->ev_port = ev_port; /* back ptr */
1636 qm_port->dlb2 = dlb2; /* back ptr */
1637
1638 /*
1639 * Init local qe struct(s).
1640 * Note: MOVDIR64 requires the enqueue QE to be aligned
1641 */
1642
1643 snprintf(mz_name, sizeof(mz_name), "dlb2_dir_port%d",
1644 ev_port->id);
1645
1646 ret = dlb2_init_qe_mem(qm_port, mz_name);
1647
1648 if (ret < 0) {
1649 DLB2_LOG_ERR("dlb2: init_qe_mem failed, ret=%d\n", ret);
1650 goto error_exit;
1651 }
1652
1653 qm_port->id = qm_port_id;
1654
1655 if (dlb2->version == DLB2_HW_V2) {
1656 qm_port->cached_ldb_credits = 0;
1657 qm_port->cached_dir_credits = 0;
1658 } else
1659 qm_port->cached_credits = 0;
1660
1661 /* CQs with depth < 8 use an 8-entry queue, but withhold credits so
1662 * the effective depth is smaller.
1663 */
1664 qm_port->cq_depth = cfg.cq_depth <= 8 ? 8 : cfg.cq_depth;
1665 qm_port->cq_idx = 0;
1666 qm_port->cq_idx_unmasked = 0;
1667
1668 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE)
1669 qm_port->cq_depth_mask = (cfg.cq_depth * 4) - 1;
1670 else
1671 qm_port->cq_depth_mask = cfg.cq_depth - 1;
1672
1673 qm_port->gen_bit_shift = __builtin_popcount(qm_port->cq_depth_mask);
1674 /* starting value of gen bit - it toggles at wrap time */
1675 qm_port->gen_bit = 1;
1676 dlb2_hw_cq_bitmask_init(qm_port, qm_port->cq_depth);
1677
1678 qm_port->int_armed = false;
1679
1680 /* Save off for later use in info and lookup APIs. */
1681 qm_port->qid_mappings = &dlb2->qm_dir_to_ev_queue_id[0];
1682
1683 qm_port->dequeue_depth = dequeue_depth;
1684
1685 /* Directed ports are auto-pop, by default. */
1686 qm_port->token_pop_mode = AUTO_POP;
1687 qm_port->owed_tokens = 0;
1688 qm_port->issued_releases = 0;
1689
1690 /* Save config message too. */
1691 rte_memcpy(&qm_port->cfg.dir, &cfg, sizeof(qm_port->cfg.dir));
1692
1693 /* update state */
1694 qm_port->state = PORT_STARTED; /* enabled at create time */
1695 qm_port->config_state = DLB2_CONFIGURED;
1696
1697 if (dlb2->version == DLB2_HW_V2) {
1698 qm_port->dir_credits = dir_credit_high_watermark;
1699 qm_port->ldb_credits = ldb_credit_high_watermark;
1700 qm_port->credit_pool[DLB2_DIR_QUEUE] = &dlb2->dir_credit_pool;
1701 qm_port->credit_pool[DLB2_LDB_QUEUE] = &dlb2->ldb_credit_pool;
1702
1703 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d,%d\n",
1704 qm_port_id,
1705 dequeue_depth,
1706 dir_credit_high_watermark,
1707 ldb_credit_high_watermark);
1708 } else {
1709 qm_port->credits = credit_high_watermark;
1710 qm_port->credit_pool[DLB2_COMBINED_POOL] = &dlb2->credit_pool;
1711
1712 DLB2_LOG_DBG("dlb2: created dir port %d, depth = %d cr=%d\n",
1713 qm_port_id,
1714 dequeue_depth,
1715 credit_high_watermark);
1716 }
1717
1718 #if (!defined RTE_ARCH_X86_64)
1719 qm_port->use_scalar = true;
1720 #else
1721 if ((qm_port->cq_depth > 64) ||
1722 (!rte_is_power_of_2(qm_port->cq_depth)) ||
1723 (dlb2->vector_opts_enabled == false))
1724 qm_port->use_scalar = true;
1725 #endif
1726
1727 rte_spinlock_unlock(&handle->resource_lock);
1728
1729 return 0;
1730
1731 error_exit:
1732
1733 if (qm_port)
1734 dlb2_free_qe_mem(qm_port);
1735
1736 rte_spinlock_unlock(&handle->resource_lock);
1737
1738 DLB2_LOG_ERR("dlb2: create dir port failed!\n");
1739
1740 return ret;
1741 }
1742
1743 static int
dlb2_eventdev_port_setup(struct rte_eventdev * dev,uint8_t ev_port_id,const struct rte_event_port_conf * port_conf)1744 dlb2_eventdev_port_setup(struct rte_eventdev *dev,
1745 uint8_t ev_port_id,
1746 const struct rte_event_port_conf *port_conf)
1747 {
1748 struct dlb2_eventdev *dlb2;
1749 struct dlb2_eventdev_port *ev_port;
1750 int ret;
1751 uint32_t hw_credit_quanta, sw_credit_quanta;
1752
1753 if (dev == NULL || port_conf == NULL) {
1754 DLB2_LOG_ERR("Null parameter\n");
1755 return -EINVAL;
1756 }
1757
1758 dlb2 = dlb2_pmd_priv(dev);
1759
1760 if (ev_port_id >= DLB2_MAX_NUM_PORTS(dlb2->version))
1761 return -EINVAL;
1762
1763 if (port_conf->dequeue_depth >
1764 evdev_dlb2_default_info.max_event_port_dequeue_depth ||
1765 port_conf->enqueue_depth >
1766 evdev_dlb2_default_info.max_event_port_enqueue_depth)
1767 return -EINVAL;
1768
1769 ev_port = &dlb2->ev_ports[ev_port_id];
1770 /* configured? */
1771 if (ev_port->setup_done) {
1772 DLB2_LOG_ERR("evport %d is already configured\n", ev_port_id);
1773 return -EINVAL;
1774 }
1775
1776 ev_port->qm_port.is_directed = port_conf->event_port_cfg &
1777 RTE_EVENT_PORT_CFG_SINGLE_LINK;
1778
1779 if (!ev_port->qm_port.is_directed) {
1780 ret = dlb2_hw_create_ldb_port(dlb2,
1781 ev_port,
1782 port_conf->dequeue_depth,
1783 port_conf->enqueue_depth);
1784 if (ret < 0) {
1785 DLB2_LOG_ERR("Failed to create the lB port ve portId=%d\n",
1786 ev_port_id);
1787
1788 return ret;
1789 }
1790 } else {
1791 ret = dlb2_hw_create_dir_port(dlb2,
1792 ev_port,
1793 port_conf->dequeue_depth,
1794 port_conf->enqueue_depth);
1795 if (ret < 0) {
1796 DLB2_LOG_ERR("Failed to create the DIR port\n");
1797 return ret;
1798 }
1799 }
1800
1801 /* Save off port config for reconfig */
1802 ev_port->conf = *port_conf;
1803
1804 ev_port->id = ev_port_id;
1805 ev_port->enq_configured = true;
1806 ev_port->setup_done = true;
1807 ev_port->inflight_max = port_conf->new_event_threshold;
1808 ev_port->implicit_release = !(port_conf->event_port_cfg &
1809 RTE_EVENT_PORT_CFG_DISABLE_IMPL_REL);
1810 ev_port->outstanding_releases = 0;
1811 ev_port->inflight_credits = 0;
1812 ev_port->dlb2 = dlb2; /* reverse link */
1813
1814 /* Default for worker ports */
1815 sw_credit_quanta = dlb2->sw_credit_quanta;
1816 hw_credit_quanta = dlb2->hw_credit_quanta;
1817
1818 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_PRODUCER) {
1819 /* Producer type ports. Mostly enqueue */
1820 sw_credit_quanta = DLB2_SW_CREDIT_P_QUANTA_DEFAULT;
1821 hw_credit_quanta = DLB2_SW_CREDIT_P_BATCH_SZ;
1822 }
1823 if (port_conf->event_port_cfg & RTE_EVENT_PORT_CFG_HINT_CONSUMER) {
1824 /* Consumer type ports. Mostly dequeue */
1825 sw_credit_quanta = DLB2_SW_CREDIT_C_QUANTA_DEFAULT;
1826 hw_credit_quanta = DLB2_SW_CREDIT_C_BATCH_SZ;
1827 }
1828 ev_port->credit_update_quanta = sw_credit_quanta;
1829 ev_port->qm_port.hw_credit_quanta = hw_credit_quanta;
1830
1831 /* Tear down pre-existing port->queue links */
1832 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1833 dlb2_port_link_teardown(dlb2, &dlb2->ev_ports[ev_port_id]);
1834
1835 dev->data->ports[ev_port_id] = &dlb2->ev_ports[ev_port_id];
1836
1837 return 0;
1838 }
1839
1840 static int16_t
dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev * handle,uint32_t qm_port_id,uint16_t qm_qid,uint8_t priority)1841 dlb2_hw_map_ldb_qid_to_port(struct dlb2_hw_dev *handle,
1842 uint32_t qm_port_id,
1843 uint16_t qm_qid,
1844 uint8_t priority)
1845 {
1846 struct dlb2_map_qid_args cfg;
1847 int32_t ret;
1848
1849 if (handle == NULL)
1850 return -EINVAL;
1851
1852 /* Build message */
1853 cfg.port_id = qm_port_id;
1854 cfg.qid = qm_qid;
1855 cfg.priority = EV_TO_DLB2_PRIO(priority);
1856
1857 ret = dlb2_iface_map_qid(handle, &cfg);
1858 if (ret < 0) {
1859 DLB2_LOG_ERR("dlb2: map qid error, ret=%d (driver status: %s)\n",
1860 ret, dlb2_error_strings[cfg.response.status]);
1861 DLB2_LOG_ERR("dlb2: grp=%d, qm_port=%d, qm_qid=%d prio=%d\n",
1862 handle->domain_id, cfg.port_id,
1863 cfg.qid,
1864 cfg.priority);
1865 } else {
1866 DLB2_LOG_DBG("dlb2: mapped queue %d to qm_port %d\n",
1867 qm_qid, qm_port_id);
1868 }
1869
1870 return ret;
1871 }
1872
1873 static int
dlb2_event_queue_join_ldb(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,struct dlb2_eventdev_queue * ev_queue,uint8_t priority)1874 dlb2_event_queue_join_ldb(struct dlb2_eventdev *dlb2,
1875 struct dlb2_eventdev_port *ev_port,
1876 struct dlb2_eventdev_queue *ev_queue,
1877 uint8_t priority)
1878 {
1879 int first_avail = -1;
1880 int ret, i;
1881
1882 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
1883 if (ev_port->link[i].valid) {
1884 if (ev_port->link[i].queue_id == ev_queue->id &&
1885 ev_port->link[i].priority == priority) {
1886 if (ev_port->link[i].mapped)
1887 return 0; /* already mapped */
1888 first_avail = i;
1889 }
1890 } else if (first_avail == -1)
1891 first_avail = i;
1892 }
1893 if (first_avail == -1) {
1894 DLB2_LOG_ERR("dlb2: qm_port %d has no available QID slots.\n",
1895 ev_port->qm_port.id);
1896 return -EINVAL;
1897 }
1898
1899 ret = dlb2_hw_map_ldb_qid_to_port(&dlb2->qm_instance,
1900 ev_port->qm_port.id,
1901 ev_queue->qm_queue.id,
1902 priority);
1903
1904 if (!ret)
1905 ev_port->link[first_avail].mapped = true;
1906
1907 return ret;
1908 }
1909
1910 static int32_t
dlb2_hw_create_dir_queue(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * ev_queue,int32_t qm_port_id)1911 dlb2_hw_create_dir_queue(struct dlb2_eventdev *dlb2,
1912 struct dlb2_eventdev_queue *ev_queue,
1913 int32_t qm_port_id)
1914 {
1915 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
1916 struct dlb2_create_dir_queue_args cfg;
1917 int32_t ret;
1918
1919 /* The directed port is always configured before its queue */
1920 cfg.port_id = qm_port_id;
1921
1922 if (ev_queue->depth_threshold == 0) {
1923 cfg.depth_threshold = dlb2->default_depth_thresh;
1924 ev_queue->depth_threshold =
1925 dlb2->default_depth_thresh;
1926 } else
1927 cfg.depth_threshold = ev_queue->depth_threshold;
1928
1929 ret = dlb2_iface_dir_queue_create(handle, &cfg);
1930 if (ret < 0) {
1931 DLB2_LOG_ERR("dlb2: create DIR event queue error, ret=%d (driver status: %s)\n",
1932 ret, dlb2_error_strings[cfg.response.status]);
1933 return -EINVAL;
1934 }
1935
1936 return cfg.response.id;
1937 }
1938
1939 static int
dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * ev_queue,struct dlb2_eventdev_port * ev_port)1940 dlb2_eventdev_dir_queue_setup(struct dlb2_eventdev *dlb2,
1941 struct dlb2_eventdev_queue *ev_queue,
1942 struct dlb2_eventdev_port *ev_port)
1943 {
1944 int32_t qm_qid;
1945
1946 qm_qid = dlb2_hw_create_dir_queue(dlb2, ev_queue, ev_port->qm_port.id);
1947
1948 if (qm_qid < 0) {
1949 DLB2_LOG_ERR("Failed to create the DIR queue\n");
1950 return qm_qid;
1951 }
1952
1953 dlb2->qm_dir_to_ev_queue_id[qm_qid] = ev_queue->id;
1954
1955 ev_queue->qm_queue.id = qm_qid;
1956
1957 return 0;
1958 }
1959
1960 static int
dlb2_do_port_link(struct rte_eventdev * dev,struct dlb2_eventdev_queue * ev_queue,struct dlb2_eventdev_port * ev_port,uint8_t prio)1961 dlb2_do_port_link(struct rte_eventdev *dev,
1962 struct dlb2_eventdev_queue *ev_queue,
1963 struct dlb2_eventdev_port *ev_port,
1964 uint8_t prio)
1965 {
1966 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
1967 int err;
1968
1969 /* Don't link until start time. */
1970 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
1971 return 0;
1972
1973 if (ev_queue->qm_queue.is_directed)
1974 err = dlb2_eventdev_dir_queue_setup(dlb2, ev_queue, ev_port);
1975 else
1976 err = dlb2_event_queue_join_ldb(dlb2, ev_port, ev_queue, prio);
1977
1978 if (err) {
1979 DLB2_LOG_ERR("port link failure for %s ev_q %d, ev_port %d\n",
1980 ev_queue->qm_queue.is_directed ? "DIR" : "LDB",
1981 ev_queue->id, ev_port->id);
1982
1983 rte_errno = err;
1984 return -1;
1985 }
1986
1987 return 0;
1988 }
1989
1990 static int
dlb2_validate_port_link(struct dlb2_eventdev_port * ev_port,uint8_t queue_id,bool link_exists,int index)1991 dlb2_validate_port_link(struct dlb2_eventdev_port *ev_port,
1992 uint8_t queue_id,
1993 bool link_exists,
1994 int index)
1995 {
1996 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
1997 struct dlb2_eventdev_queue *ev_queue;
1998 bool port_is_dir, queue_is_dir;
1999
2000 if (queue_id > dlb2->num_queues) {
2001 rte_errno = -EINVAL;
2002 return -1;
2003 }
2004
2005 ev_queue = &dlb2->ev_queues[queue_id];
2006
2007 if (!ev_queue->setup_done &&
2008 ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED) {
2009 rte_errno = -EINVAL;
2010 return -1;
2011 }
2012
2013 port_is_dir = ev_port->qm_port.is_directed;
2014 queue_is_dir = ev_queue->qm_queue.is_directed;
2015
2016 if (port_is_dir != queue_is_dir) {
2017 DLB2_LOG_ERR("%s queue %u can't link to %s port %u\n",
2018 queue_is_dir ? "DIR" : "LDB", ev_queue->id,
2019 port_is_dir ? "DIR" : "LDB", ev_port->id);
2020
2021 rte_errno = -EINVAL;
2022 return -1;
2023 }
2024
2025 /* Check if there is space for the requested link */
2026 if (!link_exists && index == -1) {
2027 DLB2_LOG_ERR("no space for new link\n");
2028 rte_errno = -ENOSPC;
2029 return -1;
2030 }
2031
2032 /* Check if the directed port is already linked */
2033 if (ev_port->qm_port.is_directed && ev_port->num_links > 0 &&
2034 !link_exists) {
2035 DLB2_LOG_ERR("Can't link DIR port %d to >1 queues\n",
2036 ev_port->id);
2037 rte_errno = -EINVAL;
2038 return -1;
2039 }
2040
2041 /* Check if the directed queue is already linked */
2042 if (ev_queue->qm_queue.is_directed && ev_queue->num_links > 0 &&
2043 !link_exists) {
2044 DLB2_LOG_ERR("Can't link DIR queue %d to >1 ports\n",
2045 ev_queue->id);
2046 rte_errno = -EINVAL;
2047 return -1;
2048 }
2049
2050 return 0;
2051 }
2052
2053 static int
dlb2_eventdev_port_link(struct rte_eventdev * dev,void * event_port,const uint8_t queues[],const uint8_t priorities[],uint16_t nb_links)2054 dlb2_eventdev_port_link(struct rte_eventdev *dev, void *event_port,
2055 const uint8_t queues[], const uint8_t priorities[],
2056 uint16_t nb_links)
2057
2058 {
2059 struct dlb2_eventdev_port *ev_port = event_port;
2060 struct dlb2_eventdev *dlb2;
2061 int i, j;
2062
2063 RTE_SET_USED(dev);
2064
2065 if (ev_port == NULL) {
2066 DLB2_LOG_ERR("dlb2: evport not setup\n");
2067 rte_errno = -EINVAL;
2068 return 0;
2069 }
2070
2071 if (!ev_port->setup_done &&
2072 ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED) {
2073 DLB2_LOG_ERR("dlb2: evport not setup\n");
2074 rte_errno = -EINVAL;
2075 return 0;
2076 }
2077
2078 /* Note: rte_event_port_link() ensures the PMD won't receive a NULL
2079 * queues pointer.
2080 */
2081 if (nb_links == 0) {
2082 DLB2_LOG_DBG("dlb2: nb_links is 0\n");
2083 return 0; /* Ignore and return success */
2084 }
2085
2086 dlb2 = ev_port->dlb2;
2087
2088 DLB2_LOG_DBG("Linking %u queues to %s port %d\n",
2089 nb_links,
2090 ev_port->qm_port.is_directed ? "DIR" : "LDB",
2091 ev_port->id);
2092
2093 for (i = 0; i < nb_links; i++) {
2094 struct dlb2_eventdev_queue *ev_queue;
2095 uint8_t queue_id, prio;
2096 bool found = false;
2097 int index = -1;
2098
2099 queue_id = queues[i];
2100 prio = priorities[i];
2101
2102 /* Check if the link already exists. */
2103 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2104 if (ev_port->link[j].valid) {
2105 if (ev_port->link[j].queue_id == queue_id) {
2106 found = true;
2107 index = j;
2108 break;
2109 }
2110 } else if (index == -1) {
2111 index = j;
2112 }
2113
2114 /* could not link */
2115 if (index == -1)
2116 break;
2117
2118 /* Check if already linked at the requested priority */
2119 if (found && ev_port->link[j].priority == prio)
2120 continue;
2121
2122 if (dlb2_validate_port_link(ev_port, queue_id, found, index))
2123 break; /* return index of offending queue */
2124
2125 ev_queue = &dlb2->ev_queues[queue_id];
2126
2127 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2128 break; /* return index of offending queue */
2129
2130 ev_queue->num_links++;
2131
2132 ev_port->link[index].queue_id = queue_id;
2133 ev_port->link[index].priority = prio;
2134 ev_port->link[index].valid = true;
2135 /* Entry already exists? If so, then must be prio change */
2136 if (!found)
2137 ev_port->num_links++;
2138 }
2139 return i;
2140 }
2141
2142 static int16_t
dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev * handle,uint32_t qm_port_id,uint16_t qm_qid)2143 dlb2_hw_unmap_ldb_qid_from_port(struct dlb2_hw_dev *handle,
2144 uint32_t qm_port_id,
2145 uint16_t qm_qid)
2146 {
2147 struct dlb2_unmap_qid_args cfg;
2148 int32_t ret;
2149
2150 if (handle == NULL)
2151 return -EINVAL;
2152
2153 cfg.port_id = qm_port_id;
2154 cfg.qid = qm_qid;
2155
2156 ret = dlb2_iface_unmap_qid(handle, &cfg);
2157 if (ret < 0)
2158 DLB2_LOG_ERR("dlb2: unmap qid error, ret=%d (driver status: %s)\n",
2159 ret, dlb2_error_strings[cfg.response.status]);
2160
2161 return ret;
2162 }
2163
2164 static int
dlb2_event_queue_detach_ldb(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,struct dlb2_eventdev_queue * ev_queue)2165 dlb2_event_queue_detach_ldb(struct dlb2_eventdev *dlb2,
2166 struct dlb2_eventdev_port *ev_port,
2167 struct dlb2_eventdev_queue *ev_queue)
2168 {
2169 int ret, i;
2170
2171 /* Don't unlink until start time. */
2172 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED)
2173 return 0;
2174
2175 for (i = 0; i < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; i++) {
2176 if (ev_port->link[i].valid &&
2177 ev_port->link[i].queue_id == ev_queue->id)
2178 break; /* found */
2179 }
2180
2181 /* This is expected with eventdev API!
2182 * It blindly attempts to unmap all queues.
2183 */
2184 if (i == DLB2_MAX_NUM_QIDS_PER_LDB_CQ) {
2185 DLB2_LOG_DBG("dlb2: ignoring LB QID %d not mapped for qm_port %d.\n",
2186 ev_queue->qm_queue.id,
2187 ev_port->qm_port.id);
2188 return 0;
2189 }
2190
2191 ret = dlb2_hw_unmap_ldb_qid_from_port(&dlb2->qm_instance,
2192 ev_port->qm_port.id,
2193 ev_queue->qm_queue.id);
2194 if (!ret)
2195 ev_port->link[i].mapped = false;
2196
2197 return ret;
2198 }
2199
2200 static int
dlb2_eventdev_port_unlink(struct rte_eventdev * dev,void * event_port,uint8_t queues[],uint16_t nb_unlinks)2201 dlb2_eventdev_port_unlink(struct rte_eventdev *dev, void *event_port,
2202 uint8_t queues[], uint16_t nb_unlinks)
2203 {
2204 struct dlb2_eventdev_port *ev_port = event_port;
2205 struct dlb2_eventdev *dlb2;
2206 int i;
2207
2208 RTE_SET_USED(dev);
2209
2210 if (!ev_port->setup_done) {
2211 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2212 ev_port->id);
2213 rte_errno = -EINVAL;
2214 return 0;
2215 }
2216
2217 if (queues == NULL || nb_unlinks == 0) {
2218 DLB2_LOG_DBG("dlb2: queues is NULL or nb_unlinks is 0\n");
2219 return 0; /* Ignore and return success */
2220 }
2221
2222 if (ev_port->qm_port.is_directed) {
2223 DLB2_LOG_DBG("dlb2: ignore unlink from dir port %d\n",
2224 ev_port->id);
2225 rte_errno = 0;
2226 return nb_unlinks; /* as if success */
2227 }
2228
2229 dlb2 = ev_port->dlb2;
2230
2231 for (i = 0; i < nb_unlinks; i++) {
2232 struct dlb2_eventdev_queue *ev_queue;
2233 int ret, j;
2234
2235 if (queues[i] >= dlb2->num_queues) {
2236 DLB2_LOG_ERR("dlb2: invalid queue id %d\n", queues[i]);
2237 rte_errno = -EINVAL;
2238 return i; /* return index of offending queue */
2239 }
2240
2241 ev_queue = &dlb2->ev_queues[queues[i]];
2242
2243 /* Does a link exist? */
2244 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++)
2245 if (ev_port->link[j].queue_id == queues[i] &&
2246 ev_port->link[j].valid)
2247 break;
2248
2249 if (j == DLB2_MAX_NUM_QIDS_PER_LDB_CQ)
2250 continue;
2251
2252 ret = dlb2_event_queue_detach_ldb(dlb2, ev_port, ev_queue);
2253 if (ret) {
2254 DLB2_LOG_ERR("unlink err=%d for port %d queue %d\n",
2255 ret, ev_port->id, queues[i]);
2256 rte_errno = -ENOENT;
2257 return i; /* return index of offending queue */
2258 }
2259
2260 ev_port->link[j].valid = false;
2261 ev_port->num_links--;
2262 ev_queue->num_links--;
2263 }
2264
2265 return nb_unlinks;
2266 }
2267
2268 static int
dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev * dev,void * event_port)2269 dlb2_eventdev_port_unlinks_in_progress(struct rte_eventdev *dev,
2270 void *event_port)
2271 {
2272 struct dlb2_eventdev_port *ev_port = event_port;
2273 struct dlb2_eventdev *dlb2;
2274 struct dlb2_hw_dev *handle;
2275 struct dlb2_pending_port_unmaps_args cfg;
2276 int ret;
2277
2278 RTE_SET_USED(dev);
2279
2280 if (!ev_port->setup_done) {
2281 DLB2_LOG_ERR("dlb2: evport %d is not configured\n",
2282 ev_port->id);
2283 rte_errno = -EINVAL;
2284 return 0;
2285 }
2286
2287 cfg.port_id = ev_port->qm_port.id;
2288 dlb2 = ev_port->dlb2;
2289 handle = &dlb2->qm_instance;
2290 ret = dlb2_iface_pending_port_unmaps(handle, &cfg);
2291
2292 if (ret < 0) {
2293 DLB2_LOG_ERR("dlb2: num_unlinks_in_progress ret=%d (driver status: %s)\n",
2294 ret, dlb2_error_strings[cfg.response.status]);
2295 return ret;
2296 }
2297
2298 return cfg.response.id;
2299 }
2300
2301 static int
dlb2_eventdev_reapply_configuration(struct rte_eventdev * dev)2302 dlb2_eventdev_reapply_configuration(struct rte_eventdev *dev)
2303 {
2304 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2305 int ret, i;
2306
2307 /* If an event queue or port was previously configured, but hasn't been
2308 * reconfigured, reapply its original configuration.
2309 */
2310 for (i = 0; i < dlb2->num_queues; i++) {
2311 struct dlb2_eventdev_queue *ev_queue;
2312
2313 ev_queue = &dlb2->ev_queues[i];
2314
2315 if (ev_queue->qm_queue.config_state != DLB2_PREV_CONFIGURED)
2316 continue;
2317
2318 ret = dlb2_eventdev_queue_setup(dev, i, &ev_queue->conf);
2319 if (ret < 0) {
2320 DLB2_LOG_ERR("dlb2: failed to reconfigure queue %d", i);
2321 return ret;
2322 }
2323 }
2324
2325 for (i = 0; i < dlb2->num_ports; i++) {
2326 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2327
2328 if (ev_port->qm_port.config_state != DLB2_PREV_CONFIGURED)
2329 continue;
2330
2331 ret = dlb2_eventdev_port_setup(dev, i, &ev_port->conf);
2332 if (ret < 0) {
2333 DLB2_LOG_ERR("dlb2: failed to reconfigure ev_port %d",
2334 i);
2335 return ret;
2336 }
2337 }
2338
2339 return 0;
2340 }
2341
2342 static int
dlb2_eventdev_apply_port_links(struct rte_eventdev * dev)2343 dlb2_eventdev_apply_port_links(struct rte_eventdev *dev)
2344 {
2345 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2346 int i;
2347
2348 /* Perform requested port->queue links */
2349 for (i = 0; i < dlb2->num_ports; i++) {
2350 struct dlb2_eventdev_port *ev_port = &dlb2->ev_ports[i];
2351 int j;
2352
2353 for (j = 0; j < DLB2_MAX_NUM_QIDS_PER_LDB_CQ; j++) {
2354 struct dlb2_eventdev_queue *ev_queue;
2355 uint8_t prio, queue_id;
2356
2357 if (!ev_port->link[j].valid)
2358 continue;
2359
2360 prio = ev_port->link[j].priority;
2361 queue_id = ev_port->link[j].queue_id;
2362
2363 if (dlb2_validate_port_link(ev_port, queue_id, true, j))
2364 return -EINVAL;
2365
2366 ev_queue = &dlb2->ev_queues[queue_id];
2367
2368 if (dlb2_do_port_link(dev, ev_queue, ev_port, prio))
2369 return -EINVAL;
2370 }
2371 }
2372
2373 return 0;
2374 }
2375
2376 static int
dlb2_eventdev_start(struct rte_eventdev * dev)2377 dlb2_eventdev_start(struct rte_eventdev *dev)
2378 {
2379 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
2380 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
2381 struct dlb2_start_domain_args cfg;
2382 int ret, i;
2383
2384 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
2385 if (dlb2->run_state != DLB2_RUN_STATE_STOPPED) {
2386 DLB2_LOG_ERR("bad state %d for dev_start\n",
2387 (int)dlb2->run_state);
2388 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2389 return -EINVAL;
2390 }
2391 dlb2->run_state = DLB2_RUN_STATE_STARTING;
2392 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
2393
2394 /* If the device was configured more than once, some event ports and/or
2395 * queues may need to be reconfigured.
2396 */
2397 ret = dlb2_eventdev_reapply_configuration(dev);
2398 if (ret)
2399 return ret;
2400
2401 /* The DLB PMD delays port links until the device is started. */
2402 ret = dlb2_eventdev_apply_port_links(dev);
2403 if (ret)
2404 return ret;
2405
2406 for (i = 0; i < dlb2->num_ports; i++) {
2407 if (!dlb2->ev_ports[i].setup_done) {
2408 DLB2_LOG_ERR("dlb2: port %d not setup", i);
2409 return -ESTALE;
2410 }
2411 }
2412
2413 for (i = 0; i < dlb2->num_queues; i++) {
2414 if (dlb2->ev_queues[i].num_links == 0) {
2415 DLB2_LOG_ERR("dlb2: queue %d is not linked", i);
2416 return -ENOLINK;
2417 }
2418 }
2419
2420 ret = dlb2_iface_sched_domain_start(handle, &cfg);
2421 if (ret < 0) {
2422 DLB2_LOG_ERR("dlb2: sched_domain_start ret=%d (driver status: %s)\n",
2423 ret, dlb2_error_strings[cfg.response.status]);
2424 return ret;
2425 }
2426
2427 dlb2->run_state = DLB2_RUN_STATE_STARTED;
2428 DLB2_LOG_DBG("dlb2: sched_domain_start completed OK\n");
2429
2430 return 0;
2431 }
2432
2433 static uint8_t cmd_byte_map[DLB2_NUM_PORT_TYPES][DLB2_NUM_HW_SCHED_TYPES] = {
2434 {
2435 /* Load-balanced cmd bytes */
2436 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2437 [RTE_EVENT_OP_FORWARD] = DLB2_FWD_CMD_BYTE,
2438 [RTE_EVENT_OP_RELEASE] = DLB2_COMP_CMD_BYTE,
2439 },
2440 {
2441 /* Directed cmd bytes */
2442 [RTE_EVENT_OP_NEW] = DLB2_NEW_CMD_BYTE,
2443 [RTE_EVENT_OP_FORWARD] = DLB2_NEW_CMD_BYTE,
2444 [RTE_EVENT_OP_RELEASE] = DLB2_NOOP_CMD_BYTE,
2445 },
2446 };
2447
2448 static inline uint32_t
dlb2_port_credits_get(struct dlb2_port * qm_port,enum dlb2_hw_queue_types type)2449 dlb2_port_credits_get(struct dlb2_port *qm_port,
2450 enum dlb2_hw_queue_types type)
2451 {
2452 uint32_t credits = *qm_port->credit_pool[type];
2453 /* By default hw_credit_quanta is DLB2_SW_CREDIT_BATCH_SZ */
2454 uint32_t batch_size = qm_port->hw_credit_quanta;
2455
2456 if (unlikely(credits < batch_size))
2457 batch_size = credits;
2458
2459 if (likely(credits &&
2460 __atomic_compare_exchange_n(
2461 qm_port->credit_pool[type],
2462 &credits, credits - batch_size, false,
2463 __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)))
2464 return batch_size;
2465 else
2466 return 0;
2467 }
2468
2469 static inline void
dlb2_replenish_sw_credits(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port)2470 dlb2_replenish_sw_credits(struct dlb2_eventdev *dlb2,
2471 struct dlb2_eventdev_port *ev_port)
2472 {
2473 uint16_t quanta = ev_port->credit_update_quanta;
2474
2475 if (ev_port->inflight_credits >= quanta * 2) {
2476 /* Replenish credits, saving one quanta for enqueues */
2477 uint16_t val = ev_port->inflight_credits - quanta;
2478
2479 __atomic_fetch_sub(&dlb2->inflights, val, __ATOMIC_SEQ_CST);
2480 ev_port->inflight_credits -= val;
2481 }
2482 }
2483
2484 static inline int
dlb2_check_enqueue_sw_credits(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port)2485 dlb2_check_enqueue_sw_credits(struct dlb2_eventdev *dlb2,
2486 struct dlb2_eventdev_port *ev_port)
2487 {
2488 uint32_t sw_inflights = __atomic_load_n(&dlb2->inflights,
2489 __ATOMIC_SEQ_CST);
2490 const int num = 1;
2491
2492 if (unlikely(ev_port->inflight_max < sw_inflights)) {
2493 DLB2_INC_STAT(ev_port->stats.traffic.tx_nospc_inflight_max, 1);
2494 rte_errno = -ENOSPC;
2495 return 1;
2496 }
2497
2498 if (ev_port->inflight_credits < num) {
2499 /* check if event enqueue brings ev_port over max threshold */
2500 uint32_t credit_update_quanta = ev_port->credit_update_quanta;
2501
2502 if (sw_inflights + credit_update_quanta >
2503 dlb2->new_event_limit) {
2504 DLB2_INC_STAT(
2505 ev_port->stats.traffic.tx_nospc_new_event_limit,
2506 1);
2507 rte_errno = -ENOSPC;
2508 return 1;
2509 }
2510
2511 __atomic_fetch_add(&dlb2->inflights, credit_update_quanta,
2512 __ATOMIC_SEQ_CST);
2513 ev_port->inflight_credits += (credit_update_quanta);
2514
2515 if (ev_port->inflight_credits < num) {
2516 DLB2_INC_STAT(
2517 ev_port->stats.traffic.tx_nospc_inflight_credits,
2518 1);
2519 rte_errno = -ENOSPC;
2520 return 1;
2521 }
2522 }
2523
2524 return 0;
2525 }
2526
2527 static inline int
dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port * qm_port)2528 dlb2_check_enqueue_hw_ldb_credits(struct dlb2_port *qm_port)
2529 {
2530 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2531 qm_port->cached_ldb_credits =
2532 dlb2_port_credits_get(qm_port,
2533 DLB2_LDB_QUEUE);
2534 if (unlikely(qm_port->cached_ldb_credits == 0)) {
2535 DLB2_INC_STAT(
2536 qm_port->ev_port->stats.traffic.tx_nospc_ldb_hw_credits,
2537 1);
2538 DLB2_LOG_DBG("ldb credits exhausted\n");
2539 return 1; /* credits exhausted */
2540 }
2541 }
2542
2543 return 0;
2544 }
2545
2546 static inline int
dlb2_check_enqueue_hw_dir_credits(struct dlb2_port * qm_port)2547 dlb2_check_enqueue_hw_dir_credits(struct dlb2_port *qm_port)
2548 {
2549 if (unlikely(qm_port->cached_dir_credits == 0)) {
2550 qm_port->cached_dir_credits =
2551 dlb2_port_credits_get(qm_port,
2552 DLB2_DIR_QUEUE);
2553 if (unlikely(qm_port->cached_dir_credits == 0)) {
2554 DLB2_INC_STAT(
2555 qm_port->ev_port->stats.traffic.tx_nospc_dir_hw_credits,
2556 1);
2557 DLB2_LOG_DBG("dir credits exhausted\n");
2558 return 1; /* credits exhausted */
2559 }
2560 }
2561
2562 return 0;
2563 }
2564
2565 static inline int
dlb2_check_enqueue_hw_credits(struct dlb2_port * qm_port)2566 dlb2_check_enqueue_hw_credits(struct dlb2_port *qm_port)
2567 {
2568 if (unlikely(qm_port->cached_credits == 0)) {
2569 qm_port->cached_credits =
2570 dlb2_port_credits_get(qm_port,
2571 DLB2_COMBINED_POOL);
2572 if (unlikely(qm_port->cached_credits == 0)) {
2573 DLB2_INC_STAT(
2574 qm_port->ev_port->stats.traffic.tx_nospc_hw_credits, 1);
2575 DLB2_LOG_DBG("credits exhausted\n");
2576 return 1; /* credits exhausted */
2577 }
2578 }
2579
2580 return 0;
2581 }
2582
2583 static __rte_always_inline void
dlb2_pp_write(struct dlb2_enqueue_qe * qe4,struct process_local_port_data * port_data)2584 dlb2_pp_write(struct dlb2_enqueue_qe *qe4,
2585 struct process_local_port_data *port_data)
2586 {
2587 dlb2_movdir64b(port_data->pp_addr, qe4);
2588 }
2589
2590 static inline int
dlb2_consume_qe_immediate(struct dlb2_port * qm_port,int num)2591 dlb2_consume_qe_immediate(struct dlb2_port *qm_port, int num)
2592 {
2593 struct process_local_port_data *port_data;
2594 struct dlb2_cq_pop_qe *qe;
2595
2596 RTE_ASSERT(qm_port->config_state == DLB2_CONFIGURED);
2597
2598 qe = qm_port->consume_qe;
2599
2600 qe->tokens = num - 1;
2601
2602 /* No store fence needed since no pointer is being sent, and CQ token
2603 * pops can be safely reordered with other HCWs.
2604 */
2605 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2606
2607 dlb2_movntdq_single(port_data->pp_addr, qe);
2608
2609 DLB2_LOG_DBG("dlb2: consume immediate - %d QEs\n", num);
2610
2611 qm_port->owed_tokens = 0;
2612
2613 return 0;
2614 }
2615
2616 static inline void
dlb2_hw_do_enqueue(struct dlb2_port * qm_port,bool do_sfence,struct process_local_port_data * port_data)2617 dlb2_hw_do_enqueue(struct dlb2_port *qm_port,
2618 bool do_sfence,
2619 struct process_local_port_data *port_data)
2620 {
2621 /* Since MOVDIR64B is weakly-ordered, use an SFENCE to ensure that
2622 * application writes complete before enqueueing the QE.
2623 */
2624 if (do_sfence)
2625 rte_wmb();
2626
2627 dlb2_pp_write(qm_port->qe4, port_data);
2628 }
2629
2630 static inline void
dlb2_construct_token_pop_qe(struct dlb2_port * qm_port,int idx)2631 dlb2_construct_token_pop_qe(struct dlb2_port *qm_port, int idx)
2632 {
2633 struct dlb2_cq_pop_qe *qe = (void *)qm_port->qe4;
2634 int num = qm_port->owed_tokens;
2635
2636 qe[idx].cmd_byte = DLB2_POP_CMD_BYTE;
2637 qe[idx].tokens = num - 1;
2638
2639 qm_port->owed_tokens = 0;
2640 }
2641
2642 static inline void
dlb2_event_build_hcws(struct dlb2_port * qm_port,const struct rte_event ev[],int num,uint8_t * sched_type,uint8_t * queue_id)2643 dlb2_event_build_hcws(struct dlb2_port *qm_port,
2644 const struct rte_event ev[],
2645 int num,
2646 uint8_t *sched_type,
2647 uint8_t *queue_id)
2648 {
2649 struct dlb2_enqueue_qe *qe;
2650 uint16_t sched_word[4];
2651 __m128i sse_qe[2];
2652 int i;
2653
2654 qe = qm_port->qe4;
2655
2656 sse_qe[0] = _mm_setzero_si128();
2657 sse_qe[1] = _mm_setzero_si128();
2658
2659 switch (num) {
2660 case 4:
2661 /* Construct the metadata portion of two HCWs in one 128b SSE
2662 * register. HCW metadata is constructed in the SSE registers
2663 * like so:
2664 * sse_qe[0][63:0]: qe[0]'s metadata
2665 * sse_qe[0][127:64]: qe[1]'s metadata
2666 * sse_qe[1][63:0]: qe[2]'s metadata
2667 * sse_qe[1][127:64]: qe[3]'s metadata
2668 */
2669
2670 /* Convert the event operation into a command byte and store it
2671 * in the metadata:
2672 * sse_qe[0][63:56] = cmd_byte_map[is_directed][ev[0].op]
2673 * sse_qe[0][127:120] = cmd_byte_map[is_directed][ev[1].op]
2674 * sse_qe[1][63:56] = cmd_byte_map[is_directed][ev[2].op]
2675 * sse_qe[1][127:120] = cmd_byte_map[is_directed][ev[3].op]
2676 */
2677 #define DLB2_QE_CMD_BYTE 7
2678 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2679 cmd_byte_map[qm_port->is_directed][ev[0].op],
2680 DLB2_QE_CMD_BYTE);
2681 sse_qe[0] = _mm_insert_epi8(sse_qe[0],
2682 cmd_byte_map[qm_port->is_directed][ev[1].op],
2683 DLB2_QE_CMD_BYTE + 8);
2684 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2685 cmd_byte_map[qm_port->is_directed][ev[2].op],
2686 DLB2_QE_CMD_BYTE);
2687 sse_qe[1] = _mm_insert_epi8(sse_qe[1],
2688 cmd_byte_map[qm_port->is_directed][ev[3].op],
2689 DLB2_QE_CMD_BYTE + 8);
2690
2691 /* Store priority, scheduling type, and queue ID in the sched
2692 * word array because these values are re-used when the
2693 * destination is a directed queue.
2694 */
2695 sched_word[0] = EV_TO_DLB2_PRIO(ev[0].priority) << 10 |
2696 sched_type[0] << 8 |
2697 queue_id[0];
2698 sched_word[1] = EV_TO_DLB2_PRIO(ev[1].priority) << 10 |
2699 sched_type[1] << 8 |
2700 queue_id[1];
2701 sched_word[2] = EV_TO_DLB2_PRIO(ev[2].priority) << 10 |
2702 sched_type[2] << 8 |
2703 queue_id[2];
2704 sched_word[3] = EV_TO_DLB2_PRIO(ev[3].priority) << 10 |
2705 sched_type[3] << 8 |
2706 queue_id[3];
2707
2708 /* Store the event priority, scheduling type, and queue ID in
2709 * the metadata:
2710 * sse_qe[0][31:16] = sched_word[0]
2711 * sse_qe[0][95:80] = sched_word[1]
2712 * sse_qe[1][31:16] = sched_word[2]
2713 * sse_qe[1][95:80] = sched_word[3]
2714 */
2715 #define DLB2_QE_QID_SCHED_WORD 1
2716 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2717 sched_word[0],
2718 DLB2_QE_QID_SCHED_WORD);
2719 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2720 sched_word[1],
2721 DLB2_QE_QID_SCHED_WORD + 4);
2722 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2723 sched_word[2],
2724 DLB2_QE_QID_SCHED_WORD);
2725 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2726 sched_word[3],
2727 DLB2_QE_QID_SCHED_WORD + 4);
2728
2729 /* If the destination is a load-balanced queue, store the lock
2730 * ID. If it is a directed queue, DLB places this field in
2731 * bytes 10-11 of the received QE, so we format it accordingly:
2732 * sse_qe[0][47:32] = dir queue ? sched_word[0] : flow_id[0]
2733 * sse_qe[0][111:96] = dir queue ? sched_word[1] : flow_id[1]
2734 * sse_qe[1][47:32] = dir queue ? sched_word[2] : flow_id[2]
2735 * sse_qe[1][111:96] = dir queue ? sched_word[3] : flow_id[3]
2736 */
2737 #define DLB2_QE_LOCK_ID_WORD 2
2738 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2739 (sched_type[0] == DLB2_SCHED_DIRECTED) ?
2740 sched_word[0] : ev[0].flow_id,
2741 DLB2_QE_LOCK_ID_WORD);
2742 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2743 (sched_type[1] == DLB2_SCHED_DIRECTED) ?
2744 sched_word[1] : ev[1].flow_id,
2745 DLB2_QE_LOCK_ID_WORD + 4);
2746 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2747 (sched_type[2] == DLB2_SCHED_DIRECTED) ?
2748 sched_word[2] : ev[2].flow_id,
2749 DLB2_QE_LOCK_ID_WORD);
2750 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2751 (sched_type[3] == DLB2_SCHED_DIRECTED) ?
2752 sched_word[3] : ev[3].flow_id,
2753 DLB2_QE_LOCK_ID_WORD + 4);
2754
2755 /* Store the event type and sub event type in the metadata:
2756 * sse_qe[0][15:0] = flow_id[0]
2757 * sse_qe[0][79:64] = flow_id[1]
2758 * sse_qe[1][15:0] = flow_id[2]
2759 * sse_qe[1][79:64] = flow_id[3]
2760 */
2761 #define DLB2_QE_EV_TYPE_WORD 0
2762 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2763 ev[0].sub_event_type << 8 |
2764 ev[0].event_type,
2765 DLB2_QE_EV_TYPE_WORD);
2766 sse_qe[0] = _mm_insert_epi16(sse_qe[0],
2767 ev[1].sub_event_type << 8 |
2768 ev[1].event_type,
2769 DLB2_QE_EV_TYPE_WORD + 4);
2770 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2771 ev[2].sub_event_type << 8 |
2772 ev[2].event_type,
2773 DLB2_QE_EV_TYPE_WORD);
2774 sse_qe[1] = _mm_insert_epi16(sse_qe[1],
2775 ev[3].sub_event_type << 8 |
2776 ev[3].event_type,
2777 DLB2_QE_EV_TYPE_WORD + 4);
2778
2779 /* Store the metadata to memory (use the double-precision
2780 * _mm_storeh_pd because there is no integer function for
2781 * storing the upper 64b):
2782 * qe[0] metadata = sse_qe[0][63:0]
2783 * qe[1] metadata = sse_qe[0][127:64]
2784 * qe[2] metadata = sse_qe[1][63:0]
2785 * qe[3] metadata = sse_qe[1][127:64]
2786 */
2787 _mm_storel_epi64((__m128i *)&qe[0].u.opaque_data, sse_qe[0]);
2788 _mm_storeh_pd((double *)&qe[1].u.opaque_data,
2789 (__m128d)sse_qe[0]);
2790 _mm_storel_epi64((__m128i *)&qe[2].u.opaque_data, sse_qe[1]);
2791 _mm_storeh_pd((double *)&qe[3].u.opaque_data,
2792 (__m128d)sse_qe[1]);
2793
2794 qe[0].data = ev[0].u64;
2795 qe[1].data = ev[1].u64;
2796 qe[2].data = ev[2].u64;
2797 qe[3].data = ev[3].u64;
2798
2799 break;
2800 case 3:
2801 case 2:
2802 case 1:
2803 for (i = 0; i < num; i++) {
2804 qe[i].cmd_byte =
2805 cmd_byte_map[qm_port->is_directed][ev[i].op];
2806 qe[i].sched_type = sched_type[i];
2807 qe[i].data = ev[i].u64;
2808 qe[i].qid = queue_id[i];
2809 qe[i].priority = EV_TO_DLB2_PRIO(ev[i].priority);
2810 qe[i].lock_id = ev[i].flow_id;
2811 if (sched_type[i] == DLB2_SCHED_DIRECTED) {
2812 struct dlb2_msg_info *info =
2813 (struct dlb2_msg_info *)&qe[i].lock_id;
2814
2815 info->qid = queue_id[i];
2816 info->sched_type = DLB2_SCHED_DIRECTED;
2817 info->priority = qe[i].priority;
2818 }
2819 qe[i].u.event_type.major = ev[i].event_type;
2820 qe[i].u.event_type.sub = ev[i].sub_event_type;
2821 }
2822 break;
2823 case 0:
2824 break;
2825 }
2826 }
2827
2828 static inline int
dlb2_event_enqueue_prep(struct dlb2_eventdev_port * ev_port,struct dlb2_port * qm_port,const struct rte_event ev[],uint8_t * sched_type,uint8_t * queue_id)2829 dlb2_event_enqueue_prep(struct dlb2_eventdev_port *ev_port,
2830 struct dlb2_port *qm_port,
2831 const struct rte_event ev[],
2832 uint8_t *sched_type,
2833 uint8_t *queue_id)
2834 {
2835 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
2836 struct dlb2_eventdev_queue *ev_queue;
2837 uint16_t *cached_credits = NULL;
2838 struct dlb2_queue *qm_queue;
2839
2840 ev_queue = &dlb2->ev_queues[ev->queue_id];
2841 qm_queue = &ev_queue->qm_queue;
2842 *queue_id = qm_queue->id;
2843
2844 /* Ignore sched_type and hardware credits on release events */
2845 if (ev->op == RTE_EVENT_OP_RELEASE)
2846 goto op_check;
2847
2848 if (!qm_queue->is_directed) {
2849 /* Load balanced destination queue */
2850
2851 if (dlb2->version == DLB2_HW_V2) {
2852 if (dlb2_check_enqueue_hw_ldb_credits(qm_port)) {
2853 rte_errno = -ENOSPC;
2854 return 1;
2855 }
2856 cached_credits = &qm_port->cached_ldb_credits;
2857 } else {
2858 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2859 rte_errno = -ENOSPC;
2860 return 1;
2861 }
2862 cached_credits = &qm_port->cached_credits;
2863 }
2864 switch (ev->sched_type) {
2865 case RTE_SCHED_TYPE_ORDERED:
2866 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ORDERED\n");
2867 if (qm_queue->sched_type != RTE_SCHED_TYPE_ORDERED) {
2868 DLB2_LOG_ERR("dlb2: tried to send ordered event to unordered queue %d\n",
2869 *queue_id);
2870 rte_errno = -EINVAL;
2871 return 1;
2872 }
2873 *sched_type = DLB2_SCHED_ORDERED;
2874 break;
2875 case RTE_SCHED_TYPE_ATOMIC:
2876 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_ATOMIC\n");
2877 *sched_type = DLB2_SCHED_ATOMIC;
2878 break;
2879 case RTE_SCHED_TYPE_PARALLEL:
2880 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_PARALLEL\n");
2881 if (qm_queue->sched_type == RTE_SCHED_TYPE_ORDERED)
2882 *sched_type = DLB2_SCHED_ORDERED;
2883 else
2884 *sched_type = DLB2_SCHED_UNORDERED;
2885 break;
2886 default:
2887 DLB2_LOG_ERR("Unsupported LDB sched type in put_qe\n");
2888 DLB2_INC_STAT(ev_port->stats.tx_invalid, 1);
2889 rte_errno = -EINVAL;
2890 return 1;
2891 }
2892 } else {
2893 /* Directed destination queue */
2894
2895 if (dlb2->version == DLB2_HW_V2) {
2896 if (dlb2_check_enqueue_hw_dir_credits(qm_port)) {
2897 rte_errno = -ENOSPC;
2898 return 1;
2899 }
2900 cached_credits = &qm_port->cached_dir_credits;
2901 } else {
2902 if (dlb2_check_enqueue_hw_credits(qm_port)) {
2903 rte_errno = -ENOSPC;
2904 return 1;
2905 }
2906 cached_credits = &qm_port->cached_credits;
2907 }
2908 DLB2_LOG_DBG("dlb2: put_qe: RTE_SCHED_TYPE_DIRECTED\n");
2909
2910 *sched_type = DLB2_SCHED_DIRECTED;
2911 }
2912
2913 op_check:
2914 switch (ev->op) {
2915 case RTE_EVENT_OP_NEW:
2916 /* Check that a sw credit is available */
2917 if (dlb2_check_enqueue_sw_credits(dlb2, ev_port)) {
2918 rte_errno = -ENOSPC;
2919 return 1;
2920 }
2921 ev_port->inflight_credits--;
2922 (*cached_credits)--;
2923 break;
2924 case RTE_EVENT_OP_FORWARD:
2925 /* Check for outstanding_releases underflow. If this occurs,
2926 * the application is not using the EVENT_OPs correctly; for
2927 * example, forwarding or releasing events that were not
2928 * dequeued.
2929 */
2930 RTE_ASSERT(ev_port->outstanding_releases > 0);
2931 ev_port->outstanding_releases--;
2932 qm_port->issued_releases++;
2933 (*cached_credits)--;
2934 break;
2935 case RTE_EVENT_OP_RELEASE:
2936 ev_port->inflight_credits++;
2937 /* Check for outstanding_releases underflow. If this occurs,
2938 * the application is not using the EVENT_OPs correctly; for
2939 * example, forwarding or releasing events that were not
2940 * dequeued.
2941 */
2942 RTE_ASSERT(ev_port->outstanding_releases > 0);
2943 ev_port->outstanding_releases--;
2944 qm_port->issued_releases++;
2945
2946 /* Replenish s/w credits if enough are cached */
2947 dlb2_replenish_sw_credits(dlb2, ev_port);
2948 break;
2949 }
2950
2951 DLB2_INC_STAT(ev_port->stats.tx_op_cnt[ev->op], 1);
2952 DLB2_INC_STAT(ev_port->stats.traffic.tx_ok, 1);
2953
2954 #ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
2955 if (ev->op != RTE_EVENT_OP_RELEASE) {
2956 DLB2_INC_STAT(ev_port->stats.queue[ev->queue_id].enq_ok, 1);
2957 DLB2_INC_STAT(ev_port->stats.tx_sched_cnt[*sched_type], 1);
2958 }
2959 #endif
2960
2961 return 0;
2962 }
2963
2964 static inline uint16_t
__dlb2_event_enqueue_burst(void * event_port,const struct rte_event events[],uint16_t num,bool use_delayed)2965 __dlb2_event_enqueue_burst(void *event_port,
2966 const struct rte_event events[],
2967 uint16_t num,
2968 bool use_delayed)
2969 {
2970 struct dlb2_eventdev_port *ev_port = event_port;
2971 struct dlb2_port *qm_port = &ev_port->qm_port;
2972 struct process_local_port_data *port_data;
2973 int i;
2974
2975 RTE_ASSERT(ev_port->enq_configured);
2976 RTE_ASSERT(events != NULL);
2977
2978 i = 0;
2979
2980 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
2981
2982 while (i < num) {
2983 uint8_t sched_types[DLB2_NUM_QES_PER_CACHE_LINE];
2984 uint8_t queue_ids[DLB2_NUM_QES_PER_CACHE_LINE];
2985 int pop_offs = 0;
2986 int j = 0;
2987
2988 memset(qm_port->qe4,
2989 0,
2990 DLB2_NUM_QES_PER_CACHE_LINE *
2991 sizeof(struct dlb2_enqueue_qe));
2992
2993 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
2994 const struct rte_event *ev = &events[i + j];
2995 int16_t thresh = qm_port->token_pop_thresh;
2996
2997 if (use_delayed &&
2998 qm_port->token_pop_mode == DELAYED_POP &&
2999 (ev->op == RTE_EVENT_OP_FORWARD ||
3000 ev->op == RTE_EVENT_OP_RELEASE) &&
3001 qm_port->issued_releases >= thresh - 1) {
3002 /* Insert the token pop QE and break out. This
3003 * may result in a partial HCW, but that is
3004 * simpler than supporting arbitrary QE
3005 * insertion.
3006 */
3007 dlb2_construct_token_pop_qe(qm_port, j);
3008
3009 /* Reset the releases for the next QE batch */
3010 qm_port->issued_releases -= thresh;
3011
3012 pop_offs = 1;
3013 j++;
3014 break;
3015 }
3016
3017 if (dlb2_event_enqueue_prep(ev_port, qm_port, ev,
3018 &sched_types[j],
3019 &queue_ids[j]))
3020 break;
3021 }
3022
3023 if (j == 0)
3024 break;
3025
3026 dlb2_event_build_hcws(qm_port, &events[i], j - pop_offs,
3027 sched_types, queue_ids);
3028
3029 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3030
3031 /* Don't include the token pop QE in the enqueue count */
3032 i += j - pop_offs;
3033
3034 /* Don't interpret j < DLB2_NUM_... as out-of-credits if
3035 * pop_offs != 0
3036 */
3037 if (j < DLB2_NUM_QES_PER_CACHE_LINE && pop_offs == 0)
3038 break;
3039 }
3040
3041 return i;
3042 }
3043
3044 static uint16_t
dlb2_event_enqueue_burst(void * event_port,const struct rte_event events[],uint16_t num)3045 dlb2_event_enqueue_burst(void *event_port,
3046 const struct rte_event events[],
3047 uint16_t num)
3048 {
3049 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3050 }
3051
3052 static uint16_t
dlb2_event_enqueue_burst_delayed(void * event_port,const struct rte_event events[],uint16_t num)3053 dlb2_event_enqueue_burst_delayed(void *event_port,
3054 const struct rte_event events[],
3055 uint16_t num)
3056 {
3057 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3058 }
3059
3060 static inline uint16_t
dlb2_event_enqueue(void * event_port,const struct rte_event events[])3061 dlb2_event_enqueue(void *event_port,
3062 const struct rte_event events[])
3063 {
3064 return __dlb2_event_enqueue_burst(event_port, events, 1, false);
3065 }
3066
3067 static inline uint16_t
dlb2_event_enqueue_delayed(void * event_port,const struct rte_event events[])3068 dlb2_event_enqueue_delayed(void *event_port,
3069 const struct rte_event events[])
3070 {
3071 return __dlb2_event_enqueue_burst(event_port, events, 1, true);
3072 }
3073
3074 static uint16_t
dlb2_event_enqueue_new_burst(void * event_port,const struct rte_event events[],uint16_t num)3075 dlb2_event_enqueue_new_burst(void *event_port,
3076 const struct rte_event events[],
3077 uint16_t num)
3078 {
3079 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3080 }
3081
3082 static uint16_t
dlb2_event_enqueue_new_burst_delayed(void * event_port,const struct rte_event events[],uint16_t num)3083 dlb2_event_enqueue_new_burst_delayed(void *event_port,
3084 const struct rte_event events[],
3085 uint16_t num)
3086 {
3087 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3088 }
3089
3090 static uint16_t
dlb2_event_enqueue_forward_burst(void * event_port,const struct rte_event events[],uint16_t num)3091 dlb2_event_enqueue_forward_burst(void *event_port,
3092 const struct rte_event events[],
3093 uint16_t num)
3094 {
3095 return __dlb2_event_enqueue_burst(event_port, events, num, false);
3096 }
3097
3098 static uint16_t
dlb2_event_enqueue_forward_burst_delayed(void * event_port,const struct rte_event events[],uint16_t num)3099 dlb2_event_enqueue_forward_burst_delayed(void *event_port,
3100 const struct rte_event events[],
3101 uint16_t num)
3102 {
3103 return __dlb2_event_enqueue_burst(event_port, events, num, true);
3104 }
3105
3106 static void
dlb2_event_release(struct dlb2_eventdev * dlb2,uint8_t port_id,int n)3107 dlb2_event_release(struct dlb2_eventdev *dlb2,
3108 uint8_t port_id,
3109 int n)
3110 {
3111 struct process_local_port_data *port_data;
3112 struct dlb2_eventdev_port *ev_port;
3113 struct dlb2_port *qm_port;
3114 int i;
3115
3116 if (port_id > dlb2->num_ports) {
3117 DLB2_LOG_ERR("Invalid port id %d in dlb2-event_release\n",
3118 port_id);
3119 rte_errno = -EINVAL;
3120 return;
3121 }
3122
3123 ev_port = &dlb2->ev_ports[port_id];
3124 qm_port = &ev_port->qm_port;
3125 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3126
3127 i = 0;
3128
3129 if (qm_port->is_directed) {
3130 i = n;
3131 goto sw_credit_update;
3132 }
3133
3134 while (i < n) {
3135 int pop_offs = 0;
3136 int j = 0;
3137
3138 /* Zero-out QEs */
3139 _mm_storeu_si128((void *)&qm_port->qe4[0], _mm_setzero_si128());
3140 _mm_storeu_si128((void *)&qm_port->qe4[1], _mm_setzero_si128());
3141 _mm_storeu_si128((void *)&qm_port->qe4[2], _mm_setzero_si128());
3142 _mm_storeu_si128((void *)&qm_port->qe4[3], _mm_setzero_si128());
3143
3144
3145 for (; j < DLB2_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
3146 int16_t thresh = qm_port->token_pop_thresh;
3147
3148 if (qm_port->token_pop_mode == DELAYED_POP &&
3149 qm_port->issued_releases >= thresh - 1) {
3150 /* Insert the token pop QE */
3151 dlb2_construct_token_pop_qe(qm_port, j);
3152
3153 /* Reset the releases for the next QE batch */
3154 qm_port->issued_releases -= thresh;
3155
3156 pop_offs = 1;
3157 j++;
3158 break;
3159 }
3160
3161 qm_port->qe4[j].cmd_byte = DLB2_COMP_CMD_BYTE;
3162 qm_port->issued_releases++;
3163 }
3164
3165 dlb2_hw_do_enqueue(qm_port, i == 0, port_data);
3166
3167 /* Don't include the token pop QE in the release count */
3168 i += j - pop_offs;
3169 }
3170
3171 sw_credit_update:
3172 /* each release returns one credit */
3173 if (unlikely(!ev_port->outstanding_releases)) {
3174 DLB2_LOG_ERR("%s: Outstanding releases underflowed.\n",
3175 __func__);
3176 return;
3177 }
3178 ev_port->outstanding_releases -= i;
3179 ev_port->inflight_credits += i;
3180
3181 /* Replenish s/w credits if enough releases are performed */
3182 dlb2_replenish_sw_credits(dlb2, ev_port);
3183 }
3184
3185 static inline void
dlb2_port_credits_inc(struct dlb2_port * qm_port,int num)3186 dlb2_port_credits_inc(struct dlb2_port *qm_port, int num)
3187 {
3188 uint32_t batch_size = qm_port->hw_credit_quanta;
3189
3190 /* increment port credits, and return to pool if exceeds threshold */
3191 if (!qm_port->is_directed) {
3192 if (qm_port->dlb2->version == DLB2_HW_V2) {
3193 qm_port->cached_ldb_credits += num;
3194 if (qm_port->cached_ldb_credits >= 2 * batch_size) {
3195 __atomic_fetch_add(
3196 qm_port->credit_pool[DLB2_LDB_QUEUE],
3197 batch_size, __ATOMIC_SEQ_CST);
3198 qm_port->cached_ldb_credits -= batch_size;
3199 }
3200 } else {
3201 qm_port->cached_credits += num;
3202 if (qm_port->cached_credits >= 2 * batch_size) {
3203 __atomic_fetch_add(
3204 qm_port->credit_pool[DLB2_COMBINED_POOL],
3205 batch_size, __ATOMIC_SEQ_CST);
3206 qm_port->cached_credits -= batch_size;
3207 }
3208 }
3209 } else {
3210 if (qm_port->dlb2->version == DLB2_HW_V2) {
3211 qm_port->cached_dir_credits += num;
3212 if (qm_port->cached_dir_credits >= 2 * batch_size) {
3213 __atomic_fetch_add(
3214 qm_port->credit_pool[DLB2_DIR_QUEUE],
3215 batch_size, __ATOMIC_SEQ_CST);
3216 qm_port->cached_dir_credits -= batch_size;
3217 }
3218 } else {
3219 qm_port->cached_credits += num;
3220 if (qm_port->cached_credits >= 2 * batch_size) {
3221 __atomic_fetch_add(
3222 qm_port->credit_pool[DLB2_COMBINED_POOL],
3223 batch_size, __ATOMIC_SEQ_CST);
3224 qm_port->cached_credits -= batch_size;
3225 }
3226 }
3227 }
3228 }
3229
3230 #define CLB_MASK_IDX 0
3231 #define CLB_VAL_IDX 1
3232 static int
dlb2_monitor_callback(const uint64_t val,const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])3233 dlb2_monitor_callback(const uint64_t val,
3234 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
3235 {
3236 /* abort if the value matches */
3237 return (val & opaque[CLB_MASK_IDX]) == opaque[CLB_VAL_IDX] ? -1 : 0;
3238 }
3239
3240 static inline int
dlb2_dequeue_wait(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,struct dlb2_port * qm_port,uint64_t timeout,uint64_t start_ticks)3241 dlb2_dequeue_wait(struct dlb2_eventdev *dlb2,
3242 struct dlb2_eventdev_port *ev_port,
3243 struct dlb2_port *qm_port,
3244 uint64_t timeout,
3245 uint64_t start_ticks)
3246 {
3247 struct process_local_port_data *port_data;
3248 uint64_t elapsed_ticks;
3249
3250 port_data = &dlb2_port[qm_port->id][PORT_TYPE(qm_port)];
3251
3252 elapsed_ticks = rte_get_timer_cycles() - start_ticks;
3253
3254 /* Wait/poll time expired */
3255 if (elapsed_ticks >= timeout) {
3256 return 1;
3257 } else if (dlb2->umwait_allowed) {
3258 struct rte_power_monitor_cond pmc;
3259 volatile struct dlb2_dequeue_qe *cq_base;
3260 union {
3261 uint64_t raw_qe[2];
3262 struct dlb2_dequeue_qe qe;
3263 } qe_mask;
3264 uint64_t expected_value;
3265 volatile uint64_t *monitor_addr;
3266
3267 qe_mask.qe.cq_gen = 1; /* set mask */
3268
3269 cq_base = port_data->cq_base;
3270 monitor_addr = (volatile uint64_t *)(volatile void *)
3271 &cq_base[qm_port->cq_idx];
3272 monitor_addr++; /* cq_gen bit is in second 64bit location */
3273
3274 if (qm_port->gen_bit)
3275 expected_value = qe_mask.raw_qe[1];
3276 else
3277 expected_value = 0;
3278
3279 pmc.addr = monitor_addr;
3280 /* store expected value and comparison mask in opaque data */
3281 pmc.opaque[CLB_VAL_IDX] = expected_value;
3282 pmc.opaque[CLB_MASK_IDX] = qe_mask.raw_qe[1];
3283 /* set up callback */
3284 pmc.fn = dlb2_monitor_callback;
3285 pmc.size = sizeof(uint64_t);
3286
3287 rte_power_monitor(&pmc, timeout + start_ticks);
3288
3289 DLB2_INC_STAT(ev_port->stats.traffic.rx_umonitor_umwait, 1);
3290 } else {
3291 uint64_t poll_interval = dlb2->poll_interval;
3292 uint64_t curr_ticks = rte_get_timer_cycles();
3293 uint64_t init_ticks = curr_ticks;
3294
3295 while ((curr_ticks - start_ticks < timeout) &&
3296 (curr_ticks - init_ticks < poll_interval))
3297 curr_ticks = rte_get_timer_cycles();
3298 }
3299
3300 return 0;
3301 }
3302
3303 static __rte_noinline int
dlb2_process_dequeue_qes(struct dlb2_eventdev_port * ev_port,struct dlb2_port * qm_port,struct rte_event * events,struct dlb2_dequeue_qe * qes,int cnt)3304 dlb2_process_dequeue_qes(struct dlb2_eventdev_port *ev_port,
3305 struct dlb2_port *qm_port,
3306 struct rte_event *events,
3307 struct dlb2_dequeue_qe *qes,
3308 int cnt)
3309 {
3310 uint8_t *qid_mappings = qm_port->qid_mappings;
3311 int i, num, evq_id;
3312
3313 for (i = 0, num = 0; i < cnt; i++) {
3314 struct dlb2_dequeue_qe *qe = &qes[i];
3315 int sched_type_map[DLB2_NUM_HW_SCHED_TYPES] = {
3316 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3317 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3318 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3319 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3320 };
3321
3322 /* Fill in event information.
3323 * Note that flow_id must be embedded in the data by
3324 * the app, such as the mbuf RSS hash field if the data
3325 * buffer is a mbuf.
3326 */
3327 if (unlikely(qe->error)) {
3328 DLB2_LOG_ERR("QE error bit ON\n");
3329 DLB2_INC_STAT(ev_port->stats.traffic.rx_drop, 1);
3330 dlb2_consume_qe_immediate(qm_port, 1);
3331 continue; /* Ignore */
3332 }
3333
3334 events[num].u64 = qe->data;
3335 events[num].flow_id = qe->flow_id;
3336 events[num].priority = DLB2_TO_EV_PRIO((uint8_t)qe->priority);
3337 events[num].event_type = qe->u.event_type.major;
3338 events[num].sub_event_type = qe->u.event_type.sub;
3339 events[num].sched_type = sched_type_map[qe->sched_type];
3340 events[num].impl_opaque = qe->qid_depth;
3341
3342 /* qid not preserved for directed queues */
3343 if (qm_port->is_directed)
3344 evq_id = ev_port->link[0].queue_id;
3345 else
3346 evq_id = qid_mappings[qe->qid];
3347
3348 events[num].queue_id = evq_id;
3349 DLB2_INC_STAT(
3350 ev_port->stats.queue[evq_id].qid_depth[qe->qid_depth],
3351 1);
3352 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qe->sched_type], 1);
3353 num++;
3354 }
3355
3356 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num);
3357
3358 return num;
3359 }
3360
3361 static inline int
dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port * ev_port,struct dlb2_port * qm_port,struct rte_event * events,struct dlb2_dequeue_qe * qes)3362 dlb2_process_dequeue_four_qes(struct dlb2_eventdev_port *ev_port,
3363 struct dlb2_port *qm_port,
3364 struct rte_event *events,
3365 struct dlb2_dequeue_qe *qes)
3366 {
3367 int sched_type_map[] = {
3368 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3369 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3370 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3371 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3372 };
3373 const int num_events = DLB2_NUM_QES_PER_CACHE_LINE;
3374 uint8_t *qid_mappings = qm_port->qid_mappings;
3375 __m128i sse_evt[2];
3376
3377 /* In the unlikely case that any of the QE error bits are set, process
3378 * them one at a time.
3379 */
3380 if (unlikely(qes[0].error || qes[1].error ||
3381 qes[2].error || qes[3].error))
3382 return dlb2_process_dequeue_qes(ev_port, qm_port, events,
3383 qes, num_events);
3384
3385 events[0].u64 = qes[0].data;
3386 events[1].u64 = qes[1].data;
3387 events[2].u64 = qes[2].data;
3388 events[3].u64 = qes[3].data;
3389
3390 /* Construct the metadata portion of two struct rte_events
3391 * in one 128b SSE register. Event metadata is constructed in the SSE
3392 * registers like so:
3393 * sse_evt[0][63:0]: event[0]'s metadata
3394 * sse_evt[0][127:64]: event[1]'s metadata
3395 * sse_evt[1][63:0]: event[2]'s metadata
3396 * sse_evt[1][127:64]: event[3]'s metadata
3397 */
3398 sse_evt[0] = _mm_setzero_si128();
3399 sse_evt[1] = _mm_setzero_si128();
3400
3401 /* Convert the hardware queue ID to an event queue ID and store it in
3402 * the metadata:
3403 * sse_evt[0][47:40] = qid_mappings[qes[0].qid]
3404 * sse_evt[0][111:104] = qid_mappings[qes[1].qid]
3405 * sse_evt[1][47:40] = qid_mappings[qes[2].qid]
3406 * sse_evt[1][111:104] = qid_mappings[qes[3].qid]
3407 */
3408 #define DLB_EVENT_QUEUE_ID_BYTE 5
3409 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3410 qid_mappings[qes[0].qid],
3411 DLB_EVENT_QUEUE_ID_BYTE);
3412 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3413 qid_mappings[qes[1].qid],
3414 DLB_EVENT_QUEUE_ID_BYTE + 8);
3415 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3416 qid_mappings[qes[2].qid],
3417 DLB_EVENT_QUEUE_ID_BYTE);
3418 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3419 qid_mappings[qes[3].qid],
3420 DLB_EVENT_QUEUE_ID_BYTE + 8);
3421
3422 /* Convert the hardware priority to an event priority and store it in
3423 * the metadata, while also returning the queue depth status
3424 * value captured by the hardware, storing it in impl_opaque, which can
3425 * be read by the application but not modified
3426 * sse_evt[0][55:48] = DLB2_TO_EV_PRIO(qes[0].priority)
3427 * sse_evt[0][63:56] = qes[0].qid_depth
3428 * sse_evt[0][119:112] = DLB2_TO_EV_PRIO(qes[1].priority)
3429 * sse_evt[0][127:120] = qes[1].qid_depth
3430 * sse_evt[1][55:48] = DLB2_TO_EV_PRIO(qes[2].priority)
3431 * sse_evt[1][63:56] = qes[2].qid_depth
3432 * sse_evt[1][119:112] = DLB2_TO_EV_PRIO(qes[3].priority)
3433 * sse_evt[1][127:120] = qes[3].qid_depth
3434 */
3435 #define DLB_EVENT_PRIO_IMPL_OPAQUE_WORD 3
3436 #define DLB_BYTE_SHIFT 8
3437 sse_evt[0] =
3438 _mm_insert_epi16(sse_evt[0],
3439 DLB2_TO_EV_PRIO((uint8_t)qes[0].priority) |
3440 (qes[0].qid_depth << DLB_BYTE_SHIFT),
3441 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3442 sse_evt[0] =
3443 _mm_insert_epi16(sse_evt[0],
3444 DLB2_TO_EV_PRIO((uint8_t)qes[1].priority) |
3445 (qes[1].qid_depth << DLB_BYTE_SHIFT),
3446 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3447 sse_evt[1] =
3448 _mm_insert_epi16(sse_evt[1],
3449 DLB2_TO_EV_PRIO((uint8_t)qes[2].priority) |
3450 (qes[2].qid_depth << DLB_BYTE_SHIFT),
3451 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD);
3452 sse_evt[1] =
3453 _mm_insert_epi16(sse_evt[1],
3454 DLB2_TO_EV_PRIO((uint8_t)qes[3].priority) |
3455 (qes[3].qid_depth << DLB_BYTE_SHIFT),
3456 DLB_EVENT_PRIO_IMPL_OPAQUE_WORD + 4);
3457
3458 /* Write the event type, sub event type, and flow_id to the event
3459 * metadata.
3460 * sse_evt[0][31:0] = qes[0].flow_id |
3461 * qes[0].u.event_type.major << 28 |
3462 * qes[0].u.event_type.sub << 20;
3463 * sse_evt[0][95:64] = qes[1].flow_id |
3464 * qes[1].u.event_type.major << 28 |
3465 * qes[1].u.event_type.sub << 20;
3466 * sse_evt[1][31:0] = qes[2].flow_id |
3467 * qes[2].u.event_type.major << 28 |
3468 * qes[2].u.event_type.sub << 20;
3469 * sse_evt[1][95:64] = qes[3].flow_id |
3470 * qes[3].u.event_type.major << 28 |
3471 * qes[3].u.event_type.sub << 20;
3472 */
3473 #define DLB_EVENT_EV_TYPE_DW 0
3474 #define DLB_EVENT_EV_TYPE_SHIFT 28
3475 #define DLB_EVENT_SUB_EV_TYPE_SHIFT 20
3476 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3477 qes[0].flow_id |
3478 qes[0].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3479 qes[0].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3480 DLB_EVENT_EV_TYPE_DW);
3481 sse_evt[0] = _mm_insert_epi32(sse_evt[0],
3482 qes[1].flow_id |
3483 qes[1].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3484 qes[1].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3485 DLB_EVENT_EV_TYPE_DW + 2);
3486 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3487 qes[2].flow_id |
3488 qes[2].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3489 qes[2].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3490 DLB_EVENT_EV_TYPE_DW);
3491 sse_evt[1] = _mm_insert_epi32(sse_evt[1],
3492 qes[3].flow_id |
3493 qes[3].u.event_type.major << DLB_EVENT_EV_TYPE_SHIFT |
3494 qes[3].u.event_type.sub << DLB_EVENT_SUB_EV_TYPE_SHIFT,
3495 DLB_EVENT_EV_TYPE_DW + 2);
3496
3497 /* Write the sched type to the event metadata. 'op' and 'rsvd' are not
3498 * set:
3499 * sse_evt[0][39:32] = sched_type_map[qes[0].sched_type] << 6
3500 * sse_evt[0][103:96] = sched_type_map[qes[1].sched_type] << 6
3501 * sse_evt[1][39:32] = sched_type_map[qes[2].sched_type] << 6
3502 * sse_evt[1][103:96] = sched_type_map[qes[3].sched_type] << 6
3503 */
3504 #define DLB_EVENT_SCHED_TYPE_BYTE 4
3505 #define DLB_EVENT_SCHED_TYPE_SHIFT 6
3506 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3507 sched_type_map[qes[0].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3508 DLB_EVENT_SCHED_TYPE_BYTE);
3509 sse_evt[0] = _mm_insert_epi8(sse_evt[0],
3510 sched_type_map[qes[1].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3511 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3512 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3513 sched_type_map[qes[2].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3514 DLB_EVENT_SCHED_TYPE_BYTE);
3515 sse_evt[1] = _mm_insert_epi8(sse_evt[1],
3516 sched_type_map[qes[3].sched_type] << DLB_EVENT_SCHED_TYPE_SHIFT,
3517 DLB_EVENT_SCHED_TYPE_BYTE + 8);
3518
3519 /* Store the metadata to the event (use the double-precision
3520 * _mm_storeh_pd because there is no integer function for storing the
3521 * upper 64b):
3522 * events[0].event = sse_evt[0][63:0]
3523 * events[1].event = sse_evt[0][127:64]
3524 * events[2].event = sse_evt[1][63:0]
3525 * events[3].event = sse_evt[1][127:64]
3526 */
3527 _mm_storel_epi64((__m128i *)&events[0].event, sse_evt[0]);
3528 _mm_storeh_pd((double *)&events[1].event, (__m128d) sse_evt[0]);
3529 _mm_storel_epi64((__m128i *)&events[2].event, sse_evt[1]);
3530 _mm_storeh_pd((double *)&events[3].event, (__m128d) sse_evt[1]);
3531
3532 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[0].sched_type], 1);
3533 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[1].sched_type], 1);
3534 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[2].sched_type], 1);
3535 DLB2_INC_STAT(ev_port->stats.rx_sched_cnt[qes[3].sched_type], 1);
3536
3537 DLB2_INC_STAT(
3538 ev_port->stats.queue[events[0].queue_id].
3539 qid_depth[qes[0].qid_depth],
3540 1);
3541 DLB2_INC_STAT(
3542 ev_port->stats.queue[events[1].queue_id].
3543 qid_depth[qes[1].qid_depth],
3544 1);
3545 DLB2_INC_STAT(
3546 ev_port->stats.queue[events[2].queue_id].
3547 qid_depth[qes[2].qid_depth],
3548 1);
3549 DLB2_INC_STAT(
3550 ev_port->stats.queue[events[3].queue_id].
3551 qid_depth[qes[3].qid_depth],
3552 1);
3553
3554 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_events);
3555
3556 return num_events;
3557 }
3558
3559 static __rte_always_inline int
dlb2_recv_qe_sparse(struct dlb2_port * qm_port,struct dlb2_dequeue_qe * qe)3560 dlb2_recv_qe_sparse(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe)
3561 {
3562 volatile struct dlb2_dequeue_qe *cq_addr;
3563 uint8_t xor_mask[2] = {0x0F, 0x00};
3564 const uint8_t and_mask = 0x0F;
3565 __m128i *qes = (__m128i *)qe;
3566 uint8_t gen_bits, gen_bit;
3567 uintptr_t addr[4];
3568 uint16_t idx;
3569
3570 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3571
3572 idx = qm_port->cq_idx_unmasked & qm_port->cq_depth_mask;
3573 /* Load the next 4 QEs */
3574 addr[0] = (uintptr_t)&cq_addr[idx];
3575 addr[1] = (uintptr_t)&cq_addr[(idx + 4) & qm_port->cq_depth_mask];
3576 addr[2] = (uintptr_t)&cq_addr[(idx + 8) & qm_port->cq_depth_mask];
3577 addr[3] = (uintptr_t)&cq_addr[(idx + 12) & qm_port->cq_depth_mask];
3578
3579 /* Prefetch next batch of QEs (all CQs occupy minimum 8 cache lines) */
3580 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3581 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3582 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3583 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3584
3585 /* Correct the xor_mask for wrap-around QEs */
3586 gen_bit = qm_port->gen_bit;
3587 xor_mask[gen_bit] ^= !!((idx + 4) > qm_port->cq_depth_mask) << 1;
3588 xor_mask[gen_bit] ^= !!((idx + 8) > qm_port->cq_depth_mask) << 2;
3589 xor_mask[gen_bit] ^= !!((idx + 12) > qm_port->cq_depth_mask) << 3;
3590
3591 /* Read the cache lines backwards to ensure that if QE[N] (N > 0) is
3592 * valid, then QEs[0:N-1] are too.
3593 */
3594 qes[3] = _mm_load_si128((__m128i *)(void *)addr[3]);
3595 rte_compiler_barrier();
3596 qes[2] = _mm_load_si128((__m128i *)(void *)addr[2]);
3597 rte_compiler_barrier();
3598 qes[1] = _mm_load_si128((__m128i *)(void *)addr[1]);
3599 rte_compiler_barrier();
3600 qes[0] = _mm_load_si128((__m128i *)(void *)addr[0]);
3601
3602 /* Extract and combine the gen bits */
3603 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
3604 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
3605 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
3606 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
3607
3608 /* XOR the combined bits such that a 1 represents a valid QE */
3609 gen_bits ^= xor_mask[gen_bit];
3610
3611 /* Mask off gen bits we don't care about */
3612 gen_bits &= and_mask;
3613
3614 return __builtin_popcount(gen_bits);
3615 }
3616
3617 static inline void
_process_deq_qes_vec_impl(struct dlb2_port * qm_port,struct rte_event * events,__m128i v_qe_3,__m128i v_qe_2,__m128i v_qe_1,__m128i v_qe_0,__m128i v_qe_meta,__m128i v_qe_status,uint32_t valid_events)3618 _process_deq_qes_vec_impl(struct dlb2_port *qm_port,
3619 struct rte_event *events,
3620 __m128i v_qe_3,
3621 __m128i v_qe_2,
3622 __m128i v_qe_1,
3623 __m128i v_qe_0,
3624 __m128i v_qe_meta,
3625 __m128i v_qe_status,
3626 uint32_t valid_events)
3627 {
3628 /* Look up the event QIDs, using the hardware QIDs to index the
3629 * port's QID mapping.
3630 *
3631 * Each v_qe_[0-4] is just a 16-byte load of the whole QE. It is
3632 * passed along in registers as the QE data is required later.
3633 *
3634 * v_qe_meta is an u32 unpack of all 4x QEs. A.k.a, it contains one
3635 * 32-bit slice of each QE, so makes up a full SSE register. This
3636 * allows parallel processing of 4x QEs in a single register.
3637 */
3638
3639 __m128i v_qid_done = {0};
3640 int hw_qid0 = _mm_extract_epi8(v_qe_meta, 2);
3641 int hw_qid1 = _mm_extract_epi8(v_qe_meta, 6);
3642 int hw_qid2 = _mm_extract_epi8(v_qe_meta, 10);
3643 int hw_qid3 = _mm_extract_epi8(v_qe_meta, 14);
3644
3645 int ev_qid0 = qm_port->qid_mappings[hw_qid0];
3646 int ev_qid1 = qm_port->qid_mappings[hw_qid1];
3647 int ev_qid2 = qm_port->qid_mappings[hw_qid2];
3648 int ev_qid3 = qm_port->qid_mappings[hw_qid3];
3649
3650 int hw_sched0 = _mm_extract_epi8(v_qe_meta, 3) & 3ul;
3651 int hw_sched1 = _mm_extract_epi8(v_qe_meta, 7) & 3ul;
3652 int hw_sched2 = _mm_extract_epi8(v_qe_meta, 11) & 3ul;
3653 int hw_sched3 = _mm_extract_epi8(v_qe_meta, 15) & 3ul;
3654
3655 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid0, 2);
3656 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid1, 6);
3657 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid2, 10);
3658 v_qid_done = _mm_insert_epi8(v_qid_done, ev_qid3, 14);
3659
3660 /* Schedule field remapping using byte shuffle
3661 * - Full byte containing sched field handled here (op, rsvd are zero)
3662 * - Note sanitizing the register requires two masking ANDs:
3663 * 1) to strip prio/msg_type from byte for correct shuffle lookup
3664 * 2) to strip any non-sched-field lanes from any results to OR later
3665 * - Final byte result is >> 10 to another byte-lane inside the u32.
3666 * This makes the final combination OR easier to make the rte_event.
3667 */
3668 __m128i v_sched_done;
3669 __m128i v_sched_bits;
3670 {
3671 static const uint8_t sched_type_map[16] = {
3672 [DLB2_SCHED_ATOMIC] = RTE_SCHED_TYPE_ATOMIC,
3673 [DLB2_SCHED_UNORDERED] = RTE_SCHED_TYPE_PARALLEL,
3674 [DLB2_SCHED_ORDERED] = RTE_SCHED_TYPE_ORDERED,
3675 [DLB2_SCHED_DIRECTED] = RTE_SCHED_TYPE_ATOMIC,
3676 };
3677 static const uint8_t sched_and_mask[16] = {
3678 0x00, 0x00, 0x00, 0x03,
3679 0x00, 0x00, 0x00, 0x03,
3680 0x00, 0x00, 0x00, 0x03,
3681 0x00, 0x00, 0x00, 0x03,
3682 };
3683 const __m128i v_sched_map = _mm_loadu_si128(
3684 (const __m128i *)sched_type_map);
3685 __m128i v_sched_mask = _mm_loadu_si128(
3686 (const __m128i *)&sched_and_mask);
3687 v_sched_bits = _mm_and_si128(v_qe_meta, v_sched_mask);
3688 __m128i v_sched_remapped = _mm_shuffle_epi8(v_sched_map,
3689 v_sched_bits);
3690 __m128i v_preshift = _mm_and_si128(v_sched_remapped,
3691 v_sched_mask);
3692 v_sched_done = _mm_srli_epi32(v_preshift, 10);
3693 }
3694
3695 /* Priority handling
3696 * - QE provides 3 bits of priority
3697 * - Shift << 3 to move to MSBs for byte-prio in rte_event
3698 * - Mask bits to avoid pollution, leaving only 3 prio MSBs in reg
3699 */
3700 __m128i v_prio_done;
3701 {
3702 static const uint8_t prio_mask[16] = {
3703 0x00, 0x00, 0x00, 0x07 << 5,
3704 0x00, 0x00, 0x00, 0x07 << 5,
3705 0x00, 0x00, 0x00, 0x07 << 5,
3706 0x00, 0x00, 0x00, 0x07 << 5,
3707 };
3708 __m128i v_prio_mask = _mm_loadu_si128(
3709 (const __m128i *)prio_mask);
3710 __m128i v_prio_shifted = _mm_slli_epi32(v_qe_meta, 3);
3711 v_prio_done = _mm_and_si128(v_prio_shifted, v_prio_mask);
3712 }
3713
3714 /* Event Sub/Type handling:
3715 * we want to keep the lower 12 bits of each QE. Shift up by 20 bits
3716 * to get the sub/ev type data into rte_event location, clearing the
3717 * lower 20 bits in the process.
3718 */
3719 __m128i v_types_done;
3720 {
3721 static const uint8_t event_mask[16] = {
3722 0x0f, 0x00, 0x00, 0x00,
3723 0x0f, 0x00, 0x00, 0x00,
3724 0x0f, 0x00, 0x00, 0x00,
3725 0x0f, 0x00, 0x00, 0x00,
3726 };
3727 static const uint8_t sub_event_mask[16] = {
3728 0xff, 0x00, 0x00, 0x00,
3729 0xff, 0x00, 0x00, 0x00,
3730 0xff, 0x00, 0x00, 0x00,
3731 0xff, 0x00, 0x00, 0x00,
3732 };
3733 static const uint8_t flow_mask[16] = {
3734 0xff, 0xff, 0x00, 0x00,
3735 0xff, 0xff, 0x00, 0x00,
3736 0xff, 0xff, 0x00, 0x00,
3737 0xff, 0xff, 0x00, 0x00,
3738 };
3739 __m128i v_event_mask = _mm_loadu_si128(
3740 (const __m128i *)event_mask);
3741 __m128i v_sub_event_mask = _mm_loadu_si128(
3742 (const __m128i *)sub_event_mask);
3743 __m128i v_flow_mask = _mm_loadu_si128(
3744 (const __m128i *)flow_mask);
3745 __m128i v_sub = _mm_srli_epi32(v_qe_meta, 8);
3746 v_sub = _mm_and_si128(v_sub, v_sub_event_mask);
3747 __m128i v_type = _mm_and_si128(v_qe_meta, v_event_mask);
3748 v_type = _mm_slli_epi32(v_type, 8);
3749 v_types_done = _mm_or_si128(v_type, v_sub);
3750 v_types_done = _mm_slli_epi32(v_types_done, 20);
3751 __m128i v_flow = _mm_and_si128(v_qe_status, v_flow_mask);
3752 v_types_done = _mm_or_si128(v_types_done, v_flow);
3753 }
3754
3755 /* Combine QID, Sched and Prio fields, then Shift >> 8 bits to align
3756 * with the rte_event, allowing unpacks to move/blend with payload.
3757 */
3758 __m128i v_q_s_p_done;
3759 {
3760 __m128i v_qid_sched = _mm_or_si128(v_qid_done, v_sched_done);
3761 __m128i v_q_s_prio = _mm_or_si128(v_qid_sched, v_prio_done);
3762 v_q_s_p_done = _mm_srli_epi32(v_q_s_prio, 8);
3763 }
3764
3765 __m128i v_unpk_ev_23, v_unpk_ev_01, v_ev_2, v_ev_3, v_ev_0, v_ev_1;
3766
3767 /* Unpack evs into u64 metadata, then indiv events */
3768 v_unpk_ev_23 = _mm_unpackhi_epi32(v_types_done, v_q_s_p_done);
3769 v_unpk_ev_01 = _mm_unpacklo_epi32(v_types_done, v_q_s_p_done);
3770
3771 switch (valid_events) {
3772 case 4:
3773 v_ev_3 = _mm_blend_epi16(v_unpk_ev_23, v_qe_3, 0x0F);
3774 v_ev_3 = _mm_alignr_epi8(v_ev_3, v_ev_3, 8);
3775 _mm_storeu_si128((__m128i *)&events[3], v_ev_3);
3776 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched3],
3777 1);
3778 /* fallthrough */
3779 case 3:
3780 v_ev_2 = _mm_unpacklo_epi64(v_unpk_ev_23, v_qe_2);
3781 _mm_storeu_si128((__m128i *)&events[2], v_ev_2);
3782 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched2],
3783 1);
3784 /* fallthrough */
3785 case 2:
3786 v_ev_1 = _mm_blend_epi16(v_unpk_ev_01, v_qe_1, 0x0F);
3787 v_ev_1 = _mm_alignr_epi8(v_ev_1, v_ev_1, 8);
3788 _mm_storeu_si128((__m128i *)&events[1], v_ev_1);
3789 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched1],
3790 1);
3791 /* fallthrough */
3792 case 1:
3793 v_ev_0 = _mm_unpacklo_epi64(v_unpk_ev_01, v_qe_0);
3794 _mm_storeu_si128((__m128i *)&events[0], v_ev_0);
3795 DLB2_INC_STAT(qm_port->ev_port->stats.rx_sched_cnt[hw_sched0],
3796 1);
3797 }
3798 }
3799
3800 static __rte_always_inline int
dlb2_recv_qe_sparse_vec(struct dlb2_port * qm_port,void * events,uint32_t max_events)3801 dlb2_recv_qe_sparse_vec(struct dlb2_port *qm_port, void *events,
3802 uint32_t max_events)
3803 {
3804 /* Using unmasked idx for perf, and masking manually */
3805 uint16_t idx = qm_port->cq_idx_unmasked;
3806 volatile struct dlb2_dequeue_qe *cq_addr;
3807
3808 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
3809
3810 uintptr_t qe_ptr_3 = (uintptr_t)&cq_addr[(idx + 12) &
3811 qm_port->cq_depth_mask];
3812 uintptr_t qe_ptr_2 = (uintptr_t)&cq_addr[(idx + 8) &
3813 qm_port->cq_depth_mask];
3814 uintptr_t qe_ptr_1 = (uintptr_t)&cq_addr[(idx + 4) &
3815 qm_port->cq_depth_mask];
3816 uintptr_t qe_ptr_0 = (uintptr_t)&cq_addr[(idx + 0) &
3817 qm_port->cq_depth_mask];
3818
3819 /* Load QEs from CQ: use compiler barriers to avoid load reordering */
3820 __m128i v_qe_3 = _mm_loadu_si128((const __m128i *)qe_ptr_3);
3821 rte_compiler_barrier();
3822 __m128i v_qe_2 = _mm_loadu_si128((const __m128i *)qe_ptr_2);
3823 rte_compiler_barrier();
3824 __m128i v_qe_1 = _mm_loadu_si128((const __m128i *)qe_ptr_1);
3825 rte_compiler_barrier();
3826 __m128i v_qe_0 = _mm_loadu_si128((const __m128i *)qe_ptr_0);
3827
3828 /* Generate the pkt_shuffle mask;
3829 * - Avoids load in otherwise load-heavy section of code
3830 * - Moves bytes 3,7,11,15 (gen bit bytes) to LSB bytes in XMM
3831 */
3832 const uint32_t stat_shuf_bytes = (15 << 24) | (11 << 16) | (7 << 8) | 3;
3833 __m128i v_zeros = _mm_setzero_si128();
3834 __m128i v_ffff = _mm_cmpeq_epi8(v_zeros, v_zeros);
3835 __m128i v_stat_shuf_mask = _mm_insert_epi32(v_ffff, stat_shuf_bytes, 0);
3836
3837 /* Extract u32 components required from the QE
3838 * - QE[64 to 95 ] for metadata (qid, sched, prio, event type, ...)
3839 * - QE[96 to 127] for status (cq gen bit, error)
3840 *
3841 * Note that stage 1 of the unpacking is re-used for both u32 extracts
3842 */
3843 __m128i v_qe_02 = _mm_unpackhi_epi32(v_qe_0, v_qe_2);
3844 __m128i v_qe_13 = _mm_unpackhi_epi32(v_qe_1, v_qe_3);
3845 __m128i v_qe_status = _mm_unpackhi_epi32(v_qe_02, v_qe_13);
3846 __m128i v_qe_meta = _mm_unpacklo_epi32(v_qe_02, v_qe_13);
3847
3848 /* Status byte (gen_bit, error) handling:
3849 * - Shuffle to lanes 0,1,2,3, clear all others
3850 * - Shift right by 7 for gen bit to MSB, movemask to scalar
3851 * - Shift right by 2 for error bit to MSB, movemask to scalar
3852 */
3853 __m128i v_qe_shuffled = _mm_shuffle_epi8(v_qe_status, v_stat_shuf_mask);
3854 __m128i v_qes_shift_gen_bit = _mm_slli_epi32(v_qe_shuffled, 7);
3855 int32_t qe_gen_bits = _mm_movemask_epi8(v_qes_shift_gen_bit) & 0xf;
3856
3857 /* Expected vs Reality of QE Gen bits
3858 * - cq_rolling_mask provides expected bits
3859 * - QE loads, unpacks/shuffle and movemask provides reality
3860 * - XOR of the two gives bitmask of new packets
3861 * - POPCNT to get the number of new events
3862 */
3863 uint64_t rolling = qm_port->cq_rolling_mask & 0xF;
3864 uint64_t qe_xor_bits = (qe_gen_bits ^ rolling);
3865 uint32_t count_new = __builtin_popcount(qe_xor_bits);
3866 count_new = RTE_MIN(count_new, max_events);
3867 if (!count_new)
3868 return 0;
3869
3870 /* emulate a 128 bit rotate using 2x 64-bit numbers and bit-shifts */
3871
3872 uint64_t m_rshift = qm_port->cq_rolling_mask >> count_new;
3873 uint64_t m_lshift = qm_port->cq_rolling_mask << (64 - count_new);
3874 uint64_t m2_rshift = qm_port->cq_rolling_mask_2 >> count_new;
3875 uint64_t m2_lshift = qm_port->cq_rolling_mask_2 << (64 - count_new);
3876
3877 /* shifted out of m2 into MSB of m */
3878 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3879
3880 /* shifted out of m "looped back" into MSB of m2 */
3881 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3882
3883 /* Prefetch the next QEs - should run as IPC instead of cycles */
3884 rte_prefetch0(&cq_addr[(idx + 16) & qm_port->cq_depth_mask]);
3885 rte_prefetch0(&cq_addr[(idx + 20) & qm_port->cq_depth_mask]);
3886 rte_prefetch0(&cq_addr[(idx + 24) & qm_port->cq_depth_mask]);
3887 rte_prefetch0(&cq_addr[(idx + 28) & qm_port->cq_depth_mask]);
3888
3889 /* Convert QEs from XMM regs to events and store events directly */
3890 _process_deq_qes_vec_impl(qm_port, events, v_qe_3, v_qe_2, v_qe_1,
3891 v_qe_0, v_qe_meta, v_qe_status, count_new);
3892
3893 return count_new;
3894 }
3895
3896 static inline void
dlb2_inc_cq_idx(struct dlb2_port * qm_port,int cnt)3897 dlb2_inc_cq_idx(struct dlb2_port *qm_port, int cnt)
3898 {
3899 uint16_t idx = qm_port->cq_idx_unmasked + cnt;
3900
3901 qm_port->cq_idx_unmasked = idx;
3902 qm_port->cq_idx = idx & qm_port->cq_depth_mask;
3903 qm_port->gen_bit = (~(idx >> qm_port->gen_bit_shift)) & 0x1;
3904 }
3905
3906 static inline int16_t
dlb2_hw_dequeue_sparse(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,struct rte_event * events,uint16_t max_num,uint64_t dequeue_timeout_ticks)3907 dlb2_hw_dequeue_sparse(struct dlb2_eventdev *dlb2,
3908 struct dlb2_eventdev_port *ev_port,
3909 struct rte_event *events,
3910 uint16_t max_num,
3911 uint64_t dequeue_timeout_ticks)
3912 {
3913 uint64_t start_ticks = 0ULL;
3914 struct dlb2_port *qm_port;
3915 int num = 0;
3916 bool use_scalar;
3917 uint64_t timeout;
3918
3919 qm_port = &ev_port->qm_port;
3920 use_scalar = qm_port->use_scalar;
3921
3922 if (!dlb2->global_dequeue_wait)
3923 timeout = dequeue_timeout_ticks;
3924 else
3925 timeout = dlb2->global_dequeue_wait_ticks;
3926
3927 start_ticks = rte_get_timer_cycles();
3928
3929 use_scalar = use_scalar || (max_num & 0x3);
3930
3931 while (num < max_num) {
3932 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
3933 int num_avail;
3934
3935 if (use_scalar) {
3936 int n_iter = 0;
3937 uint64_t m_rshift, m_lshift, m2_rshift, m2_lshift;
3938
3939 num_avail = dlb2_recv_qe_sparse(qm_port, qes);
3940 num_avail = RTE_MIN(num_avail, max_num - num);
3941 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3942 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
3943 n_iter = dlb2_process_dequeue_four_qes(ev_port,
3944 qm_port,
3945 &events[num],
3946 &qes[0]);
3947 else if (num_avail)
3948 n_iter = dlb2_process_dequeue_qes(ev_port,
3949 qm_port,
3950 &events[num],
3951 &qes[0],
3952 num_avail);
3953 if (n_iter != 0) {
3954 num += n_iter;
3955 /* update rolling_mask for vector code support */
3956 m_rshift = qm_port->cq_rolling_mask >> n_iter;
3957 m_lshift = qm_port->cq_rolling_mask << (64 - n_iter);
3958 m2_rshift = qm_port->cq_rolling_mask_2 >> n_iter;
3959 m2_lshift = qm_port->cq_rolling_mask_2 <<
3960 (64 - n_iter);
3961 qm_port->cq_rolling_mask = (m_rshift | m2_lshift);
3962 qm_port->cq_rolling_mask_2 = (m2_rshift | m_lshift);
3963 }
3964 } else { /* !use_scalar */
3965 num_avail = dlb2_recv_qe_sparse_vec(qm_port,
3966 &events[num],
3967 max_num - num);
3968 dlb2_inc_cq_idx(qm_port, num_avail << 2);
3969 num += num_avail;
3970 DLB2_INC_STAT(ev_port->stats.traffic.rx_ok, num_avail);
3971 }
3972 if (!num_avail) {
3973 if ((timeout == 0) || (num > 0))
3974 /* Not waiting in any form or 1+ events recd */
3975 break;
3976 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
3977 timeout, start_ticks))
3978 break;
3979 }
3980 }
3981
3982 qm_port->owed_tokens += num;
3983
3984 if (num) {
3985 if (qm_port->token_pop_mode == AUTO_POP)
3986 dlb2_consume_qe_immediate(qm_port, num);
3987
3988 ev_port->outstanding_releases += num;
3989
3990 dlb2_port_credits_inc(qm_port, num);
3991 }
3992
3993 return num;
3994 }
3995
3996 static __rte_always_inline int
dlb2_recv_qe(struct dlb2_port * qm_port,struct dlb2_dequeue_qe * qe,uint8_t * offset)3997 dlb2_recv_qe(struct dlb2_port *qm_port, struct dlb2_dequeue_qe *qe,
3998 uint8_t *offset)
3999 {
4000 uint8_t xor_mask[2][4] = { {0x0F, 0x0E, 0x0C, 0x08},
4001 {0x00, 0x01, 0x03, 0x07} };
4002 uint8_t and_mask[4] = {0x0F, 0x0E, 0x0C, 0x08};
4003 volatile struct dlb2_dequeue_qe *cq_addr;
4004 __m128i *qes = (__m128i *)qe;
4005 uint64_t *cache_line_base;
4006 uint8_t gen_bits;
4007
4008 cq_addr = dlb2_port[qm_port->id][PORT_TYPE(qm_port)].cq_base;
4009 cq_addr = &cq_addr[qm_port->cq_idx];
4010
4011 cache_line_base = (void *)(((uintptr_t)cq_addr) & ~0x3F);
4012 *offset = ((uintptr_t)cq_addr & 0x30) >> 4;
4013
4014 /* Load the next CQ cache line from memory. Pack these reads as tight
4015 * as possible to reduce the chance that DLB invalidates the line while
4016 * the CPU is reading it. Read the cache line backwards to ensure that
4017 * if QE[N] (N > 0) is valid, then QEs[0:N-1] are too.
4018 *
4019 * (Valid QEs start at &qe[offset])
4020 */
4021 qes[3] = _mm_load_si128((__m128i *)&cache_line_base[6]);
4022 qes[2] = _mm_load_si128((__m128i *)&cache_line_base[4]);
4023 qes[1] = _mm_load_si128((__m128i *)&cache_line_base[2]);
4024 qes[0] = _mm_load_si128((__m128i *)&cache_line_base[0]);
4025
4026 /* Evict the cache line ASAP */
4027 rte_cldemote(cache_line_base);
4028
4029 /* Extract and combine the gen bits */
4030 gen_bits = ((_mm_extract_epi8(qes[0], 15) & 0x1) << 0) |
4031 ((_mm_extract_epi8(qes[1], 15) & 0x1) << 1) |
4032 ((_mm_extract_epi8(qes[2], 15) & 0x1) << 2) |
4033 ((_mm_extract_epi8(qes[3], 15) & 0x1) << 3);
4034
4035 /* XOR the combined bits such that a 1 represents a valid QE */
4036 gen_bits ^= xor_mask[qm_port->gen_bit][*offset];
4037
4038 /* Mask off gen bits we don't care about */
4039 gen_bits &= and_mask[*offset];
4040
4041 return __builtin_popcount(gen_bits);
4042 }
4043
4044 static inline int16_t
dlb2_hw_dequeue(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_port * ev_port,struct rte_event * events,uint16_t max_num,uint64_t dequeue_timeout_ticks)4045 dlb2_hw_dequeue(struct dlb2_eventdev *dlb2,
4046 struct dlb2_eventdev_port *ev_port,
4047 struct rte_event *events,
4048 uint16_t max_num,
4049 uint64_t dequeue_timeout_ticks)
4050 {
4051 uint64_t timeout;
4052 uint64_t start_ticks = 0ULL;
4053 struct dlb2_port *qm_port;
4054 int num = 0;
4055
4056 qm_port = &ev_port->qm_port;
4057
4058 /* We have a special implementation for waiting. Wait can be:
4059 * 1) no waiting at all
4060 * 2) busy poll only
4061 * 3) wait for interrupt. If wakeup and poll time
4062 * has expired, then return to caller
4063 * 4) umonitor/umwait repeatedly up to poll time
4064 */
4065
4066 /* If configured for per dequeue wait, then use wait value provided
4067 * to this API. Otherwise we must use the global
4068 * value from eventdev config time.
4069 */
4070 if (!dlb2->global_dequeue_wait)
4071 timeout = dequeue_timeout_ticks;
4072 else
4073 timeout = dlb2->global_dequeue_wait_ticks;
4074
4075 start_ticks = rte_get_timer_cycles();
4076
4077 while (num < max_num) {
4078 struct dlb2_dequeue_qe qes[DLB2_NUM_QES_PER_CACHE_LINE];
4079 uint8_t offset;
4080 int num_avail;
4081
4082 /* Copy up to 4 QEs from the current cache line into qes */
4083 num_avail = dlb2_recv_qe(qm_port, qes, &offset);
4084
4085 /* But don't process more than the user requested */
4086 num_avail = RTE_MIN(num_avail, max_num - num);
4087
4088 dlb2_inc_cq_idx(qm_port, num_avail);
4089
4090 if (num_avail == DLB2_NUM_QES_PER_CACHE_LINE)
4091 num += dlb2_process_dequeue_four_qes(ev_port,
4092 qm_port,
4093 &events[num],
4094 &qes[offset]);
4095 else if (num_avail)
4096 num += dlb2_process_dequeue_qes(ev_port,
4097 qm_port,
4098 &events[num],
4099 &qes[offset],
4100 num_avail);
4101 else if ((timeout == 0) || (num > 0))
4102 /* Not waiting in any form, or 1+ events received? */
4103 break;
4104 else if (dlb2_dequeue_wait(dlb2, ev_port, qm_port,
4105 timeout, start_ticks))
4106 break;
4107 }
4108
4109 qm_port->owed_tokens += num;
4110
4111 if (num) {
4112 if (qm_port->token_pop_mode == AUTO_POP)
4113 dlb2_consume_qe_immediate(qm_port, num);
4114
4115 ev_port->outstanding_releases += num;
4116
4117 dlb2_port_credits_inc(qm_port, num);
4118 }
4119
4120 return num;
4121 }
4122
4123 static uint16_t
dlb2_event_dequeue_burst(void * event_port,struct rte_event * ev,uint16_t num,uint64_t wait)4124 dlb2_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
4125 uint64_t wait)
4126 {
4127 struct dlb2_eventdev_port *ev_port = event_port;
4128 struct dlb2_port *qm_port = &ev_port->qm_port;
4129 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4130 uint16_t cnt;
4131
4132 RTE_ASSERT(ev_port->setup_done);
4133 RTE_ASSERT(ev != NULL);
4134
4135 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4136 uint16_t out_rels = ev_port->outstanding_releases;
4137
4138 dlb2_event_release(dlb2, ev_port->id, out_rels);
4139
4140 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4141 }
4142
4143 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4144 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4145
4146 cnt = dlb2_hw_dequeue(dlb2, ev_port, ev, num, wait);
4147
4148 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4149 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4150
4151 return cnt;
4152 }
4153
4154 static uint16_t
dlb2_event_dequeue(void * event_port,struct rte_event * ev,uint64_t wait)4155 dlb2_event_dequeue(void *event_port, struct rte_event *ev, uint64_t wait)
4156 {
4157 return dlb2_event_dequeue_burst(event_port, ev, 1, wait);
4158 }
4159
4160 static uint16_t
dlb2_event_dequeue_burst_sparse(void * event_port,struct rte_event * ev,uint16_t num,uint64_t wait)4161 dlb2_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
4162 uint16_t num, uint64_t wait)
4163 {
4164 struct dlb2_eventdev_port *ev_port = event_port;
4165 struct dlb2_port *qm_port = &ev_port->qm_port;
4166 struct dlb2_eventdev *dlb2 = ev_port->dlb2;
4167 uint16_t cnt;
4168
4169 RTE_ASSERT(ev_port->setup_done);
4170 RTE_ASSERT(ev != NULL);
4171
4172 if (ev_port->implicit_release && ev_port->outstanding_releases > 0) {
4173 uint16_t out_rels = ev_port->outstanding_releases;
4174
4175 dlb2_event_release(dlb2, ev_port->id, out_rels);
4176
4177 DLB2_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
4178 }
4179
4180 if (qm_port->token_pop_mode == DEFERRED_POP && qm_port->owed_tokens)
4181 dlb2_consume_qe_immediate(qm_port, qm_port->owed_tokens);
4182
4183 cnt = dlb2_hw_dequeue_sparse(dlb2, ev_port, ev, num, wait);
4184
4185 DLB2_INC_STAT(ev_port->stats.traffic.total_polls, 1);
4186 DLB2_INC_STAT(ev_port->stats.traffic.zero_polls, ((cnt == 0) ? 1 : 0));
4187 return cnt;
4188 }
4189
4190 static uint16_t
dlb2_event_dequeue_sparse(void * event_port,struct rte_event * ev,uint64_t wait)4191 dlb2_event_dequeue_sparse(void *event_port, struct rte_event *ev,
4192 uint64_t wait)
4193 {
4194 return dlb2_event_dequeue_burst_sparse(event_port, ev, 1, wait);
4195 }
4196
4197 static void
dlb2_flush_port(struct rte_eventdev * dev,int port_id)4198 dlb2_flush_port(struct rte_eventdev *dev, int port_id)
4199 {
4200 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4201 eventdev_stop_flush_t flush;
4202 struct rte_event ev;
4203 uint8_t dev_id;
4204 void *arg;
4205 int i;
4206
4207 flush = dev->dev_ops->dev_stop_flush;
4208 dev_id = dev->data->dev_id;
4209 arg = dev->data->dev_stop_flush_arg;
4210
4211 while (rte_event_dequeue_burst(dev_id, port_id, &ev, 1, 0)) {
4212 if (flush)
4213 flush(dev_id, ev, arg);
4214
4215 if (dlb2->ev_ports[port_id].qm_port.is_directed)
4216 continue;
4217
4218 ev.op = RTE_EVENT_OP_RELEASE;
4219
4220 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4221 }
4222
4223 /* Enqueue any additional outstanding releases */
4224 ev.op = RTE_EVENT_OP_RELEASE;
4225
4226 for (i = dlb2->ev_ports[port_id].outstanding_releases; i > 0; i--)
4227 rte_event_enqueue_burst(dev_id, port_id, &ev, 1);
4228 }
4229
4230 static uint32_t
dlb2_get_ldb_queue_depth(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * queue)4231 dlb2_get_ldb_queue_depth(struct dlb2_eventdev *dlb2,
4232 struct dlb2_eventdev_queue *queue)
4233 {
4234 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4235 struct dlb2_get_ldb_queue_depth_args cfg;
4236 int ret;
4237
4238 cfg.queue_id = queue->qm_queue.id;
4239
4240 ret = dlb2_iface_get_ldb_queue_depth(handle, &cfg);
4241 if (ret < 0) {
4242 DLB2_LOG_ERR("dlb2: get_ldb_queue_depth ret=%d (driver status: %s)\n",
4243 ret, dlb2_error_strings[cfg.response.status]);
4244 return ret;
4245 }
4246
4247 return cfg.response.id;
4248 }
4249
4250 static uint32_t
dlb2_get_dir_queue_depth(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * queue)4251 dlb2_get_dir_queue_depth(struct dlb2_eventdev *dlb2,
4252 struct dlb2_eventdev_queue *queue)
4253 {
4254 struct dlb2_hw_dev *handle = &dlb2->qm_instance;
4255 struct dlb2_get_dir_queue_depth_args cfg;
4256 int ret;
4257
4258 cfg.queue_id = queue->qm_queue.id;
4259
4260 ret = dlb2_iface_get_dir_queue_depth(handle, &cfg);
4261 if (ret < 0) {
4262 DLB2_LOG_ERR("dlb2: get_dir_queue_depth ret=%d (driver status: %s)\n",
4263 ret, dlb2_error_strings[cfg.response.status]);
4264 return ret;
4265 }
4266
4267 return cfg.response.id;
4268 }
4269
4270 uint32_t
dlb2_get_queue_depth(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * queue)4271 dlb2_get_queue_depth(struct dlb2_eventdev *dlb2,
4272 struct dlb2_eventdev_queue *queue)
4273 {
4274 if (queue->qm_queue.is_directed)
4275 return dlb2_get_dir_queue_depth(dlb2, queue);
4276 else
4277 return dlb2_get_ldb_queue_depth(dlb2, queue);
4278 }
4279
4280 static bool
dlb2_queue_is_empty(struct dlb2_eventdev * dlb2,struct dlb2_eventdev_queue * queue)4281 dlb2_queue_is_empty(struct dlb2_eventdev *dlb2,
4282 struct dlb2_eventdev_queue *queue)
4283 {
4284 return dlb2_get_queue_depth(dlb2, queue) == 0;
4285 }
4286
4287 static bool
dlb2_linked_queues_empty(struct dlb2_eventdev * dlb2)4288 dlb2_linked_queues_empty(struct dlb2_eventdev *dlb2)
4289 {
4290 int i;
4291
4292 for (i = 0; i < dlb2->num_queues; i++) {
4293 if (dlb2->ev_queues[i].num_links == 0)
4294 continue;
4295 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4296 return false;
4297 }
4298
4299 return true;
4300 }
4301
4302 static bool
dlb2_queues_empty(struct dlb2_eventdev * dlb2)4303 dlb2_queues_empty(struct dlb2_eventdev *dlb2)
4304 {
4305 int i;
4306
4307 for (i = 0; i < dlb2->num_queues; i++) {
4308 if (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4309 return false;
4310 }
4311
4312 return true;
4313 }
4314
4315 static void
dlb2_drain(struct rte_eventdev * dev)4316 dlb2_drain(struct rte_eventdev *dev)
4317 {
4318 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4319 struct dlb2_eventdev_port *ev_port = NULL;
4320 uint8_t dev_id;
4321 int i;
4322
4323 dev_id = dev->data->dev_id;
4324
4325 while (!dlb2_linked_queues_empty(dlb2)) {
4326 /* Flush all the ev_ports, which will drain all their connected
4327 * queues.
4328 */
4329 for (i = 0; i < dlb2->num_ports; i++)
4330 dlb2_flush_port(dev, i);
4331 }
4332
4333 /* The queues are empty, but there may be events left in the ports. */
4334 for (i = 0; i < dlb2->num_ports; i++)
4335 dlb2_flush_port(dev, i);
4336
4337 /* If the domain's queues are empty, we're done. */
4338 if (dlb2_queues_empty(dlb2))
4339 return;
4340
4341 /* Else, there must be at least one unlinked load-balanced queue.
4342 * Select a load-balanced port with which to drain the unlinked
4343 * queue(s).
4344 */
4345 for (i = 0; i < dlb2->num_ports; i++) {
4346 ev_port = &dlb2->ev_ports[i];
4347
4348 if (!ev_port->qm_port.is_directed)
4349 break;
4350 }
4351
4352 if (i == dlb2->num_ports) {
4353 DLB2_LOG_ERR("internal error: no LDB ev_ports\n");
4354 return;
4355 }
4356
4357 rte_errno = 0;
4358 rte_event_port_unlink(dev_id, ev_port->id, NULL, 0);
4359
4360 if (rte_errno) {
4361 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d\n",
4362 ev_port->id);
4363 return;
4364 }
4365
4366 for (i = 0; i < dlb2->num_queues; i++) {
4367 uint8_t qid, prio;
4368 int ret;
4369
4370 if (dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4371 continue;
4372
4373 qid = i;
4374 prio = 0;
4375
4376 /* Link the ev_port to the queue */
4377 ret = rte_event_port_link(dev_id, ev_port->id, &qid, &prio, 1);
4378 if (ret != 1) {
4379 DLB2_LOG_ERR("internal error: failed to link ev_port %d to queue %d\n",
4380 ev_port->id, qid);
4381 return;
4382 }
4383
4384 /* Flush the queue */
4385 while (!dlb2_queue_is_empty(dlb2, &dlb2->ev_queues[i]))
4386 dlb2_flush_port(dev, ev_port->id);
4387
4388 /* Drain any extant events in the ev_port. */
4389 dlb2_flush_port(dev, ev_port->id);
4390
4391 /* Unlink the ev_port from the queue */
4392 ret = rte_event_port_unlink(dev_id, ev_port->id, &qid, 1);
4393 if (ret != 1) {
4394 DLB2_LOG_ERR("internal error: failed to unlink ev_port %d to queue %d\n",
4395 ev_port->id, qid);
4396 return;
4397 }
4398 }
4399 }
4400
4401 static void
dlb2_eventdev_stop(struct rte_eventdev * dev)4402 dlb2_eventdev_stop(struct rte_eventdev *dev)
4403 {
4404 struct dlb2_eventdev *dlb2 = dlb2_pmd_priv(dev);
4405
4406 rte_spinlock_lock(&dlb2->qm_instance.resource_lock);
4407
4408 if (dlb2->run_state == DLB2_RUN_STATE_STOPPED) {
4409 DLB2_LOG_DBG("Internal error: already stopped\n");
4410 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4411 return;
4412 } else if (dlb2->run_state != DLB2_RUN_STATE_STARTED) {
4413 DLB2_LOG_ERR("Internal error: bad state %d for dev_stop\n",
4414 (int)dlb2->run_state);
4415 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4416 return;
4417 }
4418
4419 dlb2->run_state = DLB2_RUN_STATE_STOPPING;
4420
4421 rte_spinlock_unlock(&dlb2->qm_instance.resource_lock);
4422
4423 dlb2_drain(dev);
4424
4425 dlb2->run_state = DLB2_RUN_STATE_STOPPED;
4426 }
4427
4428 static int
dlb2_eventdev_close(struct rte_eventdev * dev)4429 dlb2_eventdev_close(struct rte_eventdev *dev)
4430 {
4431 dlb2_hw_reset_sched_domain(dev, false);
4432
4433 return 0;
4434 }
4435
4436 static void
dlb2_eventdev_queue_release(struct rte_eventdev * dev,uint8_t id)4437 dlb2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t id)
4438 {
4439 RTE_SET_USED(dev);
4440 RTE_SET_USED(id);
4441
4442 /* This function intentionally left blank. */
4443 }
4444
4445 static void
dlb2_eventdev_port_release(void * port)4446 dlb2_eventdev_port_release(void *port)
4447 {
4448 struct dlb2_eventdev_port *ev_port = port;
4449 struct dlb2_port *qm_port;
4450
4451 if (ev_port) {
4452 qm_port = &ev_port->qm_port;
4453 if (qm_port->config_state == DLB2_CONFIGURED)
4454 dlb2_free_qe_mem(qm_port);
4455 }
4456 }
4457
4458 static int
dlb2_eventdev_timeout_ticks(struct rte_eventdev * dev,uint64_t ns,uint64_t * timeout_ticks)4459 dlb2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
4460 uint64_t *timeout_ticks)
4461 {
4462 RTE_SET_USED(dev);
4463 uint64_t cycles_per_ns = rte_get_timer_hz() / 1E9;
4464
4465 *timeout_ticks = ns * cycles_per_ns;
4466
4467 return 0;
4468 }
4469
4470 static void
dlb2_entry_points_init(struct rte_eventdev * dev)4471 dlb2_entry_points_init(struct rte_eventdev *dev)
4472 {
4473 struct dlb2_eventdev *dlb2;
4474
4475 /* Expose PMD's eventdev interface */
4476 static struct eventdev_ops dlb2_eventdev_entry_ops = {
4477 .dev_infos_get = dlb2_eventdev_info_get,
4478 .dev_configure = dlb2_eventdev_configure,
4479 .dev_start = dlb2_eventdev_start,
4480 .dev_stop = dlb2_eventdev_stop,
4481 .dev_close = dlb2_eventdev_close,
4482 .queue_def_conf = dlb2_eventdev_queue_default_conf_get,
4483 .queue_setup = dlb2_eventdev_queue_setup,
4484 .queue_release = dlb2_eventdev_queue_release,
4485 .port_def_conf = dlb2_eventdev_port_default_conf_get,
4486 .port_setup = dlb2_eventdev_port_setup,
4487 .port_release = dlb2_eventdev_port_release,
4488 .port_link = dlb2_eventdev_port_link,
4489 .port_unlink = dlb2_eventdev_port_unlink,
4490 .port_unlinks_in_progress =
4491 dlb2_eventdev_port_unlinks_in_progress,
4492 .timeout_ticks = dlb2_eventdev_timeout_ticks,
4493 .dump = dlb2_eventdev_dump,
4494 .xstats_get = dlb2_eventdev_xstats_get,
4495 .xstats_get_names = dlb2_eventdev_xstats_get_names,
4496 .xstats_get_by_name = dlb2_eventdev_xstats_get_by_name,
4497 .xstats_reset = dlb2_eventdev_xstats_reset,
4498 .dev_selftest = test_dlb2_eventdev,
4499 };
4500
4501 /* Expose PMD's eventdev interface */
4502
4503 dev->dev_ops = &dlb2_eventdev_entry_ops;
4504 dev->enqueue = dlb2_event_enqueue;
4505 dev->enqueue_burst = dlb2_event_enqueue_burst;
4506 dev->enqueue_new_burst = dlb2_event_enqueue_new_burst;
4507 dev->enqueue_forward_burst = dlb2_event_enqueue_forward_burst;
4508
4509 dlb2 = dev->data->dev_private;
4510 if (dlb2->poll_mode == DLB2_CQ_POLL_MODE_SPARSE) {
4511 dev->dequeue = dlb2_event_dequeue_sparse;
4512 dev->dequeue_burst = dlb2_event_dequeue_burst_sparse;
4513 } else {
4514 dev->dequeue = dlb2_event_dequeue;
4515 dev->dequeue_burst = dlb2_event_dequeue_burst;
4516 }
4517 }
4518
4519 int
dlb2_primary_eventdev_probe(struct rte_eventdev * dev,const char * name,struct dlb2_devargs * dlb2_args)4520 dlb2_primary_eventdev_probe(struct rte_eventdev *dev,
4521 const char *name,
4522 struct dlb2_devargs *dlb2_args)
4523 {
4524 struct dlb2_eventdev *dlb2;
4525 int err, i;
4526
4527 dlb2 = dev->data->dev_private;
4528
4529 dlb2->event_dev = dev; /* backlink */
4530
4531 evdev_dlb2_default_info.driver_name = name;
4532
4533 dlb2->max_num_events_override = dlb2_args->max_num_events;
4534 dlb2->num_dir_credits_override = dlb2_args->num_dir_credits_override;
4535 dlb2->qm_instance.cos_id = dlb2_args->cos_id;
4536 dlb2->poll_interval = dlb2_args->poll_interval;
4537 dlb2->sw_credit_quanta = dlb2_args->sw_credit_quanta;
4538 dlb2->hw_credit_quanta = dlb2_args->hw_credit_quanta;
4539 dlb2->default_depth_thresh = dlb2_args->default_depth_thresh;
4540 dlb2->vector_opts_enabled = dlb2_args->vector_opts_enabled;
4541 dlb2->max_cq_depth = dlb2_args->max_cq_depth;
4542
4543 err = dlb2_iface_open(&dlb2->qm_instance, name);
4544 if (err < 0) {
4545 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4546 err);
4547 return err;
4548 }
4549
4550 err = dlb2_iface_get_device_version(&dlb2->qm_instance,
4551 &dlb2->revision);
4552 if (err < 0) {
4553 DLB2_LOG_ERR("dlb2: failed to get the device version, err=%d\n",
4554 err);
4555 return err;
4556 }
4557
4558 err = dlb2_hw_query_resources(dlb2);
4559 if (err) {
4560 DLB2_LOG_ERR("get resources err=%d for %s\n",
4561 err, name);
4562 return err;
4563 }
4564
4565 dlb2_iface_hardware_init(&dlb2->qm_instance);
4566
4567 err = dlb2_iface_get_cq_poll_mode(&dlb2->qm_instance, &dlb2->poll_mode);
4568 if (err < 0) {
4569 DLB2_LOG_ERR("dlb2: failed to get the poll mode, err=%d\n",
4570 err);
4571 return err;
4572 }
4573
4574 /* Complete xtstats runtime initialization */
4575 err = dlb2_xstats_init(dlb2);
4576 if (err) {
4577 DLB2_LOG_ERR("dlb2: failed to init xstats, err=%d\n", err);
4578 return err;
4579 }
4580
4581 /* Initialize each port's token pop mode */
4582 for (i = 0; i < DLB2_MAX_NUM_PORTS(dlb2->version); i++)
4583 dlb2->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
4584
4585 rte_spinlock_init(&dlb2->qm_instance.resource_lock);
4586
4587 dlb2_iface_low_level_io_init();
4588
4589 dlb2_entry_points_init(dev);
4590
4591 dlb2_init_queue_depth_thresholds(dlb2,
4592 dlb2_args->qid_depth_thresholds.val);
4593
4594 return 0;
4595 }
4596
4597 int
dlb2_secondary_eventdev_probe(struct rte_eventdev * dev,const char * name)4598 dlb2_secondary_eventdev_probe(struct rte_eventdev *dev,
4599 const char *name)
4600 {
4601 struct dlb2_eventdev *dlb2;
4602 int err;
4603
4604 dlb2 = dev->data->dev_private;
4605
4606 evdev_dlb2_default_info.driver_name = name;
4607
4608 err = dlb2_iface_open(&dlb2->qm_instance, name);
4609 if (err < 0) {
4610 DLB2_LOG_ERR("could not open event hardware device, err=%d\n",
4611 err);
4612 return err;
4613 }
4614
4615 err = dlb2_hw_query_resources(dlb2);
4616 if (err) {
4617 DLB2_LOG_ERR("get resources err=%d for %s\n",
4618 err, name);
4619 return err;
4620 }
4621
4622 dlb2_iface_low_level_io_init();
4623
4624 dlb2_entry_points_init(dev);
4625
4626 return 0;
4627 }
4628
4629 int
dlb2_parse_params(const char * params,const char * name,struct dlb2_devargs * dlb2_args,uint8_t version)4630 dlb2_parse_params(const char *params,
4631 const char *name,
4632 struct dlb2_devargs *dlb2_args,
4633 uint8_t version)
4634 {
4635 int ret = 0;
4636 static const char * const args[] = { NUMA_NODE_ARG,
4637 DLB2_MAX_NUM_EVENTS,
4638 DLB2_NUM_DIR_CREDITS,
4639 DEV_ID_ARG,
4640 DLB2_QID_DEPTH_THRESH_ARG,
4641 DLB2_COS_ARG,
4642 DLB2_POLL_INTERVAL_ARG,
4643 DLB2_SW_CREDIT_QUANTA_ARG,
4644 DLB2_HW_CREDIT_QUANTA_ARG,
4645 DLB2_DEPTH_THRESH_ARG,
4646 DLB2_VECTOR_OPTS_ENAB_ARG,
4647 DLB2_MAX_CQ_DEPTH,
4648 NULL };
4649
4650 if (params != NULL && params[0] != '\0') {
4651 struct rte_kvargs *kvlist = rte_kvargs_parse(params, args);
4652
4653 if (kvlist == NULL) {
4654 RTE_LOG(INFO, PMD,
4655 "Ignoring unsupported parameters when creating device '%s'\n",
4656 name);
4657 } else {
4658 int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG,
4659 set_numa_node,
4660 &dlb2_args->socket_id);
4661 if (ret != 0) {
4662 DLB2_LOG_ERR("%s: Error parsing numa node parameter",
4663 name);
4664 rte_kvargs_free(kvlist);
4665 return ret;
4666 }
4667
4668 ret = rte_kvargs_process(kvlist, DLB2_MAX_NUM_EVENTS,
4669 set_max_num_events,
4670 &dlb2_args->max_num_events);
4671 if (ret != 0) {
4672 DLB2_LOG_ERR("%s: Error parsing max_num_events parameter",
4673 name);
4674 rte_kvargs_free(kvlist);
4675 return ret;
4676 }
4677
4678 if (version == DLB2_HW_V2) {
4679 ret = rte_kvargs_process(kvlist,
4680 DLB2_NUM_DIR_CREDITS,
4681 set_num_dir_credits,
4682 &dlb2_args->num_dir_credits_override);
4683 if (ret != 0) {
4684 DLB2_LOG_ERR("%s: Error parsing num_dir_credits parameter",
4685 name);
4686 rte_kvargs_free(kvlist);
4687 return ret;
4688 }
4689 }
4690 ret = rte_kvargs_process(kvlist, DEV_ID_ARG,
4691 set_dev_id,
4692 &dlb2_args->dev_id);
4693 if (ret != 0) {
4694 DLB2_LOG_ERR("%s: Error parsing dev_id parameter",
4695 name);
4696 rte_kvargs_free(kvlist);
4697 return ret;
4698 }
4699
4700 if (version == DLB2_HW_V2) {
4701 ret = rte_kvargs_process(
4702 kvlist,
4703 DLB2_QID_DEPTH_THRESH_ARG,
4704 set_qid_depth_thresh,
4705 &dlb2_args->qid_depth_thresholds);
4706 } else {
4707 ret = rte_kvargs_process(
4708 kvlist,
4709 DLB2_QID_DEPTH_THRESH_ARG,
4710 set_qid_depth_thresh_v2_5,
4711 &dlb2_args->qid_depth_thresholds);
4712 }
4713 if (ret != 0) {
4714 DLB2_LOG_ERR("%s: Error parsing qid_depth_thresh parameter",
4715 name);
4716 rte_kvargs_free(kvlist);
4717 return ret;
4718 }
4719
4720 ret = rte_kvargs_process(kvlist, DLB2_COS_ARG,
4721 set_cos,
4722 &dlb2_args->cos_id);
4723 if (ret != 0) {
4724 DLB2_LOG_ERR("%s: Error parsing cos parameter",
4725 name);
4726 rte_kvargs_free(kvlist);
4727 return ret;
4728 }
4729
4730 ret = rte_kvargs_process(kvlist, DLB2_POLL_INTERVAL_ARG,
4731 set_poll_interval,
4732 &dlb2_args->poll_interval);
4733 if (ret != 0) {
4734 DLB2_LOG_ERR("%s: Error parsing poll interval parameter",
4735 name);
4736 rte_kvargs_free(kvlist);
4737 return ret;
4738 }
4739
4740 ret = rte_kvargs_process(kvlist,
4741 DLB2_SW_CREDIT_QUANTA_ARG,
4742 set_sw_credit_quanta,
4743 &dlb2_args->sw_credit_quanta);
4744 if (ret != 0) {
4745 DLB2_LOG_ERR("%s: Error parsing sw credit quanta parameter",
4746 name);
4747 rte_kvargs_free(kvlist);
4748 return ret;
4749 }
4750
4751 ret = rte_kvargs_process(kvlist,
4752 DLB2_HW_CREDIT_QUANTA_ARG,
4753 set_hw_credit_quanta,
4754 &dlb2_args->hw_credit_quanta);
4755 if (ret != 0) {
4756 DLB2_LOG_ERR("%s: Error parsing hw credit quanta parameter",
4757 name);
4758 rte_kvargs_free(kvlist);
4759 return ret;
4760 }
4761
4762 ret = rte_kvargs_process(kvlist, DLB2_DEPTH_THRESH_ARG,
4763 set_default_depth_thresh,
4764 &dlb2_args->default_depth_thresh);
4765 if (ret != 0) {
4766 DLB2_LOG_ERR("%s: Error parsing set depth thresh parameter",
4767 name);
4768 rte_kvargs_free(kvlist);
4769 return ret;
4770 }
4771
4772 ret = rte_kvargs_process(kvlist,
4773 DLB2_VECTOR_OPTS_ENAB_ARG,
4774 set_vector_opts_enab,
4775 &dlb2_args->vector_opts_enabled);
4776 if (ret != 0) {
4777 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4778 name);
4779 rte_kvargs_free(kvlist);
4780 return ret;
4781 }
4782
4783 ret = rte_kvargs_process(kvlist,
4784 DLB2_MAX_CQ_DEPTH,
4785 set_max_cq_depth,
4786 &dlb2_args->max_cq_depth);
4787 if (ret != 0) {
4788 DLB2_LOG_ERR("%s: Error parsing vector opts enabled",
4789 name);
4790 rte_kvargs_free(kvlist);
4791 return ret;
4792 }
4793
4794 rte_kvargs_free(kvlist);
4795 }
4796 }
4797 return ret;
4798 }
4799 RTE_LOG_REGISTER_DEFAULT(eventdev_dlb2_log_level, NOTICE);
4800