1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2020 Mellanox Technologies, Ltd
3 */
4
5 #include <stddef.h>
6 #include <errno.h>
7 #include <string.h>
8 #include <stdint.h>
9 #include <unistd.h>
10 #include <inttypes.h>
11 #include <sys/queue.h>
12
13 #include "mlx5_autoconf.h"
14
15 #include <rte_mbuf.h>
16 #include <rte_malloc.h>
17 #include <rte_ethdev_driver.h>
18 #include <rte_common.h>
19
20 #include <mlx5_glue.h>
21 #include <mlx5_common.h>
22 #include <mlx5_common_mr.h>
23 #include <mlx5_rxtx.h>
24 #include <mlx5_verbs.h>
25 #include <mlx5_utils.h>
26 #include <mlx5_malloc.h>
27
28 /**
29 * Register mr. Given protection domain pointer, pointer to addr and length
30 * register the memory region.
31 *
32 * @param[in] pd
33 * Pointer to protection domain context.
34 * @param[in] addr
35 * Pointer to memory start address.
36 * @param[in] length
37 * Length of the memory to register.
38 * @param[out] pmd_mr
39 * pmd_mr struct set with lkey, address, length and pointer to mr object
40 *
41 * @return
42 * 0 on successful registration, -1 otherwise
43 */
44 static int
mlx5_reg_mr(void * pd,void * addr,size_t length,struct mlx5_pmd_mr * pmd_mr)45 mlx5_reg_mr(void *pd, void *addr, size_t length,
46 struct mlx5_pmd_mr *pmd_mr)
47 {
48 return mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);
49 }
50
51 /**
52 * Deregister mr. Given the mlx5 pmd MR - deregister the MR
53 *
54 * @param[in] pmd_mr
55 * pmd_mr struct set with lkey, address, length and pointer to mr object
56 *
57 */
58 static void
mlx5_dereg_mr(struct mlx5_pmd_mr * pmd_mr)59 mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)
60 {
61 mlx5_common_verbs_dereg_mr(pmd_mr);
62 }
63
64 /* verbs operations. */
65 const struct mlx5_verbs_ops mlx5_verbs_ops = {
66 .reg_mr = mlx5_reg_mr,
67 .dereg_mr = mlx5_dereg_mr,
68 };
69
70 /**
71 * Modify Rx WQ vlan stripping offload
72 *
73 * @param rxq_obj
74 * Rx queue object.
75 *
76 * @return 0 on success, non-0 otherwise
77 */
78 static int
mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj * rxq_obj,int on)79 mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)
80 {
81 uint16_t vlan_offloads =
82 (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
83 0;
84 struct ibv_wq_attr mod;
85 mod = (struct ibv_wq_attr){
86 .attr_mask = IBV_WQ_ATTR_FLAGS,
87 .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
88 .flags = vlan_offloads,
89 };
90
91 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
92 }
93
94 /**
95 * Modifies the attributes for the specified WQ.
96 *
97 * @param rxq_obj
98 * Verbs Rx queue object.
99 * @param type
100 * Type of change queue state.
101 *
102 * @return
103 * 0 on success, a negative errno value otherwise and rte_errno is set.
104 */
105 static int
mlx5_ibv_modify_wq(struct mlx5_rxq_obj * rxq_obj,uint8_t type)106 mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)
107 {
108 struct ibv_wq_attr mod = {
109 .attr_mask = IBV_WQ_ATTR_STATE,
110 .wq_state = (enum ibv_wq_state)type,
111 };
112
113 return mlx5_glue->modify_wq(rxq_obj->wq, &mod);
114 }
115
116 /**
117 * Modify QP using Verbs API.
118 *
119 * @param txq_obj
120 * Verbs Tx queue object.
121 * @param type
122 * Type of change queue state.
123 * @param dev_port
124 * IB device port number.
125 *
126 * @return
127 * 0 on success, a negative errno value otherwise and rte_errno is set.
128 */
129 static int
mlx5_ibv_modify_qp(struct mlx5_txq_obj * obj,enum mlx5_txq_modify_type type,uint8_t dev_port)130 mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,
131 uint8_t dev_port)
132 {
133 struct ibv_qp_attr mod = {
134 .qp_state = IBV_QPS_RESET,
135 .port_num = dev_port,
136 };
137 int attr_mask = (IBV_QP_STATE | IBV_QP_PORT);
138 int ret;
139
140 if (type != MLX5_TXQ_MOD_RST2RDY) {
141 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
142 if (ret) {
143 DRV_LOG(ERR, "Cannot change Tx QP state to RESET %s",
144 strerror(errno));
145 rte_errno = errno;
146 return ret;
147 }
148 if (type == MLX5_TXQ_MOD_RDY2RST)
149 return 0;
150 }
151 if (type == MLX5_TXQ_MOD_ERR2RDY)
152 attr_mask = IBV_QP_STATE;
153 mod.qp_state = IBV_QPS_INIT;
154 ret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);
155 if (ret) {
156 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s",
157 strerror(errno));
158 rte_errno = errno;
159 return ret;
160 }
161 mod.qp_state = IBV_QPS_RTR;
162 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
163 if (ret) {
164 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s",
165 strerror(errno));
166 rte_errno = errno;
167 return ret;
168 }
169 mod.qp_state = IBV_QPS_RTS;
170 ret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);
171 if (ret) {
172 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s",
173 strerror(errno));
174 rte_errno = errno;
175 return ret;
176 }
177 return 0;
178 }
179
180 /**
181 * Create a CQ Verbs object.
182 *
183 * @param dev
184 * Pointer to Ethernet device.
185 * @param idx
186 * Queue index in DPDK Rx queue array.
187 *
188 * @return
189 * The Verbs CQ object initialized, NULL otherwise and rte_errno is set.
190 */
191 static struct ibv_cq *
mlx5_rxq_ibv_cq_create(struct rte_eth_dev * dev,uint16_t idx)192 mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)
193 {
194 struct mlx5_priv *priv = dev->data->dev_private;
195 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
196 struct mlx5_rxq_ctrl *rxq_ctrl =
197 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
198 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
199 unsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);
200 struct {
201 struct ibv_cq_init_attr_ex ibv;
202 struct mlx5dv_cq_init_attr mlx5;
203 } cq_attr;
204
205 cq_attr.ibv = (struct ibv_cq_init_attr_ex){
206 .cqe = cqe_n,
207 .channel = rxq_obj->ibv_channel,
208 .comp_mask = 0,
209 };
210 cq_attr.mlx5 = (struct mlx5dv_cq_init_attr){
211 .comp_mask = 0,
212 };
213 if (priv->config.cqe_comp && !rxq_data->hw_timestamp) {
214 cq_attr.mlx5.comp_mask |=
215 MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
216 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
217 cq_attr.mlx5.cqe_comp_res_format =
218 mlx5_rxq_mprq_enabled(rxq_data) ?
219 MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX :
220 MLX5DV_CQE_RES_FORMAT_HASH;
221 #else
222 cq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
223 #endif
224 /*
225 * For vectorized Rx, it must not be doubled in order to
226 * make cq_ci and rq_ci aligned.
227 */
228 if (mlx5_rxq_check_vec_support(rxq_data) < 0)
229 cq_attr.ibv.cqe *= 2;
230 } else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {
231 DRV_LOG(DEBUG,
232 "Port %u Rx CQE compression is disabled for HW"
233 " timestamp.",
234 dev->data->port_id);
235 }
236 #ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
237 if (priv->config.cqe_pad) {
238 cq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
239 cq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
240 }
241 #endif
242 return mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,
243 &cq_attr.ibv,
244 &cq_attr.mlx5));
245 }
246
247 /**
248 * Create a WQ Verbs object.
249 *
250 * @param dev
251 * Pointer to Ethernet device.
252 * @param idx
253 * Queue index in DPDK Rx queue array.
254 *
255 * @return
256 * The Verbs WQ object initialized, NULL otherwise and rte_errno is set.
257 */
258 static struct ibv_wq *
mlx5_rxq_ibv_wq_create(struct rte_eth_dev * dev,uint16_t idx)259 mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)
260 {
261 struct mlx5_priv *priv = dev->data->dev_private;
262 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
263 struct mlx5_rxq_ctrl *rxq_ctrl =
264 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
265 struct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;
266 unsigned int wqe_n = 1 << rxq_data->elts_n;
267 struct {
268 struct ibv_wq_init_attr ibv;
269 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
270 struct mlx5dv_wq_init_attr mlx5;
271 #endif
272 } wq_attr;
273
274 wq_attr.ibv = (struct ibv_wq_init_attr){
275 .wq_context = NULL, /* Could be useful in the future. */
276 .wq_type = IBV_WQT_RQ,
277 /* Max number of outstanding WRs. */
278 .max_wr = wqe_n >> rxq_data->sges_n,
279 /* Max number of scatter/gather elements in a WR. */
280 .max_sge = 1 << rxq_data->sges_n,
281 .pd = priv->sh->pd,
282 .cq = rxq_obj->ibv_cq,
283 .comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,
284 .create_flags = (rxq_data->vlan_strip ?
285 IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),
286 };
287 /* By default, FCS (CRC) is stripped by hardware. */
288 if (rxq_data->crc_present) {
289 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
290 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
291 }
292 if (priv->config.hw_padding) {
293 #if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)
294 wq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
295 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
296 #elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)
297 wq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;
298 wq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
299 #endif
300 }
301 #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
302 wq_attr.mlx5 = (struct mlx5dv_wq_init_attr){
303 .comp_mask = 0,
304 };
305 if (mlx5_rxq_mprq_enabled(rxq_data)) {
306 struct mlx5dv_striding_rq_init_attr *mprq_attr =
307 &wq_attr.mlx5.striding_rq_attrs;
308
309 wq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
310 *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
311 .single_stride_log_num_of_bytes = rxq_data->strd_sz_n,
312 .single_wqe_log_num_of_strides = rxq_data->strd_num_n,
313 .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
314 };
315 }
316 rxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,
317 &wq_attr.mlx5);
318 #else
319 rxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);
320 #endif
321 if (rxq_obj->wq) {
322 /*
323 * Make sure number of WRs*SGEs match expectations since a queue
324 * cannot allocate more than "desc" buffers.
325 */
326 if (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
327 wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {
328 DRV_LOG(ERR,
329 "Port %u Rx queue %u requested %u*%u but got"
330 " %u*%u WRs*SGEs.",
331 dev->data->port_id, idx,
332 wqe_n >> rxq_data->sges_n,
333 (1 << rxq_data->sges_n),
334 wq_attr.ibv.max_wr, wq_attr.ibv.max_sge);
335 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
336 rxq_obj->wq = NULL;
337 rte_errno = EINVAL;
338 }
339 }
340 return rxq_obj->wq;
341 }
342
343 /**
344 * Create the Rx queue Verbs object.
345 *
346 * @param dev
347 * Pointer to Ethernet device.
348 * @param idx
349 * Queue index in DPDK Rx queue array.
350 *
351 * @return
352 * 0 on success, a negative errno value otherwise and rte_errno is set.
353 */
354 static int
mlx5_rxq_ibv_obj_new(struct rte_eth_dev * dev,uint16_t idx)355 mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
356 {
357 struct mlx5_priv *priv = dev->data->dev_private;
358 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
359 struct mlx5_rxq_ctrl *rxq_ctrl =
360 container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
361 struct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;
362 struct mlx5dv_cq cq_info;
363 struct mlx5dv_rwq rwq;
364 int ret = 0;
365 struct mlx5dv_obj obj;
366
367 MLX5_ASSERT(rxq_data);
368 MLX5_ASSERT(tmpl);
369 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE;
370 priv->verbs_alloc_ctx.obj = rxq_ctrl;
371 tmpl->rxq_ctrl = rxq_ctrl;
372 if (rxq_ctrl->irq) {
373 tmpl->ibv_channel =
374 mlx5_glue->create_comp_channel(priv->sh->ctx);
375 if (!tmpl->ibv_channel) {
376 DRV_LOG(ERR, "Port %u: comp channel creation failure.",
377 dev->data->port_id);
378 rte_errno = ENOMEM;
379 goto error;
380 }
381 tmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;
382 }
383 /* Create CQ using Verbs API. */
384 tmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);
385 if (!tmpl->ibv_cq) {
386 DRV_LOG(ERR, "Port %u Rx queue %u CQ creation failure.",
387 dev->data->port_id, idx);
388 rte_errno = ENOMEM;
389 goto error;
390 }
391 obj.cq.in = tmpl->ibv_cq;
392 obj.cq.out = &cq_info;
393 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);
394 if (ret) {
395 rte_errno = ret;
396 goto error;
397 }
398 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
399 DRV_LOG(ERR,
400 "Port %u wrong MLX5_CQE_SIZE environment "
401 "variable value: it should be set to %u.",
402 dev->data->port_id, RTE_CACHE_LINE_SIZE);
403 rte_errno = EINVAL;
404 goto error;
405 }
406 /* Fill the rings. */
407 rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
408 rxq_data->cq_db = cq_info.dbrec;
409 rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
410 rxq_data->cq_uar = cq_info.cq_uar;
411 rxq_data->cqn = cq_info.cqn;
412 /* Create WQ (RQ) using Verbs API. */
413 tmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);
414 if (!tmpl->wq) {
415 DRV_LOG(ERR, "Port %u Rx queue %u WQ creation failure.",
416 dev->data->port_id, idx);
417 rte_errno = ENOMEM;
418 goto error;
419 }
420 /* Change queue state to ready. */
421 ret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY);
422 if (ret) {
423 DRV_LOG(ERR,
424 "Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.",
425 dev->data->port_id, idx);
426 rte_errno = ret;
427 goto error;
428 }
429 obj.rwq.in = tmpl->wq;
430 obj.rwq.out = &rwq;
431 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);
432 if (ret) {
433 rte_errno = ret;
434 goto error;
435 }
436 rxq_data->wqes = rwq.buf;
437 rxq_data->rq_db = rwq.dbrec;
438 rxq_data->cq_arm_sn = 0;
439 mlx5_rxq_initialize(rxq_data);
440 rxq_data->cq_ci = 0;
441 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
442 dev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
443 rxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;
444 return 0;
445 error:
446 ret = rte_errno; /* Save rte_errno before cleanup. */
447 if (tmpl->wq)
448 claim_zero(mlx5_glue->destroy_wq(tmpl->wq));
449 if (tmpl->ibv_cq)
450 claim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));
451 if (tmpl->ibv_channel)
452 claim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));
453 rte_errno = ret; /* Restore rte_errno. */
454 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
455 return -rte_errno;
456 }
457
458 /**
459 * Release an Rx verbs queue object.
460 *
461 * @param rxq_obj
462 * Verbs Rx queue object.
463 */
464 static void
mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj * rxq_obj)465 mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)
466 {
467 MLX5_ASSERT(rxq_obj);
468 MLX5_ASSERT(rxq_obj->wq);
469 MLX5_ASSERT(rxq_obj->ibv_cq);
470 claim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));
471 claim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));
472 if (rxq_obj->ibv_channel)
473 claim_zero(mlx5_glue->destroy_comp_channel
474 (rxq_obj->ibv_channel));
475 }
476
477 /**
478 * Get event for an Rx verbs queue object.
479 *
480 * @param rxq_obj
481 * Verbs Rx queue object.
482 *
483 * @return
484 * 0 on success, a negative errno value otherwise and rte_errno is set.
485 */
486 static int
mlx5_rx_ibv_get_event(struct mlx5_rxq_obj * rxq_obj)487 mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)
488 {
489 struct ibv_cq *ev_cq;
490 void *ev_ctx;
491 int ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,
492 &ev_cq, &ev_ctx);
493
494 if (ret < 0 || ev_cq != rxq_obj->ibv_cq)
495 goto exit;
496 mlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);
497 return 0;
498 exit:
499 if (ret < 0)
500 rte_errno = errno;
501 else
502 rte_errno = EINVAL;
503 return -rte_errno;
504 }
505
506 /**
507 * Creates a receive work queue as a filed of indirection table.
508 *
509 * @param dev
510 * Pointer to Ethernet device.
511 * @param log_n
512 * Log of number of queues in the array.
513 * @param ind_tbl
514 * Verbs indirection table object.
515 *
516 * @return
517 * 0 on success, a negative errno value otherwise and rte_errno is set.
518 */
519 static int
mlx5_ibv_ind_table_new(struct rte_eth_dev * dev,const unsigned int log_n,struct mlx5_ind_table_obj * ind_tbl)520 mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,
521 struct mlx5_ind_table_obj *ind_tbl)
522 {
523 struct mlx5_priv *priv = dev->data->dev_private;
524 struct ibv_wq *wq[1 << log_n];
525 unsigned int i, j;
526
527 MLX5_ASSERT(ind_tbl);
528 for (i = 0; i != ind_tbl->queues_n; ++i) {
529 struct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];
530 struct mlx5_rxq_ctrl *rxq_ctrl =
531 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
532
533 wq[i] = rxq_ctrl->obj->wq;
534 }
535 MLX5_ASSERT(i > 0);
536 /* Finalise indirection table. */
537 for (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)
538 wq[i] = wq[j];
539 ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx,
540 &(struct ibv_rwq_ind_table_init_attr){
541 .log_ind_tbl_size = log_n,
542 .ind_tbl = wq,
543 .comp_mask = 0,
544 });
545 if (!ind_tbl->ind_table) {
546 rte_errno = errno;
547 return -rte_errno;
548 }
549 return 0;
550 }
551
552 /**
553 * Destroys the specified Indirection Table.
554 *
555 * @param ind_table
556 * Indirection table to release.
557 */
558 static void
mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj * ind_tbl)559 mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)
560 {
561 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));
562 }
563
564 /**
565 * Create an Rx Hash queue.
566 *
567 * @param dev
568 * Pointer to Ethernet device.
569 * @param hrxq
570 * Pointer to Rx Hash queue.
571 * @param tunnel
572 * Tunnel type.
573 *
574 * @return
575 * 0 on success, a negative errno value otherwise and rte_errno is set.
576 */
577 static int
mlx5_ibv_hrxq_new(struct rte_eth_dev * dev,struct mlx5_hrxq * hrxq,int tunnel __rte_unused)578 mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,
579 int tunnel __rte_unused)
580 {
581 struct mlx5_priv *priv = dev->data->dev_private;
582 struct ibv_qp *qp = NULL;
583 struct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;
584 const uint8_t *rss_key = hrxq->rss_key;
585 uint64_t hash_fields = hrxq->hash_fields;
586 int err;
587 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
588 struct mlx5dv_qp_init_attr qp_init_attr;
589
590 memset(&qp_init_attr, 0, sizeof(qp_init_attr));
591 if (tunnel) {
592 qp_init_attr.comp_mask =
593 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
594 qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
595 }
596 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
597 if (dev->data->dev_conf.lpbk_mode) {
598 /* Allow packet sent from NIC loop back w/o source MAC check. */
599 qp_init_attr.comp_mask |=
600 MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
601 qp_init_attr.create_flags |=
602 MLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;
603 }
604 #endif
605 qp = mlx5_glue->dv_create_qp
606 (priv->sh->ctx,
607 &(struct ibv_qp_init_attr_ex){
608 .qp_type = IBV_QPT_RAW_PACKET,
609 .comp_mask =
610 IBV_QP_INIT_ATTR_PD |
611 IBV_QP_INIT_ATTR_IND_TABLE |
612 IBV_QP_INIT_ATTR_RX_HASH,
613 .rx_hash_conf = (struct ibv_rx_hash_conf){
614 .rx_hash_function =
615 IBV_RX_HASH_FUNC_TOEPLITZ,
616 .rx_hash_key_len = hrxq->rss_key_len,
617 .rx_hash_key =
618 (void *)(uintptr_t)rss_key,
619 .rx_hash_fields_mask = hash_fields,
620 },
621 .rwq_ind_tbl = ind_tbl->ind_table,
622 .pd = priv->sh->pd,
623 },
624 &qp_init_attr);
625 #else
626 qp = mlx5_glue->create_qp_ex
627 (priv->sh->ctx,
628 &(struct ibv_qp_init_attr_ex){
629 .qp_type = IBV_QPT_RAW_PACKET,
630 .comp_mask =
631 IBV_QP_INIT_ATTR_PD |
632 IBV_QP_INIT_ATTR_IND_TABLE |
633 IBV_QP_INIT_ATTR_RX_HASH,
634 .rx_hash_conf = (struct ibv_rx_hash_conf){
635 .rx_hash_function =
636 IBV_RX_HASH_FUNC_TOEPLITZ,
637 .rx_hash_key_len = hrxq->rss_key_len,
638 .rx_hash_key =
639 (void *)(uintptr_t)rss_key,
640 .rx_hash_fields_mask = hash_fields,
641 },
642 .rwq_ind_tbl = ind_tbl->ind_table,
643 .pd = priv->sh->pd,
644 });
645 #endif
646 if (!qp) {
647 rte_errno = errno;
648 goto error;
649 }
650 hrxq->qp = qp;
651 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
652 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
653 if (!hrxq->action) {
654 rte_errno = errno;
655 goto error;
656 }
657 #endif
658 return 0;
659 error:
660 err = rte_errno; /* Save rte_errno before cleanup. */
661 if (qp)
662 claim_zero(mlx5_glue->destroy_qp(qp));
663 rte_errno = err; /* Restore rte_errno. */
664 return -rte_errno;
665 }
666
667 /**
668 * Destroy a Verbs queue pair.
669 *
670 * @param hrxq
671 * Hash Rx queue to release its qp.
672 */
673 static void
mlx5_ibv_qp_destroy(struct mlx5_hrxq * hrxq)674 mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)
675 {
676 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
677 }
678
679 /**
680 * Release a drop Rx queue Verbs object.
681 *
682 * @param dev
683 * Pointer to Ethernet device.
684 */
685 static void
mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev * dev)686 mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)
687 {
688 struct mlx5_priv *priv = dev->data->dev_private;
689 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
690
691 if (rxq->wq)
692 claim_zero(mlx5_glue->destroy_wq(rxq->wq));
693 if (rxq->ibv_cq)
694 claim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq));
695 mlx5_free(rxq);
696 priv->drop_queue.rxq = NULL;
697 }
698
699 /**
700 * Create a drop Rx queue Verbs object.
701 *
702 * @param dev
703 * Pointer to Ethernet device.
704 *
705 * @return
706 * 0 on success, a negative errno value otherwise and rte_errno is set.
707 */
708 static int
mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev * dev)709 mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)
710 {
711 struct mlx5_priv *priv = dev->data->dev_private;
712 struct ibv_context *ctx = priv->sh->ctx;
713 struct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;
714
715 if (rxq)
716 return 0;
717 rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);
718 if (!rxq) {
719 DEBUG("Port %u cannot allocate drop Rx queue memory.",
720 dev->data->port_id);
721 rte_errno = ENOMEM;
722 return -rte_errno;
723 }
724 priv->drop_queue.rxq = rxq;
725 rxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);
726 if (!rxq->ibv_cq) {
727 DEBUG("Port %u cannot allocate CQ for drop queue.",
728 dev->data->port_id);
729 rte_errno = errno;
730 goto error;
731 }
732 rxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){
733 .wq_type = IBV_WQT_RQ,
734 .max_wr = 1,
735 .max_sge = 1,
736 .pd = priv->sh->pd,
737 .cq = rxq->ibv_cq,
738 });
739 if (!rxq->wq) {
740 DEBUG("Port %u cannot allocate WQ for drop queue.",
741 dev->data->port_id);
742 rte_errno = errno;
743 goto error;
744 }
745 priv->drop_queue.rxq = rxq;
746 return 0;
747 error:
748 mlx5_rxq_ibv_obj_drop_release(dev);
749 return -rte_errno;
750 }
751
752 /**
753 * Create a Verbs drop action for Rx Hash queue.
754 *
755 * @param dev
756 * Pointer to Ethernet device.
757 *
758 * @return
759 * 0 on success, a negative errno value otherwise and rte_errno is set.
760 */
761 static int
mlx5_ibv_drop_action_create(struct rte_eth_dev * dev)762 mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)
763 {
764 struct mlx5_priv *priv = dev->data->dev_private;
765 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
766 struct ibv_rwq_ind_table *ind_tbl = NULL;
767 struct mlx5_rxq_obj *rxq;
768 int ret;
769
770 MLX5_ASSERT(hrxq && hrxq->ind_table);
771 ret = mlx5_rxq_ibv_obj_drop_create(dev);
772 if (ret < 0)
773 goto error;
774 rxq = priv->drop_queue.rxq;
775 ind_tbl = mlx5_glue->create_rwq_ind_table
776 (priv->sh->ctx,
777 &(struct ibv_rwq_ind_table_init_attr){
778 .log_ind_tbl_size = 0,
779 .ind_tbl = (struct ibv_wq **)&rxq->wq,
780 .comp_mask = 0,
781 });
782 if (!ind_tbl) {
783 DEBUG("Port %u cannot allocate indirection table for drop"
784 " queue.", dev->data->port_id);
785 rte_errno = errno;
786 goto error;
787 }
788 hrxq->qp = mlx5_glue->create_qp_ex(priv->sh->ctx,
789 &(struct ibv_qp_init_attr_ex){
790 .qp_type = IBV_QPT_RAW_PACKET,
791 .comp_mask = IBV_QP_INIT_ATTR_PD |
792 IBV_QP_INIT_ATTR_IND_TABLE |
793 IBV_QP_INIT_ATTR_RX_HASH,
794 .rx_hash_conf = (struct ibv_rx_hash_conf){
795 .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
796 .rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,
797 .rx_hash_key = rss_hash_default_key,
798 .rx_hash_fields_mask = 0,
799 },
800 .rwq_ind_tbl = ind_tbl,
801 .pd = priv->sh->pd
802 });
803 if (!hrxq->qp) {
804 DEBUG("Port %u cannot allocate QP for drop queue.",
805 dev->data->port_id);
806 rte_errno = errno;
807 goto error;
808 }
809 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
810 hrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);
811 if (!hrxq->action) {
812 rte_errno = errno;
813 goto error;
814 }
815 #endif
816 hrxq->ind_table->ind_table = ind_tbl;
817 return 0;
818 error:
819 if (hrxq->qp)
820 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
821 if (ind_tbl)
822 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
823 if (priv->drop_queue.rxq)
824 mlx5_rxq_ibv_obj_drop_release(dev);
825 return -rte_errno;
826 }
827
828 /**
829 * Release a drop hash Rx queue.
830 *
831 * @param dev
832 * Pointer to Ethernet device.
833 */
834 static void
mlx5_ibv_drop_action_destroy(struct rte_eth_dev * dev)835 mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)
836 {
837 struct mlx5_priv *priv = dev->data->dev_private;
838 struct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;
839 struct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;
840
841 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
842 claim_zero(mlx5_glue->destroy_flow_action(hrxq->action));
843 #endif
844 claim_zero(mlx5_glue->destroy_qp(hrxq->qp));
845 claim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));
846 mlx5_rxq_ibv_obj_drop_release(dev);
847 }
848
849 /**
850 * Create a QP Verbs object.
851 *
852 * @param dev
853 * Pointer to Ethernet device.
854 * @param idx
855 * Queue index in DPDK Tx queue array.
856 *
857 * @return
858 * The QP Verbs object, NULL otherwise and rte_errno is set.
859 */
860 static struct ibv_qp *
mlx5_txq_ibv_qp_create(struct rte_eth_dev * dev,uint16_t idx)861 mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)
862 {
863 struct mlx5_priv *priv = dev->data->dev_private;
864 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
865 struct mlx5_txq_ctrl *txq_ctrl =
866 container_of(txq_data, struct mlx5_txq_ctrl, txq);
867 struct ibv_qp *qp_obj = NULL;
868 struct ibv_qp_init_attr_ex qp_attr = { 0 };
869 const int desc = 1 << txq_data->elts_n;
870
871 MLX5_ASSERT(txq_ctrl->obj->cq);
872 /* CQ to be associated with the send queue. */
873 qp_attr.send_cq = txq_ctrl->obj->cq;
874 /* CQ to be associated with the receive queue. */
875 qp_attr.recv_cq = txq_ctrl->obj->cq;
876 /* Max number of outstanding WRs. */
877 qp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?
878 priv->sh->device_attr.max_qp_wr : desc);
879 /*
880 * Max number of scatter/gather elements in a WR, must be 1 to prevent
881 * libmlx5 from trying to affect must be 1 to prevent libmlx5 from
882 * trying to affect too much memory. TX gather is not impacted by the
883 * device_attr.max_sge limit and will still work properly.
884 */
885 qp_attr.cap.max_send_sge = 1;
886 qp_attr.qp_type = IBV_QPT_RAW_PACKET,
887 /* Do *NOT* enable this, completions events are managed per Tx burst. */
888 qp_attr.sq_sig_all = 0;
889 qp_attr.pd = priv->sh->pd;
890 qp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
891 if (txq_data->inlen_send)
892 qp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;
893 if (txq_data->tso_en) {
894 qp_attr.max_tso_header = txq_ctrl->max_tso_header;
895 qp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
896 }
897 qp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr);
898 if (qp_obj == NULL) {
899 DRV_LOG(ERR, "Port %u Tx queue %u QP creation failure.",
900 dev->data->port_id, idx);
901 rte_errno = errno;
902 }
903 return qp_obj;
904 }
905
906 /**
907 * Create the Tx queue Verbs object.
908 *
909 * @param dev
910 * Pointer to Ethernet device.
911 * @param idx
912 * Queue index in DPDK Tx queue array.
913 *
914 * @return
915 * 0 on success, a negative errno value otherwise and rte_errno is set.
916 */
917 int
mlx5_txq_ibv_obj_new(struct rte_eth_dev * dev,uint16_t idx)918 mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)
919 {
920 struct mlx5_priv *priv = dev->data->dev_private;
921 struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
922 struct mlx5_txq_ctrl *txq_ctrl =
923 container_of(txq_data, struct mlx5_txq_ctrl, txq);
924 struct mlx5_txq_obj *txq_obj = txq_ctrl->obj;
925 unsigned int cqe_n;
926 struct mlx5dv_qp qp;
927 struct mlx5dv_cq cq_info;
928 struct mlx5dv_obj obj;
929 const int desc = 1 << txq_data->elts_n;
930 int ret = 0;
931
932 MLX5_ASSERT(txq_data);
933 MLX5_ASSERT(txq_obj);
934 txq_obj->txq_ctrl = txq_ctrl;
935 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE;
936 priv->verbs_alloc_ctx.obj = txq_ctrl;
937 if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
938 DRV_LOG(ERR, "Port %u MLX5_ENABLE_CQE_COMPRESSION "
939 "must never be set.", dev->data->port_id);
940 rte_errno = EINVAL;
941 return -rte_errno;
942 }
943 cqe_n = desc / MLX5_TX_COMP_THRESH +
944 1 + MLX5_TX_COMP_THRESH_INLINE_DIV;
945 txq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
946 if (txq_obj->cq == NULL) {
947 DRV_LOG(ERR, "Port %u Tx queue %u CQ creation failure.",
948 dev->data->port_id, idx);
949 rte_errno = errno;
950 goto error;
951 }
952 txq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);
953 if (txq_obj->qp == NULL) {
954 rte_errno = errno;
955 goto error;
956 }
957 ret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,
958 (uint8_t)priv->dev_port);
959 if (ret) {
960 DRV_LOG(ERR, "Port %u Tx queue %u QP state modifying failed.",
961 dev->data->port_id, idx);
962 rte_errno = errno;
963 goto error;
964 }
965 qp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;
966 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
967 /* If using DevX, need additional mask to read tisn value. */
968 if (priv->sh->devx && !priv->sh->tdn)
969 qp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;
970 #endif
971 obj.cq.in = txq_obj->cq;
972 obj.cq.out = &cq_info;
973 obj.qp.in = txq_obj->qp;
974 obj.qp.out = &qp;
975 ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
976 if (ret != 0) {
977 rte_errno = errno;
978 goto error;
979 }
980 if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
981 DRV_LOG(ERR,
982 "Port %u wrong MLX5_CQE_SIZE environment variable"
983 " value: it should be set to %u.",
984 dev->data->port_id, RTE_CACHE_LINE_SIZE);
985 rte_errno = EINVAL;
986 goto error;
987 }
988 txq_data->cqe_n = log2above(cq_info.cqe_cnt);
989 txq_data->cqe_s = 1 << txq_data->cqe_n;
990 txq_data->cqe_m = txq_data->cqe_s - 1;
991 txq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;
992 txq_data->wqes = qp.sq.buf;
993 txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
994 txq_data->wqe_s = 1 << txq_data->wqe_n;
995 txq_data->wqe_m = txq_data->wqe_s - 1;
996 txq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;
997 txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
998 txq_data->cq_db = cq_info.dbrec;
999 txq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;
1000 txq_data->cq_ci = 0;
1001 txq_data->cq_pi = 0;
1002 txq_data->wqe_ci = 0;
1003 txq_data->wqe_pi = 0;
1004 txq_data->wqe_comp = 0;
1005 txq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;
1006 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
1007 /*
1008 * If using DevX need to query and store TIS transport domain value.
1009 * This is done once per port.
1010 * Will use this value on Rx, when creating matching TIR.
1011 */
1012 if (priv->sh->devx && !priv->sh->tdn) {
1013 ret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,
1014 &priv->sh->tdn);
1015 if (ret) {
1016 DRV_LOG(ERR, "Fail to query port %u Tx queue %u QP TIS "
1017 "transport domain.", dev->data->port_id, idx);
1018 rte_errno = EINVAL;
1019 goto error;
1020 } else {
1021 DRV_LOG(DEBUG, "Port %u Tx queue %u TIS number %d "
1022 "transport domain %d.", dev->data->port_id,
1023 idx, qp.tisn, priv->sh->tdn);
1024 }
1025 }
1026 #endif
1027 txq_ctrl->bf_reg = qp.bf.reg;
1028 if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
1029 txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
1030 DRV_LOG(DEBUG, "Port %u: uar_mmap_offset 0x%" PRIx64 ".",
1031 dev->data->port_id, txq_ctrl->uar_mmap_offset);
1032 } else {
1033 DRV_LOG(ERR,
1034 "Port %u failed to retrieve UAR info, invalid"
1035 " libmlx5.so",
1036 dev->data->port_id);
1037 rte_errno = EINVAL;
1038 goto error;
1039 }
1040 txq_uar_init(txq_ctrl);
1041 dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
1042 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1043 return 0;
1044 error:
1045 ret = rte_errno; /* Save rte_errno before cleanup. */
1046 if (txq_obj->cq)
1047 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1048 if (txq_obj->qp)
1049 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1050 priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
1051 rte_errno = ret; /* Restore rte_errno. */
1052 return -rte_errno;
1053 }
1054
1055 /**
1056 * Release an Tx verbs queue object.
1057 *
1058 * @param txq_obj
1059 * Verbs Tx queue object..
1060 */
1061 void
mlx5_txq_ibv_obj_release(struct mlx5_txq_obj * txq_obj)1062 mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)
1063 {
1064 MLX5_ASSERT(txq_obj);
1065 claim_zero(mlx5_glue->destroy_qp(txq_obj->qp));
1066 claim_zero(mlx5_glue->destroy_cq(txq_obj->cq));
1067 }
1068
1069 struct mlx5_obj_ops ibv_obj_ops = {
1070 .rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,
1071 .rxq_obj_new = mlx5_rxq_ibv_obj_new,
1072 .rxq_event_get = mlx5_rx_ibv_get_event,
1073 .rxq_obj_modify = mlx5_ibv_modify_wq,
1074 .rxq_obj_release = mlx5_rxq_ibv_obj_release,
1075 .ind_table_new = mlx5_ibv_ind_table_new,
1076 .ind_table_destroy = mlx5_ibv_ind_table_destroy,
1077 .hrxq_new = mlx5_ibv_hrxq_new,
1078 .hrxq_destroy = mlx5_ibv_qp_destroy,
1079 .drop_action_create = mlx5_ibv_drop_action_create,
1080 .drop_action_destroy = mlx5_ibv_drop_action_destroy,
1081 .txq_obj_new = mlx5_txq_ibv_obj_new,
1082 .txq_obj_modify = mlx5_ibv_modify_qp,
1083 .txq_obj_release = mlx5_txq_ibv_obj_release,
1084 };
1085