1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
4 */
5
6 #include <unistd.h>
7
8 #include <rte_ether.h>
9 #include <ethdev_driver.h>
10 #include <rte_interrupts.h>
11 #include <rte_alarm.h>
12 #include <rte_cycles.h>
13
14 #include <mlx5_malloc.h>
15
16 #include "mlx5.h"
17 #include "mlx5_flow.h"
18 #include "mlx5_rx.h"
19 #include "mlx5_tx.h"
20 #include "mlx5_utils.h"
21 #include "rte_pmd_mlx5.h"
22
23 /**
24 * Stop traffic on Tx queues.
25 *
26 * @param dev
27 * Pointer to Ethernet device structure.
28 */
29 static void
mlx5_txq_stop(struct rte_eth_dev * dev)30 mlx5_txq_stop(struct rte_eth_dev *dev)
31 {
32 struct mlx5_priv *priv = dev->data->dev_private;
33 unsigned int i;
34
35 for (i = 0; i != priv->txqs_n; ++i)
36 mlx5_txq_release(dev, i);
37 }
38
39 /**
40 * Start traffic on Tx queues.
41 *
42 * @param dev
43 * Pointer to Ethernet device structure.
44 *
45 * @return
46 * 0 on success, a negative errno value otherwise and rte_errno is set.
47 */
48 static int
mlx5_txq_start(struct rte_eth_dev * dev)49 mlx5_txq_start(struct rte_eth_dev *dev)
50 {
51 struct mlx5_priv *priv = dev->data->dev_private;
52 unsigned int i;
53 int ret;
54
55 for (i = 0; i != priv->txqs_n; ++i) {
56 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
57 struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
58 uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
59
60 if (!txq_ctrl)
61 continue;
62 if (!txq_ctrl->is_hairpin)
63 txq_alloc_elts(txq_ctrl);
64 MLX5_ASSERT(!txq_ctrl->obj);
65 txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
66 0, txq_ctrl->socket);
67 if (!txq_ctrl->obj) {
68 DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
69 "memory resources.", dev->data->port_id,
70 txq_data->idx);
71 rte_errno = ENOMEM;
72 goto error;
73 }
74 ret = priv->obj_ops.txq_obj_new(dev, i);
75 if (ret < 0) {
76 mlx5_free(txq_ctrl->obj);
77 txq_ctrl->obj = NULL;
78 goto error;
79 }
80 if (!txq_ctrl->is_hairpin) {
81 size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
82
83 txq_data->fcqs = mlx5_malloc(flags, size,
84 RTE_CACHE_LINE_SIZE,
85 txq_ctrl->socket);
86 if (!txq_data->fcqs) {
87 DRV_LOG(ERR, "Port %u Tx queue %u cannot "
88 "allocate memory (FCQ).",
89 dev->data->port_id, i);
90 rte_errno = ENOMEM;
91 goto error;
92 }
93 }
94 DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
95 dev->data->port_id, i, (void *)&txq_ctrl->obj);
96 LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
97 }
98 return 0;
99 error:
100 ret = rte_errno; /* Save rte_errno before cleanup. */
101 do {
102 mlx5_txq_release(dev, i);
103 } while (i-- != 0);
104 rte_errno = ret; /* Restore rte_errno. */
105 return -rte_errno;
106 }
107
108 /**
109 * Register Rx queue mempools and fill the Rx queue cache.
110 * This function tolerates repeated mempool registration.
111 *
112 * @param[in] rxq_ctrl
113 * Rx queue control data.
114 *
115 * @return
116 * 0 on success, (-1) on failure and rte_errno is set.
117 */
118 static int
mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl * rxq_ctrl)119 mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
120 {
121 struct rte_mempool *mp;
122 uint32_t s;
123 int ret = 0;
124
125 mlx5_mr_flush_local_cache(&rxq_ctrl->rxq.mr_ctrl);
126 /* MPRQ mempool is registered on creation, just fill the cache. */
127 if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
128 return mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
129 rxq_ctrl->rxq.mprq_mp);
130 for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
131 bool is_extmem;
132
133 mp = rxq_ctrl->rxq.rxseg[s].mp;
134 is_extmem = (rte_pktmbuf_priv_flags(mp) &
135 RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
136 ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
137 is_extmem);
138 if (ret < 0 && rte_errno != EEXIST)
139 return ret;
140 ret = mlx5_mr_mempool_populate_cache(&rxq_ctrl->rxq.mr_ctrl,
141 mp);
142 if (ret < 0)
143 return ret;
144 }
145 return 0;
146 }
147
148 /**
149 * Stop traffic on Rx queues.
150 *
151 * @param dev
152 * Pointer to Ethernet device structure.
153 */
154 static void
mlx5_rxq_stop(struct rte_eth_dev * dev)155 mlx5_rxq_stop(struct rte_eth_dev *dev)
156 {
157 struct mlx5_priv *priv = dev->data->dev_private;
158 unsigned int i;
159
160 for (i = 0; i != priv->rxqs_n; ++i)
161 mlx5_rxq_release(dev, i);
162 }
163
164 static int
mlx5_rxq_ctrl_prepare(struct rte_eth_dev * dev,struct mlx5_rxq_ctrl * rxq_ctrl,unsigned int idx)165 mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
166 unsigned int idx)
167 {
168 int ret = 0;
169
170 if (!rxq_ctrl->is_hairpin) {
171 /*
172 * Pre-register the mempools. Regardless of whether
173 * the implicit registration is enabled or not,
174 * Rx mempool destruction is tracked to free MRs.
175 */
176 if (mlx5_rxq_mempool_register(rxq_ctrl) < 0)
177 return -rte_errno;
178 ret = rxq_alloc_elts(rxq_ctrl);
179 if (ret)
180 return ret;
181 }
182 MLX5_ASSERT(!rxq_ctrl->obj);
183 rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
184 sizeof(*rxq_ctrl->obj), 0,
185 rxq_ctrl->socket);
186 if (!rxq_ctrl->obj) {
187 DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
188 dev->data->port_id, idx);
189 rte_errno = ENOMEM;
190 return -rte_errno;
191 }
192 DRV_LOG(DEBUG, "Port %u rxq %u updated with %p.", dev->data->port_id,
193 idx, (void *)&rxq_ctrl->obj);
194 return 0;
195 }
196
197 /**
198 * Start traffic on Rx queues.
199 *
200 * @param dev
201 * Pointer to Ethernet device structure.
202 *
203 * @return
204 * 0 on success, a negative errno value otherwise and rte_errno is set.
205 */
206 static int
mlx5_rxq_start(struct rte_eth_dev * dev)207 mlx5_rxq_start(struct rte_eth_dev *dev)
208 {
209 struct mlx5_priv *priv = dev->data->dev_private;
210 unsigned int i;
211 int ret = 0;
212
213 /* Allocate/reuse/resize mempool for Multi-Packet RQ. */
214 if (mlx5_mprq_alloc_mp(dev)) {
215 /* Should not release Rx queues but return immediately. */
216 return -rte_errno;
217 }
218 DRV_LOG(DEBUG, "Port %u dev_cap.max_qp_wr is %d.",
219 dev->data->port_id, priv->sh->dev_cap.max_qp_wr);
220 DRV_LOG(DEBUG, "Port %u dev_cap.max_sge is %d.",
221 dev->data->port_id, priv->sh->dev_cap.max_sge);
222 for (i = 0; i != priv->rxqs_n; ++i) {
223 struct mlx5_rxq_priv *rxq = mlx5_rxq_ref(dev, i);
224 struct mlx5_rxq_ctrl *rxq_ctrl;
225
226 if (rxq == NULL)
227 continue;
228 rxq_ctrl = rxq->ctrl;
229 if (!rxq_ctrl->started) {
230 if (mlx5_rxq_ctrl_prepare(dev, rxq_ctrl, i) < 0)
231 goto error;
232 LIST_INSERT_HEAD(&priv->rxqsobj, rxq_ctrl->obj, next);
233 }
234 ret = priv->obj_ops.rxq_obj_new(rxq);
235 if (ret) {
236 mlx5_free(rxq_ctrl->obj);
237 rxq_ctrl->obj = NULL;
238 goto error;
239 }
240 rxq_ctrl->started = true;
241 }
242 return 0;
243 error:
244 ret = rte_errno; /* Save rte_errno before cleanup. */
245 do {
246 mlx5_rxq_release(dev, i);
247 } while (i-- != 0);
248 rte_errno = ret; /* Restore rte_errno. */
249 return -rte_errno;
250 }
251
252 /**
253 * Binds Tx queues to Rx queues for hairpin.
254 *
255 * Binds Tx queues to the target Rx queues.
256 *
257 * @param dev
258 * Pointer to Ethernet device structure.
259 *
260 * @return
261 * 0 on success, a negative errno value otherwise and rte_errno is set.
262 */
263 static int
mlx5_hairpin_auto_bind(struct rte_eth_dev * dev)264 mlx5_hairpin_auto_bind(struct rte_eth_dev *dev)
265 {
266 struct mlx5_priv *priv = dev->data->dev_private;
267 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
268 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
269 struct mlx5_txq_ctrl *txq_ctrl;
270 struct mlx5_rxq_priv *rxq;
271 struct mlx5_rxq_ctrl *rxq_ctrl;
272 struct mlx5_devx_obj *sq;
273 struct mlx5_devx_obj *rq;
274 unsigned int i;
275 int ret = 0;
276 bool need_auto = false;
277 uint16_t self_port = dev->data->port_id;
278
279 for (i = 0; i != priv->txqs_n; ++i) {
280 txq_ctrl = mlx5_txq_get(dev, i);
281 if (!txq_ctrl)
282 continue;
283 if (!txq_ctrl->is_hairpin ||
284 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
285 mlx5_txq_release(dev, i);
286 continue;
287 }
288 if (txq_ctrl->hairpin_conf.manual_bind) {
289 mlx5_txq_release(dev, i);
290 return 0;
291 }
292 need_auto = true;
293 mlx5_txq_release(dev, i);
294 }
295 if (!need_auto)
296 return 0;
297 for (i = 0; i != priv->txqs_n; ++i) {
298 txq_ctrl = mlx5_txq_get(dev, i);
299 if (!txq_ctrl)
300 continue;
301 /* Skip hairpin queues with other peer ports. */
302 if (!txq_ctrl->is_hairpin ||
303 txq_ctrl->hairpin_conf.peers[0].port != self_port) {
304 mlx5_txq_release(dev, i);
305 continue;
306 }
307 if (!txq_ctrl->obj) {
308 rte_errno = ENOMEM;
309 DRV_LOG(ERR, "port %u no txq object found: %d",
310 dev->data->port_id, i);
311 mlx5_txq_release(dev, i);
312 return -rte_errno;
313 }
314 sq = txq_ctrl->obj->sq;
315 rxq = mlx5_rxq_get(dev, txq_ctrl->hairpin_conf.peers[0].queue);
316 if (rxq == NULL) {
317 mlx5_txq_release(dev, i);
318 rte_errno = EINVAL;
319 DRV_LOG(ERR, "port %u no rxq object found: %d",
320 dev->data->port_id,
321 txq_ctrl->hairpin_conf.peers[0].queue);
322 return -rte_errno;
323 }
324 rxq_ctrl = rxq->ctrl;
325 if (!rxq_ctrl->is_hairpin ||
326 rxq->hairpin_conf.peers[0].queue != i) {
327 rte_errno = ENOMEM;
328 DRV_LOG(ERR, "port %u Tx queue %d can't be binded to "
329 "Rx queue %d", dev->data->port_id,
330 i, txq_ctrl->hairpin_conf.peers[0].queue);
331 goto error;
332 }
333 rq = rxq_ctrl->obj->rq;
334 if (!rq) {
335 rte_errno = ENOMEM;
336 DRV_LOG(ERR, "port %u hairpin no matching rxq: %d",
337 dev->data->port_id,
338 txq_ctrl->hairpin_conf.peers[0].queue);
339 goto error;
340 }
341 sq_attr.state = MLX5_SQC_STATE_RDY;
342 sq_attr.sq_state = MLX5_SQC_STATE_RST;
343 sq_attr.hairpin_peer_rq = rq->id;
344 sq_attr.hairpin_peer_vhca =
345 priv->sh->cdev->config.hca_attr.vhca_id;
346 ret = mlx5_devx_cmd_modify_sq(sq, &sq_attr);
347 if (ret)
348 goto error;
349 rq_attr.state = MLX5_SQC_STATE_RDY;
350 rq_attr.rq_state = MLX5_SQC_STATE_RST;
351 rq_attr.hairpin_peer_sq = sq->id;
352 rq_attr.hairpin_peer_vhca =
353 priv->sh->cdev->config.hca_attr.vhca_id;
354 ret = mlx5_devx_cmd_modify_rq(rq, &rq_attr);
355 if (ret)
356 goto error;
357 /* Qs with auto-bind will be destroyed directly. */
358 rxq->hairpin_status = 1;
359 txq_ctrl->hairpin_status = 1;
360 mlx5_txq_release(dev, i);
361 }
362 return 0;
363 error:
364 mlx5_txq_release(dev, i);
365 return -rte_errno;
366 }
367
368 /*
369 * Fetch the peer queue's SW & HW information.
370 *
371 * @param dev
372 * Pointer to Ethernet device structure.
373 * @param peer_queue
374 * Index of the queue to fetch the information.
375 * @param current_info
376 * Pointer to the input peer information, not used currently.
377 * @param peer_info
378 * Pointer to the structure to store the information, output.
379 * @param direction
380 * Positive to get the RxQ information, zero to get the TxQ information.
381 *
382 * @return
383 * 0 on success, a negative errno value otherwise and rte_errno is set.
384 */
385 int
mlx5_hairpin_queue_peer_update(struct rte_eth_dev * dev,uint16_t peer_queue,struct rte_hairpin_peer_info * current_info,struct rte_hairpin_peer_info * peer_info,uint32_t direction)386 mlx5_hairpin_queue_peer_update(struct rte_eth_dev *dev, uint16_t peer_queue,
387 struct rte_hairpin_peer_info *current_info,
388 struct rte_hairpin_peer_info *peer_info,
389 uint32_t direction)
390 {
391 struct mlx5_priv *priv = dev->data->dev_private;
392 RTE_SET_USED(current_info);
393
394 if (dev->data->dev_started == 0) {
395 rte_errno = EBUSY;
396 DRV_LOG(ERR, "peer port %u is not started",
397 dev->data->port_id);
398 return -rte_errno;
399 }
400 /*
401 * Peer port used as egress. In the current design, hairpin Tx queue
402 * will be bound to the peer Rx queue. Indeed, only the information of
403 * peer Rx queue needs to be fetched.
404 */
405 if (direction == 0) {
406 struct mlx5_txq_ctrl *txq_ctrl;
407
408 txq_ctrl = mlx5_txq_get(dev, peer_queue);
409 if (txq_ctrl == NULL) {
410 rte_errno = EINVAL;
411 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
412 dev->data->port_id, peer_queue);
413 return -rte_errno;
414 }
415 if (!txq_ctrl->is_hairpin) {
416 rte_errno = EINVAL;
417 DRV_LOG(ERR, "port %u queue %d is not a hairpin Txq",
418 dev->data->port_id, peer_queue);
419 mlx5_txq_release(dev, peer_queue);
420 return -rte_errno;
421 }
422 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
423 rte_errno = ENOMEM;
424 DRV_LOG(ERR, "port %u no Txq object found: %d",
425 dev->data->port_id, peer_queue);
426 mlx5_txq_release(dev, peer_queue);
427 return -rte_errno;
428 }
429 peer_info->qp_id = txq_ctrl->obj->sq->id;
430 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
431 /* 1-to-1 mapping, only the first one is used. */
432 peer_info->peer_q = txq_ctrl->hairpin_conf.peers[0].queue;
433 peer_info->tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
434 peer_info->manual_bind = txq_ctrl->hairpin_conf.manual_bind;
435 mlx5_txq_release(dev, peer_queue);
436 } else { /* Peer port used as ingress. */
437 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, peer_queue);
438 struct mlx5_rxq_ctrl *rxq_ctrl;
439
440 if (rxq == NULL) {
441 rte_errno = EINVAL;
442 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
443 dev->data->port_id, peer_queue);
444 return -rte_errno;
445 }
446 rxq_ctrl = rxq->ctrl;
447 if (!rxq_ctrl->is_hairpin) {
448 rte_errno = EINVAL;
449 DRV_LOG(ERR, "port %u queue %d is not a hairpin Rxq",
450 dev->data->port_id, peer_queue);
451 return -rte_errno;
452 }
453 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
454 rte_errno = ENOMEM;
455 DRV_LOG(ERR, "port %u no Rxq object found: %d",
456 dev->data->port_id, peer_queue);
457 return -rte_errno;
458 }
459 peer_info->qp_id = rxq_ctrl->obj->rq->id;
460 peer_info->vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
461 peer_info->peer_q = rxq->hairpin_conf.peers[0].queue;
462 peer_info->tx_explicit = rxq->hairpin_conf.tx_explicit;
463 peer_info->manual_bind = rxq->hairpin_conf.manual_bind;
464 }
465 return 0;
466 }
467
468 /*
469 * Bind the hairpin queue with the peer HW information.
470 * This needs to be called twice both for Tx and Rx queues of a pair.
471 * If the queue is already bound, it is considered successful.
472 *
473 * @param dev
474 * Pointer to Ethernet device structure.
475 * @param cur_queue
476 * Index of the queue to change the HW configuration to bind.
477 * @param peer_info
478 * Pointer to information of the peer queue.
479 * @param direction
480 * Positive to configure the TxQ, zero to configure the RxQ.
481 *
482 * @return
483 * 0 on success, a negative errno value otherwise and rte_errno is set.
484 */
485 int
mlx5_hairpin_queue_peer_bind(struct rte_eth_dev * dev,uint16_t cur_queue,struct rte_hairpin_peer_info * peer_info,uint32_t direction)486 mlx5_hairpin_queue_peer_bind(struct rte_eth_dev *dev, uint16_t cur_queue,
487 struct rte_hairpin_peer_info *peer_info,
488 uint32_t direction)
489 {
490 int ret = 0;
491
492 /*
493 * Consistency checking of the peer queue: opposite direction is used
494 * to get the peer queue info with ethdev port ID, no need to check.
495 */
496 if (peer_info->peer_q != cur_queue) {
497 rte_errno = EINVAL;
498 DRV_LOG(ERR, "port %u queue %d and peer queue %d mismatch",
499 dev->data->port_id, cur_queue, peer_info->peer_q);
500 return -rte_errno;
501 }
502 if (direction != 0) {
503 struct mlx5_txq_ctrl *txq_ctrl;
504 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
505
506 txq_ctrl = mlx5_txq_get(dev, cur_queue);
507 if (txq_ctrl == NULL) {
508 rte_errno = EINVAL;
509 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
510 dev->data->port_id, cur_queue);
511 return -rte_errno;
512 }
513 if (!txq_ctrl->is_hairpin) {
514 rte_errno = EINVAL;
515 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
516 dev->data->port_id, cur_queue);
517 mlx5_txq_release(dev, cur_queue);
518 return -rte_errno;
519 }
520 if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
521 rte_errno = ENOMEM;
522 DRV_LOG(ERR, "port %u no Txq object found: %d",
523 dev->data->port_id, cur_queue);
524 mlx5_txq_release(dev, cur_queue);
525 return -rte_errno;
526 }
527 if (txq_ctrl->hairpin_status != 0) {
528 DRV_LOG(DEBUG, "port %u Tx queue %d is already bound",
529 dev->data->port_id, cur_queue);
530 mlx5_txq_release(dev, cur_queue);
531 return 0;
532 }
533 /*
534 * All queues' of one port consistency checking is done in the
535 * bind() function, and that is optional.
536 */
537 if (peer_info->tx_explicit !=
538 txq_ctrl->hairpin_conf.tx_explicit) {
539 rte_errno = EINVAL;
540 DRV_LOG(ERR, "port %u Tx queue %d and peer Tx rule mode"
541 " mismatch", dev->data->port_id, cur_queue);
542 mlx5_txq_release(dev, cur_queue);
543 return -rte_errno;
544 }
545 if (peer_info->manual_bind !=
546 txq_ctrl->hairpin_conf.manual_bind) {
547 rte_errno = EINVAL;
548 DRV_LOG(ERR, "port %u Tx queue %d and peer binding mode"
549 " mismatch", dev->data->port_id, cur_queue);
550 mlx5_txq_release(dev, cur_queue);
551 return -rte_errno;
552 }
553 sq_attr.state = MLX5_SQC_STATE_RDY;
554 sq_attr.sq_state = MLX5_SQC_STATE_RST;
555 sq_attr.hairpin_peer_rq = peer_info->qp_id;
556 sq_attr.hairpin_peer_vhca = peer_info->vhca_id;
557 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
558 if (ret == 0)
559 txq_ctrl->hairpin_status = 1;
560 mlx5_txq_release(dev, cur_queue);
561 } else {
562 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
563 struct mlx5_rxq_ctrl *rxq_ctrl;
564 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
565
566 if (rxq == NULL) {
567 rte_errno = EINVAL;
568 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
569 dev->data->port_id, cur_queue);
570 return -rte_errno;
571 }
572 rxq_ctrl = rxq->ctrl;
573 if (!rxq_ctrl->is_hairpin) {
574 rte_errno = EINVAL;
575 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
576 dev->data->port_id, cur_queue);
577 return -rte_errno;
578 }
579 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
580 rte_errno = ENOMEM;
581 DRV_LOG(ERR, "port %u no Rxq object found: %d",
582 dev->data->port_id, cur_queue);
583 return -rte_errno;
584 }
585 if (rxq->hairpin_status != 0) {
586 DRV_LOG(DEBUG, "port %u Rx queue %d is already bound",
587 dev->data->port_id, cur_queue);
588 return 0;
589 }
590 if (peer_info->tx_explicit !=
591 rxq->hairpin_conf.tx_explicit) {
592 rte_errno = EINVAL;
593 DRV_LOG(ERR, "port %u Rx queue %d and peer Tx rule mode"
594 " mismatch", dev->data->port_id, cur_queue);
595 return -rte_errno;
596 }
597 if (peer_info->manual_bind !=
598 rxq->hairpin_conf.manual_bind) {
599 rte_errno = EINVAL;
600 DRV_LOG(ERR, "port %u Rx queue %d and peer binding mode"
601 " mismatch", dev->data->port_id, cur_queue);
602 return -rte_errno;
603 }
604 rq_attr.state = MLX5_SQC_STATE_RDY;
605 rq_attr.rq_state = MLX5_SQC_STATE_RST;
606 rq_attr.hairpin_peer_sq = peer_info->qp_id;
607 rq_attr.hairpin_peer_vhca = peer_info->vhca_id;
608 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
609 if (ret == 0)
610 rxq->hairpin_status = 1;
611 }
612 return ret;
613 }
614
615 /*
616 * Unbind the hairpin queue and reset its HW configuration.
617 * This needs to be called twice both for Tx and Rx queues of a pair.
618 * If the queue is already unbound, it is considered successful.
619 *
620 * @param dev
621 * Pointer to Ethernet device structure.
622 * @param cur_queue
623 * Index of the queue to change the HW configuration to unbind.
624 * @param direction
625 * Positive to reset the TxQ, zero to reset the RxQ.
626 *
627 * @return
628 * 0 on success, a negative errno value otherwise and rte_errno is set.
629 */
630 int
mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev * dev,uint16_t cur_queue,uint32_t direction)631 mlx5_hairpin_queue_peer_unbind(struct rte_eth_dev *dev, uint16_t cur_queue,
632 uint32_t direction)
633 {
634 int ret = 0;
635
636 if (direction != 0) {
637 struct mlx5_txq_ctrl *txq_ctrl;
638 struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
639
640 txq_ctrl = mlx5_txq_get(dev, cur_queue);
641 if (txq_ctrl == NULL) {
642 rte_errno = EINVAL;
643 DRV_LOG(ERR, "Failed to get port %u Tx queue %d",
644 dev->data->port_id, cur_queue);
645 return -rte_errno;
646 }
647 if (!txq_ctrl->is_hairpin) {
648 rte_errno = EINVAL;
649 DRV_LOG(ERR, "port %u queue %d not a hairpin Txq",
650 dev->data->port_id, cur_queue);
651 mlx5_txq_release(dev, cur_queue);
652 return -rte_errno;
653 }
654 /* Already unbound, return success before obj checking. */
655 if (txq_ctrl->hairpin_status == 0) {
656 DRV_LOG(DEBUG, "port %u Tx queue %d is already unbound",
657 dev->data->port_id, cur_queue);
658 mlx5_txq_release(dev, cur_queue);
659 return 0;
660 }
661 if (!txq_ctrl->obj || !txq_ctrl->obj->sq) {
662 rte_errno = ENOMEM;
663 DRV_LOG(ERR, "port %u no Txq object found: %d",
664 dev->data->port_id, cur_queue);
665 mlx5_txq_release(dev, cur_queue);
666 return -rte_errno;
667 }
668 sq_attr.state = MLX5_SQC_STATE_RST;
669 sq_attr.sq_state = MLX5_SQC_STATE_RST;
670 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
671 if (ret == 0)
672 txq_ctrl->hairpin_status = 0;
673 mlx5_txq_release(dev, cur_queue);
674 } else {
675 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, cur_queue);
676 struct mlx5_rxq_ctrl *rxq_ctrl;
677 struct mlx5_devx_modify_rq_attr rq_attr = { 0 };
678
679 if (rxq == NULL) {
680 rte_errno = EINVAL;
681 DRV_LOG(ERR, "Failed to get port %u Rx queue %d",
682 dev->data->port_id, cur_queue);
683 return -rte_errno;
684 }
685 rxq_ctrl = rxq->ctrl;
686 if (!rxq_ctrl->is_hairpin) {
687 rte_errno = EINVAL;
688 DRV_LOG(ERR, "port %u queue %d not a hairpin Rxq",
689 dev->data->port_id, cur_queue);
690 return -rte_errno;
691 }
692 if (rxq->hairpin_status == 0) {
693 DRV_LOG(DEBUG, "port %u Rx queue %d is already unbound",
694 dev->data->port_id, cur_queue);
695 return 0;
696 }
697 if (rxq_ctrl->obj == NULL || rxq_ctrl->obj->rq == NULL) {
698 rte_errno = ENOMEM;
699 DRV_LOG(ERR, "port %u no Rxq object found: %d",
700 dev->data->port_id, cur_queue);
701 return -rte_errno;
702 }
703 rq_attr.state = MLX5_SQC_STATE_RST;
704 rq_attr.rq_state = MLX5_SQC_STATE_RST;
705 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, &rq_attr);
706 if (ret == 0)
707 rxq->hairpin_status = 0;
708 }
709 return ret;
710 }
711
712 /*
713 * Bind the hairpin port pairs, from the Tx to the peer Rx.
714 * This function only supports to bind the Tx to one Rx.
715 *
716 * @param dev
717 * Pointer to Ethernet device structure.
718 * @param rx_port
719 * Port identifier of the Rx port.
720 *
721 * @return
722 * 0 on success, a negative errno value otherwise and rte_errno is set.
723 */
724 static int
mlx5_hairpin_bind_single_port(struct rte_eth_dev * dev,uint16_t rx_port)725 mlx5_hairpin_bind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
726 {
727 struct mlx5_priv *priv = dev->data->dev_private;
728 int ret = 0;
729 struct mlx5_txq_ctrl *txq_ctrl;
730 uint32_t i;
731 struct rte_hairpin_peer_info peer = {0xffffff};
732 struct rte_hairpin_peer_info cur;
733 const struct rte_eth_hairpin_conf *conf;
734 uint16_t num_q = 0;
735 uint16_t local_port = priv->dev_data->port_id;
736 uint32_t manual;
737 uint32_t explicit;
738 uint16_t rx_queue;
739
740 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
741 rte_errno = ENODEV;
742 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
743 return -rte_errno;
744 }
745 /*
746 * Before binding TxQ to peer RxQ, first round loop will be used for
747 * checking the queues' configuration consistency. This would be a
748 * little time consuming but better than doing the rollback.
749 */
750 for (i = 0; i != priv->txqs_n; i++) {
751 txq_ctrl = mlx5_txq_get(dev, i);
752 if (txq_ctrl == NULL)
753 continue;
754 if (!txq_ctrl->is_hairpin) {
755 mlx5_txq_release(dev, i);
756 continue;
757 }
758 /*
759 * All hairpin Tx queues of a single port that connected to the
760 * same peer Rx port should have the same "auto binding" and
761 * "implicit Tx flow" modes.
762 * Peer consistency checking will be done in per queue binding.
763 */
764 conf = &txq_ctrl->hairpin_conf;
765 if (conf->peers[0].port == rx_port) {
766 if (num_q == 0) {
767 manual = conf->manual_bind;
768 explicit = conf->tx_explicit;
769 } else {
770 if (manual != conf->manual_bind ||
771 explicit != conf->tx_explicit) {
772 rte_errno = EINVAL;
773 DRV_LOG(ERR, "port %u queue %d mode"
774 " mismatch: %u %u, %u %u",
775 local_port, i, manual,
776 conf->manual_bind, explicit,
777 conf->tx_explicit);
778 mlx5_txq_release(dev, i);
779 return -rte_errno;
780 }
781 }
782 num_q++;
783 }
784 mlx5_txq_release(dev, i);
785 }
786 /* Once no queue is configured, success is returned directly. */
787 if (num_q == 0)
788 return ret;
789 /* All the hairpin TX queues need to be traversed again. */
790 for (i = 0; i != priv->txqs_n; i++) {
791 txq_ctrl = mlx5_txq_get(dev, i);
792 if (txq_ctrl == NULL)
793 continue;
794 if (!txq_ctrl->is_hairpin) {
795 mlx5_txq_release(dev, i);
796 continue;
797 }
798 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
799 mlx5_txq_release(dev, i);
800 continue;
801 }
802 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
803 /*
804 * Fetch peer RxQ's information.
805 * No need to pass the information of the current queue.
806 */
807 ret = rte_eth_hairpin_queue_peer_update(rx_port, rx_queue,
808 NULL, &peer, 1);
809 if (ret != 0) {
810 mlx5_txq_release(dev, i);
811 goto error;
812 }
813 /* Accessing its own device, inside mlx5 PMD. */
814 ret = mlx5_hairpin_queue_peer_bind(dev, i, &peer, 1);
815 if (ret != 0) {
816 mlx5_txq_release(dev, i);
817 goto error;
818 }
819 /* Pass TxQ's information to peer RxQ and try binding. */
820 cur.peer_q = rx_queue;
821 cur.qp_id = txq_ctrl->obj->sq->id;
822 cur.vhca_id = priv->sh->cdev->config.hca_attr.vhca_id;
823 cur.tx_explicit = txq_ctrl->hairpin_conf.tx_explicit;
824 cur.manual_bind = txq_ctrl->hairpin_conf.manual_bind;
825 /*
826 * In order to access another device in a proper way, RTE level
827 * private function is needed.
828 */
829 ret = rte_eth_hairpin_queue_peer_bind(rx_port, rx_queue,
830 &cur, 0);
831 if (ret != 0) {
832 mlx5_txq_release(dev, i);
833 goto error;
834 }
835 mlx5_txq_release(dev, i);
836 }
837 return 0;
838 error:
839 /*
840 * Do roll-back process for the queues already bound.
841 * No need to check the return value of the queue unbind function.
842 */
843 do {
844 /* No validation is needed here. */
845 txq_ctrl = mlx5_txq_get(dev, i);
846 if (txq_ctrl == NULL)
847 continue;
848 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
849 rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
850 mlx5_hairpin_queue_peer_unbind(dev, i, 1);
851 mlx5_txq_release(dev, i);
852 } while (i--);
853 return ret;
854 }
855
856 /*
857 * Unbind the hairpin port pair, HW configuration of both devices will be clear
858 * and status will be reset for all the queues used between them.
859 * This function only supports to unbind the Tx from one Rx.
860 *
861 * @param dev
862 * Pointer to Ethernet device structure.
863 * @param rx_port
864 * Port identifier of the Rx port.
865 *
866 * @return
867 * 0 on success, a negative errno value otherwise and rte_errno is set.
868 */
869 static int
mlx5_hairpin_unbind_single_port(struct rte_eth_dev * dev,uint16_t rx_port)870 mlx5_hairpin_unbind_single_port(struct rte_eth_dev *dev, uint16_t rx_port)
871 {
872 struct mlx5_priv *priv = dev->data->dev_private;
873 struct mlx5_txq_ctrl *txq_ctrl;
874 uint32_t i;
875 int ret;
876 uint16_t cur_port = priv->dev_data->port_id;
877
878 if (mlx5_eth_find_next(rx_port, dev->device) != rx_port) {
879 rte_errno = ENODEV;
880 DRV_LOG(ERR, "Rx port %u does not belong to mlx5", rx_port);
881 return -rte_errno;
882 }
883 for (i = 0; i != priv->txqs_n; i++) {
884 uint16_t rx_queue;
885
886 txq_ctrl = mlx5_txq_get(dev, i);
887 if (txq_ctrl == NULL)
888 continue;
889 if (!txq_ctrl->is_hairpin) {
890 mlx5_txq_release(dev, i);
891 continue;
892 }
893 if (txq_ctrl->hairpin_conf.peers[0].port != rx_port) {
894 mlx5_txq_release(dev, i);
895 continue;
896 }
897 /* Indeed, only the first used queue needs to be checked. */
898 if (txq_ctrl->hairpin_conf.manual_bind == 0) {
899 if (cur_port != rx_port) {
900 rte_errno = EINVAL;
901 DRV_LOG(ERR, "port %u and port %u are in"
902 " auto-bind mode", cur_port, rx_port);
903 mlx5_txq_release(dev, i);
904 return -rte_errno;
905 } else {
906 return 0;
907 }
908 }
909 rx_queue = txq_ctrl->hairpin_conf.peers[0].queue;
910 mlx5_txq_release(dev, i);
911 ret = rte_eth_hairpin_queue_peer_unbind(rx_port, rx_queue, 0);
912 if (ret) {
913 DRV_LOG(ERR, "port %u Rx queue %d unbind - failure",
914 rx_port, rx_queue);
915 return ret;
916 }
917 ret = mlx5_hairpin_queue_peer_unbind(dev, i, 1);
918 if (ret) {
919 DRV_LOG(ERR, "port %u Tx queue %d unbind - failure",
920 cur_port, i);
921 return ret;
922 }
923 }
924 return 0;
925 }
926
927 /*
928 * Bind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
929 * @see mlx5_hairpin_bind_single_port()
930 */
931 int
mlx5_hairpin_bind(struct rte_eth_dev * dev,uint16_t rx_port)932 mlx5_hairpin_bind(struct rte_eth_dev *dev, uint16_t rx_port)
933 {
934 int ret = 0;
935 uint16_t p, pp;
936
937 /*
938 * If the Rx port has no hairpin configuration with the current port,
939 * the binding will be skipped in the called function of single port.
940 * Device started status will be checked only before the queue
941 * information updating.
942 */
943 if (rx_port == RTE_MAX_ETHPORTS) {
944 MLX5_ETH_FOREACH_DEV(p, dev->device) {
945 ret = mlx5_hairpin_bind_single_port(dev, p);
946 if (ret != 0)
947 goto unbind;
948 }
949 return ret;
950 } else {
951 return mlx5_hairpin_bind_single_port(dev, rx_port);
952 }
953 unbind:
954 MLX5_ETH_FOREACH_DEV(pp, dev->device)
955 if (pp < p)
956 mlx5_hairpin_unbind_single_port(dev, pp);
957 return ret;
958 }
959
960 /*
961 * Unbind hairpin ports, Rx could be all ports when using RTE_MAX_ETHPORTS.
962 * @see mlx5_hairpin_unbind_single_port()
963 */
964 int
mlx5_hairpin_unbind(struct rte_eth_dev * dev,uint16_t rx_port)965 mlx5_hairpin_unbind(struct rte_eth_dev *dev, uint16_t rx_port)
966 {
967 int ret = 0;
968 uint16_t p;
969
970 if (rx_port == RTE_MAX_ETHPORTS)
971 MLX5_ETH_FOREACH_DEV(p, dev->device) {
972 ret = mlx5_hairpin_unbind_single_port(dev, p);
973 if (ret != 0)
974 return ret;
975 }
976 else
977 ret = mlx5_hairpin_unbind_single_port(dev, rx_port);
978 return ret;
979 }
980
981 /*
982 * DPDK callback to get the hairpin peer ports list.
983 * This will return the actual number of peer ports and save the identifiers
984 * into the array (sorted, may be different from that when setting up the
985 * hairpin peer queues).
986 * The peer port ID could be the same as the port ID of the current device.
987 *
988 * @param dev
989 * Pointer to Ethernet device structure.
990 * @param peer_ports
991 * Pointer to array to save the port identifiers.
992 * @param len
993 * The length of the array.
994 * @param direction
995 * Current port to peer port direction.
996 * positive - current used as Tx to get all peer Rx ports.
997 * zero - current used as Rx to get all peer Tx ports.
998 *
999 * @return
1000 * 0 or positive value on success, actual number of peer ports.
1001 * a negative errno value otherwise and rte_errno is set.
1002 */
1003 int
mlx5_hairpin_get_peer_ports(struct rte_eth_dev * dev,uint16_t * peer_ports,size_t len,uint32_t direction)1004 mlx5_hairpin_get_peer_ports(struct rte_eth_dev *dev, uint16_t *peer_ports,
1005 size_t len, uint32_t direction)
1006 {
1007 struct mlx5_priv *priv = dev->data->dev_private;
1008 struct mlx5_txq_ctrl *txq_ctrl;
1009 uint32_t i;
1010 uint16_t pp;
1011 uint32_t bits[(RTE_MAX_ETHPORTS + 31) / 32] = {0};
1012 int ret = 0;
1013
1014 if (direction) {
1015 for (i = 0; i < priv->txqs_n; i++) {
1016 txq_ctrl = mlx5_txq_get(dev, i);
1017 if (!txq_ctrl)
1018 continue;
1019 if (!txq_ctrl->is_hairpin) {
1020 mlx5_txq_release(dev, i);
1021 continue;
1022 }
1023 pp = txq_ctrl->hairpin_conf.peers[0].port;
1024 if (pp >= RTE_MAX_ETHPORTS) {
1025 rte_errno = ERANGE;
1026 mlx5_txq_release(dev, i);
1027 DRV_LOG(ERR, "port %hu queue %u peer port "
1028 "out of range %hu",
1029 priv->dev_data->port_id, i, pp);
1030 return -rte_errno;
1031 }
1032 bits[pp / 32] |= 1 << (pp % 32);
1033 mlx5_txq_release(dev, i);
1034 }
1035 } else {
1036 for (i = 0; i < priv->rxqs_n; i++) {
1037 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1038 struct mlx5_rxq_ctrl *rxq_ctrl;
1039
1040 if (rxq == NULL)
1041 continue;
1042 rxq_ctrl = rxq->ctrl;
1043 if (!rxq_ctrl->is_hairpin)
1044 continue;
1045 pp = rxq->hairpin_conf.peers[0].port;
1046 if (pp >= RTE_MAX_ETHPORTS) {
1047 rte_errno = ERANGE;
1048 DRV_LOG(ERR, "port %hu queue %u peer port "
1049 "out of range %hu",
1050 priv->dev_data->port_id, i, pp);
1051 return -rte_errno;
1052 }
1053 bits[pp / 32] |= 1 << (pp % 32);
1054 }
1055 }
1056 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1057 if (bits[i / 32] & (1 << (i % 32))) {
1058 if ((size_t)ret >= len) {
1059 rte_errno = E2BIG;
1060 return -rte_errno;
1061 }
1062 peer_ports[ret++] = i;
1063 }
1064 }
1065 return ret;
1066 }
1067
1068 /**
1069 * DPDK callback to start the device.
1070 *
1071 * Simulate device start by attaching all configured flows.
1072 *
1073 * @param dev
1074 * Pointer to Ethernet device structure.
1075 *
1076 * @return
1077 * 0 on success, a negative errno value otherwise and rte_errno is set.
1078 */
1079 int
mlx5_dev_start(struct rte_eth_dev * dev)1080 mlx5_dev_start(struct rte_eth_dev *dev)
1081 {
1082 struct mlx5_priv *priv = dev->data->dev_private;
1083 int ret;
1084 int fine_inline;
1085
1086 DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
1087 fine_inline = rte_mbuf_dynflag_lookup
1088 (RTE_PMD_MLX5_FINE_GRANULARITY_INLINE, NULL);
1089 if (fine_inline >= 0)
1090 rte_net_mlx5_dynf_inline_mask = 1UL << fine_inline;
1091 else
1092 rte_net_mlx5_dynf_inline_mask = 0;
1093 if (dev->data->nb_rx_queues > 0) {
1094 ret = mlx5_dev_configure_rss_reta(dev);
1095 if (ret) {
1096 DRV_LOG(ERR, "port %u reta config failed: %s",
1097 dev->data->port_id, strerror(rte_errno));
1098 return -rte_errno;
1099 }
1100 }
1101 ret = mlx5_txpp_start(dev);
1102 if (ret) {
1103 DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
1104 dev->data->port_id, strerror(rte_errno));
1105 goto error;
1106 }
1107 if (mlx5_devx_obj_ops_en(priv->sh) &&
1108 priv->obj_ops.lb_dummy_queue_create) {
1109 ret = priv->obj_ops.lb_dummy_queue_create(dev);
1110 if (ret)
1111 goto error;
1112 }
1113 ret = mlx5_txq_start(dev);
1114 if (ret) {
1115 DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
1116 dev->data->port_id, strerror(rte_errno));
1117 goto error;
1118 }
1119 if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
1120 if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf &&
1121 !priv->representor) {
1122 ret = mlx5_get_flag_dropless_rq(dev);
1123 if (ret < 0)
1124 DRV_LOG(WARNING,
1125 "port %u cannot query dropless flag",
1126 dev->data->port_id);
1127 else if (!ret)
1128 DRV_LOG(WARNING,
1129 "port %u dropless_rq OFF, no rearming",
1130 dev->data->port_id);
1131 } else {
1132 DRV_LOG(DEBUG,
1133 "port %u doesn't support dropless_rq flag",
1134 dev->data->port_id);
1135 }
1136 }
1137 ret = mlx5_rxq_start(dev);
1138 if (ret) {
1139 DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
1140 dev->data->port_id, strerror(rte_errno));
1141 goto error;
1142 }
1143 /*
1144 * Such step will be skipped if there is no hairpin TX queue configured
1145 * with RX peer queue from the same device.
1146 */
1147 ret = mlx5_hairpin_auto_bind(dev);
1148 if (ret) {
1149 DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
1150 dev->data->port_id, strerror(rte_errno));
1151 goto error;
1152 }
1153 /* Set started flag here for the following steps like control flow. */
1154 dev->data->dev_started = 1;
1155 ret = mlx5_rx_intr_vec_enable(dev);
1156 if (ret) {
1157 DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
1158 dev->data->port_id);
1159 goto error;
1160 }
1161 mlx5_os_stats_init(dev);
1162 /*
1163 * Attach indirection table objects detached on port stop.
1164 * They may be needed to create RSS in non-isolated mode.
1165 */
1166 ret = mlx5_action_handle_attach(dev);
1167 if (ret) {
1168 DRV_LOG(ERR,
1169 "port %u failed to attach indirect actions: %s",
1170 dev->data->port_id, rte_strerror(rte_errno));
1171 goto error;
1172 }
1173 ret = mlx5_traffic_enable(dev);
1174 if (ret) {
1175 DRV_LOG(ERR, "port %u failed to set defaults flows",
1176 dev->data->port_id);
1177 goto error;
1178 }
1179 /* Set a mask and offset of dynamic metadata flows into Rx queues. */
1180 mlx5_flow_rxq_dynf_metadata_set(dev);
1181 /* Set flags and context to convert Rx timestamps. */
1182 mlx5_rxq_timestamp_set(dev);
1183 /* Set a mask and offset of scheduling on timestamp into Tx queues. */
1184 mlx5_txq_dynf_timestamp_set(dev);
1185 /*
1186 * In non-cached mode, it only needs to start the default mreg copy
1187 * action and no flow created by application exists anymore.
1188 * But it is worth wrapping the interface for further usage.
1189 */
1190 ret = mlx5_flow_start_default(dev);
1191 if (ret) {
1192 DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
1193 dev->data->port_id, strerror(rte_errno));
1194 goto error;
1195 }
1196 if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
1197 DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
1198 dev->data->port_id, rte_strerror(rte_errno));
1199 goto error;
1200 }
1201 rte_wmb();
1202 dev->tx_pkt_burst = mlx5_select_tx_function(dev);
1203 dev->rx_pkt_burst = mlx5_select_rx_function(dev);
1204 /* Enable datapath on secondary process. */
1205 mlx5_mp_os_req_start_rxtx(dev);
1206 if (rte_intr_fd_get(priv->sh->intr_handle) >= 0) {
1207 priv->sh->port[priv->dev_port - 1].ih_port_id =
1208 (uint32_t)dev->data->port_id;
1209 } else {
1210 DRV_LOG(INFO, "port %u starts without RMV interrupts.",
1211 dev->data->port_id);
1212 dev->data->dev_conf.intr_conf.rmv = 0;
1213 }
1214 if (rte_intr_fd_get(priv->sh->intr_handle_nl) >= 0) {
1215 priv->sh->port[priv->dev_port - 1].nl_ih_port_id =
1216 (uint32_t)dev->data->port_id;
1217 } else {
1218 DRV_LOG(INFO, "port %u starts without LSC interrupts.",
1219 dev->data->port_id);
1220 dev->data->dev_conf.intr_conf.lsc = 0;
1221 }
1222 if (rte_intr_fd_get(priv->sh->intr_handle_devx) >= 0)
1223 priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
1224 (uint32_t)dev->data->port_id;
1225 return 0;
1226 error:
1227 ret = rte_errno; /* Save rte_errno before cleanup. */
1228 /* Rollback. */
1229 dev->data->dev_started = 0;
1230 mlx5_flow_stop_default(dev);
1231 mlx5_traffic_disable(dev);
1232 mlx5_txq_stop(dev);
1233 mlx5_rxq_stop(dev);
1234 if (priv->obj_ops.lb_dummy_queue_release)
1235 priv->obj_ops.lb_dummy_queue_release(dev);
1236 mlx5_txpp_stop(dev); /* Stop last. */
1237 rte_errno = ret; /* Restore rte_errno. */
1238 return -rte_errno;
1239 }
1240
1241 /**
1242 * DPDK callback to stop the device.
1243 *
1244 * Simulate device stop by detaching all configured flows.
1245 *
1246 * @param dev
1247 * Pointer to Ethernet device structure.
1248 */
1249 int
mlx5_dev_stop(struct rte_eth_dev * dev)1250 mlx5_dev_stop(struct rte_eth_dev *dev)
1251 {
1252 struct mlx5_priv *priv = dev->data->dev_private;
1253
1254 dev->data->dev_started = 0;
1255 /* Prevent crashes when queues are still in use. */
1256 dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1257 dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
1258 rte_wmb();
1259 /* Disable datapath on secondary process. */
1260 mlx5_mp_os_req_stop_rxtx(dev);
1261 rte_delay_us_sleep(1000 * priv->rxqs_n);
1262 DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
1263 mlx5_flow_stop_default(dev);
1264 /* Control flows for default traffic can be removed firstly. */
1265 mlx5_traffic_disable(dev);
1266 /* All RX queue flags will be cleared in the flush interface. */
1267 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1268 mlx5_flow_meter_rxq_flush(dev);
1269 mlx5_action_handle_detach(dev);
1270 mlx5_rx_intr_vec_disable(dev);
1271 priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
1272 priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
1273 priv->sh->port[priv->dev_port - 1].nl_ih_port_id = RTE_MAX_ETHPORTS;
1274 mlx5_txq_stop(dev);
1275 mlx5_rxq_stop(dev);
1276 if (priv->obj_ops.lb_dummy_queue_release)
1277 priv->obj_ops.lb_dummy_queue_release(dev);
1278 mlx5_txpp_stop(dev);
1279
1280 return 0;
1281 }
1282
1283 /**
1284 * Enable traffic flows configured by control plane
1285 *
1286 * @param dev
1287 * Pointer to Ethernet device structure.
1288 *
1289 * @return
1290 * 0 on success, a negative errno value otherwise and rte_errno is set.
1291 */
1292 int
mlx5_traffic_enable(struct rte_eth_dev * dev)1293 mlx5_traffic_enable(struct rte_eth_dev *dev)
1294 {
1295 struct mlx5_priv *priv = dev->data->dev_private;
1296 struct rte_flow_item_eth bcast = {
1297 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1298 };
1299 struct rte_flow_item_eth ipv6_multi_spec = {
1300 .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
1301 };
1302 struct rte_flow_item_eth ipv6_multi_mask = {
1303 .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
1304 };
1305 struct rte_flow_item_eth unicast = {
1306 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1307 };
1308 struct rte_flow_item_eth unicast_mask = {
1309 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1310 };
1311 const unsigned int vlan_filter_n = priv->vlan_filter_n;
1312 const struct rte_ether_addr cmp = {
1313 .addr_bytes = "\x00\x00\x00\x00\x00\x00",
1314 };
1315 unsigned int i;
1316 unsigned int j;
1317 int ret;
1318
1319 /*
1320 * Hairpin txq default flow should be created no matter if it is
1321 * isolation mode. Or else all the packets to be sent will be sent
1322 * out directly without the TX flow actions, e.g. encapsulation.
1323 */
1324 for (i = 0; i != priv->txqs_n; ++i) {
1325 struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
1326 if (!txq_ctrl)
1327 continue;
1328 /* Only Tx implicit mode requires the default Tx flow. */
1329 if (txq_ctrl->is_hairpin &&
1330 txq_ctrl->hairpin_conf.tx_explicit == 0 &&
1331 txq_ctrl->hairpin_conf.peers[0].port ==
1332 priv->dev_data->port_id) {
1333 ret = mlx5_ctrl_flow_source_queue(dev, i);
1334 if (ret) {
1335 mlx5_txq_release(dev, i);
1336 goto error;
1337 }
1338 }
1339 if (priv->sh->config.dv_esw_en) {
1340 if (mlx5_flow_create_devx_sq_miss_flow(dev, i) == 0) {
1341 DRV_LOG(ERR,
1342 "Port %u Tx queue %u SQ create representor devx default miss rule failed.",
1343 dev->data->port_id, i);
1344 goto error;
1345 }
1346 }
1347 mlx5_txq_release(dev, i);
1348 }
1349 if (priv->sh->config.dv_esw_en) {
1350 if (mlx5_flow_create_esw_table_zero_flow(dev))
1351 priv->fdb_def_rule = 1;
1352 else
1353 DRV_LOG(INFO, "port %u FDB default rule cannot be"
1354 " configured - only Eswitch group 0 flows are"
1355 " supported.", dev->data->port_id);
1356 }
1357 if (!priv->sh->config.lacp_by_user && priv->pf_bond >= 0) {
1358 ret = mlx5_flow_lacp_miss(dev);
1359 if (ret)
1360 DRV_LOG(INFO, "port %u LACP rule cannot be created - "
1361 "forward LACP to kernel.", dev->data->port_id);
1362 else
1363 DRV_LOG(INFO, "LACP traffic will be missed in port %u."
1364 , dev->data->port_id);
1365 }
1366 if (priv->isolated)
1367 return 0;
1368 if (dev->data->promiscuous) {
1369 struct rte_flow_item_eth promisc = {
1370 .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1371 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1372 .type = 0,
1373 };
1374
1375 ret = mlx5_ctrl_flow(dev, &promisc, &promisc);
1376 if (ret)
1377 goto error;
1378 }
1379 if (dev->data->all_multicast) {
1380 struct rte_flow_item_eth multicast = {
1381 .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1382 .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1383 .type = 0,
1384 };
1385
1386 ret = mlx5_ctrl_flow(dev, &multicast, &multicast);
1387 if (ret)
1388 goto error;
1389 } else {
1390 /* Add broadcast/multicast flows. */
1391 for (i = 0; i != vlan_filter_n; ++i) {
1392 uint16_t vlan = priv->vlan_filter[i];
1393
1394 struct rte_flow_item_vlan vlan_spec = {
1395 .tci = rte_cpu_to_be_16(vlan),
1396 };
1397 struct rte_flow_item_vlan vlan_mask =
1398 rte_flow_item_vlan_mask;
1399
1400 ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
1401 &vlan_spec, &vlan_mask);
1402 if (ret)
1403 goto error;
1404 ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
1405 &ipv6_multi_mask,
1406 &vlan_spec, &vlan_mask);
1407 if (ret)
1408 goto error;
1409 }
1410 if (!vlan_filter_n) {
1411 ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
1412 if (ret)
1413 goto error;
1414 ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
1415 &ipv6_multi_mask);
1416 if (ret) {
1417 /* Do not fail on IPv6 broadcast creation failure. */
1418 DRV_LOG(WARNING,
1419 "IPv6 broadcast is not supported");
1420 ret = 0;
1421 }
1422 }
1423 }
1424 /* Add MAC address flows. */
1425 for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
1426 struct rte_ether_addr *mac = &dev->data->mac_addrs[i];
1427
1428 if (!memcmp(mac, &cmp, sizeof(*mac)))
1429 continue;
1430 memcpy(&unicast.dst.addr_bytes,
1431 mac->addr_bytes,
1432 RTE_ETHER_ADDR_LEN);
1433 for (j = 0; j != vlan_filter_n; ++j) {
1434 uint16_t vlan = priv->vlan_filter[j];
1435
1436 struct rte_flow_item_vlan vlan_spec = {
1437 .tci = rte_cpu_to_be_16(vlan),
1438 };
1439 struct rte_flow_item_vlan vlan_mask =
1440 rte_flow_item_vlan_mask;
1441
1442 ret = mlx5_ctrl_flow_vlan(dev, &unicast,
1443 &unicast_mask,
1444 &vlan_spec,
1445 &vlan_mask);
1446 if (ret)
1447 goto error;
1448 }
1449 if (!vlan_filter_n) {
1450 ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask);
1451 if (ret)
1452 goto error;
1453 }
1454 }
1455 return 0;
1456 error:
1457 ret = rte_errno; /* Save rte_errno before cleanup. */
1458 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1459 rte_errno = ret; /* Restore rte_errno. */
1460 return -rte_errno;
1461 }
1462
1463
1464 /**
1465 * Disable traffic flows configured by control plane
1466 *
1467 * @param dev
1468 * Pointer to Ethernet device private data.
1469 */
1470 void
mlx5_traffic_disable(struct rte_eth_dev * dev)1471 mlx5_traffic_disable(struct rte_eth_dev *dev)
1472 {
1473 mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_CTL, false);
1474 }
1475
1476 /**
1477 * Restart traffic flows configured by control plane
1478 *
1479 * @param dev
1480 * Pointer to Ethernet device private data.
1481 *
1482 * @return
1483 * 0 on success, a negative errno value otherwise and rte_errno is set.
1484 */
1485 int
mlx5_traffic_restart(struct rte_eth_dev * dev)1486 mlx5_traffic_restart(struct rte_eth_dev *dev)
1487 {
1488 if (dev->data->dev_started) {
1489 mlx5_traffic_disable(dev);
1490 return mlx5_traffic_enable(dev);
1491 }
1492 return 0;
1493 }
1494