1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
3 */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_VLAN | \
52 RTE_MBUF_F_TX_IPV6 | \
53 RTE_MBUF_F_TX_IPV4 | \
54 RTE_MBUF_F_TX_L4_MASK | \
55 RTE_MBUF_F_TX_TCP_SEG)
56
57 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
58 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
59
60 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
61
62 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
63 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
64 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
65 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
66 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
67 #endif
68
69 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
70 static void
vmxnet3_rxq_dump(struct vmxnet3_rx_queue * rxq)71 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
72 {
73 uint32_t avail = 0;
74
75 if (rxq == NULL)
76 return;
77
78 PMD_RX_LOG(DEBUG,
79 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
80 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
81 PMD_RX_LOG(DEBUG,
82 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
83 (unsigned long)rxq->cmd_ring[0].basePA,
84 (unsigned long)rxq->cmd_ring[1].basePA,
85 (unsigned long)rxq->comp_ring.basePA);
86
87 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
88 PMD_RX_LOG(DEBUG,
89 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
90 (uint32_t)rxq->cmd_ring[0].size, avail,
91 rxq->comp_ring.next2proc,
92 rxq->cmd_ring[0].size - avail);
93
94 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
95 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
96 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
97 rxq->cmd_ring[1].size - avail);
98
99 }
100
101 static void
vmxnet3_txq_dump(struct vmxnet3_tx_queue * txq)102 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
103 {
104 uint32_t avail = 0;
105
106 if (txq == NULL)
107 return;
108
109 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
110 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
111 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
112 (unsigned long)txq->cmd_ring.basePA,
113 (unsigned long)txq->comp_ring.basePA,
114 (unsigned long)txq->data_ring.basePA);
115
116 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
117 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
118 (uint32_t)txq->cmd_ring.size, avail,
119 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
120 }
121 #endif
122
123 static void
vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t * ring)124 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
125 {
126 while (ring->next2comp != ring->next2fill) {
127 /* No need to worry about desc ownership, device is quiesced by now. */
128 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
129
130 if (buf_info->m) {
131 rte_pktmbuf_free(buf_info->m);
132 buf_info->m = NULL;
133 buf_info->bufPA = 0;
134 buf_info->len = 0;
135 }
136 vmxnet3_cmd_ring_adv_next2comp(ring);
137 }
138 }
139
140 static void
vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t * ring)141 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
142 {
143 uint32_t i;
144
145 for (i = 0; i < ring->size; i++) {
146 /* No need to worry about desc ownership, device is quiesced by now. */
147 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
148
149 if (buf_info->m) {
150 rte_pktmbuf_free_seg(buf_info->m);
151 buf_info->m = NULL;
152 buf_info->bufPA = 0;
153 buf_info->len = 0;
154 }
155 vmxnet3_cmd_ring_adv_next2comp(ring);
156 }
157 }
158
159 static void
vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t * ring)160 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
161 {
162 rte_free(ring->buf_info);
163 ring->buf_info = NULL;
164 }
165
166 void
vmxnet3_dev_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)167 vmxnet3_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
168 {
169 vmxnet3_tx_queue_t *tq = dev->data->tx_queues[qid];
170
171 if (tq != NULL) {
172 /* Release mbufs */
173 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
174 /* Release the cmd_ring */
175 vmxnet3_cmd_ring_release(&tq->cmd_ring);
176 /* Release the memzone */
177 rte_memzone_free(tq->mz);
178 /* Release the queue */
179 rte_free(tq);
180 }
181 }
182
183 void
vmxnet3_dev_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)184 vmxnet3_dev_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
185 {
186 int i;
187 vmxnet3_rx_queue_t *rq = dev->data->rx_queues[qid];
188
189 if (rq != NULL) {
190 /* Release mbufs */
191 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
192 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
193
194 /* Release both the cmd_rings */
195 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
196 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
197
198 /* Release the memzone */
199 rte_memzone_free(rq->mz);
200
201 /* Release the queue */
202 rte_free(rq);
203 }
204 }
205
206 static void
vmxnet3_dev_tx_queue_reset(void * txq)207 vmxnet3_dev_tx_queue_reset(void *txq)
208 {
209 vmxnet3_tx_queue_t *tq = txq;
210 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
211 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
212 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
213 int size;
214
215 if (tq != NULL) {
216 /* Release the cmd_ring mbufs */
217 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
218 }
219
220 /* Tx vmxnet rings structure initialization*/
221 ring->next2fill = 0;
222 ring->next2comp = 0;
223 ring->gen = VMXNET3_INIT_GEN;
224 comp_ring->next2proc = 0;
225 comp_ring->gen = VMXNET3_INIT_GEN;
226
227 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
228 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
229 size += tq->txdata_desc_size * data_ring->size;
230
231 memset(ring->base, 0, size);
232 }
233
234 static void
vmxnet3_dev_rx_queue_reset(void * rxq)235 vmxnet3_dev_rx_queue_reset(void *rxq)
236 {
237 int i;
238 vmxnet3_rx_queue_t *rq = rxq;
239 struct vmxnet3_hw *hw = rq->hw;
240 struct vmxnet3_cmd_ring *ring0, *ring1;
241 struct vmxnet3_comp_ring *comp_ring;
242 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
243 int size;
244
245 /* Release both the cmd_rings mbufs */
246 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
247 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
248
249 ring0 = &rq->cmd_ring[0];
250 ring1 = &rq->cmd_ring[1];
251 comp_ring = &rq->comp_ring;
252
253 /* Rx vmxnet rings structure initialization */
254 ring0->next2fill = 0;
255 ring1->next2fill = 0;
256 ring0->next2comp = 0;
257 ring1->next2comp = 0;
258 ring0->gen = VMXNET3_INIT_GEN;
259 ring1->gen = VMXNET3_INIT_GEN;
260 comp_ring->next2proc = 0;
261 comp_ring->gen = VMXNET3_INIT_GEN;
262
263 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
264 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
265 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
266 size += rq->data_desc_size * data_ring->size;
267
268 memset(ring0->base, 0, size);
269 }
270
271 void
vmxnet3_dev_clear_queues(struct rte_eth_dev * dev)272 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
273 {
274 unsigned i;
275
276 PMD_INIT_FUNC_TRACE();
277
278 for (i = 0; i < dev->data->nb_tx_queues; i++) {
279 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
280
281 if (txq != NULL) {
282 txq->stopped = TRUE;
283 vmxnet3_dev_tx_queue_reset(txq);
284 }
285 }
286
287 for (i = 0; i < dev->data->nb_rx_queues; i++) {
288 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
289
290 if (rxq != NULL) {
291 rxq->stopped = TRUE;
292 vmxnet3_dev_rx_queue_reset(rxq);
293 }
294 }
295 }
296
297 static int
vmxnet3_unmap_pkt(uint16_t eop_idx,vmxnet3_tx_queue_t * txq)298 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
299 {
300 int completed = 0;
301 struct rte_mbuf *mbuf;
302
303 /* Release cmd_ring descriptor and free mbuf */
304 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
305
306 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
307 if (mbuf == NULL)
308 rte_panic("EOP desc does not point to a valid mbuf");
309 rte_pktmbuf_free(mbuf);
310
311 txq->cmd_ring.buf_info[eop_idx].m = NULL;
312
313 while (txq->cmd_ring.next2comp != eop_idx) {
314 /* no out-of-order completion */
315 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
316 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
317 completed++;
318 }
319
320 /* Mark the txd for which tcd was generated as completed */
321 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
322
323 return completed + 1;
324 }
325
326 static void
vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t * txq)327 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
328 {
329 int completed = 0;
330 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
331 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
332 (comp_ring->base + comp_ring->next2proc);
333
334 while (tcd->gen == comp_ring->gen) {
335 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
336
337 vmxnet3_comp_ring_adv_next2proc(comp_ring);
338 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
339 comp_ring->next2proc);
340 }
341
342 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
343
344 /* To avoid compiler warnings when not in DEBUG mode. */
345 RTE_SET_USED(completed);
346 }
347
348 uint16_t
vmxnet3_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)349 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
350 uint16_t nb_pkts)
351 {
352 int32_t ret;
353 uint32_t i;
354 uint64_t ol_flags;
355 struct rte_mbuf *m;
356
357 for (i = 0; i != nb_pkts; i++) {
358 m = tx_pkts[i];
359 ol_flags = m->ol_flags;
360
361 /* Non-TSO packet cannot occupy more than
362 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
363 */
364 if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0 &&
365 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
366 rte_errno = EINVAL;
367 return i;
368 }
369
370 /* check that only supported TX offloads are requested. */
371 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
372 (ol_flags & RTE_MBUF_F_TX_L4_MASK) ==
373 RTE_MBUF_F_TX_SCTP_CKSUM) {
374 rte_errno = ENOTSUP;
375 return i;
376 }
377
378 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
379 ret = rte_validate_tx_offload(m);
380 if (ret != 0) {
381 rte_errno = -ret;
382 return i;
383 }
384 #endif
385 ret = rte_net_intel_cksum_prepare(m);
386 if (ret != 0) {
387 rte_errno = -ret;
388 return i;
389 }
390 }
391
392 return i;
393 }
394
395 uint16_t
vmxnet3_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)396 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
397 uint16_t nb_pkts)
398 {
399 uint16_t nb_tx;
400 vmxnet3_tx_queue_t *txq = tx_queue;
401 struct vmxnet3_hw *hw = txq->hw;
402 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
403 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
404
405 if (unlikely(txq->stopped)) {
406 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
407 return 0;
408 }
409
410 /* Free up the comp_descriptors aggressively */
411 vmxnet3_tq_tx_complete(txq);
412
413 nb_tx = 0;
414 while (nb_tx < nb_pkts) {
415 Vmxnet3_GenericDesc *gdesc;
416 vmxnet3_buf_info_t *tbi;
417 uint32_t first2fill, avail, dw2;
418 struct rte_mbuf *txm = tx_pkts[nb_tx];
419 struct rte_mbuf *m_seg = txm;
420 int copy_size = 0;
421 bool tso = (txm->ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0;
422 /* # of descriptors needed for a packet. */
423 unsigned count = txm->nb_segs;
424
425 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
426 if (count > avail) {
427 /* Is command ring full? */
428 if (unlikely(avail == 0)) {
429 PMD_TX_LOG(DEBUG, "No free ring descriptors");
430 txq->stats.tx_ring_full++;
431 txq->stats.drop_total += (nb_pkts - nb_tx);
432 break;
433 }
434
435 /* Command ring is not full but cannot handle the
436 * multi-segmented packet. Let's try the next packet
437 * in this case.
438 */
439 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
440 "(avail %d needed %d)", avail, count);
441 txq->stats.drop_total++;
442 if (tso)
443 txq->stats.drop_tso++;
444 rte_pktmbuf_free(txm);
445 nb_tx++;
446 continue;
447 }
448
449 /* Drop non-TSO packet that is excessively fragmented */
450 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
451 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
452 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
453 txq->stats.drop_too_many_segs++;
454 txq->stats.drop_total++;
455 rte_pktmbuf_free(txm);
456 nb_tx++;
457 continue;
458 }
459
460 if (txm->nb_segs == 1 &&
461 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
462 struct Vmxnet3_TxDataDesc *tdd;
463
464 /* Skip empty packets */
465 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
466 txq->stats.drop_total++;
467 rte_pktmbuf_free(txm);
468 nb_tx++;
469 continue;
470 }
471
472 tdd = (struct Vmxnet3_TxDataDesc *)
473 ((uint8 *)txq->data_ring.base +
474 txq->cmd_ring.next2fill *
475 txq->txdata_desc_size);
476 copy_size = rte_pktmbuf_pkt_len(txm);
477 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
478 }
479
480 /* use the previous gen bit for the SOP desc */
481 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
482 first2fill = txq->cmd_ring.next2fill;
483 do {
484 /* Remember the transmit buffer for cleanup */
485 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
486
487 /* NB: the following assumes that VMXNET3 maximum
488 * transmit buffer size (16K) is greater than
489 * maximum size of mbuf segment size.
490 */
491 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
492
493 /* Skip empty segments */
494 if (unlikely(m_seg->data_len == 0))
495 continue;
496
497 if (copy_size) {
498 uint64 offset =
499 (uint64)txq->cmd_ring.next2fill *
500 txq->txdata_desc_size;
501 gdesc->txd.addr =
502 rte_cpu_to_le_64(txq->data_ring.basePA +
503 offset);
504 } else {
505 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
506 }
507
508 gdesc->dword[2] = dw2 | m_seg->data_len;
509 gdesc->dword[3] = 0;
510
511 /* move to the next2fill descriptor */
512 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
513
514 /* use the right gen for non-SOP desc */
515 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
516 } while ((m_seg = m_seg->next) != NULL);
517
518 /* set the last buf_info for the pkt */
519 tbi->m = txm;
520 /* Update the EOP descriptor */
521 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
522
523 /* Add VLAN tag if present */
524 gdesc = txq->cmd_ring.base + first2fill;
525 if (txm->ol_flags & RTE_MBUF_F_TX_VLAN) {
526 gdesc->txd.ti = 1;
527 gdesc->txd.tci = txm->vlan_tci;
528 }
529
530 if (tso) {
531 uint16_t mss = txm->tso_segsz;
532
533 RTE_ASSERT(mss > 0);
534
535 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
536 gdesc->txd.om = VMXNET3_OM_TSO;
537 gdesc->txd.msscof = mss;
538
539 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
540 } else if (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
541 gdesc->txd.om = VMXNET3_OM_CSUM;
542 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
543
544 switch (txm->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
545 case RTE_MBUF_F_TX_TCP_CKSUM:
546 gdesc->txd.msscof = gdesc->txd.hlen +
547 offsetof(struct rte_tcp_hdr, cksum);
548 break;
549 case RTE_MBUF_F_TX_UDP_CKSUM:
550 gdesc->txd.msscof = gdesc->txd.hlen +
551 offsetof(struct rte_udp_hdr,
552 dgram_cksum);
553 break;
554 default:
555 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
556 txm->ol_flags & RTE_MBUF_F_TX_L4_MASK);
557 abort();
558 }
559 deferred++;
560 } else {
561 gdesc->txd.hlen = 0;
562 gdesc->txd.om = VMXNET3_OM_NONE;
563 gdesc->txd.msscof = 0;
564 deferred++;
565 }
566
567 /* flip the GEN bit on the SOP */
568 rte_compiler_barrier();
569 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
570
571 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
572 nb_tx++;
573 }
574
575 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
576
577 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
578 txq_ctrl->txNumDeferred = 0;
579 /* Notify vSwitch that packets are available. */
580 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
581 txq->cmd_ring.next2fill);
582 }
583
584 return nb_tx;
585 }
586
587 static inline void
vmxnet3_renew_desc(vmxnet3_rx_queue_t * rxq,uint8_t ring_id,struct rte_mbuf * mbuf)588 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
589 struct rte_mbuf *mbuf)
590 {
591 uint32_t val;
592 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
593 struct Vmxnet3_RxDesc *rxd =
594 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
595 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
596
597 if (ring_id == 0) {
598 /* Usually: One HEAD type buf per packet
599 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
600 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
601 */
602
603 /* We use single packet buffer so all heads here */
604 val = VMXNET3_RXD_BTYPE_HEAD;
605 } else {
606 /* All BODY type buffers for 2nd ring */
607 val = VMXNET3_RXD_BTYPE_BODY;
608 }
609
610 /*
611 * Load mbuf pointer into buf_info[ring_size]
612 * buf_info structure is equivalent to cookie for virtio-virtqueue
613 */
614 buf_info->m = mbuf;
615 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
616 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
617
618 /* Load Rx Descriptor with the buffer's GPA */
619 rxd->addr = buf_info->bufPA;
620
621 /* After this point rxd->addr MUST not be NULL */
622 rxd->btype = val;
623 rxd->len = buf_info->len;
624 /* Flip gen bit at the end to change ownership */
625 rxd->gen = ring->gen;
626
627 vmxnet3_cmd_ring_adv_next2fill(ring);
628 }
629 /*
630 * Allocates mbufs and clusters. Post rx descriptors with buffer details
631 * so that device can receive packets in those buffers.
632 * Ring layout:
633 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
634 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
635 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
636 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
637 * only for LRO.
638 */
639 static int
vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t * rxq,uint8_t ring_id)640 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
641 {
642 int err = 0;
643 uint32_t i = 0;
644 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
645
646 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
647 struct rte_mbuf *mbuf;
648
649 /* Allocate blank mbuf for the current Rx Descriptor */
650 mbuf = rte_mbuf_raw_alloc(rxq->mp);
651 if (unlikely(mbuf == NULL)) {
652 PMD_RX_LOG(ERR, "Error allocating mbuf");
653 rxq->stats.rx_buf_alloc_failure++;
654 err = ENOMEM;
655 break;
656 }
657
658 vmxnet3_renew_desc(rxq, ring_id, mbuf);
659 i++;
660 }
661
662 /* Return error only if no buffers are posted at present */
663 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
664 return -err;
665 else
666 return i;
667 }
668
669 /* MSS not provided by vmxnet3, guess one with available information */
670 static uint16_t
vmxnet3_guess_mss(struct vmxnet3_hw * hw,const Vmxnet3_RxCompDesc * rcd,struct rte_mbuf * rxm)671 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
672 struct rte_mbuf *rxm)
673 {
674 uint32_t hlen, slen;
675 struct rte_ipv4_hdr *ipv4_hdr;
676 struct rte_ipv6_hdr *ipv6_hdr;
677 struct rte_tcp_hdr *tcp_hdr;
678 char *ptr;
679 uint8_t segs;
680
681 RTE_ASSERT(rcd->tcp);
682
683 ptr = rte_pktmbuf_mtod(rxm, char *);
684 slen = rte_pktmbuf_data_len(rxm);
685 hlen = sizeof(struct rte_ether_hdr);
686
687 if (rcd->v4) {
688 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
689 return hw->mtu - sizeof(struct rte_ipv4_hdr)
690 - sizeof(struct rte_tcp_hdr);
691
692 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
693 hlen += rte_ipv4_hdr_len(ipv4_hdr);
694 } else if (rcd->v6) {
695 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
696 return hw->mtu - sizeof(struct rte_ipv6_hdr) -
697 sizeof(struct rte_tcp_hdr);
698
699 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
700 hlen += sizeof(struct rte_ipv6_hdr);
701 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
702 int frag;
703
704 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
705 &hlen, &frag);
706 }
707 }
708
709 if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
710 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
711 sizeof(struct rte_ether_hdr);
712
713 tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
714 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
715
716 segs = *vmxnet3_segs_dynfield(rxm);
717 if (segs > 1)
718 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
719 else
720 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
721 }
722
723 /* Receive side checksum and other offloads */
724 static inline void
vmxnet3_rx_offload(struct vmxnet3_hw * hw,const Vmxnet3_RxCompDesc * rcd,struct rte_mbuf * rxm,const uint8_t sop)725 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
726 struct rte_mbuf *rxm, const uint8_t sop)
727 {
728 uint64_t ol_flags = rxm->ol_flags;
729 uint32_t packet_type = rxm->packet_type;
730
731 /* Offloads set in sop */
732 if (sop) {
733 /* Set packet type */
734 packet_type |= RTE_PTYPE_L2_ETHER;
735
736 /* Check large packet receive */
737 if (VMXNET3_VERSION_GE_2(hw) &&
738 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
739 const Vmxnet3_RxCompDescExt *rcde =
740 (const Vmxnet3_RxCompDescExt *)rcd;
741
742 rxm->tso_segsz = rcde->mss;
743 *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
744 ol_flags |= RTE_MBUF_F_RX_LRO;
745 }
746 } else { /* Offloads set in eop */
747 /* Check for RSS */
748 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
749 ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
750 rxm->hash.rss = rcd->rssHash;
751 }
752
753 /* Check for hardware stripped VLAN tag */
754 if (rcd->ts) {
755 ol_flags |= (RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
756 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
757 }
758
759 /* Check packet type, checksum errors, etc. */
760 if (rcd->cnc) {
761 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_UNKNOWN;
762 } else {
763 if (rcd->v4) {
764 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
765
766 if (rcd->ipc)
767 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
768 else
769 ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
770
771 if (rcd->tuc) {
772 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
773 if (rcd->tcp)
774 packet_type |= RTE_PTYPE_L4_TCP;
775 else
776 packet_type |= RTE_PTYPE_L4_UDP;
777 } else {
778 if (rcd->tcp) {
779 packet_type |= RTE_PTYPE_L4_TCP;
780 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
781 } else if (rcd->udp) {
782 packet_type |= RTE_PTYPE_L4_UDP;
783 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
784 }
785 }
786 } else if (rcd->v6) {
787 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
788
789 if (rcd->tuc) {
790 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
791 if (rcd->tcp)
792 packet_type |= RTE_PTYPE_L4_TCP;
793 else
794 packet_type |= RTE_PTYPE_L4_UDP;
795 } else {
796 if (rcd->tcp) {
797 packet_type |= RTE_PTYPE_L4_TCP;
798 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
799 } else if (rcd->udp) {
800 packet_type |= RTE_PTYPE_L4_UDP;
801 ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
802 }
803 }
804 } else {
805 packet_type |= RTE_PTYPE_UNKNOWN;
806 }
807
808 /* Old variants of vmxnet3 do not provide MSS */
809 if ((ol_flags & RTE_MBUF_F_RX_LRO) && rxm->tso_segsz == 0)
810 rxm->tso_segsz = vmxnet3_guess_mss(hw,
811 rcd, rxm);
812 }
813 }
814
815 rxm->ol_flags = ol_flags;
816 rxm->packet_type = packet_type;
817 }
818
819 /*
820 * Process the Rx Completion Ring of given vmxnet3_rx_queue
821 * for nb_pkts burst and return the number of packets received
822 */
823 uint16_t
vmxnet3_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)824 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
825 {
826 uint16_t nb_rx;
827 uint32_t nb_rxd, idx;
828 uint8_t ring_idx;
829 vmxnet3_rx_queue_t *rxq;
830 Vmxnet3_RxCompDesc *rcd;
831 vmxnet3_buf_info_t *rbi;
832 Vmxnet3_RxDesc *rxd;
833 struct rte_mbuf *rxm = NULL;
834 struct vmxnet3_hw *hw;
835
836 nb_rx = 0;
837 ring_idx = 0;
838 nb_rxd = 0;
839 idx = 0;
840
841 rxq = rx_queue;
842 hw = rxq->hw;
843
844 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
845
846 if (unlikely(rxq->stopped)) {
847 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
848 return 0;
849 }
850
851 while (rcd->gen == rxq->comp_ring.gen) {
852 struct rte_mbuf *newm;
853
854 if (nb_rx >= nb_pkts)
855 break;
856
857 newm = rte_mbuf_raw_alloc(rxq->mp);
858 if (unlikely(newm == NULL)) {
859 PMD_RX_LOG(ERR, "Error allocating mbuf");
860 rxq->stats.rx_buf_alloc_failure++;
861 break;
862 }
863
864 idx = rcd->rxdIdx;
865 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
866 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
867 RTE_SET_USED(rxd); /* used only for assert when enabled */
868 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
869
870 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
871
872 RTE_ASSERT(rcd->len <= rxd->len);
873 RTE_ASSERT(rbi->m);
874
875 /* Get the packet buffer pointer from buf_info */
876 rxm = rbi->m;
877
878 /* Clear descriptor associated buf_info to be reused */
879 rbi->m = NULL;
880 rbi->bufPA = 0;
881
882 /* Update the index that we received a packet */
883 rxq->cmd_ring[ring_idx].next2comp = idx;
884
885 /* For RCD with EOP set, check if there is frame error */
886 if (unlikely(rcd->eop && rcd->err)) {
887 rxq->stats.drop_total++;
888 rxq->stats.drop_err++;
889
890 if (!rcd->fcs) {
891 rxq->stats.drop_fcs++;
892 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
893 }
894 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
895 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
896 rxq->comp_ring.base), rcd->rxdIdx);
897 rte_pktmbuf_free_seg(rxm);
898 if (rxq->start_seg) {
899 struct rte_mbuf *start = rxq->start_seg;
900
901 rxq->start_seg = NULL;
902 rte_pktmbuf_free(start);
903 }
904 goto rcd_done;
905 }
906
907 /* Initialize newly received packet buffer */
908 rxm->port = rxq->port_id;
909 rxm->nb_segs = 1;
910 rxm->next = NULL;
911 rxm->pkt_len = (uint16_t)rcd->len;
912 rxm->data_len = (uint16_t)rcd->len;
913 rxm->data_off = RTE_PKTMBUF_HEADROOM;
914 rxm->ol_flags = 0;
915 rxm->vlan_tci = 0;
916 rxm->packet_type = 0;
917
918 /*
919 * If this is the first buffer of the received packet,
920 * set the pointer to the first mbuf of the packet
921 * Otherwise, update the total length and the number of segments
922 * of the current scattered packet, and update the pointer to
923 * the last mbuf of the current packet.
924 */
925 if (rcd->sop) {
926 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
927
928 if (unlikely(rcd->len == 0)) {
929 RTE_ASSERT(rcd->eop);
930
931 PMD_RX_LOG(DEBUG,
932 "Rx buf was skipped. rxring[%d][%d])",
933 ring_idx, idx);
934 rte_pktmbuf_free_seg(rxm);
935 goto rcd_done;
936 }
937
938 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
939 uint8_t *rdd = rxq->data_ring.base +
940 idx * rxq->data_desc_size;
941
942 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
943 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
944 rdd, rcd->len);
945 }
946
947 rxq->start_seg = rxm;
948 rxq->last_seg = rxm;
949 vmxnet3_rx_offload(hw, rcd, rxm, 1);
950 } else {
951 struct rte_mbuf *start = rxq->start_seg;
952
953 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
954
955 if (likely(start && rxm->data_len > 0)) {
956 start->pkt_len += rxm->data_len;
957 start->nb_segs++;
958
959 rxq->last_seg->next = rxm;
960 rxq->last_seg = rxm;
961 } else {
962 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
963 rxq->stats.drop_total++;
964 rxq->stats.drop_err++;
965
966 rte_pktmbuf_free_seg(rxm);
967 }
968 }
969
970 if (rcd->eop) {
971 struct rte_mbuf *start = rxq->start_seg;
972
973 vmxnet3_rx_offload(hw, rcd, start, 0);
974 rx_pkts[nb_rx++] = start;
975 rxq->start_seg = NULL;
976 }
977
978 rcd_done:
979 rxq->cmd_ring[ring_idx].next2comp = idx;
980 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
981 rxq->cmd_ring[ring_idx].size);
982
983 /* It's time to renew descriptors */
984 vmxnet3_renew_desc(rxq, ring_idx, newm);
985 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
986 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
987 rxq->cmd_ring[ring_idx].next2fill);
988 }
989
990 /* Advance to the next descriptor in comp_ring */
991 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
992
993 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
994 nb_rxd++;
995 if (nb_rxd > rxq->cmd_ring[0].size) {
996 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
997 " relinquish control.");
998 break;
999 }
1000 }
1001
1002 if (unlikely(nb_rxd == 0)) {
1003 uint32_t avail;
1004 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1005 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1006 if (unlikely(avail > 0)) {
1007 /* try to alloc new buf and renew descriptors */
1008 vmxnet3_post_rx_bufs(rxq, ring_idx);
1009 }
1010 }
1011 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1012 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1013 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1014 rxq->cmd_ring[ring_idx].next2fill);
1015 }
1016 }
1017 }
1018
1019 return nb_rx;
1020 }
1021
1022 int
vmxnet3_dev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)1023 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1024 uint16_t queue_idx,
1025 uint16_t nb_desc,
1026 unsigned int socket_id,
1027 const struct rte_eth_txconf *tx_conf __rte_unused)
1028 {
1029 struct vmxnet3_hw *hw = dev->data->dev_private;
1030 const struct rte_memzone *mz;
1031 struct vmxnet3_tx_queue *txq;
1032 struct vmxnet3_cmd_ring *ring;
1033 struct vmxnet3_comp_ring *comp_ring;
1034 struct vmxnet3_data_ring *data_ring;
1035 int size;
1036
1037 PMD_INIT_FUNC_TRACE();
1038
1039 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1040 RTE_CACHE_LINE_SIZE);
1041 if (txq == NULL) {
1042 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1043 return -ENOMEM;
1044 }
1045
1046 txq->queue_id = queue_idx;
1047 txq->port_id = dev->data->port_id;
1048 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1049 txq->hw = hw;
1050 txq->qid = queue_idx;
1051 txq->stopped = TRUE;
1052 txq->txdata_desc_size = hw->txdata_desc_size;
1053
1054 ring = &txq->cmd_ring;
1055 comp_ring = &txq->comp_ring;
1056 data_ring = &txq->data_ring;
1057
1058 /* Tx vmxnet ring length should be between 512-4096 */
1059 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1060 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1061 VMXNET3_DEF_TX_RING_SIZE);
1062 return -EINVAL;
1063 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1064 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1065 VMXNET3_TX_RING_MAX_SIZE);
1066 return -EINVAL;
1067 } else {
1068 ring->size = nb_desc;
1069 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1070 }
1071 comp_ring->size = data_ring->size = ring->size;
1072
1073 /* Tx vmxnet rings structure initialization*/
1074 ring->next2fill = 0;
1075 ring->next2comp = 0;
1076 ring->gen = VMXNET3_INIT_GEN;
1077 comp_ring->next2proc = 0;
1078 comp_ring->gen = VMXNET3_INIT_GEN;
1079
1080 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1081 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1082 size += txq->txdata_desc_size * data_ring->size;
1083
1084 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1085 VMXNET3_RING_BA_ALIGN, socket_id);
1086 if (mz == NULL) {
1087 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1088 return -ENOMEM;
1089 }
1090 txq->mz = mz;
1091 memset(mz->addr, 0, mz->len);
1092
1093 /* cmd_ring initialization */
1094 ring->base = mz->addr;
1095 ring->basePA = mz->iova;
1096
1097 /* comp_ring initialization */
1098 comp_ring->base = ring->base + ring->size;
1099 comp_ring->basePA = ring->basePA +
1100 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1101
1102 /* data_ring initialization */
1103 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1104 data_ring->basePA = comp_ring->basePA +
1105 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1106
1107 /* cmd_ring0 buf_info allocation */
1108 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1109 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1110 if (ring->buf_info == NULL) {
1111 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1112 return -ENOMEM;
1113 }
1114
1115 /* Update the data portion with txq */
1116 dev->data->tx_queues[queue_idx] = txq;
1117
1118 return 0;
1119 }
1120
1121 int
vmxnet3_dev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,__rte_unused const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1122 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1123 uint16_t queue_idx,
1124 uint16_t nb_desc,
1125 unsigned int socket_id,
1126 __rte_unused const struct rte_eth_rxconf *rx_conf,
1127 struct rte_mempool *mp)
1128 {
1129 const struct rte_memzone *mz;
1130 struct vmxnet3_rx_queue *rxq;
1131 struct vmxnet3_hw *hw = dev->data->dev_private;
1132 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1133 struct vmxnet3_comp_ring *comp_ring;
1134 struct vmxnet3_rx_data_ring *data_ring;
1135 int size;
1136 uint8_t i;
1137 char mem_name[32];
1138
1139 PMD_INIT_FUNC_TRACE();
1140
1141 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1142 RTE_CACHE_LINE_SIZE);
1143 if (rxq == NULL) {
1144 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1145 return -ENOMEM;
1146 }
1147
1148 rxq->mp = mp;
1149 rxq->queue_id = queue_idx;
1150 rxq->port_id = dev->data->port_id;
1151 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1152 rxq->hw = hw;
1153 rxq->qid1 = queue_idx;
1154 rxq->qid2 = queue_idx + hw->num_rx_queues;
1155 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1156 rxq->data_desc_size = hw->rxdata_desc_size;
1157 rxq->stopped = TRUE;
1158
1159 ring0 = &rxq->cmd_ring[0];
1160 ring1 = &rxq->cmd_ring[1];
1161 comp_ring = &rxq->comp_ring;
1162 data_ring = &rxq->data_ring;
1163
1164 /* Rx vmxnet rings length should be between 256-4096 */
1165 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1166 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1167 return -EINVAL;
1168 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1169 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1170 return -EINVAL;
1171 } else {
1172 ring0->size = nb_desc;
1173 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1174 ring1->size = ring0->size;
1175 }
1176
1177 comp_ring->size = ring0->size + ring1->size;
1178 data_ring->size = ring0->size;
1179
1180 /* Rx vmxnet rings structure initialization */
1181 ring0->next2fill = 0;
1182 ring1->next2fill = 0;
1183 ring0->next2comp = 0;
1184 ring1->next2comp = 0;
1185 ring0->gen = VMXNET3_INIT_GEN;
1186 ring1->gen = VMXNET3_INIT_GEN;
1187 comp_ring->next2proc = 0;
1188 comp_ring->gen = VMXNET3_INIT_GEN;
1189
1190 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1191 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1192 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1193 size += rxq->data_desc_size * data_ring->size;
1194
1195 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1196 VMXNET3_RING_BA_ALIGN, socket_id);
1197 if (mz == NULL) {
1198 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1199 return -ENOMEM;
1200 }
1201 rxq->mz = mz;
1202 memset(mz->addr, 0, mz->len);
1203
1204 /* cmd_ring0 initialization */
1205 ring0->base = mz->addr;
1206 ring0->basePA = mz->iova;
1207
1208 /* cmd_ring1 initialization */
1209 ring1->base = ring0->base + ring0->size;
1210 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1211
1212 /* comp_ring initialization */
1213 comp_ring->base = ring1->base + ring1->size;
1214 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1215 ring1->size;
1216
1217 /* data_ring initialization */
1218 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1219 data_ring->base =
1220 (uint8_t *)(comp_ring->base + comp_ring->size);
1221 data_ring->basePA = comp_ring->basePA +
1222 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1223 }
1224
1225 /* cmd_ring0-cmd_ring1 buf_info allocation */
1226 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1227
1228 ring = &rxq->cmd_ring[i];
1229 ring->rid = i;
1230 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1231
1232 ring->buf_info = rte_zmalloc(mem_name,
1233 ring->size * sizeof(vmxnet3_buf_info_t),
1234 RTE_CACHE_LINE_SIZE);
1235 if (ring->buf_info == NULL) {
1236 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1237 return -ENOMEM;
1238 }
1239 }
1240
1241 /* Update the data portion with rxq */
1242 dev->data->rx_queues[queue_idx] = rxq;
1243
1244 return 0;
1245 }
1246
1247 /*
1248 * Initializes Receive Unit
1249 * Load mbufs in rx queue in advance
1250 */
1251 int
vmxnet3_dev_rxtx_init(struct rte_eth_dev * dev)1252 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1253 {
1254 struct vmxnet3_hw *hw = dev->data->dev_private;
1255
1256 int i, ret;
1257 uint8_t j;
1258
1259 PMD_INIT_FUNC_TRACE();
1260
1261 for (i = 0; i < hw->num_rx_queues; i++) {
1262 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1263
1264 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1265 /* Passing 0 as alloc_num will allocate full ring */
1266 ret = vmxnet3_post_rx_bufs(rxq, j);
1267 if (ret <= 0) {
1268 PMD_INIT_LOG(ERR,
1269 "ERROR: Posting Rxq: %d buffers ring: %d",
1270 i, j);
1271 return -ret;
1272 }
1273 /*
1274 * Updating device with the index:next2fill to fill the
1275 * mbufs for coming packets.
1276 */
1277 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1278 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1279 rxq->cmd_ring[j].next2fill);
1280 }
1281 }
1282 rxq->stopped = FALSE;
1283 rxq->start_seg = NULL;
1284 }
1285
1286 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1287 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1288
1289 txq->stopped = FALSE;
1290 }
1291
1292 return 0;
1293 }
1294
1295 static uint8_t rss_intel_key[40] = {
1296 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1297 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1298 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1299 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1300 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1301 };
1302
1303 /*
1304 * Additional RSS configurations based on vmxnet v4+ APIs
1305 */
1306 int
vmxnet3_v4_rss_configure(struct rte_eth_dev * dev)1307 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1308 {
1309 struct vmxnet3_hw *hw = dev->data->dev_private;
1310 Vmxnet3_DriverShared *shared = hw->shared;
1311 Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1312 struct rte_eth_rss_conf *port_rss_conf;
1313 uint64_t rss_hf;
1314 uint32_t ret;
1315
1316 PMD_INIT_FUNC_TRACE();
1317
1318 cmdInfo->setRSSFields = 0;
1319 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1320
1321 if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1322 VMXNET3_MANDATORY_V4_RSS) {
1323 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1324 "automatically setting it");
1325 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1326 }
1327
1328 rss_hf = port_rss_conf->rss_hf &
1329 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1330
1331 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1332 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1333 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1334 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1335 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
1336 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1337 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
1338 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1339
1340 VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1341 VMXNET3_CMD_SET_RSS_FIELDS);
1342 ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1343
1344 if (ret != VMXNET3_SUCCESS) {
1345 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1346 }
1347
1348 return ret;
1349 }
1350
1351 /*
1352 * Configure RSS feature
1353 */
1354 int
vmxnet3_rss_configure(struct rte_eth_dev * dev)1355 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1356 {
1357 struct vmxnet3_hw *hw = dev->data->dev_private;
1358 struct VMXNET3_RSSConf *dev_rss_conf;
1359 struct rte_eth_rss_conf *port_rss_conf;
1360 uint64_t rss_hf;
1361 uint8_t i, j;
1362
1363 PMD_INIT_FUNC_TRACE();
1364
1365 dev_rss_conf = hw->rss_conf;
1366 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1367
1368 /* loading hashFunc */
1369 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1370 /* loading hashKeySize */
1371 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1372 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1373 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1374
1375 if (port_rss_conf->rss_key == NULL) {
1376 /* Default hash key */
1377 port_rss_conf->rss_key = rss_intel_key;
1378 }
1379
1380 /* loading hashKey */
1381 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1382 dev_rss_conf->hashKeySize);
1383
1384 /* loading indTable */
1385 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1386 if (j == dev->data->nb_rx_queues)
1387 j = 0;
1388 dev_rss_conf->indTable[i] = j;
1389 }
1390
1391 /* loading hashType */
1392 dev_rss_conf->hashType = 0;
1393 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1394 if (rss_hf & RTE_ETH_RSS_IPV4)
1395 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1396 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1397 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1398 if (rss_hf & RTE_ETH_RSS_IPV6)
1399 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1400 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
1401 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1402
1403 return VMXNET3_SUCCESS;
1404 }
1405