1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation
3 */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <unistd.h>
14 #include <inttypes.h>
15
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_cycles.h>
19 #include <rte_log.h>
20 #include <rte_debug.h>
21 #include <rte_interrupts.h>
22 #include <rte_pci.h>
23 #include <rte_memory.h>
24 #include <rte_memzone.h>
25 #include <rte_launch.h>
26 #include <rte_eal.h>
27 #include <rte_per_lcore.h>
28 #include <rte_lcore.h>
29 #include <rte_atomic.h>
30 #include <rte_branch_prediction.h>
31 #include <rte_mempool.h>
32 #include <rte_malloc.h>
33 #include <rte_mbuf.h>
34 #include <rte_ether.h>
35 #include <rte_ethdev_driver.h>
36 #include <rte_prefetch.h>
37 #include <rte_ip.h>
38 #include <rte_udp.h>
39 #include <rte_tcp.h>
40 #include <rte_sctp.h>
41 #include <rte_string_fns.h>
42 #include <rte_errno.h>
43 #include <rte_net.h>
44
45 #include "base/vmxnet3_defs.h"
46 #include "vmxnet3_ring.h"
47
48 #include "vmxnet3_logs.h"
49 #include "vmxnet3_ethdev.h"
50
51 #define VMXNET3_TX_OFFLOAD_MASK ( \
52 PKT_TX_VLAN_PKT | \
53 PKT_TX_IPV6 | \
54 PKT_TX_IPV4 | \
55 PKT_TX_L4_MASK | \
56 PKT_TX_TCP_SEG)
57
58 #define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \
59 (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK)
60
61 static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
62
63 static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t);
64 static void vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *);
65 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
66 static void vmxnet3_rxq_dump(struct vmxnet3_rx_queue *);
67 static void vmxnet3_txq_dump(struct vmxnet3_tx_queue *);
68 #endif
69
70 #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER_NOT_USED
71 static void
vmxnet3_rxq_dump(struct vmxnet3_rx_queue * rxq)72 vmxnet3_rxq_dump(struct vmxnet3_rx_queue *rxq)
73 {
74 uint32_t avail = 0;
75
76 if (rxq == NULL)
77 return;
78
79 PMD_RX_LOG(DEBUG,
80 "RXQ: cmd0 base : %p cmd1 base : %p comp ring base : %p.",
81 rxq->cmd_ring[0].base, rxq->cmd_ring[1].base, rxq->comp_ring.base);
82 PMD_RX_LOG(DEBUG,
83 "RXQ: cmd0 basePA : 0x%lx cmd1 basePA : 0x%lx comp ring basePA : 0x%lx.",
84 (unsigned long)rxq->cmd_ring[0].basePA,
85 (unsigned long)rxq->cmd_ring[1].basePA,
86 (unsigned long)rxq->comp_ring.basePA);
87
88 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[0]);
89 PMD_RX_LOG(DEBUG,
90 "RXQ:cmd0: size=%u; free=%u; next2proc=%u; queued=%u",
91 (uint32_t)rxq->cmd_ring[0].size, avail,
92 rxq->comp_ring.next2proc,
93 rxq->cmd_ring[0].size - avail);
94
95 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[1]);
96 PMD_RX_LOG(DEBUG, "RXQ:cmd1 size=%u; free=%u; next2proc=%u; queued=%u",
97 (uint32_t)rxq->cmd_ring[1].size, avail, rxq->comp_ring.next2proc,
98 rxq->cmd_ring[1].size - avail);
99
100 }
101
102 static void
vmxnet3_txq_dump(struct vmxnet3_tx_queue * txq)103 vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq)
104 {
105 uint32_t avail = 0;
106
107 if (txq == NULL)
108 return;
109
110 PMD_TX_LOG(DEBUG, "TXQ: cmd base : %p comp ring base : %p data ring base : %p.",
111 txq->cmd_ring.base, txq->comp_ring.base, txq->data_ring.base);
112 PMD_TX_LOG(DEBUG, "TXQ: cmd basePA : 0x%lx comp ring basePA : 0x%lx data ring basePA : 0x%lx.",
113 (unsigned long)txq->cmd_ring.basePA,
114 (unsigned long)txq->comp_ring.basePA,
115 (unsigned long)txq->data_ring.basePA);
116
117 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
118 PMD_TX_LOG(DEBUG, "TXQ: size=%u; free=%u; next2proc=%u; queued=%u",
119 (uint32_t)txq->cmd_ring.size, avail,
120 txq->comp_ring.next2proc, txq->cmd_ring.size - avail);
121 }
122 #endif
123
124 static void
vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t * ring)125 vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
126 {
127 while (ring->next2comp != ring->next2fill) {
128 /* No need to worry about desc ownership, device is quiesced by now. */
129 vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp;
130
131 if (buf_info->m) {
132 rte_pktmbuf_free(buf_info->m);
133 buf_info->m = NULL;
134 buf_info->bufPA = 0;
135 buf_info->len = 0;
136 }
137 vmxnet3_cmd_ring_adv_next2comp(ring);
138 }
139 }
140
141 static void
vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t * ring)142 vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring)
143 {
144 uint32_t i;
145
146 for (i = 0; i < ring->size; i++) {
147 /* No need to worry about desc ownership, device is quiesced by now. */
148 vmxnet3_buf_info_t *buf_info = &ring->buf_info[i];
149
150 if (buf_info->m) {
151 rte_pktmbuf_free_seg(buf_info->m);
152 buf_info->m = NULL;
153 buf_info->bufPA = 0;
154 buf_info->len = 0;
155 }
156 vmxnet3_cmd_ring_adv_next2comp(ring);
157 }
158 }
159
160 static void
vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t * ring)161 vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring)
162 {
163 rte_free(ring->buf_info);
164 ring->buf_info = NULL;
165 }
166
167 void
vmxnet3_dev_tx_queue_release(void * txq)168 vmxnet3_dev_tx_queue_release(void *txq)
169 {
170 vmxnet3_tx_queue_t *tq = txq;
171
172 if (tq != NULL) {
173 /* Release mbufs */
174 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
175 /* Release the cmd_ring */
176 vmxnet3_cmd_ring_release(&tq->cmd_ring);
177 /* Release the memzone */
178 rte_memzone_free(tq->mz);
179 /* Release the queue */
180 rte_free(tq);
181 }
182 }
183
184 void
vmxnet3_dev_rx_queue_release(void * rxq)185 vmxnet3_dev_rx_queue_release(void *rxq)
186 {
187 int i;
188 vmxnet3_rx_queue_t *rq = rxq;
189
190 if (rq != NULL) {
191 /* Release mbufs */
192 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
193 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
194
195 /* Release both the cmd_rings */
196 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
197 vmxnet3_cmd_ring_release(&rq->cmd_ring[i]);
198
199 /* Release the memzone */
200 rte_memzone_free(rq->mz);
201
202 /* Release the queue */
203 rte_free(rq);
204 }
205 }
206
207 static void
vmxnet3_dev_tx_queue_reset(void * txq)208 vmxnet3_dev_tx_queue_reset(void *txq)
209 {
210 vmxnet3_tx_queue_t *tq = txq;
211 struct vmxnet3_cmd_ring *ring = &tq->cmd_ring;
212 struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
213 struct vmxnet3_data_ring *data_ring = &tq->data_ring;
214 int size;
215
216 if (tq != NULL) {
217 /* Release the cmd_ring mbufs */
218 vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring);
219 }
220
221 /* Tx vmxnet rings structure initialization*/
222 ring->next2fill = 0;
223 ring->next2comp = 0;
224 ring->gen = VMXNET3_INIT_GEN;
225 comp_ring->next2proc = 0;
226 comp_ring->gen = VMXNET3_INIT_GEN;
227
228 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
229 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
230 size += tq->txdata_desc_size * data_ring->size;
231
232 memset(ring->base, 0, size);
233 }
234
235 static void
vmxnet3_dev_rx_queue_reset(void * rxq)236 vmxnet3_dev_rx_queue_reset(void *rxq)
237 {
238 int i;
239 vmxnet3_rx_queue_t *rq = rxq;
240 struct vmxnet3_hw *hw = rq->hw;
241 struct vmxnet3_cmd_ring *ring0, *ring1;
242 struct vmxnet3_comp_ring *comp_ring;
243 struct vmxnet3_rx_data_ring *data_ring = &rq->data_ring;
244 int size;
245
246 /* Release both the cmd_rings mbufs */
247 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++)
248 vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]);
249
250 ring0 = &rq->cmd_ring[0];
251 ring1 = &rq->cmd_ring[1];
252 comp_ring = &rq->comp_ring;
253
254 /* Rx vmxnet rings structure initialization */
255 ring0->next2fill = 0;
256 ring1->next2fill = 0;
257 ring0->next2comp = 0;
258 ring1->next2comp = 0;
259 ring0->gen = VMXNET3_INIT_GEN;
260 ring1->gen = VMXNET3_INIT_GEN;
261 comp_ring->next2proc = 0;
262 comp_ring->gen = VMXNET3_INIT_GEN;
263
264 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
265 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
266 if (VMXNET3_VERSION_GE_3(hw) && rq->data_desc_size)
267 size += rq->data_desc_size * data_ring->size;
268
269 memset(ring0->base, 0, size);
270 }
271
272 void
vmxnet3_dev_clear_queues(struct rte_eth_dev * dev)273 vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
274 {
275 unsigned i;
276
277 PMD_INIT_FUNC_TRACE();
278
279 for (i = 0; i < dev->data->nb_tx_queues; i++) {
280 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
281
282 if (txq != NULL) {
283 txq->stopped = TRUE;
284 vmxnet3_dev_tx_queue_reset(txq);
285 }
286 }
287
288 for (i = 0; i < dev->data->nb_rx_queues; i++) {
289 struct vmxnet3_rx_queue *rxq = dev->data->rx_queues[i];
290
291 if (rxq != NULL) {
292 rxq->stopped = TRUE;
293 vmxnet3_dev_rx_queue_reset(rxq);
294 }
295 }
296 }
297
298 static int
vmxnet3_unmap_pkt(uint16_t eop_idx,vmxnet3_tx_queue_t * txq)299 vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
300 {
301 int completed = 0;
302 struct rte_mbuf *mbuf;
303
304 /* Release cmd_ring descriptor and free mbuf */
305 RTE_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
306
307 mbuf = txq->cmd_ring.buf_info[eop_idx].m;
308 if (mbuf == NULL)
309 rte_panic("EOP desc does not point to a valid mbuf");
310 rte_pktmbuf_free(mbuf);
311
312 txq->cmd_ring.buf_info[eop_idx].m = NULL;
313
314 while (txq->cmd_ring.next2comp != eop_idx) {
315 /* no out-of-order completion */
316 RTE_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
317 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
318 completed++;
319 }
320
321 /* Mark the txd for which tcd was generated as completed */
322 vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
323
324 return completed + 1;
325 }
326
327 static void
vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t * txq)328 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
329 {
330 int completed = 0;
331 vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
332 struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
333 (comp_ring->base + comp_ring->next2proc);
334
335 while (tcd->gen == comp_ring->gen) {
336 completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
337
338 vmxnet3_comp_ring_adv_next2proc(comp_ring);
339 tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
340 comp_ring->next2proc);
341 }
342
343 PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
344 }
345
346 uint16_t
vmxnet3_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)347 vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
348 uint16_t nb_pkts)
349 {
350 int32_t ret;
351 uint32_t i;
352 uint64_t ol_flags;
353 struct rte_mbuf *m;
354
355 for (i = 0; i != nb_pkts; i++) {
356 m = tx_pkts[i];
357 ol_flags = m->ol_flags;
358
359 /* Non-TSO packet cannot occupy more than
360 * VMXNET3_MAX_TXD_PER_PKT TX descriptors.
361 */
362 if ((ol_flags & PKT_TX_TCP_SEG) == 0 &&
363 m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) {
364 rte_errno = EINVAL;
365 return i;
366 }
367
368 /* check that only supported TX offloads are requested. */
369 if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 ||
370 (ol_flags & PKT_TX_L4_MASK) ==
371 PKT_TX_SCTP_CKSUM) {
372 rte_errno = ENOTSUP;
373 return i;
374 }
375
376 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
377 ret = rte_validate_tx_offload(m);
378 if (ret != 0) {
379 rte_errno = -ret;
380 return i;
381 }
382 #endif
383 ret = rte_net_intel_cksum_prepare(m);
384 if (ret != 0) {
385 rte_errno = -ret;
386 return i;
387 }
388 }
389
390 return i;
391 }
392
393 uint16_t
vmxnet3_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)394 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
395 uint16_t nb_pkts)
396 {
397 uint16_t nb_tx;
398 vmxnet3_tx_queue_t *txq = tx_queue;
399 struct vmxnet3_hw *hw = txq->hw;
400 Vmxnet3_TxQueueCtrl *txq_ctrl = &txq->shared->ctrl;
401 uint32_t deferred = rte_le_to_cpu_32(txq_ctrl->txNumDeferred);
402
403 if (unlikely(txq->stopped)) {
404 PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
405 return 0;
406 }
407
408 /* Free up the comp_descriptors aggressively */
409 vmxnet3_tq_tx_complete(txq);
410
411 nb_tx = 0;
412 while (nb_tx < nb_pkts) {
413 Vmxnet3_GenericDesc *gdesc;
414 vmxnet3_buf_info_t *tbi;
415 uint32_t first2fill, avail, dw2;
416 struct rte_mbuf *txm = tx_pkts[nb_tx];
417 struct rte_mbuf *m_seg = txm;
418 int copy_size = 0;
419 bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
420 /* # of descriptors needed for a packet. */
421 unsigned count = txm->nb_segs;
422
423 avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
424 if (count > avail) {
425 /* Is command ring full? */
426 if (unlikely(avail == 0)) {
427 PMD_TX_LOG(DEBUG, "No free ring descriptors");
428 txq->stats.tx_ring_full++;
429 txq->stats.drop_total += (nb_pkts - nb_tx);
430 break;
431 }
432
433 /* Command ring is not full but cannot handle the
434 * multi-segmented packet. Let's try the next packet
435 * in this case.
436 */
437 PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
438 "(avail %d needed %d)", avail, count);
439 txq->stats.drop_total++;
440 if (tso)
441 txq->stats.drop_tso++;
442 rte_pktmbuf_free(txm);
443 nb_tx++;
444 continue;
445 }
446
447 /* Drop non-TSO packet that is excessively fragmented */
448 if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
449 PMD_TX_LOG(ERR, "Non-TSO packet cannot occupy more than %d tx "
450 "descriptors. Packet dropped.", VMXNET3_MAX_TXD_PER_PKT);
451 txq->stats.drop_too_many_segs++;
452 txq->stats.drop_total++;
453 rte_pktmbuf_free(txm);
454 nb_tx++;
455 continue;
456 }
457
458 if (txm->nb_segs == 1 &&
459 rte_pktmbuf_pkt_len(txm) <= txq->txdata_desc_size) {
460 struct Vmxnet3_TxDataDesc *tdd;
461
462 /* Skip empty packets */
463 if (unlikely(rte_pktmbuf_pkt_len(txm) == 0)) {
464 txq->stats.drop_total++;
465 rte_pktmbuf_free(txm);
466 nb_tx++;
467 continue;
468 }
469
470 tdd = (struct Vmxnet3_TxDataDesc *)
471 ((uint8 *)txq->data_ring.base +
472 txq->cmd_ring.next2fill *
473 txq->txdata_desc_size);
474 copy_size = rte_pktmbuf_pkt_len(txm);
475 rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
476 }
477
478 /* use the previous gen bit for the SOP desc */
479 dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
480 first2fill = txq->cmd_ring.next2fill;
481 do {
482 /* Remember the transmit buffer for cleanup */
483 tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
484
485 /* NB: the following assumes that VMXNET3 maximum
486 * transmit buffer size (16K) is greater than
487 * maximum size of mbuf segment size.
488 */
489 gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
490
491 /* Skip empty segments */
492 if (unlikely(m_seg->data_len == 0))
493 continue;
494
495 if (copy_size) {
496 uint64 offset =
497 (uint64)txq->cmd_ring.next2fill *
498 txq->txdata_desc_size;
499 gdesc->txd.addr =
500 rte_cpu_to_le_64(txq->data_ring.basePA +
501 offset);
502 } else {
503 gdesc->txd.addr = rte_mbuf_data_iova(m_seg);
504 }
505
506 gdesc->dword[2] = dw2 | m_seg->data_len;
507 gdesc->dword[3] = 0;
508
509 /* move to the next2fill descriptor */
510 vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
511
512 /* use the right gen for non-SOP desc */
513 dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
514 } while ((m_seg = m_seg->next) != NULL);
515
516 /* set the last buf_info for the pkt */
517 tbi->m = txm;
518 /* Update the EOP descriptor */
519 gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
520
521 /* Add VLAN tag if present */
522 gdesc = txq->cmd_ring.base + first2fill;
523 if (txm->ol_flags & PKT_TX_VLAN_PKT) {
524 gdesc->txd.ti = 1;
525 gdesc->txd.tci = txm->vlan_tci;
526 }
527
528 if (tso) {
529 uint16_t mss = txm->tso_segsz;
530
531 RTE_ASSERT(mss > 0);
532
533 gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
534 gdesc->txd.om = VMXNET3_OM_TSO;
535 gdesc->txd.msscof = mss;
536
537 deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
538 } else if (txm->ol_flags & PKT_TX_L4_MASK) {
539 gdesc->txd.om = VMXNET3_OM_CSUM;
540 gdesc->txd.hlen = txm->l2_len + txm->l3_len;
541
542 switch (txm->ol_flags & PKT_TX_L4_MASK) {
543 case PKT_TX_TCP_CKSUM:
544 gdesc->txd.msscof = gdesc->txd.hlen +
545 offsetof(struct rte_tcp_hdr, cksum);
546 break;
547 case PKT_TX_UDP_CKSUM:
548 gdesc->txd.msscof = gdesc->txd.hlen +
549 offsetof(struct rte_udp_hdr,
550 dgram_cksum);
551 break;
552 default:
553 PMD_TX_LOG(WARNING, "requested cksum offload not supported %#llx",
554 txm->ol_flags & PKT_TX_L4_MASK);
555 abort();
556 }
557 deferred++;
558 } else {
559 gdesc->txd.hlen = 0;
560 gdesc->txd.om = VMXNET3_OM_NONE;
561 gdesc->txd.msscof = 0;
562 deferred++;
563 }
564
565 /* flip the GEN bit on the SOP */
566 rte_compiler_barrier();
567 gdesc->dword[2] ^= VMXNET3_TXD_GEN;
568
569 txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
570 nb_tx++;
571 }
572
573 PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u", rte_le_to_cpu_32(txq_ctrl->txThreshold));
574
575 if (deferred >= rte_le_to_cpu_32(txq_ctrl->txThreshold)) {
576 txq_ctrl->txNumDeferred = 0;
577 /* Notify vSwitch that packets are available. */
578 VMXNET3_WRITE_BAR0_REG(hw, (VMXNET3_REG_TXPROD + txq->queue_id * VMXNET3_REG_ALIGN),
579 txq->cmd_ring.next2fill);
580 }
581
582 return nb_tx;
583 }
584
585 static inline void
vmxnet3_renew_desc(vmxnet3_rx_queue_t * rxq,uint8_t ring_id,struct rte_mbuf * mbuf)586 vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
587 struct rte_mbuf *mbuf)
588 {
589 uint32_t val;
590 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
591 struct Vmxnet3_RxDesc *rxd =
592 (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
593 vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
594
595 if (ring_id == 0) {
596 /* Usually: One HEAD type buf per packet
597 * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?
598 * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;
599 */
600
601 /* We use single packet buffer so all heads here */
602 val = VMXNET3_RXD_BTYPE_HEAD;
603 } else {
604 /* All BODY type buffers for 2nd ring */
605 val = VMXNET3_RXD_BTYPE_BODY;
606 }
607
608 /*
609 * Load mbuf pointer into buf_info[ring_size]
610 * buf_info structure is equivalent to cookie for virtio-virtqueue
611 */
612 buf_info->m = mbuf;
613 buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
614 buf_info->bufPA = rte_mbuf_data_iova_default(mbuf);
615
616 /* Load Rx Descriptor with the buffer's GPA */
617 rxd->addr = buf_info->bufPA;
618
619 /* After this point rxd->addr MUST not be NULL */
620 rxd->btype = val;
621 rxd->len = buf_info->len;
622 /* Flip gen bit at the end to change ownership */
623 rxd->gen = ring->gen;
624
625 vmxnet3_cmd_ring_adv_next2fill(ring);
626 }
627 /*
628 * Allocates mbufs and clusters. Post rx descriptors with buffer details
629 * so that device can receive packets in those buffers.
630 * Ring layout:
631 * Among the two rings, 1st ring contains buffers of type 0 and type 1.
632 * bufs_per_pkt is set such that for non-LRO cases all the buffers required
633 * by a frame will fit in 1st ring (1st buf of type0 and rest of type1).
634 * 2nd ring contains buffers of type 1 alone. Second ring mostly be used
635 * only for LRO.
636 */
637 static int
vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t * rxq,uint8_t ring_id)638 vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)
639 {
640 int err = 0;
641 uint32_t i = 0;
642 struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
643
644 while (vmxnet3_cmd_ring_desc_avail(ring) > 0) {
645 struct rte_mbuf *mbuf;
646
647 /* Allocate blank mbuf for the current Rx Descriptor */
648 mbuf = rte_mbuf_raw_alloc(rxq->mp);
649 if (unlikely(mbuf == NULL)) {
650 PMD_RX_LOG(ERR, "Error allocating mbuf");
651 rxq->stats.rx_buf_alloc_failure++;
652 err = ENOMEM;
653 break;
654 }
655
656 vmxnet3_renew_desc(rxq, ring_id, mbuf);
657 i++;
658 }
659
660 /* Return error only if no buffers are posted at present */
661 if (vmxnet3_cmd_ring_desc_avail(ring) >= (ring->size - 1))
662 return -err;
663 else
664 return i;
665 }
666
667 /* MSS not provided by vmxnet3, guess one with available information */
668 static uint16_t
vmxnet3_guess_mss(struct vmxnet3_hw * hw,const Vmxnet3_RxCompDesc * rcd,struct rte_mbuf * rxm)669 vmxnet3_guess_mss(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
670 struct rte_mbuf *rxm)
671 {
672 uint32_t hlen, slen;
673 struct rte_ipv4_hdr *ipv4_hdr;
674 struct rte_ipv6_hdr *ipv6_hdr;
675 struct rte_tcp_hdr *tcp_hdr;
676 char *ptr;
677 uint8_t segs;
678
679 RTE_ASSERT(rcd->tcp);
680
681 ptr = rte_pktmbuf_mtod(rxm, char *);
682 slen = rte_pktmbuf_data_len(rxm);
683 hlen = sizeof(struct rte_ether_hdr);
684
685 if (rcd->v4) {
686 if (unlikely(slen < hlen + sizeof(struct rte_ipv4_hdr)))
687 return hw->mtu - sizeof(struct rte_ipv4_hdr)
688 - sizeof(struct rte_tcp_hdr);
689
690 ipv4_hdr = (struct rte_ipv4_hdr *)(ptr + hlen);
691 hlen += rte_ipv4_hdr_len(ipv4_hdr);
692 } else if (rcd->v6) {
693 if (unlikely(slen < hlen + sizeof(struct rte_ipv6_hdr)))
694 return hw->mtu - sizeof(struct rte_ipv6_hdr) -
695 sizeof(struct rte_tcp_hdr);
696
697 ipv6_hdr = (struct rte_ipv6_hdr *)(ptr + hlen);
698 hlen += sizeof(struct rte_ipv6_hdr);
699 if (unlikely(ipv6_hdr->proto != IPPROTO_TCP)) {
700 int frag;
701
702 rte_net_skip_ip6_ext(ipv6_hdr->proto, rxm,
703 &hlen, &frag);
704 }
705 }
706
707 if (unlikely(slen < hlen + sizeof(struct rte_tcp_hdr)))
708 return hw->mtu - hlen - sizeof(struct rte_tcp_hdr) +
709 sizeof(struct rte_ether_hdr);
710
711 tcp_hdr = (struct rte_tcp_hdr *)(ptr + hlen);
712 hlen += (tcp_hdr->data_off & 0xf0) >> 2;
713
714 segs = *vmxnet3_segs_dynfield(rxm);
715 if (segs > 1)
716 return (rte_pktmbuf_pkt_len(rxm) - hlen + segs - 1) / segs;
717 else
718 return hw->mtu - hlen + sizeof(struct rte_ether_hdr);
719 }
720
721 /* Receive side checksum and other offloads */
722 static inline void
vmxnet3_rx_offload(struct vmxnet3_hw * hw,const Vmxnet3_RxCompDesc * rcd,struct rte_mbuf * rxm,const uint8_t sop)723 vmxnet3_rx_offload(struct vmxnet3_hw *hw, const Vmxnet3_RxCompDesc *rcd,
724 struct rte_mbuf *rxm, const uint8_t sop)
725 {
726 uint64_t ol_flags = rxm->ol_flags;
727 uint32_t packet_type = rxm->packet_type;
728
729 /* Offloads set in sop */
730 if (sop) {
731 /* Set packet type */
732 packet_type |= RTE_PTYPE_L2_ETHER;
733
734 /* Check large packet receive */
735 if (VMXNET3_VERSION_GE_2(hw) &&
736 rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) {
737 const Vmxnet3_RxCompDescExt *rcde =
738 (const Vmxnet3_RxCompDescExt *)rcd;
739
740 rxm->tso_segsz = rcde->mss;
741 *vmxnet3_segs_dynfield(rxm) = rcde->segCnt;
742 ol_flags |= PKT_RX_LRO;
743 }
744 } else { /* Offloads set in eop */
745 /* Check for RSS */
746 if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE) {
747 ol_flags |= PKT_RX_RSS_HASH;
748 rxm->hash.rss = rcd->rssHash;
749 }
750
751 /* Check for hardware stripped VLAN tag */
752 if (rcd->ts) {
753 ol_flags |= (PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
754 rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);
755 }
756
757 /* Check packet type, checksum errors, etc. */
758 if (rcd->cnc) {
759 ol_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
760 } else {
761 if (rcd->v4) {
762 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
763
764 if (rcd->ipc)
765 ol_flags |= PKT_RX_IP_CKSUM_GOOD;
766 else
767 ol_flags |= PKT_RX_IP_CKSUM_BAD;
768
769 if (rcd->tuc) {
770 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
771 if (rcd->tcp)
772 packet_type |= RTE_PTYPE_L4_TCP;
773 else
774 packet_type |= RTE_PTYPE_L4_UDP;
775 } else {
776 if (rcd->tcp) {
777 packet_type |= RTE_PTYPE_L4_TCP;
778 ol_flags |= PKT_RX_L4_CKSUM_BAD;
779 } else if (rcd->udp) {
780 packet_type |= RTE_PTYPE_L4_UDP;
781 ol_flags |= PKT_RX_L4_CKSUM_BAD;
782 }
783 }
784 } else if (rcd->v6) {
785 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
786
787 if (rcd->tuc) {
788 ol_flags |= PKT_RX_L4_CKSUM_GOOD;
789 if (rcd->tcp)
790 packet_type |= RTE_PTYPE_L4_TCP;
791 else
792 packet_type |= RTE_PTYPE_L4_UDP;
793 } else {
794 if (rcd->tcp) {
795 packet_type |= RTE_PTYPE_L4_TCP;
796 ol_flags |= PKT_RX_L4_CKSUM_BAD;
797 } else if (rcd->udp) {
798 packet_type |= RTE_PTYPE_L4_UDP;
799 ol_flags |= PKT_RX_L4_CKSUM_BAD;
800 }
801 }
802 } else {
803 packet_type |= RTE_PTYPE_UNKNOWN;
804 }
805
806 /* Old variants of vmxnet3 do not provide MSS */
807 if ((ol_flags & PKT_RX_LRO) && rxm->tso_segsz == 0)
808 rxm->tso_segsz = vmxnet3_guess_mss(hw,
809 rcd, rxm);
810 }
811 }
812
813 rxm->ol_flags = ol_flags;
814 rxm->packet_type = packet_type;
815 }
816
817 /*
818 * Process the Rx Completion Ring of given vmxnet3_rx_queue
819 * for nb_pkts burst and return the number of packets received
820 */
821 uint16_t
vmxnet3_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)822 vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
823 {
824 uint16_t nb_rx;
825 uint32_t nb_rxd, idx;
826 uint8_t ring_idx;
827 vmxnet3_rx_queue_t *rxq;
828 Vmxnet3_RxCompDesc *rcd;
829 vmxnet3_buf_info_t *rbi;
830 Vmxnet3_RxDesc *rxd;
831 struct rte_mbuf *rxm = NULL;
832 struct vmxnet3_hw *hw;
833
834 nb_rx = 0;
835 ring_idx = 0;
836 nb_rxd = 0;
837 idx = 0;
838
839 rxq = rx_queue;
840 hw = rxq->hw;
841
842 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
843
844 if (unlikely(rxq->stopped)) {
845 PMD_RX_LOG(DEBUG, "Rx queue is stopped.");
846 return 0;
847 }
848
849 while (rcd->gen == rxq->comp_ring.gen) {
850 struct rte_mbuf *newm;
851
852 if (nb_rx >= nb_pkts)
853 break;
854
855 newm = rte_mbuf_raw_alloc(rxq->mp);
856 if (unlikely(newm == NULL)) {
857 PMD_RX_LOG(ERR, "Error allocating mbuf");
858 rxq->stats.rx_buf_alloc_failure++;
859 break;
860 }
861
862 idx = rcd->rxdIdx;
863 ring_idx = vmxnet3_get_ring_idx(hw, rcd->rqID);
864 rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
865 RTE_SET_USED(rxd); /* used only for assert when enabled */
866 rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
867
868 PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
869
870 RTE_ASSERT(rcd->len <= rxd->len);
871 RTE_ASSERT(rbi->m);
872
873 /* Get the packet buffer pointer from buf_info */
874 rxm = rbi->m;
875
876 /* Clear descriptor associated buf_info to be reused */
877 rbi->m = NULL;
878 rbi->bufPA = 0;
879
880 /* Update the index that we received a packet */
881 rxq->cmd_ring[ring_idx].next2comp = idx;
882
883 /* For RCD with EOP set, check if there is frame error */
884 if (unlikely(rcd->eop && rcd->err)) {
885 rxq->stats.drop_total++;
886 rxq->stats.drop_err++;
887
888 if (!rcd->fcs) {
889 rxq->stats.drop_fcs++;
890 PMD_RX_LOG(ERR, "Recv packet dropped due to frame err.");
891 }
892 PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d",
893 (int)(rcd - (struct Vmxnet3_RxCompDesc *)
894 rxq->comp_ring.base), rcd->rxdIdx);
895 rte_pktmbuf_free_seg(rxm);
896 if (rxq->start_seg) {
897 struct rte_mbuf *start = rxq->start_seg;
898
899 rxq->start_seg = NULL;
900 rte_pktmbuf_free(start);
901 }
902 goto rcd_done;
903 }
904
905 /* Initialize newly received packet buffer */
906 rxm->port = rxq->port_id;
907 rxm->nb_segs = 1;
908 rxm->next = NULL;
909 rxm->pkt_len = (uint16_t)rcd->len;
910 rxm->data_len = (uint16_t)rcd->len;
911 rxm->data_off = RTE_PKTMBUF_HEADROOM;
912 rxm->ol_flags = 0;
913 rxm->vlan_tci = 0;
914 rxm->packet_type = 0;
915
916 /*
917 * If this is the first buffer of the received packet,
918 * set the pointer to the first mbuf of the packet
919 * Otherwise, update the total length and the number of segments
920 * of the current scattered packet, and update the pointer to
921 * the last mbuf of the current packet.
922 */
923 if (rcd->sop) {
924 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
925
926 if (unlikely(rcd->len == 0)) {
927 RTE_ASSERT(rcd->eop);
928
929 PMD_RX_LOG(DEBUG,
930 "Rx buf was skipped. rxring[%d][%d])",
931 ring_idx, idx);
932 rte_pktmbuf_free_seg(rxm);
933 goto rcd_done;
934 }
935
936 if (vmxnet3_rx_data_ring(hw, rcd->rqID)) {
937 uint8_t *rdd = rxq->data_ring.base +
938 idx * rxq->data_desc_size;
939
940 RTE_ASSERT(VMXNET3_VERSION_GE_3(hw));
941 rte_memcpy(rte_pktmbuf_mtod(rxm, char *),
942 rdd, rcd->len);
943 }
944
945 rxq->start_seg = rxm;
946 rxq->last_seg = rxm;
947 vmxnet3_rx_offload(hw, rcd, rxm, 1);
948 } else {
949 struct rte_mbuf *start = rxq->start_seg;
950
951 RTE_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
952
953 if (likely(start && rxm->data_len > 0)) {
954 start->pkt_len += rxm->data_len;
955 start->nb_segs++;
956
957 rxq->last_seg->next = rxm;
958 rxq->last_seg = rxm;
959 } else {
960 PMD_RX_LOG(ERR, "Error received empty or out of order frame.");
961 rxq->stats.drop_total++;
962 rxq->stats.drop_err++;
963
964 rte_pktmbuf_free_seg(rxm);
965 }
966 }
967
968 if (rcd->eop) {
969 struct rte_mbuf *start = rxq->start_seg;
970
971 vmxnet3_rx_offload(hw, rcd, start, 0);
972 rx_pkts[nb_rx++] = start;
973 rxq->start_seg = NULL;
974 }
975
976 rcd_done:
977 rxq->cmd_ring[ring_idx].next2comp = idx;
978 VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp,
979 rxq->cmd_ring[ring_idx].size);
980
981 /* It's time to renew descriptors */
982 vmxnet3_renew_desc(rxq, ring_idx, newm);
983 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
984 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
985 rxq->cmd_ring[ring_idx].next2fill);
986 }
987
988 /* Advance to the next descriptor in comp_ring */
989 vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);
990
991 rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;
992 nb_rxd++;
993 if (nb_rxd > rxq->cmd_ring[0].size) {
994 PMD_RX_LOG(ERR, "Used up quota of receiving packets,"
995 " relinquish control.");
996 break;
997 }
998 }
999
1000 if (unlikely(nb_rxd == 0)) {
1001 uint32_t avail;
1002 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1003 avail = vmxnet3_cmd_ring_desc_avail(&rxq->cmd_ring[ring_idx]);
1004 if (unlikely(avail > 0)) {
1005 /* try to alloc new buf and renew descriptors */
1006 vmxnet3_post_rx_bufs(rxq, ring_idx);
1007 }
1008 }
1009 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1010 for (ring_idx = 0; ring_idx < VMXNET3_RX_CMDRING_SIZE; ring_idx++) {
1011 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1012 rxq->cmd_ring[ring_idx].next2fill);
1013 }
1014 }
1015 }
1016
1017 return nb_rx;
1018 }
1019
1020 int
vmxnet3_dev_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf __rte_unused)1021 vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev,
1022 uint16_t queue_idx,
1023 uint16_t nb_desc,
1024 unsigned int socket_id,
1025 const struct rte_eth_txconf *tx_conf __rte_unused)
1026 {
1027 struct vmxnet3_hw *hw = dev->data->dev_private;
1028 const struct rte_memzone *mz;
1029 struct vmxnet3_tx_queue *txq;
1030 struct vmxnet3_cmd_ring *ring;
1031 struct vmxnet3_comp_ring *comp_ring;
1032 struct vmxnet3_data_ring *data_ring;
1033 int size;
1034
1035 PMD_INIT_FUNC_TRACE();
1036
1037 txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue),
1038 RTE_CACHE_LINE_SIZE);
1039 if (txq == NULL) {
1040 PMD_INIT_LOG(ERR, "Can not allocate tx queue structure");
1041 return -ENOMEM;
1042 }
1043
1044 txq->queue_id = queue_idx;
1045 txq->port_id = dev->data->port_id;
1046 txq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1047 txq->hw = hw;
1048 txq->qid = queue_idx;
1049 txq->stopped = TRUE;
1050 txq->txdata_desc_size = hw->txdata_desc_size;
1051
1052 ring = &txq->cmd_ring;
1053 comp_ring = &txq->comp_ring;
1054 data_ring = &txq->data_ring;
1055
1056 /* Tx vmxnet ring length should be between 512-4096 */
1057 if (nb_desc < VMXNET3_DEF_TX_RING_SIZE) {
1058 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Min: %u",
1059 VMXNET3_DEF_TX_RING_SIZE);
1060 return -EINVAL;
1061 } else if (nb_desc > VMXNET3_TX_RING_MAX_SIZE) {
1062 PMD_INIT_LOG(ERR, "VMXNET3 Tx Ring Size Max: %u",
1063 VMXNET3_TX_RING_MAX_SIZE);
1064 return -EINVAL;
1065 } else {
1066 ring->size = nb_desc;
1067 ring->size &= ~VMXNET3_RING_SIZE_MASK;
1068 }
1069 comp_ring->size = data_ring->size = ring->size;
1070
1071 /* Tx vmxnet rings structure initialization*/
1072 ring->next2fill = 0;
1073 ring->next2comp = 0;
1074 ring->gen = VMXNET3_INIT_GEN;
1075 comp_ring->next2proc = 0;
1076 comp_ring->gen = VMXNET3_INIT_GEN;
1077
1078 size = sizeof(struct Vmxnet3_TxDesc) * ring->size;
1079 size += sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size;
1080 size += txq->txdata_desc_size * data_ring->size;
1081
1082 mz = rte_eth_dma_zone_reserve(dev, "txdesc", queue_idx, size,
1083 VMXNET3_RING_BA_ALIGN, socket_id);
1084 if (mz == NULL) {
1085 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1086 return -ENOMEM;
1087 }
1088 txq->mz = mz;
1089 memset(mz->addr, 0, mz->len);
1090
1091 /* cmd_ring initialization */
1092 ring->base = mz->addr;
1093 ring->basePA = mz->iova;
1094
1095 /* comp_ring initialization */
1096 comp_ring->base = ring->base + ring->size;
1097 comp_ring->basePA = ring->basePA +
1098 (sizeof(struct Vmxnet3_TxDesc) * ring->size);
1099
1100 /* data_ring initialization */
1101 data_ring->base = (Vmxnet3_TxDataDesc *)(comp_ring->base + comp_ring->size);
1102 data_ring->basePA = comp_ring->basePA +
1103 (sizeof(struct Vmxnet3_TxCompDesc) * comp_ring->size);
1104
1105 /* cmd_ring0 buf_info allocation */
1106 ring->buf_info = rte_zmalloc("tx_ring_buf_info",
1107 ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE);
1108 if (ring->buf_info == NULL) {
1109 PMD_INIT_LOG(ERR, "ERROR: Creating tx_buf_info structure");
1110 return -ENOMEM;
1111 }
1112
1113 /* Update the data portion with txq */
1114 dev->data->tx_queues[queue_idx] = txq;
1115
1116 return 0;
1117 }
1118
1119 int
vmxnet3_dev_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,__rte_unused const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1120 vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
1121 uint16_t queue_idx,
1122 uint16_t nb_desc,
1123 unsigned int socket_id,
1124 __rte_unused const struct rte_eth_rxconf *rx_conf,
1125 struct rte_mempool *mp)
1126 {
1127 const struct rte_memzone *mz;
1128 struct vmxnet3_rx_queue *rxq;
1129 struct vmxnet3_hw *hw = dev->data->dev_private;
1130 struct vmxnet3_cmd_ring *ring0, *ring1, *ring;
1131 struct vmxnet3_comp_ring *comp_ring;
1132 struct vmxnet3_rx_data_ring *data_ring;
1133 int size;
1134 uint8_t i;
1135 char mem_name[32];
1136
1137 PMD_INIT_FUNC_TRACE();
1138
1139 rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue),
1140 RTE_CACHE_LINE_SIZE);
1141 if (rxq == NULL) {
1142 PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
1143 return -ENOMEM;
1144 }
1145
1146 rxq->mp = mp;
1147 rxq->queue_id = queue_idx;
1148 rxq->port_id = dev->data->port_id;
1149 rxq->shared = NULL; /* set in vmxnet3_setup_driver_shared() */
1150 rxq->hw = hw;
1151 rxq->qid1 = queue_idx;
1152 rxq->qid2 = queue_idx + hw->num_rx_queues;
1153 rxq->data_ring_qid = queue_idx + 2 * hw->num_rx_queues;
1154 rxq->data_desc_size = hw->rxdata_desc_size;
1155 rxq->stopped = TRUE;
1156
1157 ring0 = &rxq->cmd_ring[0];
1158 ring1 = &rxq->cmd_ring[1];
1159 comp_ring = &rxq->comp_ring;
1160 data_ring = &rxq->data_ring;
1161
1162 /* Rx vmxnet rings length should be between 256-4096 */
1163 if (nb_desc < VMXNET3_DEF_RX_RING_SIZE) {
1164 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Min: 256");
1165 return -EINVAL;
1166 } else if (nb_desc > VMXNET3_RX_RING_MAX_SIZE) {
1167 PMD_INIT_LOG(ERR, "VMXNET3 Rx Ring Size Max: 4096");
1168 return -EINVAL;
1169 } else {
1170 ring0->size = nb_desc;
1171 ring0->size &= ~VMXNET3_RING_SIZE_MASK;
1172 ring1->size = ring0->size;
1173 }
1174
1175 comp_ring->size = ring0->size + ring1->size;
1176 data_ring->size = ring0->size;
1177
1178 /* Rx vmxnet rings structure initialization */
1179 ring0->next2fill = 0;
1180 ring1->next2fill = 0;
1181 ring0->next2comp = 0;
1182 ring1->next2comp = 0;
1183 ring0->gen = VMXNET3_INIT_GEN;
1184 ring1->gen = VMXNET3_INIT_GEN;
1185 comp_ring->next2proc = 0;
1186 comp_ring->gen = VMXNET3_INIT_GEN;
1187
1188 size = sizeof(struct Vmxnet3_RxDesc) * (ring0->size + ring1->size);
1189 size += sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1190 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size)
1191 size += rxq->data_desc_size * data_ring->size;
1192
1193 mz = rte_eth_dma_zone_reserve(dev, "rxdesc", queue_idx, size,
1194 VMXNET3_RING_BA_ALIGN, socket_id);
1195 if (mz == NULL) {
1196 PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone");
1197 return -ENOMEM;
1198 }
1199 rxq->mz = mz;
1200 memset(mz->addr, 0, mz->len);
1201
1202 /* cmd_ring0 initialization */
1203 ring0->base = mz->addr;
1204 ring0->basePA = mz->iova;
1205
1206 /* cmd_ring1 initialization */
1207 ring1->base = ring0->base + ring0->size;
1208 ring1->basePA = ring0->basePA + sizeof(struct Vmxnet3_RxDesc) * ring0->size;
1209
1210 /* comp_ring initialization */
1211 comp_ring->base = ring1->base + ring1->size;
1212 comp_ring->basePA = ring1->basePA + sizeof(struct Vmxnet3_RxDesc) *
1213 ring1->size;
1214
1215 /* data_ring initialization */
1216 if (VMXNET3_VERSION_GE_3(hw) && rxq->data_desc_size) {
1217 data_ring->base =
1218 (uint8_t *)(comp_ring->base + comp_ring->size);
1219 data_ring->basePA = comp_ring->basePA +
1220 sizeof(struct Vmxnet3_RxCompDesc) * comp_ring->size;
1221 }
1222
1223 /* cmd_ring0-cmd_ring1 buf_info allocation */
1224 for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) {
1225
1226 ring = &rxq->cmd_ring[i];
1227 ring->rid = i;
1228 snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i);
1229
1230 ring->buf_info = rte_zmalloc(mem_name,
1231 ring->size * sizeof(vmxnet3_buf_info_t),
1232 RTE_CACHE_LINE_SIZE);
1233 if (ring->buf_info == NULL) {
1234 PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure");
1235 return -ENOMEM;
1236 }
1237 }
1238
1239 /* Update the data portion with rxq */
1240 dev->data->rx_queues[queue_idx] = rxq;
1241
1242 return 0;
1243 }
1244
1245 /*
1246 * Initializes Receive Unit
1247 * Load mbufs in rx queue in advance
1248 */
1249 int
vmxnet3_dev_rxtx_init(struct rte_eth_dev * dev)1250 vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
1251 {
1252 struct vmxnet3_hw *hw = dev->data->dev_private;
1253
1254 int i, ret;
1255 uint8_t j;
1256
1257 PMD_INIT_FUNC_TRACE();
1258
1259 for (i = 0; i < hw->num_rx_queues; i++) {
1260 vmxnet3_rx_queue_t *rxq = dev->data->rx_queues[i];
1261
1262 for (j = 0; j < VMXNET3_RX_CMDRING_SIZE; j++) {
1263 /* Passing 0 as alloc_num will allocate full ring */
1264 ret = vmxnet3_post_rx_bufs(rxq, j);
1265 if (ret <= 0) {
1266 PMD_INIT_LOG(ERR,
1267 "ERROR: Posting Rxq: %d buffers ring: %d",
1268 i, j);
1269 return -ret;
1270 }
1271 /*
1272 * Updating device with the index:next2fill to fill the
1273 * mbufs for coming packets.
1274 */
1275 if (unlikely(rxq->shared->ctrl.updateRxProd)) {
1276 VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN),
1277 rxq->cmd_ring[j].next2fill);
1278 }
1279 }
1280 rxq->stopped = FALSE;
1281 rxq->start_seg = NULL;
1282 }
1283
1284 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1285 struct vmxnet3_tx_queue *txq = dev->data->tx_queues[i];
1286
1287 txq->stopped = FALSE;
1288 }
1289
1290 return 0;
1291 }
1292
1293 static uint8_t rss_intel_key[40] = {
1294 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1295 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1296 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1297 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1298 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1299 };
1300
1301 /*
1302 * Additional RSS configurations based on vmxnet v4+ APIs
1303 */
1304 int
vmxnet3_v4_rss_configure(struct rte_eth_dev * dev)1305 vmxnet3_v4_rss_configure(struct rte_eth_dev *dev)
1306 {
1307 struct vmxnet3_hw *hw = dev->data->dev_private;
1308 Vmxnet3_DriverShared *shared = hw->shared;
1309 Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
1310 struct rte_eth_rss_conf *port_rss_conf;
1311 uint64_t rss_hf;
1312 uint32_t ret;
1313
1314 PMD_INIT_FUNC_TRACE();
1315
1316 cmdInfo->setRSSFields = 0;
1317 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1318
1319 if ((port_rss_conf->rss_hf & VMXNET3_MANDATORY_V4_RSS) !=
1320 VMXNET3_MANDATORY_V4_RSS) {
1321 PMD_INIT_LOG(WARNING, "RSS: IPv4/6 TCP is required for vmxnet3 v4 RSS,"
1322 "automatically setting it");
1323 port_rss_conf->rss_hf |= VMXNET3_MANDATORY_V4_RSS;
1324 }
1325
1326 rss_hf = port_rss_conf->rss_hf &
1327 (VMXNET3_V4_RSS_MASK | VMXNET3_RSS_OFFLOAD_ALL);
1328
1329 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1330 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP4;
1331 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1332 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_TCPIP6;
1333 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1334 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP4;
1335 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1336 cmdInfo->setRSSFields |= VMXNET3_RSS_FIELDS_UDPIP6;
1337
1338 VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
1339 VMXNET3_CMD_SET_RSS_FIELDS);
1340 ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD);
1341
1342 if (ret != VMXNET3_SUCCESS) {
1343 PMD_DRV_LOG(ERR, "Set RSS fields (v4) failed: %d", ret);
1344 }
1345
1346 return ret;
1347 }
1348
1349 /*
1350 * Configure RSS feature
1351 */
1352 int
vmxnet3_rss_configure(struct rte_eth_dev * dev)1353 vmxnet3_rss_configure(struct rte_eth_dev *dev)
1354 {
1355 struct vmxnet3_hw *hw = dev->data->dev_private;
1356 struct VMXNET3_RSSConf *dev_rss_conf;
1357 struct rte_eth_rss_conf *port_rss_conf;
1358 uint64_t rss_hf;
1359 uint8_t i, j;
1360
1361 PMD_INIT_FUNC_TRACE();
1362
1363 dev_rss_conf = hw->rss_conf;
1364 port_rss_conf = &dev->data->dev_conf.rx_adv_conf.rss_conf;
1365
1366 /* loading hashFunc */
1367 dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ;
1368 /* loading hashKeySize */
1369 dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE;
1370 /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/
1371 dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4);
1372
1373 if (port_rss_conf->rss_key == NULL) {
1374 /* Default hash key */
1375 port_rss_conf->rss_key = rss_intel_key;
1376 }
1377
1378 /* loading hashKey */
1379 memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key,
1380 dev_rss_conf->hashKeySize);
1381
1382 /* loading indTable */
1383 for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) {
1384 if (j == dev->data->nb_rx_queues)
1385 j = 0;
1386 dev_rss_conf->indTable[i] = j;
1387 }
1388
1389 /* loading hashType */
1390 dev_rss_conf->hashType = 0;
1391 rss_hf = port_rss_conf->rss_hf & VMXNET3_RSS_OFFLOAD_ALL;
1392 if (rss_hf & ETH_RSS_IPV4)
1393 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV4;
1394 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1395 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV4;
1396 if (rss_hf & ETH_RSS_IPV6)
1397 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_IPV6;
1398 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1399 dev_rss_conf->hashType |= VMXNET3_RSS_HASH_TYPE_TCP_IPV6;
1400
1401 return VMXNET3_SUCCESS;
1402 }
1403