1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
3 */
4
5 #include <linux/vhost.h>
6 #include <linux/virtio_net.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #ifdef RTE_LIBRTE_VHOST_NUMA
10 #include <numa.h>
11 #include <numaif.h>
12 #endif
13
14 #include <rte_errno.h>
15 #include <rte_log.h>
16 #include <rte_memory.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
19
20 #include "iotlb.h"
21 #include "vhost.h"
22 #include "vhost_user.h"
23
24 struct virtio_net *vhost_devices[RTE_MAX_VHOST_DEVICE];
25 pthread_mutex_t vhost_dev_lock = PTHREAD_MUTEX_INITIALIZER;
26
27 /* Called with iotlb_lock read-locked */
28 uint64_t
__vhost_iova_to_vva(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t iova,uint64_t * size,uint8_t perm)29 __vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
30 uint64_t iova, uint64_t *size, uint8_t perm)
31 {
32 uint64_t vva, tmp_size;
33
34 if (unlikely(!*size))
35 return 0;
36
37 tmp_size = *size;
38
39 vva = vhost_user_iotlb_cache_find(vq, iova, &tmp_size, perm);
40 if (tmp_size == *size)
41 return vva;
42
43 iova += tmp_size;
44
45 if (!vhost_user_iotlb_pending_miss(vq, iova, perm)) {
46 /*
47 * iotlb_lock is read-locked for a full burst,
48 * but it only protects the iotlb cache.
49 * In case of IOTLB miss, we might block on the socket,
50 * which could cause a deadlock with QEMU if an IOTLB update
51 * is being handled. We can safely unlock here to avoid it.
52 */
53 vhost_user_iotlb_rd_unlock(vq);
54
55 vhost_user_iotlb_pending_insert(dev, vq, iova, perm);
56 if (vhost_user_iotlb_miss(dev, iova, perm)) {
57 VHOST_LOG_DATA(ERR, "(%s) IOTLB miss req failed for IOVA 0x%" PRIx64 "\n",
58 dev->ifname, iova);
59 vhost_user_iotlb_pending_remove(vq, iova, 1, perm);
60 }
61
62 vhost_user_iotlb_rd_lock(vq);
63 }
64
65 return 0;
66 }
67
68 #define VHOST_LOG_PAGE 4096
69
70 /*
71 * Atomically set a bit in memory.
72 */
73 static __rte_always_inline void
vhost_set_bit(unsigned int nr,volatile uint8_t * addr)74 vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
75 {
76 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
77 /*
78 * __sync_ built-ins are deprecated, but __atomic_ ones
79 * are sub-optimized in older GCC versions.
80 */
81 __sync_fetch_and_or_1(addr, (1U << nr));
82 #else
83 __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
84 #endif
85 }
86
87 static __rte_always_inline void
vhost_log_page(uint8_t * log_base,uint64_t page)88 vhost_log_page(uint8_t *log_base, uint64_t page)
89 {
90 vhost_set_bit(page % 8, &log_base[page / 8]);
91 }
92
93 void
__vhost_log_write(struct virtio_net * dev,uint64_t addr,uint64_t len)94 __vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
95 {
96 uint64_t page;
97
98 if (unlikely(!dev->log_base || !len))
99 return;
100
101 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
102 return;
103
104 /* To make sure guest memory updates are committed before logging */
105 rte_atomic_thread_fence(__ATOMIC_RELEASE);
106
107 page = addr / VHOST_LOG_PAGE;
108 while (page * VHOST_LOG_PAGE < addr + len) {
109 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
110 page += 1;
111 }
112 }
113
114 void
__vhost_log_write_iova(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t iova,uint64_t len)115 __vhost_log_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
116 uint64_t iova, uint64_t len)
117 {
118 uint64_t hva, gpa, map_len;
119 map_len = len;
120
121 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
122 if (map_len != len) {
123 VHOST_LOG_DATA(ERR,
124 "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
125 dev->ifname, iova);
126 return;
127 }
128
129 gpa = hva_to_gpa(dev, hva, len);
130 if (gpa)
131 __vhost_log_write(dev, gpa, len);
132 }
133
134 void
__vhost_log_cache_sync(struct virtio_net * dev,struct vhost_virtqueue * vq)135 __vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
136 {
137 unsigned long *log_base;
138 int i;
139
140 if (unlikely(!dev->log_base))
141 return;
142
143 /* No cache, nothing to sync */
144 if (unlikely(!vq->log_cache))
145 return;
146
147 rte_atomic_thread_fence(__ATOMIC_RELEASE);
148
149 log_base = (unsigned long *)(uintptr_t)dev->log_base;
150
151 for (i = 0; i < vq->log_cache_nb_elem; i++) {
152 struct log_cache_entry *elem = vq->log_cache + i;
153
154 #if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
155 /*
156 * '__sync' builtins are deprecated, but '__atomic' ones
157 * are sub-optimized in older GCC versions.
158 */
159 __sync_fetch_and_or(log_base + elem->offset, elem->val);
160 #else
161 __atomic_fetch_or(log_base + elem->offset, elem->val,
162 __ATOMIC_RELAXED);
163 #endif
164 }
165
166 rte_atomic_thread_fence(__ATOMIC_RELEASE);
167
168 vq->log_cache_nb_elem = 0;
169 }
170
171 static __rte_always_inline void
vhost_log_cache_page(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t page)172 vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
173 uint64_t page)
174 {
175 uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
176 uint32_t offset = page / (sizeof(unsigned long) << 3);
177 int i;
178
179 if (unlikely(!vq->log_cache)) {
180 /* No logging cache allocated, write dirty log map directly */
181 rte_atomic_thread_fence(__ATOMIC_RELEASE);
182 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
183
184 return;
185 }
186
187 for (i = 0; i < vq->log_cache_nb_elem; i++) {
188 struct log_cache_entry *elem = vq->log_cache + i;
189
190 if (elem->offset == offset) {
191 elem->val |= (1UL << bit_nr);
192 return;
193 }
194 }
195
196 if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
197 /*
198 * No more room for a new log cache entry,
199 * so write the dirty log map directly.
200 */
201 rte_atomic_thread_fence(__ATOMIC_RELEASE);
202 vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
203
204 return;
205 }
206
207 vq->log_cache[i].offset = offset;
208 vq->log_cache[i].val = (1UL << bit_nr);
209 vq->log_cache_nb_elem++;
210 }
211
212 void
__vhost_log_cache_write(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t addr,uint64_t len)213 __vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
214 uint64_t addr, uint64_t len)
215 {
216 uint64_t page;
217
218 if (unlikely(!dev->log_base || !len))
219 return;
220
221 if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
222 return;
223
224 page = addr / VHOST_LOG_PAGE;
225 while (page * VHOST_LOG_PAGE < addr + len) {
226 vhost_log_cache_page(dev, vq, page);
227 page += 1;
228 }
229 }
230
231 void
__vhost_log_cache_write_iova(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t iova,uint64_t len)232 __vhost_log_cache_write_iova(struct virtio_net *dev, struct vhost_virtqueue *vq,
233 uint64_t iova, uint64_t len)
234 {
235 uint64_t hva, gpa, map_len;
236 map_len = len;
237
238 hva = __vhost_iova_to_vva(dev, vq, iova, &map_len, VHOST_ACCESS_RW);
239 if (map_len != len) {
240 VHOST_LOG_DATA(ERR,
241 "(%s) failed to write log for IOVA 0x%" PRIx64 ". No IOTLB entry found\n",
242 dev->ifname, iova);
243 return;
244 }
245
246 gpa = hva_to_gpa(dev, hva, len);
247 if (gpa)
248 __vhost_log_cache_write(dev, vq, gpa, len);
249 }
250
251 void *
vhost_alloc_copy_ind_table(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t desc_addr,uint64_t desc_len)252 vhost_alloc_copy_ind_table(struct virtio_net *dev, struct vhost_virtqueue *vq,
253 uint64_t desc_addr, uint64_t desc_len)
254 {
255 void *idesc;
256 uint64_t src, dst;
257 uint64_t len, remain = desc_len;
258
259 idesc = rte_malloc_socket(__func__, desc_len, 0, vq->numa_node);
260 if (unlikely(!idesc))
261 return NULL;
262
263 dst = (uint64_t)(uintptr_t)idesc;
264
265 while (remain) {
266 len = remain;
267 src = vhost_iova_to_vva(dev, vq, desc_addr, &len,
268 VHOST_ACCESS_RO);
269 if (unlikely(!src || !len)) {
270 rte_free(idesc);
271 return NULL;
272 }
273
274 rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
275
276 remain -= len;
277 dst += len;
278 desc_addr += len;
279 }
280
281 return idesc;
282 }
283
284 void
cleanup_vq(struct vhost_virtqueue * vq,int destroy)285 cleanup_vq(struct vhost_virtqueue *vq, int destroy)
286 {
287 if ((vq->callfd >= 0) && (destroy != 0))
288 close(vq->callfd);
289 if (vq->kickfd >= 0)
290 close(vq->kickfd);
291 }
292
293 void
cleanup_vq_inflight(struct virtio_net * dev,struct vhost_virtqueue * vq)294 cleanup_vq_inflight(struct virtio_net *dev, struct vhost_virtqueue *vq)
295 {
296 if (!(dev->protocol_features &
297 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)))
298 return;
299
300 if (vq_is_packed(dev)) {
301 if (vq->inflight_packed)
302 vq->inflight_packed = NULL;
303 } else {
304 if (vq->inflight_split)
305 vq->inflight_split = NULL;
306 }
307
308 if (vq->resubmit_inflight) {
309 if (vq->resubmit_inflight->resubmit_list) {
310 rte_free(vq->resubmit_inflight->resubmit_list);
311 vq->resubmit_inflight->resubmit_list = NULL;
312 }
313 rte_free(vq->resubmit_inflight);
314 vq->resubmit_inflight = NULL;
315 }
316 }
317
318 /*
319 * Unmap any memory, close any file descriptors and
320 * free any memory owned by a device.
321 */
322 void
cleanup_device(struct virtio_net * dev,int destroy)323 cleanup_device(struct virtio_net *dev, int destroy)
324 {
325 uint32_t i;
326
327 vhost_backend_cleanup(dev);
328
329 for (i = 0; i < dev->nr_vring; i++) {
330 cleanup_vq(dev->virtqueue[i], destroy);
331 cleanup_vq_inflight(dev, dev->virtqueue[i]);
332 }
333 }
334
335 static void
vhost_free_async_mem(struct vhost_virtqueue * vq)336 vhost_free_async_mem(struct vhost_virtqueue *vq)
337 {
338 if (!vq->async)
339 return;
340
341 rte_free(vq->async->pkts_info);
342 rte_free(vq->async->pkts_cmpl_flag);
343
344 rte_free(vq->async->buffers_packed);
345 vq->async->buffers_packed = NULL;
346 rte_free(vq->async->descs_split);
347 vq->async->descs_split = NULL;
348
349 rte_free(vq->async);
350 vq->async = NULL;
351 }
352
353 void
free_vq(struct virtio_net * dev,struct vhost_virtqueue * vq)354 free_vq(struct virtio_net *dev, struct vhost_virtqueue *vq)
355 {
356 if (vq_is_packed(dev))
357 rte_free(vq->shadow_used_packed);
358 else
359 rte_free(vq->shadow_used_split);
360
361 vhost_free_async_mem(vq);
362 rte_free(vq->batch_copy_elems);
363 rte_mempool_free(vq->iotlb_pool);
364 rte_free(vq->log_cache);
365 rte_free(vq);
366 }
367
368 /*
369 * Release virtqueues and device memory.
370 */
371 static void
free_device(struct virtio_net * dev)372 free_device(struct virtio_net *dev)
373 {
374 uint32_t i;
375
376 for (i = 0; i < dev->nr_vring; i++)
377 free_vq(dev, dev->virtqueue[i]);
378
379 rte_free(dev);
380 }
381
382 static __rte_always_inline int
log_translate(struct virtio_net * dev,struct vhost_virtqueue * vq)383 log_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
384 {
385 if (likely(!(vq->ring_addrs.flags & (1 << VHOST_VRING_F_LOG))))
386 return 0;
387
388 vq->log_guest_addr = translate_log_addr(dev, vq,
389 vq->ring_addrs.log_guest_addr);
390 if (vq->log_guest_addr == 0)
391 return -1;
392
393 return 0;
394 }
395
396 /*
397 * Converts vring log address to GPA
398 * If IOMMU is enabled, the log address is IOVA
399 * If IOMMU not enabled, the log address is already GPA
400 *
401 * Caller should have iotlb_lock read-locked
402 */
403 uint64_t
translate_log_addr(struct virtio_net * dev,struct vhost_virtqueue * vq,uint64_t log_addr)404 translate_log_addr(struct virtio_net *dev, struct vhost_virtqueue *vq,
405 uint64_t log_addr)
406 {
407 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) {
408 const uint64_t exp_size = sizeof(uint64_t);
409 uint64_t hva, gpa;
410 uint64_t size = exp_size;
411
412 hva = vhost_iova_to_vva(dev, vq, log_addr,
413 &size, VHOST_ACCESS_RW);
414
415 if (size != exp_size)
416 return 0;
417
418 gpa = hva_to_gpa(dev, hva, exp_size);
419 if (!gpa) {
420 VHOST_LOG_DATA(ERR,
421 "(%s) failed to find GPA for log_addr: 0x%"
422 PRIx64 " hva: 0x%" PRIx64 "\n",
423 dev->ifname, log_addr, hva);
424 return 0;
425 }
426 return gpa;
427
428 } else
429 return log_addr;
430 }
431
432 /* Caller should have iotlb_lock read-locked */
433 static int
vring_translate_split(struct virtio_net * dev,struct vhost_virtqueue * vq)434 vring_translate_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
435 {
436 uint64_t req_size, size;
437
438 req_size = sizeof(struct vring_desc) * vq->size;
439 size = req_size;
440 vq->desc = (struct vring_desc *)(uintptr_t)vhost_iova_to_vva(dev, vq,
441 vq->ring_addrs.desc_user_addr,
442 &size, VHOST_ACCESS_RW);
443 if (!vq->desc || size != req_size)
444 return -1;
445
446 req_size = sizeof(struct vring_avail);
447 req_size += sizeof(uint16_t) * vq->size;
448 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
449 req_size += sizeof(uint16_t);
450 size = req_size;
451 vq->avail = (struct vring_avail *)(uintptr_t)vhost_iova_to_vva(dev, vq,
452 vq->ring_addrs.avail_user_addr,
453 &size, VHOST_ACCESS_RW);
454 if (!vq->avail || size != req_size)
455 return -1;
456
457 req_size = sizeof(struct vring_used);
458 req_size += sizeof(struct vring_used_elem) * vq->size;
459 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
460 req_size += sizeof(uint16_t);
461 size = req_size;
462 vq->used = (struct vring_used *)(uintptr_t)vhost_iova_to_vva(dev, vq,
463 vq->ring_addrs.used_user_addr,
464 &size, VHOST_ACCESS_RW);
465 if (!vq->used || size != req_size)
466 return -1;
467
468 return 0;
469 }
470
471 /* Caller should have iotlb_lock read-locked */
472 static int
vring_translate_packed(struct virtio_net * dev,struct vhost_virtqueue * vq)473 vring_translate_packed(struct virtio_net *dev, struct vhost_virtqueue *vq)
474 {
475 uint64_t req_size, size;
476
477 req_size = sizeof(struct vring_packed_desc) * vq->size;
478 size = req_size;
479 vq->desc_packed = (struct vring_packed_desc *)(uintptr_t)
480 vhost_iova_to_vva(dev, vq, vq->ring_addrs.desc_user_addr,
481 &size, VHOST_ACCESS_RW);
482 if (!vq->desc_packed || size != req_size)
483 return -1;
484
485 req_size = sizeof(struct vring_packed_desc_event);
486 size = req_size;
487 vq->driver_event = (struct vring_packed_desc_event *)(uintptr_t)
488 vhost_iova_to_vva(dev, vq, vq->ring_addrs.avail_user_addr,
489 &size, VHOST_ACCESS_RW);
490 if (!vq->driver_event || size != req_size)
491 return -1;
492
493 req_size = sizeof(struct vring_packed_desc_event);
494 size = req_size;
495 vq->device_event = (struct vring_packed_desc_event *)(uintptr_t)
496 vhost_iova_to_vva(dev, vq, vq->ring_addrs.used_user_addr,
497 &size, VHOST_ACCESS_RW);
498 if (!vq->device_event || size != req_size)
499 return -1;
500
501 return 0;
502 }
503
504 int
vring_translate(struct virtio_net * dev,struct vhost_virtqueue * vq)505 vring_translate(struct virtio_net *dev, struct vhost_virtqueue *vq)
506 {
507
508 if (!(dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)))
509 return -1;
510
511 if (vq_is_packed(dev)) {
512 if (vring_translate_packed(dev, vq) < 0)
513 return -1;
514 } else {
515 if (vring_translate_split(dev, vq) < 0)
516 return -1;
517 }
518
519 if (log_translate(dev, vq) < 0)
520 return -1;
521
522 vq->access_ok = true;
523
524 return 0;
525 }
526
527 void
vring_invalidate(struct virtio_net * dev,struct vhost_virtqueue * vq)528 vring_invalidate(struct virtio_net *dev, struct vhost_virtqueue *vq)
529 {
530 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
531 vhost_user_iotlb_wr_lock(vq);
532
533 vq->access_ok = false;
534 vq->desc = NULL;
535 vq->avail = NULL;
536 vq->used = NULL;
537 vq->log_guest_addr = 0;
538
539 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
540 vhost_user_iotlb_wr_unlock(vq);
541 }
542
543 static void
init_vring_queue(struct virtio_net * dev,uint32_t vring_idx)544 init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
545 {
546 struct vhost_virtqueue *vq;
547 int numa_node = SOCKET_ID_ANY;
548
549 if (vring_idx >= VHOST_MAX_VRING) {
550 VHOST_LOG_CONFIG(ERR, "(%s) failed to init vring, out of bound (%d)\n",
551 dev->ifname, vring_idx);
552 return;
553 }
554
555 vq = dev->virtqueue[vring_idx];
556 if (!vq) {
557 VHOST_LOG_CONFIG(ERR, "(%s) virtqueue not allocated (%d)\n",
558 dev->ifname, vring_idx);
559 return;
560 }
561
562 memset(vq, 0, sizeof(struct vhost_virtqueue));
563
564 vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
565 vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
566 vq->notif_enable = VIRTIO_UNINITIALIZED_NOTIF;
567
568 #ifdef RTE_LIBRTE_VHOST_NUMA
569 if (get_mempolicy(&numa_node, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR)) {
570 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
571 dev->ifname, rte_strerror(errno));
572 numa_node = SOCKET_ID_ANY;
573 }
574 #endif
575 vq->numa_node = numa_node;
576
577 vhost_user_iotlb_init(dev, vring_idx);
578 }
579
580 static void
reset_vring_queue(struct virtio_net * dev,uint32_t vring_idx)581 reset_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
582 {
583 struct vhost_virtqueue *vq;
584 int callfd;
585
586 if (vring_idx >= VHOST_MAX_VRING) {
587 VHOST_LOG_CONFIG(ERR,
588 "(%s) failed to reset vring, out of bound (%d)\n",
589 dev->ifname, vring_idx);
590 return;
591 }
592
593 vq = dev->virtqueue[vring_idx];
594 if (!vq) {
595 VHOST_LOG_CONFIG(ERR, "(%s) failed to reset vring, virtqueue not allocated (%d)\n",
596 dev->ifname, vring_idx);
597 return;
598 }
599
600 callfd = vq->callfd;
601 init_vring_queue(dev, vring_idx);
602 vq->callfd = callfd;
603 }
604
605 int
alloc_vring_queue(struct virtio_net * dev,uint32_t vring_idx)606 alloc_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
607 {
608 struct vhost_virtqueue *vq;
609 uint32_t i;
610
611 /* Also allocate holes, if any, up to requested vring index. */
612 for (i = 0; i <= vring_idx; i++) {
613 if (dev->virtqueue[i])
614 continue;
615
616 vq = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), 0);
617 if (vq == NULL) {
618 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate memory for vring %u.\n",
619 dev->ifname, i);
620 return -1;
621 }
622
623 dev->virtqueue[i] = vq;
624 init_vring_queue(dev, i);
625 rte_spinlock_init(&vq->access_lock);
626 vq->avail_wrap_counter = 1;
627 vq->used_wrap_counter = 1;
628 vq->signalled_used_valid = false;
629 }
630
631 dev->nr_vring = RTE_MAX(dev->nr_vring, vring_idx + 1);
632
633 return 0;
634 }
635
636 /*
637 * Reset some variables in device structure, while keeping few
638 * others untouched, such as vid, ifname, nr_vring: they
639 * should be same unless the device is removed.
640 */
641 void
reset_device(struct virtio_net * dev)642 reset_device(struct virtio_net *dev)
643 {
644 uint32_t i;
645
646 dev->features = 0;
647 dev->protocol_features = 0;
648 dev->flags &= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
649
650 for (i = 0; i < dev->nr_vring; i++)
651 reset_vring_queue(dev, i);
652 }
653
654 /*
655 * Invoked when there is a new vhost-user connection established (when
656 * there is a new virtio device being attached).
657 */
658 int
vhost_new_device(void)659 vhost_new_device(void)
660 {
661 struct virtio_net *dev;
662 int i;
663
664 pthread_mutex_lock(&vhost_dev_lock);
665 for (i = 0; i < RTE_MAX_VHOST_DEVICE; i++) {
666 if (vhost_devices[i] == NULL)
667 break;
668 }
669
670 if (i == RTE_MAX_VHOST_DEVICE) {
671 VHOST_LOG_CONFIG(ERR, "failed to find a free slot for new device.\n");
672 pthread_mutex_unlock(&vhost_dev_lock);
673 return -1;
674 }
675
676 dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0);
677 if (dev == NULL) {
678 VHOST_LOG_CONFIG(ERR, "failed to allocate memory for new device.\n");
679 pthread_mutex_unlock(&vhost_dev_lock);
680 return -1;
681 }
682
683 vhost_devices[i] = dev;
684 pthread_mutex_unlock(&vhost_dev_lock);
685
686 dev->vid = i;
687 dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
688 dev->slave_req_fd = -1;
689 dev->postcopy_ufd = -1;
690 rte_spinlock_init(&dev->slave_req_lock);
691
692 return i;
693 }
694
695 void
vhost_destroy_device_notify(struct virtio_net * dev)696 vhost_destroy_device_notify(struct virtio_net *dev)
697 {
698 struct rte_vdpa_device *vdpa_dev;
699
700 if (dev->flags & VIRTIO_DEV_RUNNING) {
701 vdpa_dev = dev->vdpa_dev;
702 if (vdpa_dev)
703 vdpa_dev->ops->dev_close(dev->vid);
704 dev->flags &= ~VIRTIO_DEV_RUNNING;
705 dev->notify_ops->destroy_device(dev->vid);
706 }
707 }
708
709 /*
710 * Invoked when there is the vhost-user connection is broken (when
711 * the virtio device is being detached).
712 */
713 void
vhost_destroy_device(int vid)714 vhost_destroy_device(int vid)
715 {
716 struct virtio_net *dev = get_device(vid);
717
718 if (dev == NULL)
719 return;
720
721 vhost_destroy_device_notify(dev);
722
723 cleanup_device(dev, 1);
724 free_device(dev);
725
726 vhost_devices[vid] = NULL;
727 }
728
729 void
vhost_attach_vdpa_device(int vid,struct rte_vdpa_device * vdpa_dev)730 vhost_attach_vdpa_device(int vid, struct rte_vdpa_device *vdpa_dev)
731 {
732 struct virtio_net *dev = get_device(vid);
733
734 if (dev == NULL)
735 return;
736
737 dev->vdpa_dev = vdpa_dev;
738 }
739
740 void
vhost_set_ifname(int vid,const char * if_name,unsigned int if_len)741 vhost_set_ifname(int vid, const char *if_name, unsigned int if_len)
742 {
743 struct virtio_net *dev;
744 unsigned int len;
745
746 dev = get_device(vid);
747 if (dev == NULL)
748 return;
749
750 len = if_len > sizeof(dev->ifname) ?
751 sizeof(dev->ifname) : if_len;
752
753 strncpy(dev->ifname, if_name, len);
754 dev->ifname[sizeof(dev->ifname) - 1] = '\0';
755 }
756
757 void
vhost_setup_virtio_net(int vid,bool enable,bool compliant_ol_flags)758 vhost_setup_virtio_net(int vid, bool enable, bool compliant_ol_flags)
759 {
760 struct virtio_net *dev = get_device(vid);
761
762 if (dev == NULL)
763 return;
764
765 if (enable)
766 dev->flags |= VIRTIO_DEV_BUILTIN_VIRTIO_NET;
767 else
768 dev->flags &= ~VIRTIO_DEV_BUILTIN_VIRTIO_NET;
769 if (!compliant_ol_flags)
770 dev->flags |= VIRTIO_DEV_LEGACY_OL_FLAGS;
771 else
772 dev->flags &= ~VIRTIO_DEV_LEGACY_OL_FLAGS;
773 }
774
775 void
vhost_enable_extbuf(int vid)776 vhost_enable_extbuf(int vid)
777 {
778 struct virtio_net *dev = get_device(vid);
779
780 if (dev == NULL)
781 return;
782
783 dev->extbuf = 1;
784 }
785
786 void
vhost_enable_linearbuf(int vid)787 vhost_enable_linearbuf(int vid)
788 {
789 struct virtio_net *dev = get_device(vid);
790
791 if (dev == NULL)
792 return;
793
794 dev->linearbuf = 1;
795 }
796
797 int
rte_vhost_get_mtu(int vid,uint16_t * mtu)798 rte_vhost_get_mtu(int vid, uint16_t *mtu)
799 {
800 struct virtio_net *dev = get_device(vid);
801
802 if (dev == NULL || mtu == NULL)
803 return -ENODEV;
804
805 if (!(dev->flags & VIRTIO_DEV_READY))
806 return -EAGAIN;
807
808 if (!(dev->features & (1ULL << VIRTIO_NET_F_MTU)))
809 return -ENOTSUP;
810
811 *mtu = dev->mtu;
812
813 return 0;
814 }
815
816 int
rte_vhost_get_numa_node(int vid)817 rte_vhost_get_numa_node(int vid)
818 {
819 #ifdef RTE_LIBRTE_VHOST_NUMA
820 struct virtio_net *dev = get_device(vid);
821 int numa_node;
822 int ret;
823
824 if (dev == NULL || numa_available() != 0)
825 return -1;
826
827 ret = get_mempolicy(&numa_node, NULL, 0, dev,
828 MPOL_F_NODE | MPOL_F_ADDR);
829 if (ret < 0) {
830 VHOST_LOG_CONFIG(ERR, "(%s) failed to query numa node: %s\n",
831 dev->ifname, rte_strerror(errno));
832 return -1;
833 }
834
835 return numa_node;
836 #else
837 RTE_SET_USED(vid);
838 return -1;
839 #endif
840 }
841
842 uint32_t
rte_vhost_get_queue_num(int vid)843 rte_vhost_get_queue_num(int vid)
844 {
845 struct virtio_net *dev = get_device(vid);
846
847 if (dev == NULL)
848 return 0;
849
850 return dev->nr_vring / 2;
851 }
852
853 uint16_t
rte_vhost_get_vring_num(int vid)854 rte_vhost_get_vring_num(int vid)
855 {
856 struct virtio_net *dev = get_device(vid);
857
858 if (dev == NULL)
859 return 0;
860
861 return dev->nr_vring;
862 }
863
864 int
rte_vhost_get_ifname(int vid,char * buf,size_t len)865 rte_vhost_get_ifname(int vid, char *buf, size_t len)
866 {
867 struct virtio_net *dev = get_device(vid);
868
869 if (dev == NULL || buf == NULL)
870 return -1;
871
872 len = RTE_MIN(len, sizeof(dev->ifname));
873
874 strncpy(buf, dev->ifname, len);
875 buf[len - 1] = '\0';
876
877 return 0;
878 }
879
880 int
rte_vhost_get_negotiated_features(int vid,uint64_t * features)881 rte_vhost_get_negotiated_features(int vid, uint64_t *features)
882 {
883 struct virtio_net *dev;
884
885 dev = get_device(vid);
886 if (dev == NULL || features == NULL)
887 return -1;
888
889 *features = dev->features;
890 return 0;
891 }
892
893 int
rte_vhost_get_negotiated_protocol_features(int vid,uint64_t * protocol_features)894 rte_vhost_get_negotiated_protocol_features(int vid,
895 uint64_t *protocol_features)
896 {
897 struct virtio_net *dev;
898
899 dev = get_device(vid);
900 if (dev == NULL || protocol_features == NULL)
901 return -1;
902
903 *protocol_features = dev->protocol_features;
904 return 0;
905 }
906
907 int
rte_vhost_get_mem_table(int vid,struct rte_vhost_memory ** mem)908 rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
909 {
910 struct virtio_net *dev;
911 struct rte_vhost_memory *m;
912 size_t size;
913
914 dev = get_device(vid);
915 if (dev == NULL || mem == NULL)
916 return -1;
917
918 size = dev->mem->nregions * sizeof(struct rte_vhost_mem_region);
919 m = malloc(sizeof(struct rte_vhost_memory) + size);
920 if (!m)
921 return -1;
922
923 m->nregions = dev->mem->nregions;
924 memcpy(m->regions, dev->mem->regions, size);
925 *mem = m;
926
927 return 0;
928 }
929
930 int
rte_vhost_get_vhost_vring(int vid,uint16_t vring_idx,struct rte_vhost_vring * vring)931 rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
932 struct rte_vhost_vring *vring)
933 {
934 struct virtio_net *dev;
935 struct vhost_virtqueue *vq;
936
937 dev = get_device(vid);
938 if (dev == NULL || vring == NULL)
939 return -1;
940
941 if (vring_idx >= VHOST_MAX_VRING)
942 return -1;
943
944 vq = dev->virtqueue[vring_idx];
945 if (!vq)
946 return -1;
947
948 if (vq_is_packed(dev)) {
949 vring->desc_packed = vq->desc_packed;
950 vring->driver_event = vq->driver_event;
951 vring->device_event = vq->device_event;
952 } else {
953 vring->desc = vq->desc;
954 vring->avail = vq->avail;
955 vring->used = vq->used;
956 }
957 vring->log_guest_addr = vq->log_guest_addr;
958
959 vring->callfd = vq->callfd;
960 vring->kickfd = vq->kickfd;
961 vring->size = vq->size;
962
963 return 0;
964 }
965
966 int
rte_vhost_get_vhost_ring_inflight(int vid,uint16_t vring_idx,struct rte_vhost_ring_inflight * vring)967 rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
968 struct rte_vhost_ring_inflight *vring)
969 {
970 struct virtio_net *dev;
971 struct vhost_virtqueue *vq;
972
973 dev = get_device(vid);
974 if (unlikely(!dev))
975 return -1;
976
977 if (vring_idx >= VHOST_MAX_VRING)
978 return -1;
979
980 vq = dev->virtqueue[vring_idx];
981 if (unlikely(!vq))
982 return -1;
983
984 if (vq_is_packed(dev)) {
985 if (unlikely(!vq->inflight_packed))
986 return -1;
987
988 vring->inflight_packed = vq->inflight_packed;
989 } else {
990 if (unlikely(!vq->inflight_split))
991 return -1;
992
993 vring->inflight_split = vq->inflight_split;
994 }
995
996 vring->resubmit_inflight = vq->resubmit_inflight;
997
998 return 0;
999 }
1000
1001 int
rte_vhost_set_inflight_desc_split(int vid,uint16_t vring_idx,uint16_t idx)1002 rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
1003 uint16_t idx)
1004 {
1005 struct vhost_virtqueue *vq;
1006 struct virtio_net *dev;
1007
1008 dev = get_device(vid);
1009 if (unlikely(!dev))
1010 return -1;
1011
1012 if (unlikely(!(dev->protocol_features &
1013 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1014 return 0;
1015
1016 if (unlikely(vq_is_packed(dev)))
1017 return -1;
1018
1019 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1020 return -1;
1021
1022 vq = dev->virtqueue[vring_idx];
1023 if (unlikely(!vq))
1024 return -1;
1025
1026 if (unlikely(!vq->inflight_split))
1027 return -1;
1028
1029 if (unlikely(idx >= vq->size))
1030 return -1;
1031
1032 vq->inflight_split->desc[idx].counter = vq->global_counter++;
1033 vq->inflight_split->desc[idx].inflight = 1;
1034 return 0;
1035 }
1036
1037 int
rte_vhost_set_inflight_desc_packed(int vid,uint16_t vring_idx,uint16_t head,uint16_t last,uint16_t * inflight_entry)1038 rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
1039 uint16_t head, uint16_t last,
1040 uint16_t *inflight_entry)
1041 {
1042 struct rte_vhost_inflight_info_packed *inflight_info;
1043 struct virtio_net *dev;
1044 struct vhost_virtqueue *vq;
1045 struct vring_packed_desc *desc;
1046 uint16_t old_free_head, free_head;
1047
1048 dev = get_device(vid);
1049 if (unlikely(!dev))
1050 return -1;
1051
1052 if (unlikely(!(dev->protocol_features &
1053 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1054 return 0;
1055
1056 if (unlikely(!vq_is_packed(dev)))
1057 return -1;
1058
1059 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1060 return -1;
1061
1062 vq = dev->virtqueue[vring_idx];
1063 if (unlikely(!vq))
1064 return -1;
1065
1066 inflight_info = vq->inflight_packed;
1067 if (unlikely(!inflight_info))
1068 return -1;
1069
1070 if (unlikely(head >= vq->size))
1071 return -1;
1072
1073 desc = vq->desc_packed;
1074 old_free_head = inflight_info->old_free_head;
1075 if (unlikely(old_free_head >= vq->size))
1076 return -1;
1077
1078 free_head = old_free_head;
1079
1080 /* init header descriptor */
1081 inflight_info->desc[old_free_head].num = 0;
1082 inflight_info->desc[old_free_head].counter = vq->global_counter++;
1083 inflight_info->desc[old_free_head].inflight = 1;
1084
1085 /* save desc entry in flight entry */
1086 while (head != ((last + 1) % vq->size)) {
1087 inflight_info->desc[old_free_head].num++;
1088 inflight_info->desc[free_head].addr = desc[head].addr;
1089 inflight_info->desc[free_head].len = desc[head].len;
1090 inflight_info->desc[free_head].flags = desc[head].flags;
1091 inflight_info->desc[free_head].id = desc[head].id;
1092
1093 inflight_info->desc[old_free_head].last = free_head;
1094 free_head = inflight_info->desc[free_head].next;
1095 inflight_info->free_head = free_head;
1096 head = (head + 1) % vq->size;
1097 }
1098
1099 inflight_info->old_free_head = free_head;
1100 *inflight_entry = old_free_head;
1101
1102 return 0;
1103 }
1104
1105 int
rte_vhost_clr_inflight_desc_split(int vid,uint16_t vring_idx,uint16_t last_used_idx,uint16_t idx)1106 rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
1107 uint16_t last_used_idx, uint16_t idx)
1108 {
1109 struct virtio_net *dev;
1110 struct vhost_virtqueue *vq;
1111
1112 dev = get_device(vid);
1113 if (unlikely(!dev))
1114 return -1;
1115
1116 if (unlikely(!(dev->protocol_features &
1117 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1118 return 0;
1119
1120 if (unlikely(vq_is_packed(dev)))
1121 return -1;
1122
1123 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1124 return -1;
1125
1126 vq = dev->virtqueue[vring_idx];
1127 if (unlikely(!vq))
1128 return -1;
1129
1130 if (unlikely(!vq->inflight_split))
1131 return -1;
1132
1133 if (unlikely(idx >= vq->size))
1134 return -1;
1135
1136 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1137
1138 vq->inflight_split->desc[idx].inflight = 0;
1139
1140 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1141
1142 vq->inflight_split->used_idx = last_used_idx;
1143 return 0;
1144 }
1145
1146 int
rte_vhost_clr_inflight_desc_packed(int vid,uint16_t vring_idx,uint16_t head)1147 rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
1148 uint16_t head)
1149 {
1150 struct rte_vhost_inflight_info_packed *inflight_info;
1151 struct virtio_net *dev;
1152 struct vhost_virtqueue *vq;
1153
1154 dev = get_device(vid);
1155 if (unlikely(!dev))
1156 return -1;
1157
1158 if (unlikely(!(dev->protocol_features &
1159 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1160 return 0;
1161
1162 if (unlikely(!vq_is_packed(dev)))
1163 return -1;
1164
1165 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1166 return -1;
1167
1168 vq = dev->virtqueue[vring_idx];
1169 if (unlikely(!vq))
1170 return -1;
1171
1172 inflight_info = vq->inflight_packed;
1173 if (unlikely(!inflight_info))
1174 return -1;
1175
1176 if (unlikely(head >= vq->size))
1177 return -1;
1178
1179 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1180
1181 inflight_info->desc[head].inflight = 0;
1182
1183 rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
1184
1185 inflight_info->old_free_head = inflight_info->free_head;
1186 inflight_info->old_used_idx = inflight_info->used_idx;
1187 inflight_info->old_used_wrap_counter = inflight_info->used_wrap_counter;
1188
1189 return 0;
1190 }
1191
1192 int
rte_vhost_set_last_inflight_io_split(int vid,uint16_t vring_idx,uint16_t idx)1193 rte_vhost_set_last_inflight_io_split(int vid, uint16_t vring_idx,
1194 uint16_t idx)
1195 {
1196 struct virtio_net *dev;
1197 struct vhost_virtqueue *vq;
1198
1199 dev = get_device(vid);
1200 if (unlikely(!dev))
1201 return -1;
1202
1203 if (unlikely(!(dev->protocol_features &
1204 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1205 return 0;
1206
1207 if (unlikely(vq_is_packed(dev)))
1208 return -1;
1209
1210 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1211 return -1;
1212
1213 vq = dev->virtqueue[vring_idx];
1214 if (unlikely(!vq))
1215 return -1;
1216
1217 if (unlikely(!vq->inflight_split))
1218 return -1;
1219
1220 if (unlikely(idx >= vq->size))
1221 return -1;
1222
1223 vq->inflight_split->last_inflight_io = idx;
1224 return 0;
1225 }
1226
1227 int
rte_vhost_set_last_inflight_io_packed(int vid,uint16_t vring_idx,uint16_t head)1228 rte_vhost_set_last_inflight_io_packed(int vid, uint16_t vring_idx,
1229 uint16_t head)
1230 {
1231 struct rte_vhost_inflight_info_packed *inflight_info;
1232 struct virtio_net *dev;
1233 struct vhost_virtqueue *vq;
1234 uint16_t last;
1235
1236 dev = get_device(vid);
1237 if (unlikely(!dev))
1238 return -1;
1239
1240 if (unlikely(!(dev->protocol_features &
1241 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))))
1242 return 0;
1243
1244 if (unlikely(!vq_is_packed(dev)))
1245 return -1;
1246
1247 if (unlikely(vring_idx >= VHOST_MAX_VRING))
1248 return -1;
1249
1250 vq = dev->virtqueue[vring_idx];
1251 if (unlikely(!vq))
1252 return -1;
1253
1254 inflight_info = vq->inflight_packed;
1255 if (unlikely(!inflight_info))
1256 return -1;
1257
1258 if (unlikely(head >= vq->size))
1259 return -1;
1260
1261 last = inflight_info->desc[head].last;
1262 if (unlikely(last >= vq->size))
1263 return -1;
1264
1265 inflight_info->desc[last].next = inflight_info->free_head;
1266 inflight_info->free_head = head;
1267 inflight_info->used_idx += inflight_info->desc[head].num;
1268 if (inflight_info->used_idx >= inflight_info->desc_num) {
1269 inflight_info->used_idx -= inflight_info->desc_num;
1270 inflight_info->used_wrap_counter =
1271 !inflight_info->used_wrap_counter;
1272 }
1273
1274 return 0;
1275 }
1276
1277 int
rte_vhost_vring_call(int vid,uint16_t vring_idx)1278 rte_vhost_vring_call(int vid, uint16_t vring_idx)
1279 {
1280 struct virtio_net *dev;
1281 struct vhost_virtqueue *vq;
1282
1283 dev = get_device(vid);
1284 if (!dev)
1285 return -1;
1286
1287 if (vring_idx >= VHOST_MAX_VRING)
1288 return -1;
1289
1290 vq = dev->virtqueue[vring_idx];
1291 if (!vq)
1292 return -1;
1293
1294 rte_spinlock_lock(&vq->access_lock);
1295
1296 if (vq_is_packed(dev))
1297 vhost_vring_call_packed(dev, vq);
1298 else
1299 vhost_vring_call_split(dev, vq);
1300
1301 rte_spinlock_unlock(&vq->access_lock);
1302
1303 return 0;
1304 }
1305
1306 uint16_t
rte_vhost_avail_entries(int vid,uint16_t queue_id)1307 rte_vhost_avail_entries(int vid, uint16_t queue_id)
1308 {
1309 struct virtio_net *dev;
1310 struct vhost_virtqueue *vq;
1311 uint16_t ret = 0;
1312
1313 dev = get_device(vid);
1314 if (!dev)
1315 return 0;
1316
1317 if (queue_id >= VHOST_MAX_VRING)
1318 return 0;
1319
1320 vq = dev->virtqueue[queue_id];
1321 if (!vq)
1322 return 0;
1323
1324 rte_spinlock_lock(&vq->access_lock);
1325
1326 if (unlikely(!vq->enabled || vq->avail == NULL))
1327 goto out;
1328
1329 ret = *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx;
1330
1331 out:
1332 rte_spinlock_unlock(&vq->access_lock);
1333 return ret;
1334 }
1335
1336 static inline int
vhost_enable_notify_split(struct virtio_net * dev,struct vhost_virtqueue * vq,int enable)1337 vhost_enable_notify_split(struct virtio_net *dev,
1338 struct vhost_virtqueue *vq, int enable)
1339 {
1340 if (vq->used == NULL)
1341 return -1;
1342
1343 if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
1344 if (enable)
1345 vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
1346 else
1347 vq->used->flags |= VRING_USED_F_NO_NOTIFY;
1348 } else {
1349 if (enable)
1350 vhost_avail_event(vq) = vq->last_avail_idx;
1351 }
1352 return 0;
1353 }
1354
1355 static inline int
vhost_enable_notify_packed(struct virtio_net * dev,struct vhost_virtqueue * vq,int enable)1356 vhost_enable_notify_packed(struct virtio_net *dev,
1357 struct vhost_virtqueue *vq, int enable)
1358 {
1359 uint16_t flags;
1360
1361 if (vq->device_event == NULL)
1362 return -1;
1363
1364 if (!enable) {
1365 vq->device_event->flags = VRING_EVENT_F_DISABLE;
1366 return 0;
1367 }
1368
1369 flags = VRING_EVENT_F_ENABLE;
1370 if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
1371 flags = VRING_EVENT_F_DESC;
1372 vq->device_event->off_wrap = vq->last_avail_idx |
1373 vq->avail_wrap_counter << 15;
1374 }
1375
1376 rte_atomic_thread_fence(__ATOMIC_RELEASE);
1377
1378 vq->device_event->flags = flags;
1379 return 0;
1380 }
1381
1382 int
vhost_enable_guest_notification(struct virtio_net * dev,struct vhost_virtqueue * vq,int enable)1383 vhost_enable_guest_notification(struct virtio_net *dev,
1384 struct vhost_virtqueue *vq, int enable)
1385 {
1386 /*
1387 * If the virtqueue is not ready yet, it will be applied
1388 * when it will become ready.
1389 */
1390 if (!vq->ready)
1391 return 0;
1392
1393 if (vq_is_packed(dev))
1394 return vhost_enable_notify_packed(dev, vq, enable);
1395 else
1396 return vhost_enable_notify_split(dev, vq, enable);
1397 }
1398
1399 int
rte_vhost_enable_guest_notification(int vid,uint16_t queue_id,int enable)1400 rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
1401 {
1402 struct virtio_net *dev = get_device(vid);
1403 struct vhost_virtqueue *vq;
1404 int ret;
1405
1406 if (!dev)
1407 return -1;
1408
1409 if (queue_id >= VHOST_MAX_VRING)
1410 return -1;
1411
1412 vq = dev->virtqueue[queue_id];
1413 if (!vq)
1414 return -1;
1415
1416 rte_spinlock_lock(&vq->access_lock);
1417
1418 vq->notif_enable = enable;
1419 ret = vhost_enable_guest_notification(dev, vq, enable);
1420
1421 rte_spinlock_unlock(&vq->access_lock);
1422
1423 return ret;
1424 }
1425
1426 void
rte_vhost_log_write(int vid,uint64_t addr,uint64_t len)1427 rte_vhost_log_write(int vid, uint64_t addr, uint64_t len)
1428 {
1429 struct virtio_net *dev = get_device(vid);
1430
1431 if (dev == NULL)
1432 return;
1433
1434 vhost_log_write(dev, addr, len);
1435 }
1436
1437 void
rte_vhost_log_used_vring(int vid,uint16_t vring_idx,uint64_t offset,uint64_t len)1438 rte_vhost_log_used_vring(int vid, uint16_t vring_idx,
1439 uint64_t offset, uint64_t len)
1440 {
1441 struct virtio_net *dev;
1442 struct vhost_virtqueue *vq;
1443
1444 dev = get_device(vid);
1445 if (dev == NULL)
1446 return;
1447
1448 if (vring_idx >= VHOST_MAX_VRING)
1449 return;
1450 vq = dev->virtqueue[vring_idx];
1451 if (!vq)
1452 return;
1453
1454 vhost_log_used_vring(dev, vq, offset, len);
1455 }
1456
1457 uint32_t
rte_vhost_rx_queue_count(int vid,uint16_t qid)1458 rte_vhost_rx_queue_count(int vid, uint16_t qid)
1459 {
1460 struct virtio_net *dev;
1461 struct vhost_virtqueue *vq;
1462 uint32_t ret = 0;
1463
1464 dev = get_device(vid);
1465 if (dev == NULL)
1466 return 0;
1467
1468 if (unlikely(qid >= dev->nr_vring || (qid & 1) == 0)) {
1469 VHOST_LOG_DATA(ERR, "(%s) %s: invalid virtqueue idx %d.\n",
1470 dev->ifname, __func__, qid);
1471 return 0;
1472 }
1473
1474 vq = dev->virtqueue[qid];
1475 if (vq == NULL)
1476 return 0;
1477
1478 rte_spinlock_lock(&vq->access_lock);
1479
1480 if (unlikely(!vq->enabled || vq->avail == NULL))
1481 goto out;
1482
1483 ret = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx;
1484
1485 out:
1486 rte_spinlock_unlock(&vq->access_lock);
1487 return ret;
1488 }
1489
1490 struct rte_vdpa_device *
rte_vhost_get_vdpa_device(int vid)1491 rte_vhost_get_vdpa_device(int vid)
1492 {
1493 struct virtio_net *dev = get_device(vid);
1494
1495 if (dev == NULL)
1496 return NULL;
1497
1498 return dev->vdpa_dev;
1499 }
1500
1501 int
rte_vhost_get_log_base(int vid,uint64_t * log_base,uint64_t * log_size)1502 rte_vhost_get_log_base(int vid, uint64_t *log_base,
1503 uint64_t *log_size)
1504 {
1505 struct virtio_net *dev = get_device(vid);
1506
1507 if (dev == NULL || log_base == NULL || log_size == NULL)
1508 return -1;
1509
1510 *log_base = dev->log_base;
1511 *log_size = dev->log_size;
1512
1513 return 0;
1514 }
1515
1516 int
rte_vhost_get_vring_base(int vid,uint16_t queue_id,uint16_t * last_avail_idx,uint16_t * last_used_idx)1517 rte_vhost_get_vring_base(int vid, uint16_t queue_id,
1518 uint16_t *last_avail_idx, uint16_t *last_used_idx)
1519 {
1520 struct vhost_virtqueue *vq;
1521 struct virtio_net *dev = get_device(vid);
1522
1523 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1524 return -1;
1525
1526 if (queue_id >= VHOST_MAX_VRING)
1527 return -1;
1528
1529 vq = dev->virtqueue[queue_id];
1530 if (!vq)
1531 return -1;
1532
1533 if (vq_is_packed(dev)) {
1534 *last_avail_idx = (vq->avail_wrap_counter << 15) |
1535 vq->last_avail_idx;
1536 *last_used_idx = (vq->used_wrap_counter << 15) |
1537 vq->last_used_idx;
1538 } else {
1539 *last_avail_idx = vq->last_avail_idx;
1540 *last_used_idx = vq->last_used_idx;
1541 }
1542
1543 return 0;
1544 }
1545
1546 int
rte_vhost_set_vring_base(int vid,uint16_t queue_id,uint16_t last_avail_idx,uint16_t last_used_idx)1547 rte_vhost_set_vring_base(int vid, uint16_t queue_id,
1548 uint16_t last_avail_idx, uint16_t last_used_idx)
1549 {
1550 struct vhost_virtqueue *vq;
1551 struct virtio_net *dev = get_device(vid);
1552
1553 if (!dev)
1554 return -1;
1555
1556 if (queue_id >= VHOST_MAX_VRING)
1557 return -1;
1558
1559 vq = dev->virtqueue[queue_id];
1560 if (!vq)
1561 return -1;
1562
1563 if (vq_is_packed(dev)) {
1564 vq->last_avail_idx = last_avail_idx & 0x7fff;
1565 vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
1566 vq->last_used_idx = last_used_idx & 0x7fff;
1567 vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
1568 } else {
1569 vq->last_avail_idx = last_avail_idx;
1570 vq->last_used_idx = last_used_idx;
1571 }
1572
1573 return 0;
1574 }
1575
1576 int
rte_vhost_get_vring_base_from_inflight(int vid,uint16_t queue_id,uint16_t * last_avail_idx,uint16_t * last_used_idx)1577 rte_vhost_get_vring_base_from_inflight(int vid,
1578 uint16_t queue_id,
1579 uint16_t *last_avail_idx,
1580 uint16_t *last_used_idx)
1581 {
1582 struct rte_vhost_inflight_info_packed *inflight_info;
1583 struct vhost_virtqueue *vq;
1584 struct virtio_net *dev = get_device(vid);
1585
1586 if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
1587 return -1;
1588
1589 if (queue_id >= VHOST_MAX_VRING)
1590 return -1;
1591
1592 vq = dev->virtqueue[queue_id];
1593 if (!vq)
1594 return -1;
1595
1596 if (!vq_is_packed(dev))
1597 return -1;
1598
1599 inflight_info = vq->inflight_packed;
1600 if (!inflight_info)
1601 return -1;
1602
1603 *last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
1604 inflight_info->old_used_idx;
1605 *last_used_idx = *last_avail_idx;
1606
1607 return 0;
1608 }
1609
1610 int
rte_vhost_extern_callback_register(int vid,struct rte_vhost_user_extern_ops const * const ops,void * ctx)1611 rte_vhost_extern_callback_register(int vid,
1612 struct rte_vhost_user_extern_ops const * const ops, void *ctx)
1613 {
1614 struct virtio_net *dev = get_device(vid);
1615
1616 if (dev == NULL || ops == NULL)
1617 return -1;
1618
1619 dev->extern_ops = *ops;
1620 dev->extern_data = ctx;
1621 return 0;
1622 }
1623
1624 static __rte_always_inline int
async_channel_register(int vid,uint16_t queue_id)1625 async_channel_register(int vid, uint16_t queue_id)
1626 {
1627 struct virtio_net *dev = get_device(vid);
1628 struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
1629 struct vhost_async *async;
1630 int node = vq->numa_node;
1631
1632 if (unlikely(vq->async)) {
1633 VHOST_LOG_CONFIG(ERR,
1634 "(%s) async register failed: already registered (qid: %d)\n",
1635 dev->ifname, queue_id);
1636 return -1;
1637 }
1638
1639 async = rte_zmalloc_socket(NULL, sizeof(struct vhost_async), 0, node);
1640 if (!async) {
1641 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async metadata (qid: %d)\n",
1642 dev->ifname, queue_id);
1643 return -1;
1644 }
1645
1646 async->pkts_info = rte_malloc_socket(NULL, vq->size * sizeof(struct async_inflight_info),
1647 RTE_CACHE_LINE_SIZE, node);
1648 if (!async->pkts_info) {
1649 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async_pkts_info (qid: %d)\n",
1650 dev->ifname, queue_id);
1651 goto out_free_async;
1652 }
1653
1654 async->pkts_cmpl_flag = rte_zmalloc_socket(NULL, vq->size * sizeof(bool),
1655 RTE_CACHE_LINE_SIZE, node);
1656 if (!async->pkts_cmpl_flag) {
1657 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async pkts_cmpl_flag (qid: %d)\n",
1658 dev->ifname, queue_id);
1659 goto out_free_async;
1660 }
1661
1662 if (vq_is_packed(dev)) {
1663 async->buffers_packed = rte_malloc_socket(NULL,
1664 vq->size * sizeof(struct vring_used_elem_packed),
1665 RTE_CACHE_LINE_SIZE, node);
1666 if (!async->buffers_packed) {
1667 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async buffers (qid: %d)\n",
1668 dev->ifname, queue_id);
1669 goto out_free_inflight;
1670 }
1671 } else {
1672 async->descs_split = rte_malloc_socket(NULL,
1673 vq->size * sizeof(struct vring_used_elem),
1674 RTE_CACHE_LINE_SIZE, node);
1675 if (!async->descs_split) {
1676 VHOST_LOG_CONFIG(ERR, "(%s) failed to allocate async descs (qid: %d)\n",
1677 dev->ifname, queue_id);
1678 goto out_free_inflight;
1679 }
1680 }
1681
1682 vq->async = async;
1683
1684 return 0;
1685 out_free_inflight:
1686 rte_free(async->pkts_info);
1687 out_free_async:
1688 rte_free(async);
1689
1690 return -1;
1691 }
1692
1693 int
rte_vhost_async_channel_register(int vid,uint16_t queue_id)1694 rte_vhost_async_channel_register(int vid, uint16_t queue_id)
1695 {
1696 struct vhost_virtqueue *vq;
1697 struct virtio_net *dev = get_device(vid);
1698 int ret;
1699
1700 if (dev == NULL)
1701 return -1;
1702
1703 if (queue_id >= VHOST_MAX_VRING)
1704 return -1;
1705
1706 vq = dev->virtqueue[queue_id];
1707
1708 if (unlikely(vq == NULL || !dev->async_copy))
1709 return -1;
1710
1711 rte_spinlock_lock(&vq->access_lock);
1712 ret = async_channel_register(vid, queue_id);
1713 rte_spinlock_unlock(&vq->access_lock);
1714
1715 return ret;
1716 }
1717
1718 int
rte_vhost_async_channel_register_thread_unsafe(int vid,uint16_t queue_id)1719 rte_vhost_async_channel_register_thread_unsafe(int vid, uint16_t queue_id)
1720 {
1721 struct vhost_virtqueue *vq;
1722 struct virtio_net *dev = get_device(vid);
1723
1724 if (dev == NULL)
1725 return -1;
1726
1727 if (queue_id >= VHOST_MAX_VRING)
1728 return -1;
1729
1730 vq = dev->virtqueue[queue_id];
1731
1732 if (unlikely(vq == NULL || !dev->async_copy))
1733 return -1;
1734
1735 return async_channel_register(vid, queue_id);
1736 }
1737
1738 int
rte_vhost_async_channel_unregister(int vid,uint16_t queue_id)1739 rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
1740 {
1741 struct vhost_virtqueue *vq;
1742 struct virtio_net *dev = get_device(vid);
1743 int ret = -1;
1744
1745 if (dev == NULL)
1746 return ret;
1747
1748 if (queue_id >= VHOST_MAX_VRING)
1749 return ret;
1750
1751 vq = dev->virtqueue[queue_id];
1752
1753 if (vq == NULL)
1754 return ret;
1755
1756 ret = 0;
1757
1758 if (!vq->async)
1759 return ret;
1760
1761 if (!rte_spinlock_trylock(&vq->access_lock)) {
1762 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel, virtqueue busy.\n",
1763 dev->ifname);
1764 return -1;
1765 }
1766
1767 if (vq->async->pkts_inflight_n) {
1768 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1769 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1770 dev->ifname);
1771 ret = -1;
1772 goto out;
1773 }
1774
1775 vhost_free_async_mem(vq);
1776 out:
1777 rte_spinlock_unlock(&vq->access_lock);
1778
1779 return ret;
1780 }
1781
1782 int
rte_vhost_async_channel_unregister_thread_unsafe(int vid,uint16_t queue_id)1783 rte_vhost_async_channel_unregister_thread_unsafe(int vid, uint16_t queue_id)
1784 {
1785 struct vhost_virtqueue *vq;
1786 struct virtio_net *dev = get_device(vid);
1787
1788 if (dev == NULL)
1789 return -1;
1790
1791 if (queue_id >= VHOST_MAX_VRING)
1792 return -1;
1793
1794 vq = dev->virtqueue[queue_id];
1795
1796 if (vq == NULL)
1797 return -1;
1798
1799 if (!vq->async)
1800 return 0;
1801
1802 if (vq->async->pkts_inflight_n) {
1803 VHOST_LOG_CONFIG(ERR, "(%s) failed to unregister async channel.\n", dev->ifname);
1804 VHOST_LOG_CONFIG(ERR, "(%s) inflight packets must be completed before unregistration.\n",
1805 dev->ifname);
1806 return -1;
1807 }
1808
1809 vhost_free_async_mem(vq);
1810
1811 return 0;
1812 }
1813
1814 int
rte_vhost_async_dma_configure(int16_t dma_id,uint16_t vchan_id)1815 rte_vhost_async_dma_configure(int16_t dma_id, uint16_t vchan_id)
1816 {
1817 struct rte_dma_info info;
1818 void *pkts_cmpl_flag_addr;
1819 uint16_t max_desc;
1820
1821 if (!rte_dma_is_valid(dma_id)) {
1822 VHOST_LOG_CONFIG(ERR, "DMA %d is not found.\n", dma_id);
1823 return -1;
1824 }
1825
1826 rte_dma_info_get(dma_id, &info);
1827 if (vchan_id >= info.max_vchans) {
1828 VHOST_LOG_CONFIG(ERR, "Invalid DMA %d vChannel %u.\n", dma_id, vchan_id);
1829 return -1;
1830 }
1831
1832 if (!dma_copy_track[dma_id].vchans) {
1833 struct async_dma_vchan_info *vchans;
1834
1835 vchans = rte_zmalloc(NULL, sizeof(struct async_dma_vchan_info) * info.max_vchans,
1836 RTE_CACHE_LINE_SIZE);
1837 if (vchans == NULL) {
1838 VHOST_LOG_CONFIG(ERR, "Failed to allocate vchans for DMA %d vChannel %u.\n",
1839 dma_id, vchan_id);
1840 return -1;
1841 }
1842
1843 dma_copy_track[dma_id].vchans = vchans;
1844 }
1845
1846 if (dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr) {
1847 VHOST_LOG_CONFIG(INFO, "DMA %d vChannel %u already registered.\n", dma_id,
1848 vchan_id);
1849 return 0;
1850 }
1851
1852 max_desc = info.max_desc;
1853 if (!rte_is_power_of_2(max_desc))
1854 max_desc = rte_align32pow2(max_desc);
1855
1856 pkts_cmpl_flag_addr = rte_zmalloc(NULL, sizeof(bool *) * max_desc, RTE_CACHE_LINE_SIZE);
1857 if (!pkts_cmpl_flag_addr) {
1858 VHOST_LOG_CONFIG(ERR, "Failed to allocate pkts_cmpl_flag_addr for DMA %d "
1859 "vChannel %u.\n", dma_id, vchan_id);
1860
1861 if (dma_copy_track[dma_id].nr_vchans == 0) {
1862 rte_free(dma_copy_track[dma_id].vchans);
1863 dma_copy_track[dma_id].vchans = NULL;
1864 }
1865 return -1;
1866 }
1867
1868 dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr = pkts_cmpl_flag_addr;
1869 dma_copy_track[dma_id].vchans[vchan_id].ring_size = max_desc;
1870 dma_copy_track[dma_id].vchans[vchan_id].ring_mask = max_desc - 1;
1871 dma_copy_track[dma_id].nr_vchans++;
1872
1873 return 0;
1874 }
1875
1876 int
rte_vhost_async_get_inflight(int vid,uint16_t queue_id)1877 rte_vhost_async_get_inflight(int vid, uint16_t queue_id)
1878 {
1879 struct vhost_virtqueue *vq;
1880 struct virtio_net *dev = get_device(vid);
1881 int ret = -1;
1882
1883 if (dev == NULL)
1884 return ret;
1885
1886 if (queue_id >= VHOST_MAX_VRING)
1887 return ret;
1888
1889 vq = dev->virtqueue[queue_id];
1890
1891 if (vq == NULL)
1892 return ret;
1893
1894 if (!vq->async)
1895 return ret;
1896
1897 if (!rte_spinlock_trylock(&vq->access_lock)) {
1898 VHOST_LOG_CONFIG(DEBUG,
1899 "(%s) failed to check in-flight packets. virtqueue busy.\n",
1900 dev->ifname);
1901 return ret;
1902 }
1903
1904 ret = vq->async->pkts_inflight_n;
1905 rte_spinlock_unlock(&vq->access_lock);
1906
1907 return ret;
1908 }
1909
1910 int
rte_vhost_async_get_inflight_thread_unsafe(int vid,uint16_t queue_id)1911 rte_vhost_async_get_inflight_thread_unsafe(int vid, uint16_t queue_id)
1912 {
1913 struct vhost_virtqueue *vq;
1914 struct virtio_net *dev = get_device(vid);
1915 int ret = -1;
1916
1917 if (dev == NULL)
1918 return ret;
1919
1920 if (queue_id >= VHOST_MAX_VRING)
1921 return ret;
1922
1923 vq = dev->virtqueue[queue_id];
1924
1925 if (vq == NULL)
1926 return ret;
1927
1928 if (!vq->async)
1929 return ret;
1930
1931 ret = vq->async->pkts_inflight_n;
1932
1933 return ret;
1934 }
1935
1936 int
rte_vhost_get_monitor_addr(int vid,uint16_t queue_id,struct rte_vhost_power_monitor_cond * pmc)1937 rte_vhost_get_monitor_addr(int vid, uint16_t queue_id,
1938 struct rte_vhost_power_monitor_cond *pmc)
1939 {
1940 struct virtio_net *dev = get_device(vid);
1941 struct vhost_virtqueue *vq;
1942
1943 if (dev == NULL)
1944 return -1;
1945 if (queue_id >= VHOST_MAX_VRING)
1946 return -1;
1947
1948 vq = dev->virtqueue[queue_id];
1949 if (vq == NULL)
1950 return -1;
1951
1952 if (vq_is_packed(dev)) {
1953 struct vring_packed_desc *desc;
1954 desc = vq->desc_packed;
1955 pmc->addr = &desc[vq->last_avail_idx].flags;
1956 if (vq->avail_wrap_counter)
1957 pmc->val = VRING_DESC_F_AVAIL;
1958 else
1959 pmc->val = VRING_DESC_F_USED;
1960 pmc->mask = VRING_DESC_F_AVAIL | VRING_DESC_F_USED;
1961 pmc->size = sizeof(desc[vq->last_avail_idx].flags);
1962 pmc->match = 1;
1963 } else {
1964 pmc->addr = &vq->avail->idx;
1965 pmc->val = vq->last_avail_idx & (vq->size - 1);
1966 pmc->mask = vq->size - 1;
1967 pmc->size = sizeof(vq->avail->idx);
1968 pmc->match = 0;
1969 }
1970
1971 return 0;
1972 }
1973
1974 RTE_LOG_REGISTER_SUFFIX(vhost_config_log_level, config, INFO);
1975 RTE_LOG_REGISTER_SUFFIX(vhost_data_log_level, data, WARNING);
1976