1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016 Intel Corporation. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <fcntl.h> 37 #include <unistd.h> 38 39 #include <rte_memory.h> 40 #include <rte_eal_memconfig.h> 41 42 #include "vhost.h" 43 #include "virtio_user_dev.h" 44 #include "vhost_kernel_tap.h" 45 46 struct vhost_memory_kernel { 47 uint32_t nregions; 48 uint32_t padding; 49 struct vhost_memory_region regions[0]; 50 }; 51 52 /* vhost kernel ioctls */ 53 #define VHOST_VIRTIO 0xAF 54 #define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) 55 #define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) 56 #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) 57 #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) 58 #define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) 59 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) 60 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) 61 #define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) 62 #define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) 63 #define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 64 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) 65 #define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) 66 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 67 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 68 #define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) 69 70 static uint64_t max_regions = 64; 71 72 static void 73 get_vhost_kernel_max_regions(void) 74 { 75 int fd; 76 char buf[20] = {'\0'}; 77 78 fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); 79 if (fd < 0) 80 return; 81 82 if (read(fd, buf, sizeof(buf) - 1) > 0) 83 max_regions = strtoull(buf, NULL, 10); 84 85 close(fd); 86 } 87 88 static uint64_t vhost_req_user_to_kernel[] = { 89 [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, 90 [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, 91 [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, 92 [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, 93 [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, 94 [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, 95 [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, 96 [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, 97 [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, 98 [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, 99 [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, 100 }; 101 102 /* By default, vhost kernel module allows 64 regions, but DPDK allows 103 * 256 segments. As a relief, below function merges those virtually 104 * adjacent memsegs into one region. 105 */ 106 static struct vhost_memory_kernel * 107 prepare_vhost_memory_kernel(void) 108 { 109 uint32_t i, j, k = 0; 110 struct rte_memseg *seg; 111 struct vhost_memory_region *mr; 112 struct vhost_memory_kernel *vm; 113 114 vm = malloc(sizeof(struct vhost_memory_kernel) + 115 max_regions * 116 sizeof(struct vhost_memory_region)); 117 118 for (i = 0; i < RTE_MAX_MEMSEG; ++i) { 119 seg = &rte_eal_get_configuration()->mem_config->memseg[i]; 120 if (!seg->addr) 121 break; 122 123 int new_region = 1; 124 125 for (j = 0; j < k; ++j) { 126 mr = &vm->regions[j]; 127 128 if (mr->userspace_addr + mr->memory_size == 129 (uint64_t)(uintptr_t)seg->addr) { 130 mr->memory_size += seg->len; 131 new_region = 0; 132 break; 133 } 134 135 if ((uint64_t)(uintptr_t)seg->addr + seg->len == 136 mr->userspace_addr) { 137 mr->guest_phys_addr = 138 (uint64_t)(uintptr_t)seg->addr; 139 mr->userspace_addr = 140 (uint64_t)(uintptr_t)seg->addr; 141 mr->memory_size += seg->len; 142 new_region = 0; 143 break; 144 } 145 } 146 147 if (new_region == 0) 148 continue; 149 150 mr = &vm->regions[k++]; 151 /* use vaddr here! */ 152 mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; 153 mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; 154 mr->memory_size = seg->len; 155 mr->mmap_offset = 0; 156 157 if (k >= max_regions) { 158 free(vm); 159 return NULL; 160 } 161 } 162 163 vm->nregions = k; 164 vm->padding = 0; 165 return vm; 166 } 167 168 /* with below features, vhost kernel does not need to do the checksum and TSO, 169 * these info will be passed to virtio_user through virtio net header. 170 */ 171 #define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ 172 ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ 173 (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ 174 (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ 175 (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ 176 (1ULL << VIRTIO_NET_F_GUEST_UFO)) 177 178 /* with below features, when flows from virtio_user to vhost kernel 179 * (1) if flows goes up through the kernel networking stack, it does not need 180 * to verify checksum, which can save CPU cycles; 181 * (2) if flows goes through a Linux bridge and outside from an interface 182 * (kernel driver), checksum and TSO will be done by GSO in kernel or even 183 * offloaded into real physical device. 184 */ 185 #define VHOST_KERNEL_HOST_OFFLOADS_MASK \ 186 ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ 187 (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ 188 (1ULL << VIRTIO_NET_F_CSUM)) 189 190 static int 191 tap_supporte_mq(void) 192 { 193 int tapfd; 194 unsigned int tap_features; 195 196 tapfd = open(PATH_NET_TUN, O_RDWR); 197 if (tapfd < 0) { 198 PMD_DRV_LOG(ERR, "fail to open %s: %s", 199 PATH_NET_TUN, strerror(errno)); 200 return -1; 201 } 202 203 if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { 204 PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); 205 close(tapfd); 206 return -1; 207 } 208 209 close(tapfd); 210 return tap_features & IFF_MULTI_QUEUE; 211 } 212 213 static int 214 vhost_kernel_ioctl(struct virtio_user_dev *dev, 215 enum vhost_user_request req, 216 void *arg) 217 { 218 int ret = -1; 219 unsigned int i; 220 uint64_t req_kernel; 221 struct vhost_memory_kernel *vm = NULL; 222 int vhostfd; 223 unsigned int queue_sel; 224 225 PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); 226 227 req_kernel = vhost_req_user_to_kernel[req]; 228 229 if (req_kernel == VHOST_SET_MEM_TABLE) { 230 vm = prepare_vhost_memory_kernel(); 231 if (!vm) 232 return -1; 233 arg = (void *)vm; 234 } 235 236 if (req_kernel == VHOST_SET_FEATURES) { 237 /* We don't need memory protection here */ 238 *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); 239 240 /* VHOST kernel does not know about below flags */ 241 *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; 242 *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; 243 244 *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); 245 } 246 247 switch (req_kernel) { 248 case VHOST_SET_VRING_NUM: 249 case VHOST_SET_VRING_ADDR: 250 case VHOST_SET_VRING_BASE: 251 case VHOST_GET_VRING_BASE: 252 case VHOST_SET_VRING_KICK: 253 case VHOST_SET_VRING_CALL: 254 queue_sel = *(unsigned int *)arg; 255 vhostfd = dev->vhostfds[queue_sel / 2]; 256 *(unsigned int *)arg = queue_sel % 2; 257 PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", 258 vhostfd, *(unsigned int *)arg); 259 break; 260 default: 261 vhostfd = -1; 262 } 263 if (vhostfd == -1) { 264 for (i = 0; i < dev->max_queue_pairs; ++i) { 265 if (dev->vhostfds[i] < 0) 266 continue; 267 268 ret = ioctl(dev->vhostfds[i], req_kernel, arg); 269 if (ret < 0) 270 break; 271 } 272 } else { 273 ret = ioctl(vhostfd, req_kernel, arg); 274 } 275 276 if (!ret && req_kernel == VHOST_GET_FEATURES) { 277 /* with tap as the backend, all these features are supported 278 * but not claimed by vhost-net, so we add them back when 279 * reporting to upper layer. 280 */ 281 *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; 282 *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; 283 284 /* vhost_kernel will not declare this feature, but it does 285 * support multi-queue. 286 */ 287 if (tap_supporte_mq()) 288 *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); 289 } 290 291 if (vm) 292 free(vm); 293 294 if (ret < 0) 295 PMD_DRV_LOG(ERR, "%s failed: %s", 296 vhost_msg_strings[req], strerror(errno)); 297 298 return ret; 299 } 300 301 /** 302 * Set up environment to talk with a vhost kernel backend. 303 * 304 * @return 305 * - (-1) if fail to set up; 306 * - (>=0) if successful. 307 */ 308 static int 309 vhost_kernel_setup(struct virtio_user_dev *dev) 310 { 311 int vhostfd; 312 uint32_t i; 313 314 get_vhost_kernel_max_regions(); 315 316 for (i = 0; i < dev->max_queue_pairs; ++i) { 317 vhostfd = open(dev->path, O_RDWR); 318 if (vhostfd < 0) { 319 PMD_DRV_LOG(ERR, "fail to open %s, %s", 320 dev->path, strerror(errno)); 321 return -1; 322 } 323 324 dev->vhostfds[i] = vhostfd; 325 } 326 327 return 0; 328 } 329 330 static int 331 vhost_kernel_set_backend(int vhostfd, int tapfd) 332 { 333 struct vhost_vring_file f; 334 335 f.fd = tapfd; 336 f.index = 0; 337 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 338 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 339 strerror(errno)); 340 return -1; 341 } 342 343 f.index = 1; 344 if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { 345 PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", 346 strerror(errno)); 347 return -1; 348 } 349 350 return 0; 351 } 352 353 static int 354 vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, 355 uint16_t pair_idx, 356 int enable) 357 { 358 int hdr_size; 359 int vhostfd; 360 int tapfd; 361 int req_mq = (dev->max_queue_pairs > 1); 362 363 vhostfd = dev->vhostfds[pair_idx]; 364 365 if (!enable) { 366 if (dev->tapfds[pair_idx]) { 367 close(dev->tapfds[pair_idx]); 368 dev->tapfds[pair_idx] = -1; 369 } 370 return vhost_kernel_set_backend(vhostfd, -1); 371 } else if (dev->tapfds[pair_idx] >= 0) { 372 return 0; 373 } 374 375 if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || 376 (dev->features & (1ULL << VIRTIO_F_VERSION_1))) 377 hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 378 else 379 hdr_size = sizeof(struct virtio_net_hdr); 380 381 tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); 382 if (tapfd < 0) { 383 PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); 384 return -1; 385 } 386 387 if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { 388 PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); 389 close(tapfd); 390 return -1; 391 } 392 393 dev->tapfds[pair_idx] = tapfd; 394 return 0; 395 } 396 397 struct virtio_user_backend_ops ops_kernel = { 398 .setup = vhost_kernel_setup, 399 .send_request = vhost_kernel_ioctl, 400 .enable_qp = vhost_kernel_enable_queue_pair 401 }; 402