1d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2d30ea906Sjfb8856606  * Copyright(c) 2016 Intel Corporation
32bfe3f2eSlogwang  */
42bfe3f2eSlogwang 
52bfe3f2eSlogwang #include <unistd.h>
62bfe3f2eSlogwang #include <sys/types.h>
72bfe3f2eSlogwang #include <sys/stat.h>
82bfe3f2eSlogwang #include <fcntl.h>
92bfe3f2eSlogwang #include <net/if.h>
10d30ea906Sjfb8856606 #include <net/if_arp.h>
112bfe3f2eSlogwang #include <errno.h>
122bfe3f2eSlogwang #include <string.h>
132bfe3f2eSlogwang #include <limits.h>
142bfe3f2eSlogwang 
15d30ea906Sjfb8856606 #include <rte_ether.h>
16d30ea906Sjfb8856606 
172bfe3f2eSlogwang #include "vhost_kernel_tap.h"
182bfe3f2eSlogwang #include "../virtio_logs.h"
19d30ea906Sjfb8856606 #include "../virtio_pci.h"
20d30ea906Sjfb8856606 
214418919fSjohnjiang int
vhost_kernel_tap_set_offload(int fd,uint64_t features)22d30ea906Sjfb8856606 vhost_kernel_tap_set_offload(int fd, uint64_t features)
23d30ea906Sjfb8856606 {
24d30ea906Sjfb8856606 	unsigned int offload = 0;
25d30ea906Sjfb8856606 
26d30ea906Sjfb8856606 	if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
27d30ea906Sjfb8856606 		offload |= TUN_F_CSUM;
28d30ea906Sjfb8856606 		if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4))
29d30ea906Sjfb8856606 			offload |= TUN_F_TSO4;
30d30ea906Sjfb8856606 		if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO6))
31d30ea906Sjfb8856606 			offload |= TUN_F_TSO6;
32d30ea906Sjfb8856606 		if (features & ((1ULL << VIRTIO_NET_F_GUEST_TSO4) |
33d30ea906Sjfb8856606 			(1ULL << VIRTIO_NET_F_GUEST_TSO6)) &&
34d30ea906Sjfb8856606 			(features & (1ULL << VIRTIO_NET_F_GUEST_ECN)))
35d30ea906Sjfb8856606 			offload |= TUN_F_TSO_ECN;
36d30ea906Sjfb8856606 		if (features & (1ULL << VIRTIO_NET_F_GUEST_UFO))
37d30ea906Sjfb8856606 			offload |= TUN_F_UFO;
38d30ea906Sjfb8856606 	}
39d30ea906Sjfb8856606 
40d30ea906Sjfb8856606 	/* Check if our kernel supports TUNSETOFFLOAD */
41d30ea906Sjfb8856606 	if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) {
42*0c6bd470Sfengbojiang 		PMD_DRV_LOG(ERR, "Kernel doesn't support TUNSETOFFLOAD\n");
43d30ea906Sjfb8856606 		return -ENOTSUP;
44d30ea906Sjfb8856606 	}
45d30ea906Sjfb8856606 
46d30ea906Sjfb8856606 	if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
47d30ea906Sjfb8856606 		offload &= ~TUN_F_UFO;
48d30ea906Sjfb8856606 		if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
49d30ea906Sjfb8856606 			PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s\n",
50d30ea906Sjfb8856606 				strerror(errno));
51d30ea906Sjfb8856606 			return -1;
52d30ea906Sjfb8856606 		}
53d30ea906Sjfb8856606 	}
54d30ea906Sjfb8856606 
55d30ea906Sjfb8856606 	return 0;
56d30ea906Sjfb8856606 }
572bfe3f2eSlogwang 
582bfe3f2eSlogwang int
vhost_kernel_tap_set_queue(int fd,bool attach)594418919fSjohnjiang vhost_kernel_tap_set_queue(int fd, bool attach)
604418919fSjohnjiang {
614418919fSjohnjiang 	struct ifreq ifr = {
624418919fSjohnjiang 		.ifr_flags = attach ? IFF_ATTACH_QUEUE : IFF_DETACH_QUEUE,
634418919fSjohnjiang 	};
644418919fSjohnjiang 
654418919fSjohnjiang 	return ioctl(fd, TUNSETQUEUE, &ifr);
664418919fSjohnjiang }
674418919fSjohnjiang 
684418919fSjohnjiang int
vhost_kernel_open_tap(char ** p_ifname,int hdr_size,int req_mq,const char * mac,uint64_t features)69d30ea906Sjfb8856606 vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
70d30ea906Sjfb8856606 			 const char *mac, uint64_t features)
712bfe3f2eSlogwang {
722bfe3f2eSlogwang 	unsigned int tap_features;
731646932aSjfb8856606 	char *tap_name = NULL;
742bfe3f2eSlogwang 	int sndbuf = INT_MAX;
752bfe3f2eSlogwang 	struct ifreq ifr;
762bfe3f2eSlogwang 	int tapfd;
774418919fSjohnjiang 	int ret;
782bfe3f2eSlogwang 
792bfe3f2eSlogwang 	/* TODO:
802bfe3f2eSlogwang 	 * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len
812bfe3f2eSlogwang 	 * 2. get number of memory regions from vhost module parameter
822bfe3f2eSlogwang 	 * max_mem_regions, supported in newer version linux kernel
832bfe3f2eSlogwang 	 */
842bfe3f2eSlogwang 	tapfd = open(PATH_NET_TUN, O_RDWR);
852bfe3f2eSlogwang 	if (tapfd < 0) {
862bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "fail to open %s: %s",
872bfe3f2eSlogwang 			    PATH_NET_TUN, strerror(errno));
882bfe3f2eSlogwang 		return -1;
892bfe3f2eSlogwang 	}
902bfe3f2eSlogwang 
912bfe3f2eSlogwang 	/* Construct ifr */
922bfe3f2eSlogwang 	memset(&ifr, 0, sizeof(ifr));
932bfe3f2eSlogwang 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
942bfe3f2eSlogwang 
952bfe3f2eSlogwang 	if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) {
962bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno));
972bfe3f2eSlogwang 		goto error;
982bfe3f2eSlogwang 	}
992bfe3f2eSlogwang 	if (tap_features & IFF_ONE_QUEUE)
1002bfe3f2eSlogwang 		ifr.ifr_flags |= IFF_ONE_QUEUE;
1012bfe3f2eSlogwang 
1022bfe3f2eSlogwang 	/* Let tap instead of vhost-net handle vnet header, as the latter does
1032bfe3f2eSlogwang 	 * not support offloading. And in this case, we should not set feature
1042bfe3f2eSlogwang 	 * bit VHOST_NET_F_VIRTIO_NET_HDR.
1052bfe3f2eSlogwang 	 */
1062bfe3f2eSlogwang 	if (tap_features & IFF_VNET_HDR) {
1072bfe3f2eSlogwang 		ifr.ifr_flags |= IFF_VNET_HDR;
1082bfe3f2eSlogwang 	} else {
1092bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR");
1102bfe3f2eSlogwang 		goto error;
1112bfe3f2eSlogwang 	}
1122bfe3f2eSlogwang 
1132bfe3f2eSlogwang 	if (req_mq)
1142bfe3f2eSlogwang 		ifr.ifr_flags |= IFF_MULTI_QUEUE;
1152bfe3f2eSlogwang 
1162bfe3f2eSlogwang 	if (*p_ifname)
1172bfe3f2eSlogwang 		strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ - 1);
1182bfe3f2eSlogwang 	else
1192bfe3f2eSlogwang 		strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ - 1);
1202bfe3f2eSlogwang 	if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) {
1212bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno));
1222bfe3f2eSlogwang 		goto error;
1232bfe3f2eSlogwang 	}
1242bfe3f2eSlogwang 
1251646932aSjfb8856606 	tap_name = strdup(ifr.ifr_name);
1261646932aSjfb8856606 	if (!tap_name) {
1271646932aSjfb8856606 		PMD_DRV_LOG(ERR, "strdup ifname failed: %s", strerror(errno));
1281646932aSjfb8856606 		goto error;
1291646932aSjfb8856606 	}
1301646932aSjfb8856606 
131*0c6bd470Sfengbojiang 	if (fcntl(tapfd, F_SETFL, O_NONBLOCK) < 0) {
132*0c6bd470Sfengbojiang 		PMD_DRV_LOG(ERR, "fcntl tapfd failed: %s", strerror(errno));
133*0c6bd470Sfengbojiang 		goto error;
134*0c6bd470Sfengbojiang 	}
1352bfe3f2eSlogwang 
1362bfe3f2eSlogwang 	if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) {
1372bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno));
1382bfe3f2eSlogwang 		goto error;
1392bfe3f2eSlogwang 	}
1402bfe3f2eSlogwang 
1412bfe3f2eSlogwang 	if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) {
1422bfe3f2eSlogwang 		PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno));
1432bfe3f2eSlogwang 		goto error;
1442bfe3f2eSlogwang 	}
1452bfe3f2eSlogwang 
1464418919fSjohnjiang 	ret = vhost_kernel_tap_set_offload(tapfd, features);
1474418919fSjohnjiang 	if (ret < 0 && ret != -ENOTSUP)
1484418919fSjohnjiang 		goto error;
149d30ea906Sjfb8856606 
150d30ea906Sjfb8856606 	memset(&ifr, 0, sizeof(ifr));
151d30ea906Sjfb8856606 	ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
1524418919fSjohnjiang 	memcpy(ifr.ifr_hwaddr.sa_data, mac, RTE_ETHER_ADDR_LEN);
153d30ea906Sjfb8856606 	if (ioctl(tapfd, SIOCSIFHWADDR, (void *)&ifr) == -1) {
154d30ea906Sjfb8856606 		PMD_DRV_LOG(ERR, "SIOCSIFHWADDR failed: %s", strerror(errno));
155d30ea906Sjfb8856606 		goto error;
156d30ea906Sjfb8856606 	}
1572bfe3f2eSlogwang 
1581646932aSjfb8856606 	free(*p_ifname);
1591646932aSjfb8856606 	*p_ifname = tap_name;
1602bfe3f2eSlogwang 
1612bfe3f2eSlogwang 	return tapfd;
1622bfe3f2eSlogwang error:
1631646932aSjfb8856606 	free(tap_name);
1642bfe3f2eSlogwang 	close(tapfd);
1652bfe3f2eSlogwang 	return -1;
1662bfe3f2eSlogwang }
167