1d30ea906Sjfb8856606 /* SPDX-License-Identifier: BSD-3-Clause
2d30ea906Sjfb8856606 * Copyright(c) 2010-2017 Intel Corporation
3a9643ea8Slogwang */
4a9643ea8Slogwang
5a9643ea8Slogwang #include <arpa/inet.h>
6a9643ea8Slogwang #include <getopt.h>
7a9643ea8Slogwang #include <linux/if_ether.h>
8a9643ea8Slogwang #include <linux/if_vlan.h>
9a9643ea8Slogwang #include <linux/virtio_net.h>
10a9643ea8Slogwang #include <linux/virtio_ring.h>
11a9643ea8Slogwang #include <signal.h>
12a9643ea8Slogwang #include <stdint.h>
13a9643ea8Slogwang #include <sys/eventfd.h>
14a9643ea8Slogwang #include <sys/param.h>
15a9643ea8Slogwang #include <unistd.h>
16a9643ea8Slogwang
17a9643ea8Slogwang #include <rte_atomic.h>
18a9643ea8Slogwang #include <rte_cycles.h>
19a9643ea8Slogwang #include <rte_ethdev.h>
20a9643ea8Slogwang #include <rte_log.h>
21a9643ea8Slogwang #include <rte_string_fns.h>
22a9643ea8Slogwang #include <rte_malloc.h>
232bfe3f2eSlogwang #include <rte_vhost.h>
24a9643ea8Slogwang #include <rte_ip.h>
25a9643ea8Slogwang #include <rte_tcp.h>
262bfe3f2eSlogwang #include <rte_pause.h>
27a9643ea8Slogwang
28*2d9fd380Sjfb8856606 #include "ioat.h"
29a9643ea8Slogwang #include "main.h"
30a9643ea8Slogwang
31a9643ea8Slogwang #ifndef MAX_QUEUES
32a9643ea8Slogwang #define MAX_QUEUES 128
33a9643ea8Slogwang #endif
34a9643ea8Slogwang
35a9643ea8Slogwang /* the maximum number of external ports supported */
36a9643ea8Slogwang #define MAX_SUP_PORTS 1
37a9643ea8Slogwang
38a9643ea8Slogwang #define MBUF_CACHE_SIZE 128
39a9643ea8Slogwang #define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
40a9643ea8Slogwang
41a9643ea8Slogwang #define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
42a9643ea8Slogwang
43a9643ea8Slogwang #define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */
44a9643ea8Slogwang #define BURST_RX_RETRIES 4 /* Number of retries on RX. */
45a9643ea8Slogwang
46a9643ea8Slogwang #define JUMBO_FRAME_MAX_SIZE 0x2600
47a9643ea8Slogwang
48a9643ea8Slogwang /* State of virtio device. */
49a9643ea8Slogwang #define DEVICE_MAC_LEARNING 0
50a9643ea8Slogwang #define DEVICE_RX 1
51a9643ea8Slogwang #define DEVICE_SAFE_REMOVE 2
52a9643ea8Slogwang
53a9643ea8Slogwang /* Configurable number of RX/TX ring descriptors */
54a9643ea8Slogwang #define RTE_TEST_RX_DESC_DEFAULT 1024
55a9643ea8Slogwang #define RTE_TEST_TX_DESC_DEFAULT 512
56a9643ea8Slogwang
57a9643ea8Slogwang #define INVALID_PORT_ID 0xFF
58a9643ea8Slogwang
59a9643ea8Slogwang /* Maximum long option length for option parsing. */
60a9643ea8Slogwang #define MAX_LONG_OPT_SZ 64
61a9643ea8Slogwang
62a9643ea8Slogwang /* mask of enabled ports */
63a9643ea8Slogwang static uint32_t enabled_port_mask = 0;
64a9643ea8Slogwang
65a9643ea8Slogwang /* Promiscuous mode */
66a9643ea8Slogwang static uint32_t promiscuous;
67a9643ea8Slogwang
68a9643ea8Slogwang /* number of devices/queues to support*/
69a9643ea8Slogwang static uint32_t num_queues = 0;
70a9643ea8Slogwang static uint32_t num_devices;
71a9643ea8Slogwang
72a9643ea8Slogwang static struct rte_mempool *mbuf_pool;
73a9643ea8Slogwang static int mergeable;
74a9643ea8Slogwang
75a9643ea8Slogwang /* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
76a9643ea8Slogwang typedef enum {
77a9643ea8Slogwang VM2VM_DISABLED = 0,
78a9643ea8Slogwang VM2VM_SOFTWARE = 1,
79a9643ea8Slogwang VM2VM_HARDWARE = 2,
80a9643ea8Slogwang VM2VM_LAST
81a9643ea8Slogwang } vm2vm_type;
82a9643ea8Slogwang static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
83a9643ea8Slogwang
84a9643ea8Slogwang /* Enable stats. */
85a9643ea8Slogwang static uint32_t enable_stats = 0;
86a9643ea8Slogwang /* Enable retries on RX. */
87a9643ea8Slogwang static uint32_t enable_retry = 1;
88a9643ea8Slogwang
89a9643ea8Slogwang /* Disable TX checksum offload */
90a9643ea8Slogwang static uint32_t enable_tx_csum;
91a9643ea8Slogwang
92a9643ea8Slogwang /* Disable TSO offload */
93a9643ea8Slogwang static uint32_t enable_tso;
94a9643ea8Slogwang
95a9643ea8Slogwang static int client_mode;
962bfe3f2eSlogwang
972bfe3f2eSlogwang static int builtin_net_driver;
98a9643ea8Slogwang
99*2d9fd380Sjfb8856606 static int async_vhost_driver;
100*2d9fd380Sjfb8856606
101*2d9fd380Sjfb8856606 static char dma_type[MAX_LONG_OPT_SZ];
102*2d9fd380Sjfb8856606
103a9643ea8Slogwang /* Specify timeout (in useconds) between retries on RX. */
104a9643ea8Slogwang static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
105a9643ea8Slogwang /* Specify the number of retries on RX. */
106a9643ea8Slogwang static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
107a9643ea8Slogwang
1082bfe3f2eSlogwang /* Socket file paths. Can be set by user */
1092bfe3f2eSlogwang static char *socket_files;
1102bfe3f2eSlogwang static int nb_sockets;
111a9643ea8Slogwang
112a9643ea8Slogwang /* empty vmdq configuration structure. Filled in programatically */
113a9643ea8Slogwang static struct rte_eth_conf vmdq_conf_default = {
114a9643ea8Slogwang .rxmode = {
115a9643ea8Slogwang .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
116a9643ea8Slogwang .split_hdr_size = 0,
117a9643ea8Slogwang /*
118d30ea906Sjfb8856606 * VLAN strip is necessary for 1G NIC such as I350,
119a9643ea8Slogwang * this fixes bug of ipv4 forwarding in guest can't
120a9643ea8Slogwang * forward pakets from one virtio dev to another virtio dev.
121a9643ea8Slogwang */
122d30ea906Sjfb8856606 .offloads = DEV_RX_OFFLOAD_VLAN_STRIP,
123a9643ea8Slogwang },
124a9643ea8Slogwang
125a9643ea8Slogwang .txmode = {
126a9643ea8Slogwang .mq_mode = ETH_MQ_TX_NONE,
127d30ea906Sjfb8856606 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM |
128d30ea906Sjfb8856606 DEV_TX_OFFLOAD_TCP_CKSUM |
129d30ea906Sjfb8856606 DEV_TX_OFFLOAD_VLAN_INSERT |
130d30ea906Sjfb8856606 DEV_TX_OFFLOAD_MULTI_SEGS |
131d30ea906Sjfb8856606 DEV_TX_OFFLOAD_TCP_TSO),
132a9643ea8Slogwang },
133a9643ea8Slogwang .rx_adv_conf = {
134a9643ea8Slogwang /*
135a9643ea8Slogwang * should be overridden separately in code with
136a9643ea8Slogwang * appropriate values
137a9643ea8Slogwang */
138a9643ea8Slogwang .vmdq_rx_conf = {
139a9643ea8Slogwang .nb_queue_pools = ETH_8_POOLS,
140a9643ea8Slogwang .enable_default_pool = 0,
141a9643ea8Slogwang .default_pool = 0,
142a9643ea8Slogwang .nb_pool_maps = 0,
143a9643ea8Slogwang .pool_map = {{0, 0},},
144a9643ea8Slogwang },
145a9643ea8Slogwang },
146a9643ea8Slogwang };
147a9643ea8Slogwang
148d30ea906Sjfb8856606
149a9643ea8Slogwang static unsigned lcore_ids[RTE_MAX_LCORE];
1502bfe3f2eSlogwang static uint16_t ports[RTE_MAX_ETHPORTS];
151a9643ea8Slogwang static unsigned num_ports = 0; /**< The number of ports specified in command line */
152a9643ea8Slogwang static uint16_t num_pf_queues, num_vmdq_queues;
153a9643ea8Slogwang static uint16_t vmdq_pool_base, vmdq_queue_base;
154a9643ea8Slogwang static uint16_t queues_per_pool;
155a9643ea8Slogwang
156a9643ea8Slogwang const uint16_t vlan_tags[] = {
157a9643ea8Slogwang 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
158a9643ea8Slogwang 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
159a9643ea8Slogwang 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
160a9643ea8Slogwang 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
161a9643ea8Slogwang 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
162a9643ea8Slogwang 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
163a9643ea8Slogwang 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
164a9643ea8Slogwang 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
165a9643ea8Slogwang };
166a9643ea8Slogwang
167a9643ea8Slogwang /* ethernet addresses of ports */
1684418919fSjohnjiang static struct rte_ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
169a9643ea8Slogwang
170a9643ea8Slogwang static struct vhost_dev_tailq_list vhost_dev_list =
171a9643ea8Slogwang TAILQ_HEAD_INITIALIZER(vhost_dev_list);
172a9643ea8Slogwang
173a9643ea8Slogwang static struct lcore_info lcore_info[RTE_MAX_LCORE];
174a9643ea8Slogwang
175a9643ea8Slogwang /* Used for queueing bursts of TX packets. */
176a9643ea8Slogwang struct mbuf_table {
177a9643ea8Slogwang unsigned len;
178a9643ea8Slogwang unsigned txq_id;
179a9643ea8Slogwang struct rte_mbuf *m_table[MAX_PKT_BURST];
180a9643ea8Slogwang };
181a9643ea8Slogwang
182a9643ea8Slogwang /* TX queue for each data core. */
183a9643ea8Slogwang struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
184a9643ea8Slogwang
185a9643ea8Slogwang #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
186a9643ea8Slogwang / US_PER_S * BURST_TX_DRAIN_US)
187a9643ea8Slogwang #define VLAN_HLEN 4
188a9643ea8Slogwang
189*2d9fd380Sjfb8856606 static inline int
open_dma(const char * value)190*2d9fd380Sjfb8856606 open_dma(const char *value)
191*2d9fd380Sjfb8856606 {
192*2d9fd380Sjfb8856606 if (strncmp(dma_type, "ioat", 4) == 0)
193*2d9fd380Sjfb8856606 return open_ioat(value);
194*2d9fd380Sjfb8856606
195*2d9fd380Sjfb8856606 return -1;
196*2d9fd380Sjfb8856606 }
197*2d9fd380Sjfb8856606
198a9643ea8Slogwang /*
199a9643ea8Slogwang * Builds up the correct configuration for VMDQ VLAN pool map
200a9643ea8Slogwang * according to the pool & queue limits.
201a9643ea8Slogwang */
202a9643ea8Slogwang static inline int
get_eth_conf(struct rte_eth_conf * eth_conf,uint32_t num_devices)203a9643ea8Slogwang get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
204a9643ea8Slogwang {
205a9643ea8Slogwang struct rte_eth_vmdq_rx_conf conf;
206a9643ea8Slogwang struct rte_eth_vmdq_rx_conf *def_conf =
207a9643ea8Slogwang &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
208a9643ea8Slogwang unsigned i;
209a9643ea8Slogwang
210a9643ea8Slogwang memset(&conf, 0, sizeof(conf));
211a9643ea8Slogwang conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
212a9643ea8Slogwang conf.nb_pool_maps = num_devices;
213a9643ea8Slogwang conf.enable_loop_back = def_conf->enable_loop_back;
214a9643ea8Slogwang conf.rx_mode = def_conf->rx_mode;
215a9643ea8Slogwang
216a9643ea8Slogwang for (i = 0; i < conf.nb_pool_maps; i++) {
217a9643ea8Slogwang conf.pool_map[i].vlan_id = vlan_tags[ i ];
218a9643ea8Slogwang conf.pool_map[i].pools = (1UL << i);
219a9643ea8Slogwang }
220a9643ea8Slogwang
221a9643ea8Slogwang (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
222a9643ea8Slogwang (void)(rte_memcpy(ð_conf->rx_adv_conf.vmdq_rx_conf, &conf,
223a9643ea8Slogwang sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
224a9643ea8Slogwang return 0;
225a9643ea8Slogwang }
226a9643ea8Slogwang
227a9643ea8Slogwang /*
228a9643ea8Slogwang * Initialises a given port using global settings and with the rx buffers
229a9643ea8Slogwang * coming from the mbuf_pool passed as parameter
230a9643ea8Slogwang */
231a9643ea8Slogwang static inline int
port_init(uint16_t port)2322bfe3f2eSlogwang port_init(uint16_t port)
233a9643ea8Slogwang {
234a9643ea8Slogwang struct rte_eth_dev_info dev_info;
235a9643ea8Slogwang struct rte_eth_conf port_conf;
236a9643ea8Slogwang struct rte_eth_rxconf *rxconf;
237a9643ea8Slogwang struct rte_eth_txconf *txconf;
238a9643ea8Slogwang int16_t rx_rings, tx_rings;
239a9643ea8Slogwang uint16_t rx_ring_size, tx_ring_size;
240a9643ea8Slogwang int retval;
241a9643ea8Slogwang uint16_t q;
242a9643ea8Slogwang
243a9643ea8Slogwang /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
2444418919fSjohnjiang retval = rte_eth_dev_info_get(port, &dev_info);
2454418919fSjohnjiang if (retval != 0) {
2464418919fSjohnjiang RTE_LOG(ERR, VHOST_PORT,
2474418919fSjohnjiang "Error during getting device (port %u) info: %s\n",
2484418919fSjohnjiang port, strerror(-retval));
2494418919fSjohnjiang
2504418919fSjohnjiang return retval;
2514418919fSjohnjiang }
252a9643ea8Slogwang
253a9643ea8Slogwang rxconf = &dev_info.default_rxconf;
254a9643ea8Slogwang txconf = &dev_info.default_txconf;
255a9643ea8Slogwang rxconf->rx_drop_en = 1;
256a9643ea8Slogwang
257a9643ea8Slogwang /*configure the number of supported virtio devices based on VMDQ limits */
258a9643ea8Slogwang num_devices = dev_info.max_vmdq_pools;
259a9643ea8Slogwang
260a9643ea8Slogwang rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
261a9643ea8Slogwang tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
2622bfe3f2eSlogwang
263a9643ea8Slogwang tx_rings = (uint16_t)rte_lcore_count();
264a9643ea8Slogwang
265a9643ea8Slogwang /* Get port configuration. */
266a9643ea8Slogwang retval = get_eth_conf(&port_conf, num_devices);
267a9643ea8Slogwang if (retval < 0)
268a9643ea8Slogwang return retval;
269a9643ea8Slogwang /* NIC queues are divided into pf queues and vmdq queues. */
270a9643ea8Slogwang num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
271a9643ea8Slogwang queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
272a9643ea8Slogwang num_vmdq_queues = num_devices * queues_per_pool;
273a9643ea8Slogwang num_queues = num_pf_queues + num_vmdq_queues;
274a9643ea8Slogwang vmdq_queue_base = dev_info.vmdq_queue_base;
275a9643ea8Slogwang vmdq_pool_base = dev_info.vmdq_pool_base;
276a9643ea8Slogwang printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
277a9643ea8Slogwang num_pf_queues, num_devices, queues_per_pool);
278a9643ea8Slogwang
279d30ea906Sjfb8856606 if (!rte_eth_dev_is_valid_port(port))
280d30ea906Sjfb8856606 return -1;
281a9643ea8Slogwang
282a9643ea8Slogwang rx_rings = (uint16_t)dev_info.max_rx_queues;
283d30ea906Sjfb8856606 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
284d30ea906Sjfb8856606 port_conf.txmode.offloads |=
285d30ea906Sjfb8856606 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
286a9643ea8Slogwang /* Configure ethernet device. */
287a9643ea8Slogwang retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
288a9643ea8Slogwang if (retval != 0) {
289a9643ea8Slogwang RTE_LOG(ERR, VHOST_PORT, "Failed to configure port %u: %s.\n",
290a9643ea8Slogwang port, strerror(-retval));
291a9643ea8Slogwang return retval;
292a9643ea8Slogwang }
293a9643ea8Slogwang
2942bfe3f2eSlogwang retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rx_ring_size,
2952bfe3f2eSlogwang &tx_ring_size);
2962bfe3f2eSlogwang if (retval != 0) {
2972bfe3f2eSlogwang RTE_LOG(ERR, VHOST_PORT, "Failed to adjust number of descriptors "
2982bfe3f2eSlogwang "for port %u: %s.\n", port, strerror(-retval));
2992bfe3f2eSlogwang return retval;
3002bfe3f2eSlogwang }
3012bfe3f2eSlogwang if (rx_ring_size > RTE_TEST_RX_DESC_DEFAULT) {
3022bfe3f2eSlogwang RTE_LOG(ERR, VHOST_PORT, "Mbuf pool has an insufficient size "
3032bfe3f2eSlogwang "for Rx queues on port %u.\n", port);
3042bfe3f2eSlogwang return -1;
3052bfe3f2eSlogwang }
3062bfe3f2eSlogwang
307a9643ea8Slogwang /* Setup the queues. */
308d30ea906Sjfb8856606 rxconf->offloads = port_conf.rxmode.offloads;
309a9643ea8Slogwang for (q = 0; q < rx_rings; q ++) {
310a9643ea8Slogwang retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
311a9643ea8Slogwang rte_eth_dev_socket_id(port),
312a9643ea8Slogwang rxconf,
313a9643ea8Slogwang mbuf_pool);
314a9643ea8Slogwang if (retval < 0) {
315a9643ea8Slogwang RTE_LOG(ERR, VHOST_PORT,
316a9643ea8Slogwang "Failed to setup rx queue %u of port %u: %s.\n",
317a9643ea8Slogwang q, port, strerror(-retval));
318a9643ea8Slogwang return retval;
319a9643ea8Slogwang }
320a9643ea8Slogwang }
321d30ea906Sjfb8856606 txconf->offloads = port_conf.txmode.offloads;
322a9643ea8Slogwang for (q = 0; q < tx_rings; q ++) {
323a9643ea8Slogwang retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
324a9643ea8Slogwang rte_eth_dev_socket_id(port),
325a9643ea8Slogwang txconf);
326a9643ea8Slogwang if (retval < 0) {
327a9643ea8Slogwang RTE_LOG(ERR, VHOST_PORT,
328a9643ea8Slogwang "Failed to setup tx queue %u of port %u: %s.\n",
329a9643ea8Slogwang q, port, strerror(-retval));
330a9643ea8Slogwang return retval;
331a9643ea8Slogwang }
332a9643ea8Slogwang }
333a9643ea8Slogwang
334a9643ea8Slogwang /* Start the device. */
335a9643ea8Slogwang retval = rte_eth_dev_start(port);
336a9643ea8Slogwang if (retval < 0) {
337a9643ea8Slogwang RTE_LOG(ERR, VHOST_PORT, "Failed to start port %u: %s\n",
338a9643ea8Slogwang port, strerror(-retval));
339a9643ea8Slogwang return retval;
340a9643ea8Slogwang }
341a9643ea8Slogwang
3424418919fSjohnjiang if (promiscuous) {
3434418919fSjohnjiang retval = rte_eth_promiscuous_enable(port);
3444418919fSjohnjiang if (retval != 0) {
3454418919fSjohnjiang RTE_LOG(ERR, VHOST_PORT,
3464418919fSjohnjiang "Failed to enable promiscuous mode on port %u: %s\n",
3474418919fSjohnjiang port, rte_strerror(-retval));
3484418919fSjohnjiang return retval;
3494418919fSjohnjiang }
3504418919fSjohnjiang }
351a9643ea8Slogwang
3524418919fSjohnjiang retval = rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
3534418919fSjohnjiang if (retval < 0) {
3544418919fSjohnjiang RTE_LOG(ERR, VHOST_PORT,
3554418919fSjohnjiang "Failed to get MAC address on port %u: %s\n",
3564418919fSjohnjiang port, rte_strerror(-retval));
3574418919fSjohnjiang return retval;
3584418919fSjohnjiang }
3594418919fSjohnjiang
360a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
361a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
362a9643ea8Slogwang " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
3632bfe3f2eSlogwang port,
364a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[0],
365a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[1],
366a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[2],
367a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[3],
368a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[4],
369a9643ea8Slogwang vmdq_ports_eth_addr[port].addr_bytes[5]);
370a9643ea8Slogwang
371a9643ea8Slogwang return 0;
372a9643ea8Slogwang }
373a9643ea8Slogwang
374a9643ea8Slogwang /*
3752bfe3f2eSlogwang * Set socket file path.
376a9643ea8Slogwang */
377a9643ea8Slogwang static int
us_vhost_parse_socket_path(const char * q_arg)3782bfe3f2eSlogwang us_vhost_parse_socket_path(const char *q_arg)
379a9643ea8Slogwang {
3801646932aSjfb8856606 char *old;
3811646932aSjfb8856606
382a9643ea8Slogwang /* parse number string */
3832bfe3f2eSlogwang if (strnlen(q_arg, PATH_MAX) == PATH_MAX)
384a9643ea8Slogwang return -1;
3852bfe3f2eSlogwang
3861646932aSjfb8856606 old = socket_files;
3872bfe3f2eSlogwang socket_files = realloc(socket_files, PATH_MAX * (nb_sockets + 1));
3881646932aSjfb8856606 if (socket_files == NULL) {
3891646932aSjfb8856606 free(old);
3901646932aSjfb8856606 return -1;
3911646932aSjfb8856606 }
3921646932aSjfb8856606
3934418919fSjohnjiang strlcpy(socket_files + nb_sockets * PATH_MAX, q_arg, PATH_MAX);
3942bfe3f2eSlogwang nb_sockets++;
395a9643ea8Slogwang
396a9643ea8Slogwang return 0;
397a9643ea8Slogwang }
398a9643ea8Slogwang
399a9643ea8Slogwang /*
400a9643ea8Slogwang * Parse the portmask provided at run time.
401a9643ea8Slogwang */
402a9643ea8Slogwang static int
parse_portmask(const char * portmask)403a9643ea8Slogwang parse_portmask(const char *portmask)
404a9643ea8Slogwang {
405a9643ea8Slogwang char *end = NULL;
406a9643ea8Slogwang unsigned long pm;
407a9643ea8Slogwang
408a9643ea8Slogwang errno = 0;
409a9643ea8Slogwang
410a9643ea8Slogwang /* parse hexadecimal string */
411a9643ea8Slogwang pm = strtoul(portmask, &end, 16);
412a9643ea8Slogwang if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
413*2d9fd380Sjfb8856606 return 0;
414a9643ea8Slogwang
415a9643ea8Slogwang return pm;
416a9643ea8Slogwang
417a9643ea8Slogwang }
418a9643ea8Slogwang
419a9643ea8Slogwang /*
420a9643ea8Slogwang * Parse num options at run time.
421a9643ea8Slogwang */
422a9643ea8Slogwang static int
parse_num_opt(const char * q_arg,uint32_t max_valid_value)423a9643ea8Slogwang parse_num_opt(const char *q_arg, uint32_t max_valid_value)
424a9643ea8Slogwang {
425a9643ea8Slogwang char *end = NULL;
426a9643ea8Slogwang unsigned long num;
427a9643ea8Slogwang
428a9643ea8Slogwang errno = 0;
429a9643ea8Slogwang
430a9643ea8Slogwang /* parse unsigned int string */
431a9643ea8Slogwang num = strtoul(q_arg, &end, 10);
432a9643ea8Slogwang if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
433a9643ea8Slogwang return -1;
434a9643ea8Slogwang
435a9643ea8Slogwang if (num > max_valid_value)
436a9643ea8Slogwang return -1;
437a9643ea8Slogwang
438a9643ea8Slogwang return num;
439a9643ea8Slogwang
440a9643ea8Slogwang }
441a9643ea8Slogwang
442a9643ea8Slogwang /*
443a9643ea8Slogwang * Display usage
444a9643ea8Slogwang */
445a9643ea8Slogwang static void
us_vhost_usage(const char * prgname)446a9643ea8Slogwang us_vhost_usage(const char *prgname)
447a9643ea8Slogwang {
448a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
449a9643ea8Slogwang " --vm2vm [0|1|2]\n"
450a9643ea8Slogwang " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
4512bfe3f2eSlogwang " --socket-file <path>\n"
452a9643ea8Slogwang " --nb-devices ND\n"
453a9643ea8Slogwang " -p PORTMASK: Set mask for ports to be used by application\n"
454a9643ea8Slogwang " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
455a9643ea8Slogwang " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
456a9643ea8Slogwang " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
457a9643ea8Slogwang " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
458a9643ea8Slogwang " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
459a9643ea8Slogwang " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
4602bfe3f2eSlogwang " --socket-file: The path of the socket file.\n"
461a9643ea8Slogwang " --tx-csum [0|1] disable/enable TX checksum offload.\n"
462a9643ea8Slogwang " --tso [0|1] disable/enable TCP segment offload.\n"
4632bfe3f2eSlogwang " --client register a vhost-user socket as client mode.\n"
464*2d9fd380Sjfb8856606 " --dma-type register dma type for your vhost async driver. For example \"ioat\" for now.\n"
465*2d9fd380Sjfb8856606 " --dmas register dma channel for specific vhost device.\n",
466a9643ea8Slogwang prgname);
467a9643ea8Slogwang }
468a9643ea8Slogwang
469a9643ea8Slogwang /*
470a9643ea8Slogwang * Parse the arguments given in the command line of the application.
471a9643ea8Slogwang */
472a9643ea8Slogwang static int
us_vhost_parse_args(int argc,char ** argv)473a9643ea8Slogwang us_vhost_parse_args(int argc, char **argv)
474a9643ea8Slogwang {
475a9643ea8Slogwang int opt, ret;
476a9643ea8Slogwang int option_index;
477a9643ea8Slogwang unsigned i;
478a9643ea8Slogwang const char *prgname = argv[0];
479a9643ea8Slogwang static struct option long_option[] = {
480a9643ea8Slogwang {"vm2vm", required_argument, NULL, 0},
481a9643ea8Slogwang {"rx-retry", required_argument, NULL, 0},
482a9643ea8Slogwang {"rx-retry-delay", required_argument, NULL, 0},
483a9643ea8Slogwang {"rx-retry-num", required_argument, NULL, 0},
484a9643ea8Slogwang {"mergeable", required_argument, NULL, 0},
485a9643ea8Slogwang {"stats", required_argument, NULL, 0},
4862bfe3f2eSlogwang {"socket-file", required_argument, NULL, 0},
487a9643ea8Slogwang {"tx-csum", required_argument, NULL, 0},
488a9643ea8Slogwang {"tso", required_argument, NULL, 0},
489a9643ea8Slogwang {"client", no_argument, &client_mode, 1},
4902bfe3f2eSlogwang {"builtin-net-driver", no_argument, &builtin_net_driver, 1},
491*2d9fd380Sjfb8856606 {"dma-type", required_argument, NULL, 0},
492*2d9fd380Sjfb8856606 {"dmas", required_argument, NULL, 0},
493a9643ea8Slogwang {NULL, 0, 0, 0},
494a9643ea8Slogwang };
495a9643ea8Slogwang
496a9643ea8Slogwang /* Parse command line */
497a9643ea8Slogwang while ((opt = getopt_long(argc, argv, "p:P",
498a9643ea8Slogwang long_option, &option_index)) != EOF) {
499a9643ea8Slogwang switch (opt) {
500a9643ea8Slogwang /* Portmask */
501a9643ea8Slogwang case 'p':
502a9643ea8Slogwang enabled_port_mask = parse_portmask(optarg);
503a9643ea8Slogwang if (enabled_port_mask == 0) {
504a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
505a9643ea8Slogwang us_vhost_usage(prgname);
506a9643ea8Slogwang return -1;
507a9643ea8Slogwang }
508a9643ea8Slogwang break;
509a9643ea8Slogwang
510a9643ea8Slogwang case 'P':
511a9643ea8Slogwang promiscuous = 1;
512a9643ea8Slogwang vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
513a9643ea8Slogwang ETH_VMDQ_ACCEPT_BROADCAST |
514a9643ea8Slogwang ETH_VMDQ_ACCEPT_MULTICAST;
515a9643ea8Slogwang
516a9643ea8Slogwang break;
517a9643ea8Slogwang
518a9643ea8Slogwang case 0:
519a9643ea8Slogwang /* Enable/disable vm2vm comms. */
520a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "vm2vm",
521a9643ea8Slogwang MAX_LONG_OPT_SZ)) {
522a9643ea8Slogwang ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
523a9643ea8Slogwang if (ret == -1) {
524a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG,
525a9643ea8Slogwang "Invalid argument for "
526a9643ea8Slogwang "vm2vm [0|1|2]\n");
527a9643ea8Slogwang us_vhost_usage(prgname);
528a9643ea8Slogwang return -1;
529a9643ea8Slogwang } else {
530a9643ea8Slogwang vm2vm_mode = (vm2vm_type)ret;
531a9643ea8Slogwang }
532a9643ea8Slogwang }
533a9643ea8Slogwang
534a9643ea8Slogwang /* Enable/disable retries on RX. */
535a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
536a9643ea8Slogwang ret = parse_num_opt(optarg, 1);
537a9643ea8Slogwang if (ret == -1) {
538a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
539a9643ea8Slogwang us_vhost_usage(prgname);
540a9643ea8Slogwang return -1;
541a9643ea8Slogwang } else {
542a9643ea8Slogwang enable_retry = ret;
543a9643ea8Slogwang }
544a9643ea8Slogwang }
545a9643ea8Slogwang
546a9643ea8Slogwang /* Enable/disable TX checksum offload. */
547a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
548a9643ea8Slogwang ret = parse_num_opt(optarg, 1);
549a9643ea8Slogwang if (ret == -1) {
550a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
551a9643ea8Slogwang us_vhost_usage(prgname);
552a9643ea8Slogwang return -1;
553a9643ea8Slogwang } else
554a9643ea8Slogwang enable_tx_csum = ret;
555a9643ea8Slogwang }
556a9643ea8Slogwang
557a9643ea8Slogwang /* Enable/disable TSO offload. */
558a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
559a9643ea8Slogwang ret = parse_num_opt(optarg, 1);
560a9643ea8Slogwang if (ret == -1) {
561a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
562a9643ea8Slogwang us_vhost_usage(prgname);
563a9643ea8Slogwang return -1;
564a9643ea8Slogwang } else
565a9643ea8Slogwang enable_tso = ret;
566a9643ea8Slogwang }
567a9643ea8Slogwang
568a9643ea8Slogwang /* Specify the retries delay time (in useconds) on RX. */
569a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
570a9643ea8Slogwang ret = parse_num_opt(optarg, INT32_MAX);
571a9643ea8Slogwang if (ret == -1) {
572a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
573a9643ea8Slogwang us_vhost_usage(prgname);
574a9643ea8Slogwang return -1;
575a9643ea8Slogwang } else {
576a9643ea8Slogwang burst_rx_delay_time = ret;
577a9643ea8Slogwang }
578a9643ea8Slogwang }
579a9643ea8Slogwang
580a9643ea8Slogwang /* Specify the retries number on RX. */
581a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
582a9643ea8Slogwang ret = parse_num_opt(optarg, INT32_MAX);
583a9643ea8Slogwang if (ret == -1) {
584a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
585a9643ea8Slogwang us_vhost_usage(prgname);
586a9643ea8Slogwang return -1;
587a9643ea8Slogwang } else {
588a9643ea8Slogwang burst_rx_retry_num = ret;
589a9643ea8Slogwang }
590a9643ea8Slogwang }
591a9643ea8Slogwang
592a9643ea8Slogwang /* Enable/disable RX mergeable buffers. */
593a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
594a9643ea8Slogwang ret = parse_num_opt(optarg, 1);
595a9643ea8Slogwang if (ret == -1) {
596a9643ea8Slogwang RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
597a9643ea8Slogwang us_vhost_usage(prgname);
598a9643ea8Slogwang return -1;
599a9643ea8Slogwang } else {
600a9643ea8Slogwang mergeable = !!ret;
601a9643ea8Slogwang if (ret) {
602d30ea906Sjfb8856606 vmdq_conf_default.rxmode.offloads |=
603d30ea906Sjfb8856606 DEV_RX_OFFLOAD_JUMBO_FRAME;
604a9643ea8Slogwang vmdq_conf_default.rxmode.max_rx_pkt_len
605a9643ea8Slogwang = JUMBO_FRAME_MAX_SIZE;
606a9643ea8Slogwang }
607a9643ea8Slogwang }
608a9643ea8Slogwang }
609a9643ea8Slogwang
610a9643ea8Slogwang /* Enable/disable stats. */
611a9643ea8Slogwang if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
612a9643ea8Slogwang ret = parse_num_opt(optarg, INT32_MAX);
613a9643ea8Slogwang if (ret == -1) {
6142bfe3f2eSlogwang RTE_LOG(INFO, VHOST_CONFIG,
6152bfe3f2eSlogwang "Invalid argument for stats [0..N]\n");
616a9643ea8Slogwang us_vhost_usage(prgname);
617a9643ea8Slogwang return -1;
618a9643ea8Slogwang } else {
619a9643ea8Slogwang enable_stats = ret;
620a9643ea8Slogwang }
621a9643ea8Slogwang }
622a9643ea8Slogwang
6232bfe3f2eSlogwang /* Set socket file path. */
6242bfe3f2eSlogwang if (!strncmp(long_option[option_index].name,
6252bfe3f2eSlogwang "socket-file", MAX_LONG_OPT_SZ)) {
6262bfe3f2eSlogwang if (us_vhost_parse_socket_path(optarg) == -1) {
6272bfe3f2eSlogwang RTE_LOG(INFO, VHOST_CONFIG,
6282bfe3f2eSlogwang "Invalid argument for socket name (Max %d characters)\n",
6292bfe3f2eSlogwang PATH_MAX);
630a9643ea8Slogwang us_vhost_usage(prgname);
631a9643ea8Slogwang return -1;
632a9643ea8Slogwang }
633a9643ea8Slogwang }
634a9643ea8Slogwang
635*2d9fd380Sjfb8856606 if (!strncmp(long_option[option_index].name,
636*2d9fd380Sjfb8856606 "dma-type", MAX_LONG_OPT_SZ)) {
637*2d9fd380Sjfb8856606 if (strlen(optarg) >= MAX_LONG_OPT_SZ) {
638*2d9fd380Sjfb8856606 RTE_LOG(INFO, VHOST_CONFIG,
639*2d9fd380Sjfb8856606 "Wrong DMA type\n");
640*2d9fd380Sjfb8856606 us_vhost_usage(prgname);
641*2d9fd380Sjfb8856606 return -1;
642*2d9fd380Sjfb8856606 }
643*2d9fd380Sjfb8856606 strcpy(dma_type, optarg);
644*2d9fd380Sjfb8856606 }
645*2d9fd380Sjfb8856606
646*2d9fd380Sjfb8856606 if (!strncmp(long_option[option_index].name,
647*2d9fd380Sjfb8856606 "dmas", MAX_LONG_OPT_SZ)) {
648*2d9fd380Sjfb8856606 if (open_dma(optarg) == -1) {
649*2d9fd380Sjfb8856606 RTE_LOG(INFO, VHOST_CONFIG,
650*2d9fd380Sjfb8856606 "Wrong DMA args\n");
651*2d9fd380Sjfb8856606 us_vhost_usage(prgname);
652*2d9fd380Sjfb8856606 return -1;
653*2d9fd380Sjfb8856606 }
654*2d9fd380Sjfb8856606 async_vhost_driver = 1;
655*2d9fd380Sjfb8856606 }
656*2d9fd380Sjfb8856606
657a9643ea8Slogwang break;
658a9643ea8Slogwang
659a9643ea8Slogwang /* Invalid option - print options. */
660a9643ea8Slogwang default:
661a9643ea8Slogwang us_vhost_usage(prgname);
662a9643ea8Slogwang return -1;
663a9643ea8Slogwang }
664a9643ea8Slogwang }
665a9643ea8Slogwang
666a9643ea8Slogwang for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
667a9643ea8Slogwang if (enabled_port_mask & (1 << i))
6682bfe3f2eSlogwang ports[num_ports++] = i;
669a9643ea8Slogwang }
670a9643ea8Slogwang
671a9643ea8Slogwang if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
672a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
673a9643ea8Slogwang "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
674a9643ea8Slogwang return -1;
675a9643ea8Slogwang }
676a9643ea8Slogwang
677a9643ea8Slogwang return 0;
678a9643ea8Slogwang }
679a9643ea8Slogwang
680a9643ea8Slogwang /*
681a9643ea8Slogwang * Update the global var NUM_PORTS and array PORTS according to system ports number
682a9643ea8Slogwang * and return valid ports number
683a9643ea8Slogwang */
check_ports_num(unsigned nb_ports)684a9643ea8Slogwang static unsigned check_ports_num(unsigned nb_ports)
685a9643ea8Slogwang {
686a9643ea8Slogwang unsigned valid_num_ports = num_ports;
687a9643ea8Slogwang unsigned portid;
688a9643ea8Slogwang
689a9643ea8Slogwang if (num_ports > nb_ports) {
690a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
691a9643ea8Slogwang num_ports, nb_ports);
692a9643ea8Slogwang num_ports = nb_ports;
693a9643ea8Slogwang }
694a9643ea8Slogwang
695a9643ea8Slogwang for (portid = 0; portid < num_ports; portid ++) {
696d30ea906Sjfb8856606 if (!rte_eth_dev_is_valid_port(ports[portid])) {
697d30ea906Sjfb8856606 RTE_LOG(INFO, VHOST_PORT,
698d30ea906Sjfb8856606 "\nSpecified port ID(%u) is not valid\n",
699d30ea906Sjfb8856606 ports[portid]);
700a9643ea8Slogwang ports[portid] = INVALID_PORT_ID;
701a9643ea8Slogwang valid_num_ports--;
702a9643ea8Slogwang }
703a9643ea8Slogwang }
704a9643ea8Slogwang return valid_num_ports;
705a9643ea8Slogwang }
706a9643ea8Slogwang
7072bfe3f2eSlogwang static __rte_always_inline struct vhost_dev *
find_vhost_dev(struct rte_ether_addr * mac)7084418919fSjohnjiang find_vhost_dev(struct rte_ether_addr *mac)
709a9643ea8Slogwang {
710a9643ea8Slogwang struct vhost_dev *vdev;
711a9643ea8Slogwang
712a9643ea8Slogwang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
713a9643ea8Slogwang if (vdev->ready == DEVICE_RX &&
7144418919fSjohnjiang rte_is_same_ether_addr(mac, &vdev->mac_address))
715a9643ea8Slogwang return vdev;
716a9643ea8Slogwang }
717a9643ea8Slogwang
718a9643ea8Slogwang return NULL;
719a9643ea8Slogwang }
720a9643ea8Slogwang
721a9643ea8Slogwang /*
722a9643ea8Slogwang * This function learns the MAC address of the device and registers this along with a
723a9643ea8Slogwang * vlan tag to a VMDQ.
724a9643ea8Slogwang */
725a9643ea8Slogwang static int
link_vmdq(struct vhost_dev * vdev,struct rte_mbuf * m)726a9643ea8Slogwang link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
727a9643ea8Slogwang {
7284418919fSjohnjiang struct rte_ether_hdr *pkt_hdr;
729a9643ea8Slogwang int i, ret;
730a9643ea8Slogwang
731a9643ea8Slogwang /* Learn MAC address of guest device from packet */
7324418919fSjohnjiang pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
733a9643ea8Slogwang
734a9643ea8Slogwang if (find_vhost_dev(&pkt_hdr->s_addr)) {
735a9643ea8Slogwang RTE_LOG(ERR, VHOST_DATA,
736a9643ea8Slogwang "(%d) device is using a registered MAC!\n",
737a9643ea8Slogwang vdev->vid);
738a9643ea8Slogwang return -1;
739a9643ea8Slogwang }
740a9643ea8Slogwang
7414418919fSjohnjiang for (i = 0; i < RTE_ETHER_ADDR_LEN; i++)
742a9643ea8Slogwang vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
743a9643ea8Slogwang
744a9643ea8Slogwang /* vlan_tag currently uses the device_id. */
745a9643ea8Slogwang vdev->vlan_tag = vlan_tags[vdev->vid];
746a9643ea8Slogwang
747a9643ea8Slogwang /* Print out VMDQ registration info. */
748a9643ea8Slogwang RTE_LOG(INFO, VHOST_DATA,
749a9643ea8Slogwang "(%d) mac %02x:%02x:%02x:%02x:%02x:%02x and vlan %d registered\n",
750a9643ea8Slogwang vdev->vid,
751a9643ea8Slogwang vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
752a9643ea8Slogwang vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
753a9643ea8Slogwang vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
754a9643ea8Slogwang vdev->vlan_tag);
755a9643ea8Slogwang
756a9643ea8Slogwang /* Register the MAC address. */
757a9643ea8Slogwang ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
758a9643ea8Slogwang (uint32_t)vdev->vid + vmdq_pool_base);
759a9643ea8Slogwang if (ret)
760a9643ea8Slogwang RTE_LOG(ERR, VHOST_DATA,
761a9643ea8Slogwang "(%d) failed to add device MAC address to VMDQ\n",
762a9643ea8Slogwang vdev->vid);
763a9643ea8Slogwang
7642bfe3f2eSlogwang rte_eth_dev_set_vlan_strip_on_queue(ports[0], vdev->vmdq_rx_q, 1);
765a9643ea8Slogwang
766a9643ea8Slogwang /* Set device as ready for RX. */
767a9643ea8Slogwang vdev->ready = DEVICE_RX;
768a9643ea8Slogwang
769a9643ea8Slogwang return 0;
770a9643ea8Slogwang }
771a9643ea8Slogwang
772a9643ea8Slogwang /*
773a9643ea8Slogwang * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
774a9643ea8Slogwang * queue before disabling RX on the device.
775a9643ea8Slogwang */
776a9643ea8Slogwang static inline void
unlink_vmdq(struct vhost_dev * vdev)777a9643ea8Slogwang unlink_vmdq(struct vhost_dev *vdev)
778a9643ea8Slogwang {
779a9643ea8Slogwang unsigned i = 0;
780a9643ea8Slogwang unsigned rx_count;
781a9643ea8Slogwang struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
782a9643ea8Slogwang
783a9643ea8Slogwang if (vdev->ready == DEVICE_RX) {
784a9643ea8Slogwang /*clear MAC and VLAN settings*/
785a9643ea8Slogwang rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
786a9643ea8Slogwang for (i = 0; i < 6; i++)
787a9643ea8Slogwang vdev->mac_address.addr_bytes[i] = 0;
788a9643ea8Slogwang
789a9643ea8Slogwang vdev->vlan_tag = 0;
790a9643ea8Slogwang
791a9643ea8Slogwang /*Clear out the receive buffers*/
792a9643ea8Slogwang rx_count = rte_eth_rx_burst(ports[0],
793a9643ea8Slogwang (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
794a9643ea8Slogwang
795a9643ea8Slogwang while (rx_count) {
796a9643ea8Slogwang for (i = 0; i < rx_count; i++)
797a9643ea8Slogwang rte_pktmbuf_free(pkts_burst[i]);
798a9643ea8Slogwang
799a9643ea8Slogwang rx_count = rte_eth_rx_burst(ports[0],
800a9643ea8Slogwang (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
801a9643ea8Slogwang }
802a9643ea8Slogwang
803a9643ea8Slogwang vdev->ready = DEVICE_MAC_LEARNING;
804a9643ea8Slogwang }
805a9643ea8Slogwang }
806a9643ea8Slogwang
8072bfe3f2eSlogwang static __rte_always_inline void
virtio_xmit(struct vhost_dev * dst_vdev,struct vhost_dev * src_vdev,struct rte_mbuf * m)808a9643ea8Slogwang virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
809a9643ea8Slogwang struct rte_mbuf *m)
810a9643ea8Slogwang {
811a9643ea8Slogwang uint16_t ret;
812*2d9fd380Sjfb8856606 struct rte_mbuf *m_cpl[1];
813a9643ea8Slogwang
8142bfe3f2eSlogwang if (builtin_net_driver) {
8152bfe3f2eSlogwang ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
816*2d9fd380Sjfb8856606 } else if (async_vhost_driver) {
817*2d9fd380Sjfb8856606 ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
818*2d9fd380Sjfb8856606 &m, 1);
819*2d9fd380Sjfb8856606
820*2d9fd380Sjfb8856606 if (likely(ret))
821*2d9fd380Sjfb8856606 dst_vdev->nr_async_pkts++;
822*2d9fd380Sjfb8856606
823*2d9fd380Sjfb8856606 while (likely(dst_vdev->nr_async_pkts)) {
824*2d9fd380Sjfb8856606 if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
825*2d9fd380Sjfb8856606 VIRTIO_RXQ, m_cpl, 1))
826*2d9fd380Sjfb8856606 dst_vdev->nr_async_pkts--;
827*2d9fd380Sjfb8856606 }
8282bfe3f2eSlogwang } else {
829a9643ea8Slogwang ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
8302bfe3f2eSlogwang }
8312bfe3f2eSlogwang
832a9643ea8Slogwang if (enable_stats) {
833a9643ea8Slogwang rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic);
834a9643ea8Slogwang rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret);
835a9643ea8Slogwang src_vdev->stats.tx_total++;
836a9643ea8Slogwang src_vdev->stats.tx += ret;
837a9643ea8Slogwang }
838a9643ea8Slogwang }
839a9643ea8Slogwang
840a9643ea8Slogwang /*
841a9643ea8Slogwang * Check if the packet destination MAC address is for a local device. If so then put
842a9643ea8Slogwang * the packet on that devices RX queue. If not then return.
843a9643ea8Slogwang */
8442bfe3f2eSlogwang static __rte_always_inline int
virtio_tx_local(struct vhost_dev * vdev,struct rte_mbuf * m)845a9643ea8Slogwang virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
846a9643ea8Slogwang {
8474418919fSjohnjiang struct rte_ether_hdr *pkt_hdr;
848a9643ea8Slogwang struct vhost_dev *dst_vdev;
849a9643ea8Slogwang
8504418919fSjohnjiang pkt_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
851a9643ea8Slogwang
852a9643ea8Slogwang dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
853a9643ea8Slogwang if (!dst_vdev)
854a9643ea8Slogwang return -1;
855a9643ea8Slogwang
856a9643ea8Slogwang if (vdev->vid == dst_vdev->vid) {
8572bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
858a9643ea8Slogwang "(%d) TX: src and dst MAC is same. Dropping packet.\n",
859a9643ea8Slogwang vdev->vid);
860a9643ea8Slogwang return 0;
861a9643ea8Slogwang }
862a9643ea8Slogwang
8632bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
864a9643ea8Slogwang "(%d) TX: MAC address is local\n", dst_vdev->vid);
865a9643ea8Slogwang
866a9643ea8Slogwang if (unlikely(dst_vdev->remove)) {
8672bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
868a9643ea8Slogwang "(%d) device is marked for removal\n", dst_vdev->vid);
869a9643ea8Slogwang return 0;
870a9643ea8Slogwang }
871a9643ea8Slogwang
872a9643ea8Slogwang virtio_xmit(dst_vdev, vdev, m);
873a9643ea8Slogwang return 0;
874a9643ea8Slogwang }
875a9643ea8Slogwang
876a9643ea8Slogwang /*
877a9643ea8Slogwang * Check if the destination MAC of a packet is one local VM,
878a9643ea8Slogwang * and get its vlan tag, and offset if it is.
879a9643ea8Slogwang */
8802bfe3f2eSlogwang static __rte_always_inline int
find_local_dest(struct vhost_dev * vdev,struct rte_mbuf * m,uint32_t * offset,uint16_t * vlan_tag)881a9643ea8Slogwang find_local_dest(struct vhost_dev *vdev, struct rte_mbuf *m,
882a9643ea8Slogwang uint32_t *offset, uint16_t *vlan_tag)
883a9643ea8Slogwang {
884a9643ea8Slogwang struct vhost_dev *dst_vdev;
8854418919fSjohnjiang struct rte_ether_hdr *pkt_hdr =
8864418919fSjohnjiang rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
887a9643ea8Slogwang
888a9643ea8Slogwang dst_vdev = find_vhost_dev(&pkt_hdr->d_addr);
889a9643ea8Slogwang if (!dst_vdev)
890a9643ea8Slogwang return 0;
891a9643ea8Slogwang
892a9643ea8Slogwang if (vdev->vid == dst_vdev->vid) {
8932bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
894a9643ea8Slogwang "(%d) TX: src and dst MAC is same. Dropping packet.\n",
895a9643ea8Slogwang vdev->vid);
896a9643ea8Slogwang return -1;
897a9643ea8Slogwang }
898a9643ea8Slogwang
899a9643ea8Slogwang /*
900a9643ea8Slogwang * HW vlan strip will reduce the packet length
901a9643ea8Slogwang * by minus length of vlan tag, so need restore
902a9643ea8Slogwang * the packet length by plus it.
903a9643ea8Slogwang */
904a9643ea8Slogwang *offset = VLAN_HLEN;
905a9643ea8Slogwang *vlan_tag = vlan_tags[vdev->vid];
906a9643ea8Slogwang
9072bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
908a9643ea8Slogwang "(%d) TX: pkt to local VM device id: (%d), vlan tag: %u.\n",
909a9643ea8Slogwang vdev->vid, dst_vdev->vid, *vlan_tag);
910a9643ea8Slogwang
911a9643ea8Slogwang return 0;
912a9643ea8Slogwang }
913a9643ea8Slogwang
914a9643ea8Slogwang static uint16_t
get_psd_sum(void * l3_hdr,uint64_t ol_flags)915a9643ea8Slogwang get_psd_sum(void *l3_hdr, uint64_t ol_flags)
916a9643ea8Slogwang {
917a9643ea8Slogwang if (ol_flags & PKT_TX_IPV4)
918a9643ea8Slogwang return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
9194418919fSjohnjiang else /* assume ethertype == RTE_ETHER_TYPE_IPV6 */
920a9643ea8Slogwang return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
921a9643ea8Slogwang }
922a9643ea8Slogwang
virtio_tx_offload(struct rte_mbuf * m)923a9643ea8Slogwang static void virtio_tx_offload(struct rte_mbuf *m)
924a9643ea8Slogwang {
925a9643ea8Slogwang void *l3_hdr;
9264418919fSjohnjiang struct rte_ipv4_hdr *ipv4_hdr = NULL;
9274418919fSjohnjiang struct rte_tcp_hdr *tcp_hdr = NULL;
9284418919fSjohnjiang struct rte_ether_hdr *eth_hdr =
9294418919fSjohnjiang rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
930a9643ea8Slogwang
931a9643ea8Slogwang l3_hdr = (char *)eth_hdr + m->l2_len;
932a9643ea8Slogwang
933a9643ea8Slogwang if (m->ol_flags & PKT_TX_IPV4) {
934a9643ea8Slogwang ipv4_hdr = l3_hdr;
935a9643ea8Slogwang ipv4_hdr->hdr_checksum = 0;
936a9643ea8Slogwang m->ol_flags |= PKT_TX_IP_CKSUM;
937a9643ea8Slogwang }
938a9643ea8Slogwang
9394418919fSjohnjiang tcp_hdr = (struct rte_tcp_hdr *)((char *)l3_hdr + m->l3_len);
940a9643ea8Slogwang tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
941a9643ea8Slogwang }
942a9643ea8Slogwang
943a9643ea8Slogwang static inline void
free_pkts(struct rte_mbuf ** pkts,uint16_t n)944a9643ea8Slogwang free_pkts(struct rte_mbuf **pkts, uint16_t n)
945a9643ea8Slogwang {
946a9643ea8Slogwang while (n--)
947a9643ea8Slogwang rte_pktmbuf_free(pkts[n]);
948a9643ea8Slogwang }
949a9643ea8Slogwang
9502bfe3f2eSlogwang static __rte_always_inline void
do_drain_mbuf_table(struct mbuf_table * tx_q)951a9643ea8Slogwang do_drain_mbuf_table(struct mbuf_table *tx_q)
952a9643ea8Slogwang {
953a9643ea8Slogwang uint16_t count;
954a9643ea8Slogwang
955a9643ea8Slogwang count = rte_eth_tx_burst(ports[0], tx_q->txq_id,
956a9643ea8Slogwang tx_q->m_table, tx_q->len);
957a9643ea8Slogwang if (unlikely(count < tx_q->len))
958a9643ea8Slogwang free_pkts(&tx_q->m_table[count], tx_q->len - count);
959a9643ea8Slogwang
960a9643ea8Slogwang tx_q->len = 0;
961a9643ea8Slogwang }
962a9643ea8Slogwang
963a9643ea8Slogwang /*
964a9643ea8Slogwang * This function routes the TX packet to the correct interface. This
965a9643ea8Slogwang * may be a local device or the physical port.
966a9643ea8Slogwang */
9672bfe3f2eSlogwang static __rte_always_inline void
virtio_tx_route(struct vhost_dev * vdev,struct rte_mbuf * m,uint16_t vlan_tag)968a9643ea8Slogwang virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
969a9643ea8Slogwang {
970a9643ea8Slogwang struct mbuf_table *tx_q;
971a9643ea8Slogwang unsigned offset = 0;
972a9643ea8Slogwang const uint16_t lcore_id = rte_lcore_id();
9734418919fSjohnjiang struct rte_ether_hdr *nh;
974a9643ea8Slogwang
975a9643ea8Slogwang
9764418919fSjohnjiang nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
9774418919fSjohnjiang if (unlikely(rte_is_broadcast_ether_addr(&nh->d_addr))) {
978a9643ea8Slogwang struct vhost_dev *vdev2;
979a9643ea8Slogwang
980a9643ea8Slogwang TAILQ_FOREACH(vdev2, &vhost_dev_list, global_vdev_entry) {
9812bfe3f2eSlogwang if (vdev2 != vdev)
982a9643ea8Slogwang virtio_xmit(vdev2, vdev, m);
983a9643ea8Slogwang }
984a9643ea8Slogwang goto queue2nic;
985a9643ea8Slogwang }
986a9643ea8Slogwang
987a9643ea8Slogwang /*check if destination is local VM*/
988a9643ea8Slogwang if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
989a9643ea8Slogwang rte_pktmbuf_free(m);
990a9643ea8Slogwang return;
991a9643ea8Slogwang }
992a9643ea8Slogwang
993a9643ea8Slogwang if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
994a9643ea8Slogwang if (unlikely(find_local_dest(vdev, m, &offset,
995a9643ea8Slogwang &vlan_tag) != 0)) {
996a9643ea8Slogwang rte_pktmbuf_free(m);
997a9643ea8Slogwang return;
998a9643ea8Slogwang }
999a9643ea8Slogwang }
1000a9643ea8Slogwang
10012bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
1002a9643ea8Slogwang "(%d) TX: MAC address is external\n", vdev->vid);
1003a9643ea8Slogwang
1004a9643ea8Slogwang queue2nic:
1005a9643ea8Slogwang
1006a9643ea8Slogwang /*Add packet to the port tx queue*/
1007a9643ea8Slogwang tx_q = &lcore_tx_queue[lcore_id];
1008a9643ea8Slogwang
10094418919fSjohnjiang nh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
10104418919fSjohnjiang if (unlikely(nh->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN))) {
1011a9643ea8Slogwang /* Guest has inserted the vlan tag. */
10124418919fSjohnjiang struct rte_vlan_hdr *vh = (struct rte_vlan_hdr *) (nh + 1);
1013a9643ea8Slogwang uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
1014a9643ea8Slogwang if ((vm2vm_mode == VM2VM_HARDWARE) &&
1015a9643ea8Slogwang (vh->vlan_tci != vlan_tag_be))
1016a9643ea8Slogwang vh->vlan_tci = vlan_tag_be;
1017a9643ea8Slogwang } else {
1018a9643ea8Slogwang m->ol_flags |= PKT_TX_VLAN_PKT;
1019a9643ea8Slogwang
1020a9643ea8Slogwang /*
1021a9643ea8Slogwang * Find the right seg to adjust the data len when offset is
1022a9643ea8Slogwang * bigger than tail room size.
1023a9643ea8Slogwang */
1024a9643ea8Slogwang if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
1025a9643ea8Slogwang if (likely(offset <= rte_pktmbuf_tailroom(m)))
1026a9643ea8Slogwang m->data_len += offset;
1027a9643ea8Slogwang else {
1028a9643ea8Slogwang struct rte_mbuf *seg = m;
1029a9643ea8Slogwang
1030a9643ea8Slogwang while ((seg->next != NULL) &&
1031a9643ea8Slogwang (offset > rte_pktmbuf_tailroom(seg)))
1032a9643ea8Slogwang seg = seg->next;
1033a9643ea8Slogwang
1034a9643ea8Slogwang seg->data_len += offset;
1035a9643ea8Slogwang }
1036a9643ea8Slogwang m->pkt_len += offset;
1037a9643ea8Slogwang }
1038a9643ea8Slogwang
1039a9643ea8Slogwang m->vlan_tci = vlan_tag;
1040a9643ea8Slogwang }
1041a9643ea8Slogwang
1042a9643ea8Slogwang if (m->ol_flags & PKT_TX_TCP_SEG)
1043a9643ea8Slogwang virtio_tx_offload(m);
1044a9643ea8Slogwang
1045a9643ea8Slogwang tx_q->m_table[tx_q->len++] = m;
1046a9643ea8Slogwang if (enable_stats) {
1047a9643ea8Slogwang vdev->stats.tx_total++;
1048a9643ea8Slogwang vdev->stats.tx++;
1049a9643ea8Slogwang }
1050a9643ea8Slogwang
1051a9643ea8Slogwang if (unlikely(tx_q->len == MAX_PKT_BURST))
1052a9643ea8Slogwang do_drain_mbuf_table(tx_q);
1053a9643ea8Slogwang }
1054a9643ea8Slogwang
1055a9643ea8Slogwang
10562bfe3f2eSlogwang static __rte_always_inline void
drain_mbuf_table(struct mbuf_table * tx_q)1057a9643ea8Slogwang drain_mbuf_table(struct mbuf_table *tx_q)
1058a9643ea8Slogwang {
1059a9643ea8Slogwang static uint64_t prev_tsc;
1060a9643ea8Slogwang uint64_t cur_tsc;
1061a9643ea8Slogwang
1062a9643ea8Slogwang if (tx_q->len == 0)
1063a9643ea8Slogwang return;
1064a9643ea8Slogwang
1065a9643ea8Slogwang cur_tsc = rte_rdtsc();
1066a9643ea8Slogwang if (unlikely(cur_tsc - prev_tsc > MBUF_TABLE_DRAIN_TSC)) {
1067a9643ea8Slogwang prev_tsc = cur_tsc;
1068a9643ea8Slogwang
10692bfe3f2eSlogwang RTE_LOG_DP(DEBUG, VHOST_DATA,
1070a9643ea8Slogwang "TX queue drained after timeout with burst size %u\n",
1071a9643ea8Slogwang tx_q->len);
1072a9643ea8Slogwang do_drain_mbuf_table(tx_q);
1073a9643ea8Slogwang }
1074a9643ea8Slogwang }
1075a9643ea8Slogwang
10762bfe3f2eSlogwang static __rte_always_inline void
complete_async_pkts(struct vhost_dev * vdev,uint16_t qid)1077*2d9fd380Sjfb8856606 complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
1078*2d9fd380Sjfb8856606 {
1079*2d9fd380Sjfb8856606 struct rte_mbuf *p_cpl[MAX_PKT_BURST];
1080*2d9fd380Sjfb8856606 uint16_t complete_count;
1081*2d9fd380Sjfb8856606
1082*2d9fd380Sjfb8856606 complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
1083*2d9fd380Sjfb8856606 qid, p_cpl, MAX_PKT_BURST);
1084*2d9fd380Sjfb8856606 vdev->nr_async_pkts -= complete_count;
1085*2d9fd380Sjfb8856606 if (complete_count)
1086*2d9fd380Sjfb8856606 free_pkts(p_cpl, complete_count);
1087*2d9fd380Sjfb8856606 }
1088*2d9fd380Sjfb8856606
1089*2d9fd380Sjfb8856606 static __rte_always_inline void
drain_eth_rx(struct vhost_dev * vdev)1090a9643ea8Slogwang drain_eth_rx(struct vhost_dev *vdev)
1091a9643ea8Slogwang {
1092a9643ea8Slogwang uint16_t rx_count, enqueue_count;
1093a9643ea8Slogwang struct rte_mbuf *pkts[MAX_PKT_BURST];
1094a9643ea8Slogwang
1095a9643ea8Slogwang rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
1096a9643ea8Slogwang pkts, MAX_PKT_BURST);
1097*2d9fd380Sjfb8856606
1098*2d9fd380Sjfb8856606 while (likely(vdev->nr_async_pkts))
1099*2d9fd380Sjfb8856606 complete_async_pkts(vdev, VIRTIO_RXQ);
1100*2d9fd380Sjfb8856606
1101a9643ea8Slogwang if (!rx_count)
1102a9643ea8Slogwang return;
1103a9643ea8Slogwang
1104a9643ea8Slogwang /*
1105a9643ea8Slogwang * When "enable_retry" is set, here we wait and retry when there
1106a9643ea8Slogwang * is no enough free slots in the queue to hold @rx_count packets,
1107a9643ea8Slogwang * to diminish packet loss.
1108a9643ea8Slogwang */
1109a9643ea8Slogwang if (enable_retry &&
1110a9643ea8Slogwang unlikely(rx_count > rte_vhost_avail_entries(vdev->vid,
1111a9643ea8Slogwang VIRTIO_RXQ))) {
1112a9643ea8Slogwang uint32_t retry;
1113a9643ea8Slogwang
1114a9643ea8Slogwang for (retry = 0; retry < burst_rx_retry_num; retry++) {
1115a9643ea8Slogwang rte_delay_us(burst_rx_delay_time);
1116a9643ea8Slogwang if (rx_count <= rte_vhost_avail_entries(vdev->vid,
1117a9643ea8Slogwang VIRTIO_RXQ))
1118a9643ea8Slogwang break;
1119a9643ea8Slogwang }
1120a9643ea8Slogwang }
1121a9643ea8Slogwang
11222bfe3f2eSlogwang if (builtin_net_driver) {
11232bfe3f2eSlogwang enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
11242bfe3f2eSlogwang pkts, rx_count);
1125*2d9fd380Sjfb8856606 } else if (async_vhost_driver) {
1126*2d9fd380Sjfb8856606 enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
1127*2d9fd380Sjfb8856606 VIRTIO_RXQ, pkts, rx_count);
1128*2d9fd380Sjfb8856606 vdev->nr_async_pkts += enqueue_count;
11292bfe3f2eSlogwang } else {
1130a9643ea8Slogwang enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
1131a9643ea8Slogwang pkts, rx_count);
11322bfe3f2eSlogwang }
1133*2d9fd380Sjfb8856606
1134a9643ea8Slogwang if (enable_stats) {
1135a9643ea8Slogwang rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
1136a9643ea8Slogwang rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
1137a9643ea8Slogwang }
1138a9643ea8Slogwang
1139*2d9fd380Sjfb8856606 if (!async_vhost_driver)
1140a9643ea8Slogwang free_pkts(pkts, rx_count);
1141a9643ea8Slogwang }
1142a9643ea8Slogwang
11432bfe3f2eSlogwang static __rte_always_inline void
drain_virtio_tx(struct vhost_dev * vdev)1144a9643ea8Slogwang drain_virtio_tx(struct vhost_dev *vdev)
1145a9643ea8Slogwang {
1146a9643ea8Slogwang struct rte_mbuf *pkts[MAX_PKT_BURST];
1147a9643ea8Slogwang uint16_t count;
1148a9643ea8Slogwang uint16_t i;
1149a9643ea8Slogwang
11502bfe3f2eSlogwang if (builtin_net_driver) {
11512bfe3f2eSlogwang count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
1152a9643ea8Slogwang pkts, MAX_PKT_BURST);
11532bfe3f2eSlogwang } else {
11542bfe3f2eSlogwang count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
11552bfe3f2eSlogwang mbuf_pool, pkts, MAX_PKT_BURST);
11562bfe3f2eSlogwang }
1157a9643ea8Slogwang
1158a9643ea8Slogwang /* setup VMDq for the first packet */
1159a9643ea8Slogwang if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
1160a9643ea8Slogwang if (vdev->remove || link_vmdq(vdev, pkts[0]) == -1)
1161a9643ea8Slogwang free_pkts(pkts, count);
1162a9643ea8Slogwang }
1163a9643ea8Slogwang
1164a9643ea8Slogwang for (i = 0; i < count; ++i)
1165a9643ea8Slogwang virtio_tx_route(vdev, pkts[i], vlan_tags[vdev->vid]);
1166a9643ea8Slogwang }
1167a9643ea8Slogwang
1168a9643ea8Slogwang /*
1169a9643ea8Slogwang * Main function of vhost-switch. It basically does:
1170a9643ea8Slogwang *
1171a9643ea8Slogwang * for each vhost device {
1172a9643ea8Slogwang * - drain_eth_rx()
1173a9643ea8Slogwang *
1174a9643ea8Slogwang * Which drains the host eth Rx queue linked to the vhost device,
1175a9643ea8Slogwang * and deliver all of them to guest virito Rx ring associated with
1176a9643ea8Slogwang * this vhost device.
1177a9643ea8Slogwang *
1178a9643ea8Slogwang * - drain_virtio_tx()
1179a9643ea8Slogwang *
1180a9643ea8Slogwang * Which drains the guest virtio Tx queue and deliver all of them
1181a9643ea8Slogwang * to the target, which could be another vhost device, or the
1182a9643ea8Slogwang * physical eth dev. The route is done in function "virtio_tx_route".
1183a9643ea8Slogwang * }
1184a9643ea8Slogwang */
1185a9643ea8Slogwang static int
switch_worker(void * arg __rte_unused)1186a9643ea8Slogwang switch_worker(void *arg __rte_unused)
1187a9643ea8Slogwang {
1188a9643ea8Slogwang unsigned i;
1189a9643ea8Slogwang unsigned lcore_id = rte_lcore_id();
1190a9643ea8Slogwang struct vhost_dev *vdev;
1191a9643ea8Slogwang struct mbuf_table *tx_q;
1192a9643ea8Slogwang
1193a9643ea8Slogwang RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
1194a9643ea8Slogwang
1195a9643ea8Slogwang tx_q = &lcore_tx_queue[lcore_id];
1196a9643ea8Slogwang for (i = 0; i < rte_lcore_count(); i++) {
1197a9643ea8Slogwang if (lcore_ids[i] == lcore_id) {
1198a9643ea8Slogwang tx_q->txq_id = i;
1199a9643ea8Slogwang break;
1200a9643ea8Slogwang }
1201a9643ea8Slogwang }
1202a9643ea8Slogwang
1203a9643ea8Slogwang while(1) {
1204a9643ea8Slogwang drain_mbuf_table(tx_q);
1205a9643ea8Slogwang
1206a9643ea8Slogwang /*
1207a9643ea8Slogwang * Inform the configuration core that we have exited the
1208a9643ea8Slogwang * linked list and that no devices are in use if requested.
1209a9643ea8Slogwang */
1210a9643ea8Slogwang if (lcore_info[lcore_id].dev_removal_flag == REQUEST_DEV_REMOVAL)
1211a9643ea8Slogwang lcore_info[lcore_id].dev_removal_flag = ACK_DEV_REMOVAL;
1212a9643ea8Slogwang
1213a9643ea8Slogwang /*
1214a9643ea8Slogwang * Process vhost devices
1215a9643ea8Slogwang */
1216a9643ea8Slogwang TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list,
1217a9643ea8Slogwang lcore_vdev_entry) {
1218a9643ea8Slogwang if (unlikely(vdev->remove)) {
1219a9643ea8Slogwang unlink_vmdq(vdev);
1220a9643ea8Slogwang vdev->ready = DEVICE_SAFE_REMOVE;
1221a9643ea8Slogwang continue;
1222a9643ea8Slogwang }
1223a9643ea8Slogwang
1224a9643ea8Slogwang if (likely(vdev->ready == DEVICE_RX))
1225a9643ea8Slogwang drain_eth_rx(vdev);
1226a9643ea8Slogwang
1227a9643ea8Slogwang if (likely(!vdev->remove))
1228a9643ea8Slogwang drain_virtio_tx(vdev);
1229a9643ea8Slogwang }
1230a9643ea8Slogwang }
1231a9643ea8Slogwang
1232a9643ea8Slogwang return 0;
1233a9643ea8Slogwang }
1234a9643ea8Slogwang
1235a9643ea8Slogwang /*
1236a9643ea8Slogwang * Remove a device from the specific data core linked list and from the
1237a9643ea8Slogwang * main linked list. Synchonization occurs through the use of the
1238a9643ea8Slogwang * lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
1239a9643ea8Slogwang * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
1240a9643ea8Slogwang */
1241a9643ea8Slogwang static void
destroy_device(int vid)1242a9643ea8Slogwang destroy_device(int vid)
1243a9643ea8Slogwang {
1244a9643ea8Slogwang struct vhost_dev *vdev = NULL;
1245a9643ea8Slogwang int lcore;
1246a9643ea8Slogwang
1247a9643ea8Slogwang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1248a9643ea8Slogwang if (vdev->vid == vid)
1249a9643ea8Slogwang break;
1250a9643ea8Slogwang }
1251a9643ea8Slogwang if (!vdev)
1252a9643ea8Slogwang return;
1253a9643ea8Slogwang /*set the remove flag. */
1254a9643ea8Slogwang vdev->remove = 1;
1255a9643ea8Slogwang while(vdev->ready != DEVICE_SAFE_REMOVE) {
1256a9643ea8Slogwang rte_pause();
1257a9643ea8Slogwang }
1258a9643ea8Slogwang
12592bfe3f2eSlogwang if (builtin_net_driver)
12602bfe3f2eSlogwang vs_vhost_net_remove(vdev);
12612bfe3f2eSlogwang
1262a9643ea8Slogwang TAILQ_REMOVE(&lcore_info[vdev->coreid].vdev_list, vdev,
1263a9643ea8Slogwang lcore_vdev_entry);
1264a9643ea8Slogwang TAILQ_REMOVE(&vhost_dev_list, vdev, global_vdev_entry);
1265a9643ea8Slogwang
1266a9643ea8Slogwang
1267a9643ea8Slogwang /* Set the dev_removal_flag on each lcore. */
1268*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore)
1269a9643ea8Slogwang lcore_info[lcore].dev_removal_flag = REQUEST_DEV_REMOVAL;
1270a9643ea8Slogwang
1271a9643ea8Slogwang /*
1272a9643ea8Slogwang * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL
1273a9643ea8Slogwang * we can be sure that they can no longer access the device removed
1274a9643ea8Slogwang * from the linked lists and that the devices are no longer in use.
1275a9643ea8Slogwang */
1276*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore) {
1277a9643ea8Slogwang while (lcore_info[lcore].dev_removal_flag != ACK_DEV_REMOVAL)
1278a9643ea8Slogwang rte_pause();
1279a9643ea8Slogwang }
1280a9643ea8Slogwang
1281a9643ea8Slogwang lcore_info[vdev->coreid].device_num--;
1282a9643ea8Slogwang
1283a9643ea8Slogwang RTE_LOG(INFO, VHOST_DATA,
1284a9643ea8Slogwang "(%d) device has been removed from data core\n",
1285a9643ea8Slogwang vdev->vid);
1286a9643ea8Slogwang
1287*2d9fd380Sjfb8856606 if (async_vhost_driver)
1288*2d9fd380Sjfb8856606 rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
1289*2d9fd380Sjfb8856606
1290a9643ea8Slogwang rte_free(vdev);
1291a9643ea8Slogwang }
1292a9643ea8Slogwang
1293a9643ea8Slogwang /*
1294a9643ea8Slogwang * A new device is added to a data core. First the device is added to the main linked list
12951646932aSjfb8856606 * and then allocated to a specific data core.
1296a9643ea8Slogwang */
1297a9643ea8Slogwang static int
new_device(int vid)1298a9643ea8Slogwang new_device(int vid)
1299a9643ea8Slogwang {
1300a9643ea8Slogwang int lcore, core_add = 0;
1301a9643ea8Slogwang uint32_t device_num_min = num_devices;
1302a9643ea8Slogwang struct vhost_dev *vdev;
1303a9643ea8Slogwang vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
1304a9643ea8Slogwang if (vdev == NULL) {
1305a9643ea8Slogwang RTE_LOG(INFO, VHOST_DATA,
1306a9643ea8Slogwang "(%d) couldn't allocate memory for vhost dev\n",
1307a9643ea8Slogwang vid);
1308a9643ea8Slogwang return -1;
1309a9643ea8Slogwang }
1310a9643ea8Slogwang vdev->vid = vid;
1311a9643ea8Slogwang
13122bfe3f2eSlogwang if (builtin_net_driver)
13132bfe3f2eSlogwang vs_vhost_net_setup(vdev);
13142bfe3f2eSlogwang
1315a9643ea8Slogwang TAILQ_INSERT_TAIL(&vhost_dev_list, vdev, global_vdev_entry);
1316a9643ea8Slogwang vdev->vmdq_rx_q = vid * queues_per_pool + vmdq_queue_base;
1317a9643ea8Slogwang
1318a9643ea8Slogwang /*reset ready flag*/
1319a9643ea8Slogwang vdev->ready = DEVICE_MAC_LEARNING;
1320a9643ea8Slogwang vdev->remove = 0;
1321a9643ea8Slogwang
1322a9643ea8Slogwang /* Find a suitable lcore to add the device. */
1323*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore) {
1324a9643ea8Slogwang if (lcore_info[lcore].device_num < device_num_min) {
1325a9643ea8Slogwang device_num_min = lcore_info[lcore].device_num;
1326a9643ea8Slogwang core_add = lcore;
1327a9643ea8Slogwang }
1328a9643ea8Slogwang }
1329a9643ea8Slogwang vdev->coreid = core_add;
1330a9643ea8Slogwang
1331a9643ea8Slogwang TAILQ_INSERT_TAIL(&lcore_info[vdev->coreid].vdev_list, vdev,
1332a9643ea8Slogwang lcore_vdev_entry);
1333a9643ea8Slogwang lcore_info[vdev->coreid].device_num++;
1334a9643ea8Slogwang
1335a9643ea8Slogwang /* Disable notifications. */
1336a9643ea8Slogwang rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
1337a9643ea8Slogwang rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
1338a9643ea8Slogwang
1339a9643ea8Slogwang RTE_LOG(INFO, VHOST_DATA,
1340a9643ea8Slogwang "(%d) device has been added to data core %d\n",
1341a9643ea8Slogwang vid, vdev->coreid);
1342a9643ea8Slogwang
1343*2d9fd380Sjfb8856606 if (async_vhost_driver) {
1344*2d9fd380Sjfb8856606 struct rte_vhost_async_features f;
1345*2d9fd380Sjfb8856606 struct rte_vhost_async_channel_ops channel_ops;
1346*2d9fd380Sjfb8856606 if (strncmp(dma_type, "ioat", 4) == 0) {
1347*2d9fd380Sjfb8856606 channel_ops.transfer_data = ioat_transfer_data_cb;
1348*2d9fd380Sjfb8856606 channel_ops.check_completed_copies =
1349*2d9fd380Sjfb8856606 ioat_check_completed_copies_cb;
1350*2d9fd380Sjfb8856606 f.async_inorder = 1;
1351*2d9fd380Sjfb8856606 f.async_threshold = 256;
1352*2d9fd380Sjfb8856606 return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
1353*2d9fd380Sjfb8856606 f.intval, &channel_ops);
1354*2d9fd380Sjfb8856606 }
1355*2d9fd380Sjfb8856606 }
1356*2d9fd380Sjfb8856606
1357a9643ea8Slogwang return 0;
1358a9643ea8Slogwang }
1359a9643ea8Slogwang
1360a9643ea8Slogwang /*
1361a9643ea8Slogwang * These callback allow devices to be added to the data core when configuration
1362a9643ea8Slogwang * has been fully complete.
1363a9643ea8Slogwang */
13642bfe3f2eSlogwang static const struct vhost_device_ops virtio_net_device_ops =
1365a9643ea8Slogwang {
1366a9643ea8Slogwang .new_device = new_device,
1367a9643ea8Slogwang .destroy_device = destroy_device,
1368a9643ea8Slogwang };
1369a9643ea8Slogwang
1370a9643ea8Slogwang /*
1371a9643ea8Slogwang * This is a thread will wake up after a period to print stats if the user has
1372a9643ea8Slogwang * enabled them.
1373a9643ea8Slogwang */
1374d30ea906Sjfb8856606 static void *
print_stats(__rte_unused void * arg)1375d30ea906Sjfb8856606 print_stats(__rte_unused void *arg)
1376a9643ea8Slogwang {
1377a9643ea8Slogwang struct vhost_dev *vdev;
1378a9643ea8Slogwang uint64_t tx_dropped, rx_dropped;
1379a9643ea8Slogwang uint64_t tx, tx_total, rx, rx_total;
1380a9643ea8Slogwang const char clr[] = { 27, '[', '2', 'J', '\0' };
1381a9643ea8Slogwang const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
1382a9643ea8Slogwang
1383a9643ea8Slogwang while(1) {
1384a9643ea8Slogwang sleep(enable_stats);
1385a9643ea8Slogwang
1386a9643ea8Slogwang /* Clear screen and move to top left */
1387a9643ea8Slogwang printf("%s%s\n", clr, top_left);
1388a9643ea8Slogwang printf("Device statistics =================================\n");
1389a9643ea8Slogwang
1390a9643ea8Slogwang TAILQ_FOREACH(vdev, &vhost_dev_list, global_vdev_entry) {
1391a9643ea8Slogwang tx_total = vdev->stats.tx_total;
1392a9643ea8Slogwang tx = vdev->stats.tx;
1393a9643ea8Slogwang tx_dropped = tx_total - tx;
1394a9643ea8Slogwang
1395a9643ea8Slogwang rx_total = rte_atomic64_read(&vdev->stats.rx_total_atomic);
1396a9643ea8Slogwang rx = rte_atomic64_read(&vdev->stats.rx_atomic);
1397a9643ea8Slogwang rx_dropped = rx_total - rx;
1398a9643ea8Slogwang
1399a9643ea8Slogwang printf("Statistics for device %d\n"
1400a9643ea8Slogwang "-----------------------\n"
1401a9643ea8Slogwang "TX total: %" PRIu64 "\n"
1402a9643ea8Slogwang "TX dropped: %" PRIu64 "\n"
1403a9643ea8Slogwang "TX successful: %" PRIu64 "\n"
1404a9643ea8Slogwang "RX total: %" PRIu64 "\n"
1405a9643ea8Slogwang "RX dropped: %" PRIu64 "\n"
1406a9643ea8Slogwang "RX successful: %" PRIu64 "\n",
1407a9643ea8Slogwang vdev->vid,
1408a9643ea8Slogwang tx_total, tx_dropped, tx,
1409a9643ea8Slogwang rx_total, rx_dropped, rx);
1410a9643ea8Slogwang }
1411a9643ea8Slogwang
1412a9643ea8Slogwang printf("===================================================\n");
14130c6bd470Sfengbojiang
14140c6bd470Sfengbojiang fflush(stdout);
1415a9643ea8Slogwang }
1416d30ea906Sjfb8856606
1417d30ea906Sjfb8856606 return NULL;
1418a9643ea8Slogwang }
1419a9643ea8Slogwang
14202bfe3f2eSlogwang static void
unregister_drivers(int socket_num)14212bfe3f2eSlogwang unregister_drivers(int socket_num)
14222bfe3f2eSlogwang {
14232bfe3f2eSlogwang int i, ret;
14242bfe3f2eSlogwang
14252bfe3f2eSlogwang for (i = 0; i < socket_num; i++) {
14262bfe3f2eSlogwang ret = rte_vhost_driver_unregister(socket_files + i * PATH_MAX);
14272bfe3f2eSlogwang if (ret != 0)
14282bfe3f2eSlogwang RTE_LOG(ERR, VHOST_CONFIG,
14292bfe3f2eSlogwang "Fail to unregister vhost driver for %s.\n",
14302bfe3f2eSlogwang socket_files + i * PATH_MAX);
14312bfe3f2eSlogwang }
14322bfe3f2eSlogwang }
14332bfe3f2eSlogwang
1434a9643ea8Slogwang /* When we receive a INT signal, unregister vhost driver */
1435a9643ea8Slogwang static void
sigint_handler(__rte_unused int signum)1436a9643ea8Slogwang sigint_handler(__rte_unused int signum)
1437a9643ea8Slogwang {
1438a9643ea8Slogwang /* Unregister vhost driver. */
14392bfe3f2eSlogwang unregister_drivers(nb_sockets);
14402bfe3f2eSlogwang
1441a9643ea8Slogwang exit(0);
1442a9643ea8Slogwang }
1443a9643ea8Slogwang
1444a9643ea8Slogwang /*
1445a9643ea8Slogwang * While creating an mbuf pool, one key thing is to figure out how
1446a9643ea8Slogwang * many mbuf entries is enough for our use. FYI, here are some
1447a9643ea8Slogwang * guidelines:
1448a9643ea8Slogwang *
1449a9643ea8Slogwang * - Each rx queue would reserve @nr_rx_desc mbufs at queue setup stage
1450a9643ea8Slogwang *
1451a9643ea8Slogwang * - For each switch core (A CPU core does the packet switch), we need
1452a9643ea8Slogwang * also make some reservation for receiving the packets from virtio
1453a9643ea8Slogwang * Tx queue. How many is enough depends on the usage. It's normally
1454a9643ea8Slogwang * a simple calculation like following:
1455a9643ea8Slogwang *
1456a9643ea8Slogwang * MAX_PKT_BURST * max packet size / mbuf size
1457a9643ea8Slogwang *
1458a9643ea8Slogwang * So, we definitely need allocate more mbufs when TSO is enabled.
1459a9643ea8Slogwang *
1460a9643ea8Slogwang * - Similarly, for each switching core, we should serve @nr_rx_desc
1461a9643ea8Slogwang * mbufs for receiving the packets from physical NIC device.
1462a9643ea8Slogwang *
1463a9643ea8Slogwang * - We also need make sure, for each switch core, we have allocated
1464a9643ea8Slogwang * enough mbufs to fill up the mbuf cache.
1465a9643ea8Slogwang */
1466a9643ea8Slogwang static void
create_mbuf_pool(uint16_t nr_port,uint32_t nr_switch_core,uint32_t mbuf_size,uint32_t nr_queues,uint32_t nr_rx_desc,uint32_t nr_mbuf_cache)1467a9643ea8Slogwang create_mbuf_pool(uint16_t nr_port, uint32_t nr_switch_core, uint32_t mbuf_size,
1468a9643ea8Slogwang uint32_t nr_queues, uint32_t nr_rx_desc, uint32_t nr_mbuf_cache)
1469a9643ea8Slogwang {
1470a9643ea8Slogwang uint32_t nr_mbufs;
1471a9643ea8Slogwang uint32_t nr_mbufs_per_core;
1472a9643ea8Slogwang uint32_t mtu = 1500;
1473a9643ea8Slogwang
1474a9643ea8Slogwang if (mergeable)
1475a9643ea8Slogwang mtu = 9000;
1476a9643ea8Slogwang if (enable_tso)
1477a9643ea8Slogwang mtu = 64 * 1024;
1478a9643ea8Slogwang
1479a9643ea8Slogwang nr_mbufs_per_core = (mtu + mbuf_size) * MAX_PKT_BURST /
14802bfe3f2eSlogwang (mbuf_size - RTE_PKTMBUF_HEADROOM);
1481a9643ea8Slogwang nr_mbufs_per_core += nr_rx_desc;
1482a9643ea8Slogwang nr_mbufs_per_core = RTE_MAX(nr_mbufs_per_core, nr_mbuf_cache);
1483a9643ea8Slogwang
1484a9643ea8Slogwang nr_mbufs = nr_queues * nr_rx_desc;
1485a9643ea8Slogwang nr_mbufs += nr_mbufs_per_core * nr_switch_core;
1486a9643ea8Slogwang nr_mbufs *= nr_port;
1487a9643ea8Slogwang
1488a9643ea8Slogwang mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", nr_mbufs,
1489a9643ea8Slogwang nr_mbuf_cache, 0, mbuf_size,
1490a9643ea8Slogwang rte_socket_id());
1491a9643ea8Slogwang if (mbuf_pool == NULL)
1492a9643ea8Slogwang rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
1493a9643ea8Slogwang }
1494a9643ea8Slogwang
1495a9643ea8Slogwang /*
14962bfe3f2eSlogwang * Main function, does initialisation and calls the per-lcore functions.
1497a9643ea8Slogwang */
1498a9643ea8Slogwang int
main(int argc,char * argv[])1499a9643ea8Slogwang main(int argc, char *argv[])
1500a9643ea8Slogwang {
1501a9643ea8Slogwang unsigned lcore_id, core_id = 0;
1502a9643ea8Slogwang unsigned nb_ports, valid_num_ports;
15032bfe3f2eSlogwang int ret, i;
15042bfe3f2eSlogwang uint16_t portid;
1505a9643ea8Slogwang static pthread_t tid;
1506a9643ea8Slogwang uint64_t flags = 0;
1507a9643ea8Slogwang
1508a9643ea8Slogwang signal(SIGINT, sigint_handler);
1509a9643ea8Slogwang
1510a9643ea8Slogwang /* init EAL */
1511a9643ea8Slogwang ret = rte_eal_init(argc, argv);
1512a9643ea8Slogwang if (ret < 0)
1513a9643ea8Slogwang rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
1514a9643ea8Slogwang argc -= ret;
1515a9643ea8Slogwang argv += ret;
1516a9643ea8Slogwang
1517a9643ea8Slogwang /* parse app arguments */
1518a9643ea8Slogwang ret = us_vhost_parse_args(argc, argv);
1519a9643ea8Slogwang if (ret < 0)
1520a9643ea8Slogwang rte_exit(EXIT_FAILURE, "Invalid argument\n");
1521a9643ea8Slogwang
15222bfe3f2eSlogwang for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
1523a9643ea8Slogwang TAILQ_INIT(&lcore_info[lcore_id].vdev_list);
1524a9643ea8Slogwang
1525a9643ea8Slogwang if (rte_lcore_is_enabled(lcore_id))
1526a9643ea8Slogwang lcore_ids[core_id++] = lcore_id;
15272bfe3f2eSlogwang }
1528a9643ea8Slogwang
1529a9643ea8Slogwang if (rte_lcore_count() > RTE_MAX_LCORE)
1530a9643ea8Slogwang rte_exit(EXIT_FAILURE,"Not enough cores\n");
1531a9643ea8Slogwang
1532a9643ea8Slogwang /* Get the number of physical ports. */
1533d30ea906Sjfb8856606 nb_ports = rte_eth_dev_count_avail();
1534a9643ea8Slogwang
1535a9643ea8Slogwang /*
1536a9643ea8Slogwang * Update the global var NUM_PORTS and global array PORTS
1537a9643ea8Slogwang * and get value of var VALID_NUM_PORTS according to system ports number
1538a9643ea8Slogwang */
1539a9643ea8Slogwang valid_num_ports = check_ports_num(nb_ports);
1540a9643ea8Slogwang
1541a9643ea8Slogwang if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
1542a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
1543a9643ea8Slogwang "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
1544a9643ea8Slogwang return -1;
1545a9643ea8Slogwang }
1546a9643ea8Slogwang
1547a9643ea8Slogwang /*
1548a9643ea8Slogwang * FIXME: here we are trying to allocate mbufs big enough for
1549a9643ea8Slogwang * @MAX_QUEUES, but the truth is we're never going to use that
1550a9643ea8Slogwang * many queues here. We probably should only do allocation for
1551a9643ea8Slogwang * those queues we are going to use.
1552a9643ea8Slogwang */
1553a9643ea8Slogwang create_mbuf_pool(valid_num_ports, rte_lcore_count() - 1, MBUF_DATA_SIZE,
1554a9643ea8Slogwang MAX_QUEUES, RTE_TEST_RX_DESC_DEFAULT, MBUF_CACHE_SIZE);
1555a9643ea8Slogwang
1556a9643ea8Slogwang if (vm2vm_mode == VM2VM_HARDWARE) {
1557a9643ea8Slogwang /* Enable VT loop back to let L2 switch to do it. */
1558a9643ea8Slogwang vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
1559a9643ea8Slogwang RTE_LOG(DEBUG, VHOST_CONFIG,
1560a9643ea8Slogwang "Enable loop back for L2 switch in vmdq.\n");
1561a9643ea8Slogwang }
1562a9643ea8Slogwang
1563a9643ea8Slogwang /* initialize all ports */
1564d30ea906Sjfb8856606 RTE_ETH_FOREACH_DEV(portid) {
1565a9643ea8Slogwang /* skip ports that are not enabled */
1566a9643ea8Slogwang if ((enabled_port_mask & (1 << portid)) == 0) {
1567a9643ea8Slogwang RTE_LOG(INFO, VHOST_PORT,
1568a9643ea8Slogwang "Skipping disabled port %d\n", portid);
1569a9643ea8Slogwang continue;
1570a9643ea8Slogwang }
1571a9643ea8Slogwang if (port_init(portid) != 0)
1572a9643ea8Slogwang rte_exit(EXIT_FAILURE,
1573a9643ea8Slogwang "Cannot initialize network ports\n");
1574a9643ea8Slogwang }
1575a9643ea8Slogwang
1576a9643ea8Slogwang /* Enable stats if the user option is set. */
1577a9643ea8Slogwang if (enable_stats) {
1578d30ea906Sjfb8856606 ret = rte_ctrl_thread_create(&tid, "print-stats", NULL,
1579d30ea906Sjfb8856606 print_stats, NULL);
1580d30ea906Sjfb8856606 if (ret < 0)
1581a9643ea8Slogwang rte_exit(EXIT_FAILURE,
1582a9643ea8Slogwang "Cannot create print-stats thread\n");
1583a9643ea8Slogwang }
1584a9643ea8Slogwang
1585a9643ea8Slogwang /* Launch all data cores. */
1586*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore_id)
1587a9643ea8Slogwang rte_eal_remote_launch(switch_worker, NULL, lcore_id);
1588a9643ea8Slogwang
1589a9643ea8Slogwang if (client_mode)
1590a9643ea8Slogwang flags |= RTE_VHOST_USER_CLIENT;
1591a9643ea8Slogwang
15922bfe3f2eSlogwang /* Register vhost user driver to handle vhost messages. */
15932bfe3f2eSlogwang for (i = 0; i < nb_sockets; i++) {
15942bfe3f2eSlogwang char *file = socket_files + i * PATH_MAX;
1595*2d9fd380Sjfb8856606 if (async_vhost_driver)
1596*2d9fd380Sjfb8856606 flags = flags | RTE_VHOST_USER_ASYNC_COPY;
1597*2d9fd380Sjfb8856606
15982bfe3f2eSlogwang ret = rte_vhost_driver_register(file, flags);
15992bfe3f2eSlogwang if (ret != 0) {
16002bfe3f2eSlogwang unregister_drivers(i);
16012bfe3f2eSlogwang rte_exit(EXIT_FAILURE,
16022bfe3f2eSlogwang "vhost driver register failure.\n");
16032bfe3f2eSlogwang }
1604a9643ea8Slogwang
16052bfe3f2eSlogwang if (builtin_net_driver)
16062bfe3f2eSlogwang rte_vhost_driver_set_features(file, VIRTIO_NET_FEATURES);
16072bfe3f2eSlogwang
16082bfe3f2eSlogwang if (mergeable == 0) {
16092bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16102bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_MRG_RXBUF);
16112bfe3f2eSlogwang }
16122bfe3f2eSlogwang
16132bfe3f2eSlogwang if (enable_tx_csum == 0) {
16142bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16152bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_CSUM);
16162bfe3f2eSlogwang }
16172bfe3f2eSlogwang
16182bfe3f2eSlogwang if (enable_tso == 0) {
16192bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16202bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_HOST_TSO4);
16212bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16222bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_HOST_TSO6);
16232bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16242bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_GUEST_TSO4);
16252bfe3f2eSlogwang rte_vhost_driver_disable_features(file,
16262bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_GUEST_TSO6);
16272bfe3f2eSlogwang }
16282bfe3f2eSlogwang
16292bfe3f2eSlogwang if (promiscuous) {
16302bfe3f2eSlogwang rte_vhost_driver_enable_features(file,
16312bfe3f2eSlogwang 1ULL << VIRTIO_NET_F_CTRL_RX);
16322bfe3f2eSlogwang }
16332bfe3f2eSlogwang
16342bfe3f2eSlogwang ret = rte_vhost_driver_callback_register(file,
16352bfe3f2eSlogwang &virtio_net_device_ops);
16362bfe3f2eSlogwang if (ret != 0) {
16372bfe3f2eSlogwang rte_exit(EXIT_FAILURE,
16382bfe3f2eSlogwang "failed to register vhost driver callbacks.\n");
16392bfe3f2eSlogwang }
16402bfe3f2eSlogwang
16412bfe3f2eSlogwang if (rte_vhost_driver_start(file) < 0) {
16422bfe3f2eSlogwang rte_exit(EXIT_FAILURE,
16432bfe3f2eSlogwang "failed to start vhost driver.\n");
16442bfe3f2eSlogwang }
16452bfe3f2eSlogwang }
16462bfe3f2eSlogwang
1647*2d9fd380Sjfb8856606 RTE_LCORE_FOREACH_WORKER(lcore_id)
16482bfe3f2eSlogwang rte_eal_wait_lcore(lcore_id);
16492bfe3f2eSlogwang
1650a9643ea8Slogwang return 0;
1651a9643ea8Slogwang
1652a9643ea8Slogwang }
1653