xref: /f-stack/dpdk/drivers/bus/vmbus/vmbus_channel.c (revision 2d9fd380)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2018, Microsoft Corporation.
3  * All Rights Reserved.
4  */
5 
6 #include <unistd.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <sys/uio.h>
10 
11 #include <rte_eal.h>
12 #include <rte_tailq.h>
13 #include <rte_log.h>
14 #include <rte_malloc.h>
15 #include <rte_bus.h>
16 #include <rte_atomic.h>
17 #include <rte_memory.h>
18 #include <rte_bus_vmbus.h>
19 
20 #include "private.h"
21 
22 static inline void
vmbus_sync_set_bit(volatile uint32_t * addr,uint32_t mask)23 vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask)
24 {
25 	/* Use GCC builtin which atomic does atomic OR operation */
26 	__sync_or_and_fetch(addr, mask);
27 }
28 
29 static inline void
vmbus_set_monitor(const struct rte_vmbus_device * dev,uint32_t monitor_id)30 vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id)
31 {
32 	uint32_t *monitor_addr, monitor_mask;
33 	unsigned int trigger_index;
34 
35 	trigger_index = monitor_id / HV_MON_TRIG_LEN;
36 	monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN);
37 
38 	monitor_addr = &dev->monitor_page->trigs[trigger_index].pending;
39 	vmbus_sync_set_bit(monitor_addr, monitor_mask);
40 }
41 
42 static void
vmbus_set_event(const struct rte_vmbus_device * dev,const struct vmbus_channel * chan)43 vmbus_set_event(const struct rte_vmbus_device *dev,
44 		const struct vmbus_channel *chan)
45 {
46 	vmbus_set_monitor(dev, chan->monitor_id);
47 }
48 
49 /*
50  * Set the wait between when hypervisor examines the trigger.
51  */
52 void
rte_vmbus_set_latency(const struct rte_vmbus_device * dev,const struct vmbus_channel * chan,uint32_t latency)53 rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
54 		      const struct vmbus_channel *chan,
55 		      uint32_t latency)
56 {
57 	uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
58 	uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
59 
60 	if (latency >= UINT16_MAX * 100) {
61 		VMBUS_LOG(ERR, "invalid latency value %u", latency);
62 		return;
63 	}
64 
65 	if (trig_idx >= VMBUS_MONTRIGS_MAX) {
66 		VMBUS_LOG(ERR, "invalid monitor trigger %u",
67 			  trig_idx);
68 		return;
69 	}
70 
71 	/* Host value is expressed in 100 nanosecond units */
72 	dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
73 }
74 
75 /*
76  * Notify host that there are data pending on our TX bufring.
77  *
78  * Since this in userspace, rely on the monitor page.
79  * Can't do a hypercall from userspace.
80  */
81 void
rte_vmbus_chan_signal_tx(const struct vmbus_channel * chan)82 rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan)
83 {
84 	const struct rte_vmbus_device *dev = chan->device;
85 	const struct vmbus_br *tbr = &chan->txbr;
86 
87 	/* Make sure all updates are done before signaling host */
88 	rte_smp_wmb();
89 
90 	/* If host is ignoring interrupts? */
91 	if (tbr->vbr->imask)
92 		return;
93 
94 	vmbus_set_event(dev, chan);
95 }
96 
97 
98 /* Do a simple send directly using transmit ring. */
rte_vmbus_chan_send(struct vmbus_channel * chan,uint16_t type,void * data,uint32_t dlen,uint64_t xactid,uint32_t flags,bool * need_sig)99 int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type,
100 			void *data, uint32_t dlen,
101 			uint64_t xactid, uint32_t flags, bool *need_sig)
102 {
103 	struct vmbus_chanpkt pkt;
104 	unsigned int pktlen, pad_pktlen;
105 	const uint32_t hlen = sizeof(pkt);
106 	bool send_evt = false;
107 	uint64_t pad = 0;
108 	struct iovec iov[3];
109 	int error;
110 
111 	pktlen = hlen + dlen;
112 	pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
113 
114 	pkt.hdr.type = type;
115 	pkt.hdr.flags = flags;
116 	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
117 	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
118 	pkt.hdr.xactid = xactid;
119 
120 	iov[0].iov_base = &pkt;
121 	iov[0].iov_len = hlen;
122 	iov[1].iov_base = data;
123 	iov[1].iov_len = dlen;
124 	iov[2].iov_base = &pad;
125 	iov[2].iov_len = pad_pktlen - pktlen;
126 
127 	error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt);
128 
129 	/*
130 	 * caller sets need_sig to non-NULL if it will handle
131 	 * signaling if required later.
132 	 * if need_sig is NULL, signal now if needed.
133 	 */
134 	if (need_sig)
135 		*need_sig |= send_evt;
136 	else if (error == 0 && send_evt)
137 		rte_vmbus_chan_signal_tx(chan);
138 	return error;
139 }
140 
141 /* Do a scatter/gather send where the descriptor points to data. */
rte_vmbus_chan_send_sglist(struct vmbus_channel * chan,struct vmbus_gpa sg[],uint32_t sglen,void * data,uint32_t dlen,uint64_t xactid,bool * need_sig)142 int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan,
143 			       struct vmbus_gpa sg[], uint32_t sglen,
144 			       void *data, uint32_t dlen,
145 			       uint64_t xactid, bool *need_sig)
146 {
147 	struct vmbus_chanpkt_sglist pkt;
148 	unsigned int pktlen, pad_pktlen, hlen;
149 	bool send_evt = false;
150 	struct iovec iov[4];
151 	uint64_t pad = 0;
152 	int error;
153 
154 	hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]);
155 	pktlen = hlen + dlen;
156 	pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t));
157 
158 	pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA;
159 	pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC;
160 	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
161 	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
162 	pkt.hdr.xactid = xactid;
163 	pkt.rsvd = 0;
164 	pkt.gpa_cnt = sglen;
165 
166 	iov[0].iov_base = &pkt;
167 	iov[0].iov_len = sizeof(pkt);
168 	iov[1].iov_base = sg;
169 	iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen;
170 	iov[2].iov_base = data;
171 	iov[2].iov_len = dlen;
172 	iov[3].iov_base = &pad;
173 	iov[3].iov_len = pad_pktlen - pktlen;
174 
175 	error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt);
176 
177 	/* if caller is batching, just propagate the status */
178 	if (need_sig)
179 		*need_sig |= send_evt;
180 	else if (error == 0 && send_evt)
181 		rte_vmbus_chan_signal_tx(chan);
182 	return error;
183 }
184 
rte_vmbus_chan_rx_empty(const struct vmbus_channel * channel)185 bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel)
186 {
187 	const struct vmbus_br *br = &channel->rxbr;
188 
189 	rte_smp_rmb();
190 	return br->vbr->rindex == br->vbr->windex;
191 }
192 
193 /* Signal host after reading N bytes */
rte_vmbus_chan_signal_read(struct vmbus_channel * chan,uint32_t bytes_read)194 void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
195 {
196 	struct vmbus_br *rbr = &chan->rxbr;
197 	uint32_t write_sz, pending_sz;
198 
199 	/* No need for signaling on older versions */
200 	if (!rbr->vbr->feature_bits.feat_pending_send_sz)
201 		return;
202 
203 	/* Make sure reading of pending happens after new read index */
204 	rte_smp_mb();
205 
206 	pending_sz = rbr->vbr->pending_send;
207 	if (!pending_sz)
208 		return;
209 
210 	rte_smp_rmb();
211 	write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex);
212 
213 	/* If there was space before then host was not blocked */
214 	if (write_sz - bytes_read > pending_sz)
215 		return;
216 
217 	/* If pending write will not fit */
218 	if (write_sz <= pending_sz)
219 		return;
220 
221 	vmbus_set_event(chan->device, chan);
222 }
223 
rte_vmbus_chan_recv(struct vmbus_channel * chan,void * data,uint32_t * len,uint64_t * request_id)224 int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len,
225 			uint64_t *request_id)
226 {
227 	struct vmbus_chanpkt_hdr pkt;
228 	uint32_t dlen, hlen, bufferlen = *len;
229 	int error;
230 
231 	*len = 0;
232 
233 	error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
234 	if (error)
235 		return error;
236 
237 	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
238 		VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
239 		/* XXX this channel is dead actually. */
240 		return -EIO;
241 	}
242 
243 	if (unlikely(pkt.hlen > pkt.tlen)) {
244 		VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
245 			  pkt.hlen, pkt.tlen);
246 		return -EIO;
247 	}
248 
249 	/* Length are in quad words */
250 	hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT;
251 	dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen;
252 	*len = dlen;
253 
254 	/* If caller buffer is not large enough */
255 	if (unlikely(dlen > bufferlen))
256 		return -ENOBUFS;
257 
258 	if (request_id)
259 		*request_id = pkt.xactid;
260 
261 	/* Read data and skip packet header */
262 	error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen);
263 	if (error)
264 		return error;
265 
266 	rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t));
267 	return 0;
268 }
269 
270 /* TODO: replace this with inplace ring buffer (no copy) */
rte_vmbus_chan_recv_raw(struct vmbus_channel * chan,void * data,uint32_t * len)271 int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan,
272 			    void *data, uint32_t *len)
273 {
274 	struct vmbus_chanpkt_hdr pkt;
275 	uint32_t dlen, bufferlen = *len;
276 	int error;
277 
278 	error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt));
279 	if (error)
280 		return error;
281 
282 	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) {
283 		VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen);
284 		/* XXX this channel is dead actually. */
285 		return -EIO;
286 	}
287 
288 	if (unlikely(pkt.hlen > pkt.tlen)) {
289 		VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u",
290 			pkt.hlen, pkt.tlen);
291 		return -EIO;
292 	}
293 
294 	/* Length are in quad words */
295 	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
296 	*len = dlen;
297 
298 	/* If caller buffer is not large enough */
299 	if (unlikely(dlen > bufferlen))
300 		return -ENOBUFS;
301 
302 	/* Read data and skip packet header */
303 	error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0);
304 	if (error)
305 		return error;
306 
307 	/* Return the number of bytes read */
308 	return dlen + sizeof(uint64_t);
309 }
310 
vmbus_chan_create(const struct rte_vmbus_device * device,uint16_t relid,uint16_t subid,uint8_t monitor_id,struct vmbus_channel ** new_chan)311 int vmbus_chan_create(const struct rte_vmbus_device *device,
312 		      uint16_t relid, uint16_t subid, uint8_t monitor_id,
313 		      struct vmbus_channel **new_chan)
314 {
315 	struct vmbus_channel *chan;
316 	int err;
317 
318 	chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE,
319 				  device->device.numa_node);
320 	if (!chan)
321 		return -ENOMEM;
322 
323 	STAILQ_INIT(&chan->subchannel_list);
324 	chan->device = device;
325 	chan->subchannel_id = subid;
326 	chan->relid = relid;
327 	chan->monitor_id = monitor_id;
328 	*new_chan = chan;
329 
330 	err = vmbus_uio_map_rings(chan);
331 	if (err) {
332 		rte_free(chan);
333 		return err;
334 	}
335 
336 	return 0;
337 }
338 
339 /* Setup the primary channel */
rte_vmbus_chan_open(struct rte_vmbus_device * device,struct vmbus_channel ** new_chan)340 int rte_vmbus_chan_open(struct rte_vmbus_device *device,
341 			struct vmbus_channel **new_chan)
342 {
343 	struct mapped_vmbus_resource *uio_res;
344 	int err;
345 
346 	uio_res = vmbus_uio_find_resource(device);
347 	if (!uio_res) {
348 		VMBUS_LOG(ERR, "can't find uio resource");
349 		return -EINVAL;
350 	}
351 
352 	err = vmbus_chan_create(device, device->relid, 0,
353 				device->monitor_id, new_chan);
354 	if (!err) {
355 		device->primary = *new_chan;
356 		uio_res->primary = *new_chan;
357 	}
358 
359 	return err;
360 }
361 
rte_vmbus_max_channels(const struct rte_vmbus_device * device)362 int rte_vmbus_max_channels(const struct rte_vmbus_device *device)
363 {
364 	if (vmbus_uio_subchannels_supported(device, device->primary))
365 		return VMBUS_MAX_CHANNELS;
366 	else
367 		return 1;
368 }
369 
370 /* Setup secondary channel */
rte_vmbus_subchan_open(struct vmbus_channel * primary,struct vmbus_channel ** new_chan)371 int rte_vmbus_subchan_open(struct vmbus_channel *primary,
372 			   struct vmbus_channel **new_chan)
373 {
374 	struct vmbus_channel *chan;
375 	int err;
376 
377 	err = vmbus_uio_get_subchan(primary, &chan);
378 	if (err)
379 		return err;
380 
381 	STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next);
382 	*new_chan = chan;
383 	return 0;
384 }
385 
rte_vmbus_sub_channel_index(const struct vmbus_channel * chan)386 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan)
387 {
388 	return chan->subchannel_id;
389 }
390 
rte_vmbus_chan_close(struct vmbus_channel * chan)391 void rte_vmbus_chan_close(struct vmbus_channel *chan)
392 {
393 	const struct rte_vmbus_device *device = chan->device;
394 	struct vmbus_channel *primary = device->primary;
395 
396 	/*
397 	 * intentionally leak primary channel because
398 	 * secondary may still reference it
399 	 */
400 	if (chan != primary) {
401 		STAILQ_REMOVE(&primary->subchannel_list, chan,
402 			      vmbus_channel, next);
403 		rte_free(chan);
404 	}
405 
406 }
407 
vmbus_dump_ring(FILE * f,const char * id,const struct vmbus_br * br)408 static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br)
409 {
410 	const struct vmbus_bufring *vbr = br->vbr;
411 	struct vmbus_chanpkt_hdr pkt;
412 
413 	fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n",
414 		id, vbr->windex, vbr->rindex, vbr->imask,
415 		vbr->pending_send, vbr->feature_bits.value);
416 	fprintf(f, " size=%u avail write=%u read=%u\n",
417 		br->dsize, vmbus_br_availwrite(br, vbr->windex),
418 		vmbus_br_availread(br));
419 
420 	if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0)
421 		fprintf(f, "  pkt type %#x len %u flags %#x xactid %#"PRIx64"\n",
422 			pkt.type,
423 			pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT,
424 			pkt.flags, pkt.xactid);
425 }
426 
rte_vmbus_chan_dump(FILE * f,const struct vmbus_channel * chan)427 void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan)
428 {
429 	fprintf(f, "channel[%u] relid=%u monitor=%u\n",
430 		chan->subchannel_id, chan->relid, chan->monitor_id);
431 	vmbus_dump_ring(f, "rxbr", &chan->rxbr);
432 	vmbus_dump_ring(f, "txbr", &chan->txbr);
433 }
434