1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <[email protected]>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/bio.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/msan.h>
39 #include <sys/sglist.h>
40 #include <sys/sysctl.h>
41 #include <sys/lock.h>
42 #include <sys/mutex.h>
43 #include <sys/queue.h>
44
45 #include <geom/geom.h>
46 #include <geom/geom_disk.h>
47
48 #include <machine/bus.h>
49 #include <machine/resource.h>
50 #include <sys/bus.h>
51 #include <sys/rman.h>
52
53 #include <dev/virtio/virtio.h>
54 #include <dev/virtio/virtqueue.h>
55 #include <dev/virtio/block/virtio_blk.h>
56
57 #include "virtio_if.h"
58
59 struct vtblk_request {
60 struct vtblk_softc *vbr_sc;
61 bus_dmamap_t vbr_mapp;
62
63 /* Fields after this point are zeroed for each request. */
64 struct virtio_blk_outhdr vbr_hdr;
65 struct bio *vbr_bp;
66 uint8_t vbr_ack;
67 uint8_t vbr_requeue_on_error;
68 uint8_t vbr_busdma_wait;
69 int vbr_error;
70 TAILQ_ENTRY(vtblk_request) vbr_link;
71 };
72
73 enum vtblk_cache_mode {
74 VTBLK_CACHE_WRITETHROUGH,
75 VTBLK_CACHE_WRITEBACK,
76 VTBLK_CACHE_MAX
77 };
78
79 struct vtblk_softc {
80 device_t vtblk_dev;
81 struct mtx vtblk_mtx;
82 uint64_t vtblk_features;
83 uint32_t vtblk_flags;
84 #define VTBLK_FLAG_INDIRECT 0x0001
85 #define VTBLK_FLAG_DETACH 0x0002
86 #define VTBLK_FLAG_SUSPEND 0x0004
87 #define VTBLK_FLAG_BARRIER 0x0008
88 #define VTBLK_FLAG_WCE_CONFIG 0x0010
89 #define VTBLK_FLAG_BUSDMA_WAIT 0x0020
90 #define VTBLK_FLAG_BUSDMA_ALIGN 0x0040
91
92 struct virtqueue *vtblk_vq;
93 struct sglist *vtblk_sglist;
94 bus_dma_tag_t vtblk_dmat;
95 struct disk *vtblk_disk;
96
97 struct bio_queue_head vtblk_bioq;
98 TAILQ_HEAD(, vtblk_request)
99 vtblk_req_free;
100 TAILQ_HEAD(, vtblk_request)
101 vtblk_req_ready;
102 struct vtblk_request *vtblk_req_ordered;
103
104 int vtblk_max_nsegs;
105 int vtblk_request_count;
106 enum vtblk_cache_mode vtblk_write_cache;
107
108 struct bio_queue vtblk_dump_queue;
109 struct vtblk_request vtblk_dump_request;
110 };
111
112 static struct virtio_feature_desc vtblk_feature_desc[] = {
113 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
114 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
115 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
116 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
117 { VIRTIO_BLK_F_RO, "ReadOnly" },
118 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
119 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
120 { VIRTIO_BLK_F_FLUSH, "FlushCmd" },
121 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
122 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
123 { VIRTIO_BLK_F_MQ, "Multiqueue" },
124 { VIRTIO_BLK_F_DISCARD, "Discard" },
125 { VIRTIO_BLK_F_WRITE_ZEROES, "WriteZeros" },
126
127 { 0, NULL }
128 };
129
130 static int vtblk_modevent(module_t, int, void *);
131
132 static int vtblk_probe(device_t);
133 static int vtblk_attach(device_t);
134 static int vtblk_detach(device_t);
135 static int vtblk_suspend(device_t);
136 static int vtblk_resume(device_t);
137 static int vtblk_shutdown(device_t);
138 static int vtblk_attach_completed(device_t);
139 static int vtblk_config_change(device_t);
140
141 static int vtblk_open(struct disk *);
142 static int vtblk_close(struct disk *);
143 static int vtblk_ioctl(struct disk *, u_long, void *, int,
144 struct thread *);
145 static int vtblk_dump(void *, void *, off_t, size_t);
146 static void vtblk_strategy(struct bio *);
147
148 static int vtblk_negotiate_features(struct vtblk_softc *);
149 static int vtblk_setup_features(struct vtblk_softc *);
150 static int vtblk_maximum_segments(struct vtblk_softc *,
151 struct virtio_blk_config *);
152 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
153 static void vtblk_resize_disk(struct vtblk_softc *, uint64_t);
154 static void vtblk_alloc_disk(struct vtblk_softc *,
155 struct virtio_blk_config *);
156 static void vtblk_create_disk(struct vtblk_softc *);
157
158 static int vtblk_request_prealloc(struct vtblk_softc *);
159 static void vtblk_request_free(struct vtblk_softc *);
160 static struct vtblk_request *
161 vtblk_request_dequeue(struct vtblk_softc *);
162 static void vtblk_request_enqueue(struct vtblk_softc *,
163 struct vtblk_request *);
164 static struct vtblk_request *
165 vtblk_request_next_ready(struct vtblk_softc *);
166 static void vtblk_request_requeue_ready(struct vtblk_softc *,
167 struct vtblk_request *);
168 static struct vtblk_request *
169 vtblk_request_next(struct vtblk_softc *);
170 static struct vtblk_request *
171 vtblk_request_bio(struct vtblk_softc *);
172 static int vtblk_request_execute(struct vtblk_request *, int);
173 static void vtblk_request_execute_cb(void *,
174 bus_dma_segment_t *, int, int);
175 static int vtblk_request_error(struct vtblk_request *);
176
177 static void vtblk_queue_completed(struct vtblk_softc *,
178 struct bio_queue *);
179 static void vtblk_done_completed(struct vtblk_softc *,
180 struct bio_queue *);
181 static void vtblk_drain_vq(struct vtblk_softc *);
182 static void vtblk_drain(struct vtblk_softc *);
183
184 static void vtblk_startio(struct vtblk_softc *);
185 static void vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
186
187 static void vtblk_read_config(struct vtblk_softc *,
188 struct virtio_blk_config *);
189 static void vtblk_ident(struct vtblk_softc *);
190 static int vtblk_poll_request(struct vtblk_softc *,
191 struct vtblk_request *);
192 static int vtblk_quiesce(struct vtblk_softc *);
193 static void vtblk_vq_intr(void *);
194 static void vtblk_stop(struct vtblk_softc *);
195
196 static void vtblk_dump_quiesce(struct vtblk_softc *);
197 static int vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
198 static int vtblk_dump_flush(struct vtblk_softc *);
199 static void vtblk_dump_complete(struct vtblk_softc *);
200
201 static void vtblk_set_write_cache(struct vtblk_softc *, int);
202 static int vtblk_write_cache_enabled(struct vtblk_softc *sc,
203 struct virtio_blk_config *);
204 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
205
206 static void vtblk_setup_sysctl(struct vtblk_softc *);
207 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
208
209 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
210 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
211 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
212 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
213 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
214 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
215 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
216
217 /* Tunables. */
218 static int vtblk_no_ident = 0;
219 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
220 static int vtblk_writecache_mode = -1;
221 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
222
223 #define VTBLK_COMMON_FEATURES \
224 (VIRTIO_BLK_F_SIZE_MAX | \
225 VIRTIO_BLK_F_SEG_MAX | \
226 VIRTIO_BLK_F_GEOMETRY | \
227 VIRTIO_BLK_F_RO | \
228 VIRTIO_BLK_F_BLK_SIZE | \
229 VIRTIO_BLK_F_FLUSH | \
230 VIRTIO_BLK_F_TOPOLOGY | \
231 VIRTIO_BLK_F_CONFIG_WCE | \
232 VIRTIO_BLK_F_DISCARD | \
233 VIRTIO_RING_F_INDIRECT_DESC)
234
235 #define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES)
236 #define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
237
238 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
239 #define VTBLK_LOCK_INIT(_sc, _name) \
240 mtx_init(VTBLK_MTX((_sc)), (_name), \
241 "VirtIO Block Lock", MTX_DEF)
242 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc)))
243 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc)))
244 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
245 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
246 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
247 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
248
249 #define VTBLK_DISK_NAME "vtbd"
250 #define VTBLK_QUIESCE_TIMEOUT (30 * hz)
251 #define VTBLK_BSIZE 512
252
253 /*
254 * Each block request uses at least two segments - one for the header
255 * and one for the status.
256 */
257 #define VTBLK_MIN_SEGMENTS 2
258
259 static device_method_t vtblk_methods[] = {
260 /* Device methods. */
261 DEVMETHOD(device_probe, vtblk_probe),
262 DEVMETHOD(device_attach, vtblk_attach),
263 DEVMETHOD(device_detach, vtblk_detach),
264 DEVMETHOD(device_suspend, vtblk_suspend),
265 DEVMETHOD(device_resume, vtblk_resume),
266 DEVMETHOD(device_shutdown, vtblk_shutdown),
267
268 /* VirtIO methods. */
269 DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
270 DEVMETHOD(virtio_config_change, vtblk_config_change),
271
272 DEVMETHOD_END
273 };
274
275 static driver_t vtblk_driver = {
276 "vtblk",
277 vtblk_methods,
278 sizeof(struct vtblk_softc)
279 };
280
281 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
282 MODULE_VERSION(virtio_blk, 1);
283 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
284
285 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
286
287 static int
vtblk_modevent(module_t mod,int type,void * unused)288 vtblk_modevent(module_t mod, int type, void *unused)
289 {
290 int error;
291
292 error = 0;
293
294 switch (type) {
295 case MOD_LOAD:
296 case MOD_QUIESCE:
297 case MOD_UNLOAD:
298 case MOD_SHUTDOWN:
299 break;
300 default:
301 error = EOPNOTSUPP;
302 break;
303 }
304
305 return (error);
306 }
307
308 static int
vtblk_probe(device_t dev)309 vtblk_probe(device_t dev)
310 {
311 return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
312 }
313
314 static int
vtblk_attach(device_t dev)315 vtblk_attach(device_t dev)
316 {
317 struct vtblk_softc *sc;
318 struct virtio_blk_config blkcfg;
319 int error;
320
321 sc = device_get_softc(dev);
322 sc->vtblk_dev = dev;
323 virtio_set_feature_desc(dev, vtblk_feature_desc);
324
325 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
326 bioq_init(&sc->vtblk_bioq);
327 TAILQ_INIT(&sc->vtblk_dump_queue);
328 TAILQ_INIT(&sc->vtblk_req_free);
329 TAILQ_INIT(&sc->vtblk_req_ready);
330
331 vtblk_setup_sysctl(sc);
332
333 error = vtblk_setup_features(sc);
334 if (error) {
335 device_printf(dev, "cannot setup features\n");
336 goto fail;
337 }
338
339 vtblk_read_config(sc, &blkcfg);
340
341 /*
342 * With the current sglist(9) implementation, it is not easy
343 * for us to support a maximum segment size as adjacent
344 * segments are coalesced. For now, just make sure it's larger
345 * than the maximum supported transfer size.
346 */
347 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
348 if (blkcfg.size_max < maxphys) {
349 error = ENOTSUP;
350 device_printf(dev, "host requires unsupported "
351 "maximum segment size feature\n");
352 goto fail;
353 }
354 }
355
356 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
357 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
358 error = EINVAL;
359 device_printf(dev, "fewer than minimum number of segments "
360 "allowed: %d\n", sc->vtblk_max_nsegs);
361 goto fail;
362 }
363
364 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
365 if (sc->vtblk_sglist == NULL) {
366 error = ENOMEM;
367 device_printf(dev, "cannot allocate sglist\n");
368 goto fail;
369 }
370
371 /*
372 * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
373 * supports a single data segment; in that case we need busdma to
374 * align to a page boundary so we can send a *contiguous* page size
375 * request to the host.
376 */
377 if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
378 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
379 error = bus_dma_tag_create(
380 bus_get_dma_tag(dev), /* parent */
381 (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
382 0, /* boundary */
383 BUS_SPACE_MAXADDR, /* lowaddr */
384 BUS_SPACE_MAXADDR, /* highaddr */
385 NULL, NULL, /* filter, filterarg */
386 maxphys, /* max request size */
387 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS, /* max # segments */
388 maxphys, /* maxsegsize */
389 0, /* flags */
390 busdma_lock_mutex, /* lockfunc */
391 &sc->vtblk_mtx, /* lockarg */
392 &sc->vtblk_dmat);
393 if (error) {
394 device_printf(dev, "cannot create bus dma tag\n");
395 goto fail;
396 }
397
398 #ifdef __powerpc__
399 /*
400 * Virtio uses physical addresses rather than bus addresses, so we
401 * need to ask busdma to skip the iommu physical->bus mapping. At
402 * present, this is only a thing on the powerpc architectures.
403 */
404 bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
405 #endif
406
407 error = vtblk_alloc_virtqueue(sc);
408 if (error) {
409 device_printf(dev, "cannot allocate virtqueue\n");
410 goto fail;
411 }
412
413 error = vtblk_request_prealloc(sc);
414 if (error) {
415 device_printf(dev, "cannot preallocate requests\n");
416 goto fail;
417 }
418
419 vtblk_alloc_disk(sc, &blkcfg);
420
421 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
422 if (error) {
423 device_printf(dev, "cannot setup virtqueue interrupt\n");
424 goto fail;
425 }
426
427 virtqueue_enable_intr(sc->vtblk_vq);
428
429 fail:
430 if (error)
431 vtblk_detach(dev);
432
433 return (error);
434 }
435
436 static int
vtblk_detach(device_t dev)437 vtblk_detach(device_t dev)
438 {
439 struct vtblk_softc *sc;
440
441 sc = device_get_softc(dev);
442
443 VTBLK_LOCK(sc);
444 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
445 if (device_is_attached(dev))
446 vtblk_stop(sc);
447 VTBLK_UNLOCK(sc);
448
449 vtblk_drain(sc);
450
451 if (sc->vtblk_disk != NULL) {
452 disk_destroy(sc->vtblk_disk);
453 sc->vtblk_disk = NULL;
454 }
455
456 if (sc->vtblk_dmat != NULL) {
457 bus_dma_tag_destroy(sc->vtblk_dmat);
458 sc->vtblk_dmat = NULL;
459 }
460
461 if (sc->vtblk_sglist != NULL) {
462 sglist_free(sc->vtblk_sglist);
463 sc->vtblk_sglist = NULL;
464 }
465
466 VTBLK_LOCK_DESTROY(sc);
467
468 return (0);
469 }
470
471 static int
vtblk_suspend(device_t dev)472 vtblk_suspend(device_t dev)
473 {
474 struct vtblk_softc *sc;
475 int error;
476
477 sc = device_get_softc(dev);
478
479 VTBLK_LOCK(sc);
480 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
481 /* XXX BMV: virtio_stop(), etc needed here? */
482 error = vtblk_quiesce(sc);
483 if (error)
484 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
485 VTBLK_UNLOCK(sc);
486
487 return (error);
488 }
489
490 static int
vtblk_resume(device_t dev)491 vtblk_resume(device_t dev)
492 {
493 struct vtblk_softc *sc;
494
495 sc = device_get_softc(dev);
496
497 VTBLK_LOCK(sc);
498 /* XXX BMV: virtio_reinit(), etc needed here? */
499 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
500 vtblk_startio(sc);
501 VTBLK_UNLOCK(sc);
502
503 return (0);
504 }
505
506 static int
vtblk_shutdown(device_t dev)507 vtblk_shutdown(device_t dev)
508 {
509
510 return (0);
511 }
512
513 static int
vtblk_attach_completed(device_t dev)514 vtblk_attach_completed(device_t dev)
515 {
516 struct vtblk_softc *sc;
517
518 sc = device_get_softc(dev);
519
520 /*
521 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
522 * processed after the device acknowledged
523 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
524 */
525 vtblk_create_disk(sc);
526 return (0);
527 }
528
529 static int
vtblk_config_change(device_t dev)530 vtblk_config_change(device_t dev)
531 {
532 struct vtblk_softc *sc;
533 struct virtio_blk_config blkcfg;
534 uint64_t capacity;
535
536 sc = device_get_softc(dev);
537
538 vtblk_read_config(sc, &blkcfg);
539
540 /* Capacity is always in 512-byte units. */
541 capacity = blkcfg.capacity * VTBLK_BSIZE;
542
543 if (sc->vtblk_disk->d_mediasize != capacity)
544 vtblk_resize_disk(sc, capacity);
545
546 return (0);
547 }
548
549 static int
vtblk_open(struct disk * dp)550 vtblk_open(struct disk *dp)
551 {
552 struct vtblk_softc *sc;
553
554 if ((sc = dp->d_drv1) == NULL)
555 return (ENXIO);
556
557 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
558 }
559
560 static int
vtblk_close(struct disk * dp)561 vtblk_close(struct disk *dp)
562 {
563 struct vtblk_softc *sc;
564
565 if ((sc = dp->d_drv1) == NULL)
566 return (ENXIO);
567
568 return (0);
569 }
570
571 static int
vtblk_ioctl(struct disk * dp,u_long cmd,void * addr,int flag,struct thread * td)572 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
573 struct thread *td)
574 {
575 struct vtblk_softc *sc;
576
577 if ((sc = dp->d_drv1) == NULL)
578 return (ENXIO);
579
580 return (ENOTTY);
581 }
582
583 static int
vtblk_dump(void * arg,void * virtual,off_t offset,size_t length)584 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
585 {
586 struct disk *dp;
587 struct vtblk_softc *sc;
588 int error;
589
590 dp = arg;
591 error = 0;
592
593 if ((sc = dp->d_drv1) == NULL)
594 return (ENXIO);
595
596 VTBLK_LOCK(sc);
597
598 vtblk_dump_quiesce(sc);
599
600 if (length > 0)
601 error = vtblk_dump_write(sc, virtual, offset, length);
602 if (error || (virtual == NULL && offset == 0))
603 vtblk_dump_complete(sc);
604
605 VTBLK_UNLOCK(sc);
606
607 return (error);
608 }
609
610 static void
vtblk_strategy(struct bio * bp)611 vtblk_strategy(struct bio *bp)
612 {
613 struct vtblk_softc *sc;
614
615 if ((sc = bp->bio_disk->d_drv1) == NULL) {
616 vtblk_bio_done(NULL, bp, EINVAL);
617 return;
618 }
619
620 if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
621 (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
622 vtblk_bio_done(sc, bp, EOPNOTSUPP);
623 return;
624 }
625
626 VTBLK_LOCK(sc);
627
628 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
629 VTBLK_UNLOCK(sc);
630 vtblk_bio_done(sc, bp, ENXIO);
631 return;
632 }
633
634 bioq_insert_tail(&sc->vtblk_bioq, bp);
635 vtblk_startio(sc);
636
637 VTBLK_UNLOCK(sc);
638 }
639
640 static int
vtblk_negotiate_features(struct vtblk_softc * sc)641 vtblk_negotiate_features(struct vtblk_softc *sc)
642 {
643 device_t dev;
644 uint64_t features;
645
646 dev = sc->vtblk_dev;
647 features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
648 VTBLK_LEGACY_FEATURES;
649
650 sc->vtblk_features = virtio_negotiate_features(dev, features);
651 return (virtio_finalize_features(dev));
652 }
653
654 static int
vtblk_setup_features(struct vtblk_softc * sc)655 vtblk_setup_features(struct vtblk_softc *sc)
656 {
657 device_t dev;
658 int error;
659
660 dev = sc->vtblk_dev;
661
662 error = vtblk_negotiate_features(sc);
663 if (error)
664 return (error);
665
666 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
667 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
668 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
669 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
670
671 /* Legacy. */
672 if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
673 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
674
675 return (0);
676 }
677
678 static int
vtblk_maximum_segments(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)679 vtblk_maximum_segments(struct vtblk_softc *sc,
680 struct virtio_blk_config *blkcfg)
681 {
682 device_t dev;
683 int nsegs;
684
685 dev = sc->vtblk_dev;
686 nsegs = VTBLK_MIN_SEGMENTS;
687
688 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
689 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
690 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
691 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
692 } else
693 nsegs += 1;
694
695 return (nsegs);
696 }
697
698 static int
vtblk_alloc_virtqueue(struct vtblk_softc * sc)699 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
700 {
701 device_t dev;
702 struct vq_alloc_info vq_info;
703
704 dev = sc->vtblk_dev;
705
706 VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
707 vtblk_vq_intr, sc, &sc->vtblk_vq,
708 "%s request", device_get_nameunit(dev));
709
710 return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
711 }
712
713 static void
vtblk_resize_disk(struct vtblk_softc * sc,uint64_t new_capacity)714 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
715 {
716 device_t dev;
717 struct disk *dp;
718 int error;
719
720 dev = sc->vtblk_dev;
721 dp = sc->vtblk_disk;
722
723 dp->d_mediasize = new_capacity;
724 if (bootverbose) {
725 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
726 (uintmax_t) dp->d_mediasize >> 20,
727 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
728 dp->d_sectorsize);
729 }
730
731 error = disk_resize(dp, M_NOWAIT);
732 if (error) {
733 device_printf(dev,
734 "disk_resize(9) failed, error: %d\n", error);
735 }
736 }
737
738 static void
vtblk_alloc_disk(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)739 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
740 {
741 device_t dev;
742 struct disk *dp;
743
744 dev = sc->vtblk_dev;
745
746 sc->vtblk_disk = dp = disk_alloc();
747 dp->d_open = vtblk_open;
748 dp->d_close = vtblk_close;
749 dp->d_ioctl = vtblk_ioctl;
750 dp->d_strategy = vtblk_strategy;
751 dp->d_name = VTBLK_DISK_NAME;
752 dp->d_unit = device_get_unit(dev);
753 dp->d_drv1 = sc;
754 dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
755 dp->d_hba_vendor = virtio_get_vendor(dev);
756 dp->d_hba_device = virtio_get_device(dev);
757 dp->d_hba_subvendor = virtio_get_subvendor(dev);
758 dp->d_hba_subdevice = virtio_get_subdevice(dev);
759
760 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
761 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
762 else {
763 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
764 dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
765 dp->d_dump = vtblk_dump;
766 }
767
768 /* Capacity is always in 512-byte units. */
769 dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
770
771 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
772 dp->d_sectorsize = blkcfg->blk_size;
773 else
774 dp->d_sectorsize = VTBLK_BSIZE;
775
776 /*
777 * The VirtIO maximum I/O size is given in terms of segments.
778 * However, FreeBSD limits I/O size by logical buffer size, not
779 * by physically contiguous pages. Therefore, we have to assume
780 * no pages are contiguous. This may impose an artificially low
781 * maximum I/O size. But in practice, since QEMU advertises 128
782 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
783 * which is typically greater than maxphys. Eventually we should
784 * just advertise maxphys and split buffers that are too big.
785 *
786 * If we're not asking busdma to align data to page boundaries, the
787 * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
788 * unaligned I/Os.
789 */
790 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
791 PAGE_SIZE;
792 if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
793 dp->d_maxsize -= PAGE_SIZE;
794
795 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
796 dp->d_fwsectors = blkcfg->geometry.sectors;
797 dp->d_fwheads = blkcfg->geometry.heads;
798 }
799
800 if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
801 blkcfg->topology.physical_block_exp > 0) {
802 dp->d_stripesize = dp->d_sectorsize *
803 (1 << blkcfg->topology.physical_block_exp);
804 dp->d_stripeoffset = (dp->d_stripesize -
805 blkcfg->topology.alignment_offset * dp->d_sectorsize) %
806 dp->d_stripesize;
807 }
808
809 if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
810 dp->d_flags |= DISKFLAG_CANDELETE;
811 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
812 }
813
814 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
815 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
816 else
817 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
818 }
819
820 static void
vtblk_create_disk(struct vtblk_softc * sc)821 vtblk_create_disk(struct vtblk_softc *sc)
822 {
823 struct disk *dp;
824
825 dp = sc->vtblk_disk;
826
827 vtblk_ident(sc);
828
829 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
830 (uintmax_t) dp->d_mediasize >> 20,
831 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
832 dp->d_sectorsize);
833
834 disk_create(dp, DISK_VERSION);
835 }
836
837 static int
vtblk_request_prealloc(struct vtblk_softc * sc)838 vtblk_request_prealloc(struct vtblk_softc *sc)
839 {
840 struct vtblk_request *req;
841 int i, nreqs;
842
843 nreqs = virtqueue_size(sc->vtblk_vq);
844
845 /*
846 * Preallocate sufficient requests to keep the virtqueue full. Each
847 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
848 * the number allocated when indirect descriptors are not available.
849 */
850 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
851 nreqs /= VTBLK_MIN_SEGMENTS;
852
853 for (i = 0; i < nreqs; i++) {
854 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
855 if (req == NULL)
856 return (ENOMEM);
857
858 req->vbr_sc = sc;
859 if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) {
860 free(req, M_DEVBUF);
861 return (ENOMEM);
862 }
863
864 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
865 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
866
867 sc->vtblk_request_count++;
868 vtblk_request_enqueue(sc, req);
869 }
870
871 return (0);
872 }
873
874 static void
vtblk_request_free(struct vtblk_softc * sc)875 vtblk_request_free(struct vtblk_softc *sc)
876 {
877 struct vtblk_request *req;
878
879 MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
880
881 while ((req = vtblk_request_dequeue(sc)) != NULL) {
882 sc->vtblk_request_count--;
883 bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
884 free(req, M_DEVBUF);
885 }
886
887 KASSERT(sc->vtblk_request_count == 0,
888 ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
889 }
890
891 static struct vtblk_request *
vtblk_request_dequeue(struct vtblk_softc * sc)892 vtblk_request_dequeue(struct vtblk_softc *sc)
893 {
894 struct vtblk_request *req;
895
896 req = TAILQ_FIRST(&sc->vtblk_req_free);
897 if (req != NULL) {
898 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
899 bzero(&req->vbr_hdr, sizeof(struct vtblk_request) -
900 offsetof(struct vtblk_request, vbr_hdr));
901 }
902
903 return (req);
904 }
905
906 static void
vtblk_request_enqueue(struct vtblk_softc * sc,struct vtblk_request * req)907 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
908 {
909
910 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
911 }
912
913 static struct vtblk_request *
vtblk_request_next_ready(struct vtblk_softc * sc)914 vtblk_request_next_ready(struct vtblk_softc *sc)
915 {
916 struct vtblk_request *req;
917
918 req = TAILQ_FIRST(&sc->vtblk_req_ready);
919 if (req != NULL)
920 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
921
922 return (req);
923 }
924
925 static void
vtblk_request_requeue_ready(struct vtblk_softc * sc,struct vtblk_request * req)926 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
927 {
928
929 /* NOTE: Currently, there will be at most one request in the queue. */
930 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
931 }
932
933 static struct vtblk_request *
vtblk_request_next(struct vtblk_softc * sc)934 vtblk_request_next(struct vtblk_softc *sc)
935 {
936 struct vtblk_request *req;
937
938 req = vtblk_request_next_ready(sc);
939 if (req != NULL)
940 return (req);
941
942 return (vtblk_request_bio(sc));
943 }
944
945 static struct vtblk_request *
vtblk_request_bio(struct vtblk_softc * sc)946 vtblk_request_bio(struct vtblk_softc *sc)
947 {
948 struct bio_queue_head *bioq;
949 struct vtblk_request *req;
950 struct bio *bp;
951
952 bioq = &sc->vtblk_bioq;
953
954 if (bioq_first(bioq) == NULL)
955 return (NULL);
956
957 req = vtblk_request_dequeue(sc);
958 if (req == NULL)
959 return (NULL);
960
961 bp = bioq_takefirst(bioq);
962 req->vbr_bp = bp;
963 req->vbr_ack = -1;
964 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
965
966 switch (bp->bio_cmd) {
967 case BIO_FLUSH:
968 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
969 req->vbr_hdr.sector = 0;
970 break;
971 case BIO_READ:
972 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
973 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
974 break;
975 case BIO_WRITE:
976 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
977 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
978 break;
979 case BIO_DELETE:
980 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
981 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
982 break;
983 default:
984 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
985 }
986
987 if (bp->bio_flags & BIO_ORDERED)
988 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
989
990 return (req);
991 }
992
993 static int
vtblk_request_execute(struct vtblk_request * req,int flags)994 vtblk_request_execute(struct vtblk_request *req, int flags)
995 {
996 struct vtblk_softc *sc = req->vbr_sc;
997 struct bio *bp = req->vbr_bp;
998 int error = 0;
999
1000 /*
1001 * Call via bus_dmamap_load_bio or directly depending on whether we
1002 * have a buffer we need to map. If we don't have a busdma map,
1003 * try to perform the I/O directly and hope that it works (this will
1004 * happen when dumping).
1005 */
1006 if ((req->vbr_mapp != NULL) &&
1007 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
1008 error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
1009 req->vbr_bp, vtblk_request_execute_cb, req, flags);
1010 if (error == EINPROGRESS) {
1011 req->vbr_busdma_wait = 1;
1012 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
1013 }
1014 } else {
1015 vtblk_request_execute_cb(req, NULL, 0, 0);
1016 }
1017
1018 return (error ? error : req->vbr_error);
1019 }
1020
1021 static void
vtblk_request_execute_cb(void * callback_arg,bus_dma_segment_t * segs,int nseg,int error)1022 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
1023 int nseg, int error)
1024 {
1025 struct vtblk_request *req;
1026 struct vtblk_softc *sc;
1027 struct virtqueue *vq;
1028 struct sglist *sg;
1029 struct bio *bp;
1030 int ordered, readable, writable, i;
1031
1032 req = (struct vtblk_request *)callback_arg;
1033 sc = req->vbr_sc;
1034 vq = sc->vtblk_vq;
1035 sg = sc->vtblk_sglist;
1036 bp = req->vbr_bp;
1037 ordered = 0;
1038 writable = 0;
1039
1040 /*
1041 * If we paused request queueing while we waited for busdma to call us
1042 * asynchronously, unpause it now; this request made it through so we
1043 * don't need to worry about others getting ahead of us. (Note that we
1044 * hold the device mutex so nothing will happen until after we return
1045 * anyway.)
1046 */
1047 if (req->vbr_busdma_wait)
1048 sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
1049
1050 /* Fail on errors from busdma. */
1051 if (error)
1052 goto out1;
1053
1054 /*
1055 * Some hosts (such as bhyve) do not implement the barrier feature,
1056 * so we emulate it in the driver by allowing the barrier request
1057 * to be the only one in flight.
1058 */
1059 if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
1060 if (sc->vtblk_req_ordered != NULL) {
1061 error = EBUSY;
1062 goto out;
1063 }
1064 if (bp->bio_flags & BIO_ORDERED) {
1065 if (!virtqueue_empty(vq)) {
1066 error = EBUSY;
1067 goto out;
1068 }
1069 ordered = 1;
1070 req->vbr_hdr.type &= vtblk_gtoh32(sc,
1071 ~VIRTIO_BLK_T_BARRIER);
1072 }
1073 }
1074
1075 sglist_reset(sg);
1076 sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
1077
1078 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1079 /*
1080 * We cast bus_addr_t to vm_paddr_t here; since we skip the
1081 * iommu mapping (see vtblk_attach) this should be safe.
1082 */
1083 for (i = 0; i < nseg; i++) {
1084 error = sglist_append_phys(sg,
1085 (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
1086 if (error || sg->sg_nseg == sg->sg_maxseg) {
1087 panic("%s: bio %p data buffer too big %d",
1088 __func__, bp, error);
1089 }
1090 }
1091
1092 /* Special handling for dump, which bypasses busdma. */
1093 if (req->vbr_mapp == NULL) {
1094 error = sglist_append_bio(sg, bp);
1095 if (error || sg->sg_nseg == sg->sg_maxseg) {
1096 panic("%s: bio %p data buffer too big %d",
1097 __func__, bp, error);
1098 }
1099 }
1100
1101 /* BIO_READ means the host writes into our buffer. */
1102 if (bp->bio_cmd == BIO_READ)
1103 writable = sg->sg_nseg - 1;
1104 } else if (bp->bio_cmd == BIO_DELETE) {
1105 struct virtio_blk_discard_write_zeroes *discard;
1106
1107 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
1108 if (discard == NULL) {
1109 error = ENOMEM;
1110 goto out;
1111 }
1112
1113 bp->bio_driver1 = discard;
1114 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
1115 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
1116 error = sglist_append(sg, discard, sizeof(*discard));
1117 if (error || sg->sg_nseg == sg->sg_maxseg) {
1118 panic("%s: bio %p data buffer too big %d",
1119 __func__, bp, error);
1120 }
1121 }
1122
1123 writable++;
1124 sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
1125 readable = sg->sg_nseg - writable;
1126
1127 if (req->vbr_mapp != NULL) {
1128 switch (bp->bio_cmd) {
1129 case BIO_READ:
1130 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1131 BUS_DMASYNC_PREREAD);
1132 break;
1133 case BIO_WRITE:
1134 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1135 BUS_DMASYNC_PREWRITE);
1136 break;
1137 }
1138 }
1139
1140 error = virtqueue_enqueue(vq, req, sg, readable, writable);
1141 if (error == 0 && ordered)
1142 sc->vtblk_req_ordered = req;
1143
1144 /*
1145 * If we were called asynchronously, we need to notify the queue that
1146 * we've added a new request, since the notification from startio was
1147 * performed already.
1148 */
1149 if (error == 0 && req->vbr_busdma_wait)
1150 virtqueue_notify(vq);
1151
1152 out:
1153 if (error && (req->vbr_mapp != NULL))
1154 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1155 out1:
1156 if (error && req->vbr_requeue_on_error)
1157 vtblk_request_requeue_ready(sc, req);
1158 req->vbr_error = error;
1159 }
1160
1161 static int
vtblk_request_error(struct vtblk_request * req)1162 vtblk_request_error(struct vtblk_request *req)
1163 {
1164 int error;
1165
1166 switch (req->vbr_ack) {
1167 case VIRTIO_BLK_S_OK:
1168 error = 0;
1169 break;
1170 case VIRTIO_BLK_S_UNSUPP:
1171 error = ENOTSUP;
1172 break;
1173 default:
1174 error = EIO;
1175 break;
1176 }
1177
1178 return (error);
1179 }
1180
1181 static void
vtblk_queue_completed(struct vtblk_softc * sc,struct bio_queue * queue)1182 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1183 {
1184 struct vtblk_request *req;
1185 struct bio *bp;
1186
1187 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1188 if (sc->vtblk_req_ordered != NULL) {
1189 MPASS(sc->vtblk_req_ordered == req);
1190 sc->vtblk_req_ordered = NULL;
1191 }
1192
1193 bp = req->vbr_bp;
1194 if (req->vbr_mapp != NULL) {
1195 switch (bp->bio_cmd) {
1196 case BIO_READ:
1197 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1198 BUS_DMASYNC_POSTREAD);
1199 bus_dmamap_unload(sc->vtblk_dmat,
1200 req->vbr_mapp);
1201 break;
1202 case BIO_WRITE:
1203 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1204 BUS_DMASYNC_POSTWRITE);
1205 bus_dmamap_unload(sc->vtblk_dmat,
1206 req->vbr_mapp);
1207 break;
1208 }
1209 }
1210 bp->bio_error = vtblk_request_error(req);
1211 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1212
1213 vtblk_request_enqueue(sc, req);
1214 }
1215 }
1216
1217 static void
vtblk_done_completed(struct vtblk_softc * sc,struct bio_queue * queue)1218 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1219 {
1220 struct bio *bp, *tmp;
1221
1222 TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1223 if (bp->bio_error != 0)
1224 disk_err(bp, "hard error", -1, 1);
1225 vtblk_bio_done(sc, bp, bp->bio_error);
1226 }
1227 }
1228
1229 static void
vtblk_drain_vq(struct vtblk_softc * sc)1230 vtblk_drain_vq(struct vtblk_softc *sc)
1231 {
1232 struct virtqueue *vq;
1233 struct vtblk_request *req;
1234 int last;
1235
1236 vq = sc->vtblk_vq;
1237 last = 0;
1238
1239 while ((req = virtqueue_drain(vq, &last)) != NULL) {
1240 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1241 vtblk_request_enqueue(sc, req);
1242 }
1243
1244 sc->vtblk_req_ordered = NULL;
1245 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1246 }
1247
1248 static void
vtblk_drain(struct vtblk_softc * sc)1249 vtblk_drain(struct vtblk_softc *sc)
1250 {
1251 struct bio_queue_head *bioq;
1252 struct vtblk_request *req;
1253 struct bio *bp;
1254
1255 bioq = &sc->vtblk_bioq;
1256
1257 if (sc->vtblk_vq != NULL) {
1258 struct bio_queue queue;
1259
1260 TAILQ_INIT(&queue);
1261 vtblk_queue_completed(sc, &queue);
1262 vtblk_done_completed(sc, &queue);
1263
1264 vtblk_drain_vq(sc);
1265 }
1266
1267 while ((req = vtblk_request_next_ready(sc)) != NULL) {
1268 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1269 vtblk_request_enqueue(sc, req);
1270 }
1271
1272 while (bioq_first(bioq) != NULL) {
1273 bp = bioq_takefirst(bioq);
1274 vtblk_bio_done(sc, bp, ENXIO);
1275 }
1276
1277 vtblk_request_free(sc);
1278 }
1279
1280 static void
vtblk_startio(struct vtblk_softc * sc)1281 vtblk_startio(struct vtblk_softc *sc)
1282 {
1283 struct virtqueue *vq;
1284 struct vtblk_request *req;
1285 int enq;
1286
1287 VTBLK_LOCK_ASSERT(sc);
1288 vq = sc->vtblk_vq;
1289 enq = 0;
1290
1291 if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
1292 return;
1293
1294 while (!virtqueue_full(vq)) {
1295 req = vtblk_request_next(sc);
1296 if (req == NULL)
1297 break;
1298
1299 req->vbr_requeue_on_error = 1;
1300 if (vtblk_request_execute(req, BUS_DMA_WAITOK))
1301 break;
1302
1303 enq++;
1304 }
1305
1306 if (enq > 0)
1307 virtqueue_notify(vq);
1308 }
1309
1310 static void
vtblk_bio_done(struct vtblk_softc * sc,struct bio * bp,int error)1311 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1312 {
1313
1314 /* Because of GEOM direct dispatch, we cannot hold any locks. */
1315 if (sc != NULL)
1316 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1317
1318 if (error) {
1319 bp->bio_resid = bp->bio_bcount;
1320 bp->bio_error = error;
1321 bp->bio_flags |= BIO_ERROR;
1322 } else {
1323 kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1324 }
1325
1326 if (bp->bio_driver1 != NULL) {
1327 free(bp->bio_driver1, M_DEVBUF);
1328 bp->bio_driver1 = NULL;
1329 }
1330
1331 biodone(bp);
1332 }
1333
1334 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg) \
1335 if (virtio_with_feature(_dev, _feature)) { \
1336 virtio_read_device_config(_dev, \
1337 offsetof(struct virtio_blk_config, _field), \
1338 &(_cfg)->_field, sizeof((_cfg)->_field)); \
1339 }
1340
1341 static void
vtblk_read_config(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1342 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1343 {
1344 device_t dev;
1345
1346 dev = sc->vtblk_dev;
1347
1348 bzero(blkcfg, sizeof(struct virtio_blk_config));
1349
1350 /* The capacity is always available. */
1351 virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1352 capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1353
1354 /* Read the configuration if the feature was negotiated. */
1355 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1356 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1357 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1358 geometry.cylinders, blkcfg);
1359 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1360 geometry.heads, blkcfg);
1361 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1362 geometry.sectors, blkcfg);
1363 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1364 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1365 topology.physical_block_exp, blkcfg);
1366 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1367 topology.alignment_offset, blkcfg);
1368 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1369 topology.min_io_size, blkcfg);
1370 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1371 topology.opt_io_size, blkcfg);
1372 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1373 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1374 blkcfg);
1375 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1376 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1377 blkcfg);
1378 }
1379
1380 #undef VTBLK_GET_CONFIG
1381
1382 static void
vtblk_ident(struct vtblk_softc * sc)1383 vtblk_ident(struct vtblk_softc *sc)
1384 {
1385 struct bio buf;
1386 struct disk *dp;
1387 struct vtblk_request *req;
1388 int len, error;
1389
1390 dp = sc->vtblk_disk;
1391 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1392
1393 if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1394 return;
1395
1396 req = vtblk_request_dequeue(sc);
1397 if (req == NULL)
1398 return;
1399
1400 req->vbr_ack = -1;
1401 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1402 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1403 req->vbr_hdr.sector = 0;
1404
1405 req->vbr_bp = &buf;
1406 g_reset_bio(&buf);
1407
1408 buf.bio_cmd = BIO_READ;
1409 buf.bio_data = dp->d_ident;
1410 buf.bio_bcount = len;
1411
1412 VTBLK_LOCK(sc);
1413 error = vtblk_poll_request(sc, req);
1414 VTBLK_UNLOCK(sc);
1415
1416 vtblk_request_enqueue(sc, req);
1417
1418 if (error) {
1419 device_printf(sc->vtblk_dev,
1420 "error getting device identifier: %d\n", error);
1421 }
1422 }
1423
1424 static int
vtblk_poll_request(struct vtblk_softc * sc,struct vtblk_request * req)1425 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1426 {
1427 struct virtqueue *vq;
1428 int error;
1429
1430 vq = sc->vtblk_vq;
1431
1432 if (!virtqueue_empty(vq))
1433 return (EBUSY);
1434
1435 error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
1436 if (error)
1437 return (error);
1438
1439 virtqueue_notify(vq);
1440 virtqueue_poll(vq, NULL);
1441
1442 error = vtblk_request_error(req);
1443 if (error && bootverbose) {
1444 device_printf(sc->vtblk_dev,
1445 "%s: IO error: %d\n", __func__, error);
1446 }
1447
1448 return (error);
1449 }
1450
1451 static int
vtblk_quiesce(struct vtblk_softc * sc)1452 vtblk_quiesce(struct vtblk_softc *sc)
1453 {
1454 int error;
1455
1456 VTBLK_LOCK_ASSERT(sc);
1457 error = 0;
1458
1459 while (!virtqueue_empty(sc->vtblk_vq)) {
1460 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1461 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1462 error = EBUSY;
1463 break;
1464 }
1465 }
1466
1467 return (error);
1468 }
1469
1470 static void
vtblk_vq_intr(void * xsc)1471 vtblk_vq_intr(void *xsc)
1472 {
1473 struct vtblk_softc *sc;
1474 struct virtqueue *vq;
1475 struct bio_queue queue;
1476
1477 sc = xsc;
1478 vq = sc->vtblk_vq;
1479 TAILQ_INIT(&queue);
1480
1481 VTBLK_LOCK(sc);
1482
1483 again:
1484 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1485 goto out;
1486
1487 vtblk_queue_completed(sc, &queue);
1488 vtblk_startio(sc);
1489
1490 if (virtqueue_enable_intr(vq) != 0) {
1491 virtqueue_disable_intr(vq);
1492 goto again;
1493 }
1494
1495 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1496 wakeup(&sc->vtblk_vq);
1497
1498 out:
1499 VTBLK_UNLOCK(sc);
1500 vtblk_done_completed(sc, &queue);
1501 }
1502
1503 static void
vtblk_stop(struct vtblk_softc * sc)1504 vtblk_stop(struct vtblk_softc *sc)
1505 {
1506
1507 virtqueue_disable_intr(sc->vtblk_vq);
1508 virtio_stop(sc->vtblk_dev);
1509 }
1510
1511 static void
vtblk_dump_quiesce(struct vtblk_softc * sc)1512 vtblk_dump_quiesce(struct vtblk_softc *sc)
1513 {
1514
1515 /*
1516 * Spin here until all the requests in-flight at the time of the
1517 * dump are completed and queued. The queued requests will be
1518 * biodone'd once the dump is finished.
1519 */
1520 while (!virtqueue_empty(sc->vtblk_vq))
1521 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1522 }
1523
1524 static int
vtblk_dump_write(struct vtblk_softc * sc,void * virtual,off_t offset,size_t length)1525 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1526 size_t length)
1527 {
1528 struct bio buf;
1529 struct vtblk_request *req;
1530
1531 req = &sc->vtblk_dump_request;
1532 req->vbr_sc = sc;
1533 req->vbr_ack = -1;
1534 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1535 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1536 req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1537
1538 req->vbr_bp = &buf;
1539 g_reset_bio(&buf);
1540
1541 buf.bio_cmd = BIO_WRITE;
1542 buf.bio_data = virtual;
1543 buf.bio_bcount = length;
1544
1545 return (vtblk_poll_request(sc, req));
1546 }
1547
1548 static int
vtblk_dump_flush(struct vtblk_softc * sc)1549 vtblk_dump_flush(struct vtblk_softc *sc)
1550 {
1551 struct bio buf;
1552 struct vtblk_request *req;
1553
1554 req = &sc->vtblk_dump_request;
1555 req->vbr_sc = sc;
1556 req->vbr_ack = -1;
1557 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1558 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1559 req->vbr_hdr.sector = 0;
1560
1561 req->vbr_bp = &buf;
1562 g_reset_bio(&buf);
1563
1564 buf.bio_cmd = BIO_FLUSH;
1565
1566 return (vtblk_poll_request(sc, req));
1567 }
1568
1569 static void
vtblk_dump_complete(struct vtblk_softc * sc)1570 vtblk_dump_complete(struct vtblk_softc *sc)
1571 {
1572
1573 vtblk_dump_flush(sc);
1574
1575 VTBLK_UNLOCK(sc);
1576 vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1577 VTBLK_LOCK(sc);
1578 }
1579
1580 static void
vtblk_set_write_cache(struct vtblk_softc * sc,int wc)1581 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1582 {
1583
1584 /* Set either writeback (1) or writethrough (0) mode. */
1585 virtio_write_dev_config_1(sc->vtblk_dev,
1586 offsetof(struct virtio_blk_config, wce), wc);
1587 }
1588
1589 static int
vtblk_write_cache_enabled(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1590 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1591 struct virtio_blk_config *blkcfg)
1592 {
1593 int wc;
1594
1595 if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1596 wc = vtblk_tunable_int(sc, "writecache_mode",
1597 vtblk_writecache_mode);
1598 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1599 vtblk_set_write_cache(sc, wc);
1600 else
1601 wc = blkcfg->wce;
1602 } else
1603 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1604
1605 return (wc);
1606 }
1607
1608 static int
vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)1609 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1610 {
1611 struct vtblk_softc *sc;
1612 int wc, error;
1613
1614 sc = oidp->oid_arg1;
1615 wc = sc->vtblk_write_cache;
1616
1617 error = sysctl_handle_int(oidp, &wc, 0, req);
1618 if (error || req->newptr == NULL)
1619 return (error);
1620 if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1621 return (EPERM);
1622 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1623 return (EINVAL);
1624
1625 VTBLK_LOCK(sc);
1626 sc->vtblk_write_cache = wc;
1627 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1628 VTBLK_UNLOCK(sc);
1629
1630 return (0);
1631 }
1632
1633 static void
vtblk_setup_sysctl(struct vtblk_softc * sc)1634 vtblk_setup_sysctl(struct vtblk_softc *sc)
1635 {
1636 device_t dev;
1637 struct sysctl_ctx_list *ctx;
1638 struct sysctl_oid *tree;
1639 struct sysctl_oid_list *child;
1640
1641 dev = sc->vtblk_dev;
1642 ctx = device_get_sysctl_ctx(dev);
1643 tree = device_get_sysctl_tree(dev);
1644 child = SYSCTL_CHILDREN(tree);
1645
1646 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1647 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1648 vtblk_write_cache_sysctl, "I",
1649 "Write cache mode (writethrough (0) or writeback (1))");
1650 }
1651
1652 static int
vtblk_tunable_int(struct vtblk_softc * sc,const char * knob,int def)1653 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1654 {
1655 char path[64];
1656
1657 snprintf(path, sizeof(path),
1658 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1659 TUNABLE_INT_FETCH(path, &def);
1660
1661 return (def);
1662 }
1663