1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include "opt_bhyve_snapshot.h"
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/jail.h>
35 #include <sys/queue.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 #include <sys/malloc.h>
39 #include <sys/conf.h>
40 #include <sys/sysctl.h>
41 #include <sys/libkern.h>
42 #include <sys/ioccom.h>
43 #include <sys/mman.h>
44 #include <sys/uio.h>
45 #include <sys/proc.h>
46
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_map.h>
50 #include <vm/vm_object.h>
51
52 #include <machine/vmparam.h>
53 #include <machine/vmm.h>
54 #include <machine/vmm_dev.h>
55 #include <machine/vmm_instruction_emul.h>
56 #include <machine/vmm_snapshot.h>
57 #include <x86/apicreg.h>
58
59 #include "vmm_lapic.h"
60 #include "vmm_stat.h"
61 #include "vmm_mem.h"
62 #include "io/ppt.h"
63 #include "io/vatpic.h"
64 #include "io/vioapic.h"
65 #include "io/vhpet.h"
66 #include "io/vrtc.h"
67
68 #ifdef COMPAT_FREEBSD13
69 struct vm_stats_old {
70 int cpuid; /* in */
71 int num_entries; /* out */
72 struct timeval tv;
73 uint64_t statbuf[MAX_VM_STATS];
74 };
75
76 #define VM_STATS_OLD \
77 _IOWR('v', IOCNUM_VM_STATS, struct vm_stats_old)
78
79 struct vm_snapshot_meta_old {
80 void *ctx; /* unused */
81 void *dev_data;
82 const char *dev_name; /* identify userspace devices */
83 enum snapshot_req dev_req; /* identify kernel structs */
84
85 struct vm_snapshot_buffer buffer;
86
87 enum vm_snapshot_op op;
88 };
89
90 #define VM_SNAPSHOT_REQ_OLD \
91 _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta_old)
92
93 struct vm_exit_ipi_13 {
94 uint32_t mode;
95 uint8_t vector;
96 __BITSET_DEFINE(, 256) dmask;
97 };
98
99 struct vm_exit_13 {
100 uint32_t exitcode;
101 int32_t inst_length;
102 uint64_t rip;
103 uint64_t u[120 / sizeof(uint64_t)];
104 };
105
106 struct vm_run_13 {
107 int cpuid;
108 struct vm_exit_13 vm_exit;
109 };
110
111 #define VM_RUN_13 \
112 _IOWR('v', IOCNUM_RUN, struct vm_run_13)
113
114 #endif /* COMPAT_FREEBSD13 */
115
116 struct devmem_softc {
117 int segid;
118 char *name;
119 struct cdev *cdev;
120 struct vmmdev_softc *sc;
121 SLIST_ENTRY(devmem_softc) link;
122 };
123
124 struct vmmdev_softc {
125 struct vm *vm; /* vm instance cookie */
126 struct cdev *cdev;
127 struct ucred *ucred;
128 SLIST_ENTRY(vmmdev_softc) link;
129 SLIST_HEAD(, devmem_softc) devmem;
130 int flags;
131 };
132 #define VSC_LINKED 0x01
133
134 static SLIST_HEAD(, vmmdev_softc) head;
135
136 static unsigned pr_allow_flag;
137 static struct mtx vmmdev_mtx;
138 MTX_SYSINIT(vmmdev_mtx, &vmmdev_mtx, "vmm device mutex", MTX_DEF);
139
140 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
141
142 SYSCTL_DECL(_hw_vmm);
143
144 static int vmm_priv_check(struct ucred *ucred);
145 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
146 static void devmem_destroy(void *arg);
147
148 static int
vmm_priv_check(struct ucred * ucred)149 vmm_priv_check(struct ucred *ucred)
150 {
151
152 if (jailed(ucred) &&
153 !(ucred->cr_prison->pr_allow & pr_allow_flag))
154 return (EPERM);
155
156 return (0);
157 }
158
159 static int
vcpu_lock_one(struct vcpu * vcpu)160 vcpu_lock_one(struct vcpu *vcpu)
161 {
162 return (vcpu_set_state(vcpu, VCPU_FROZEN, true));
163 }
164
165 static void
vcpu_unlock_one(struct vmmdev_softc * sc,int vcpuid,struct vcpu * vcpu)166 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpuid, struct vcpu *vcpu)
167 {
168 enum vcpu_state state;
169
170 state = vcpu_get_state(vcpu, NULL);
171 if (state != VCPU_FROZEN) {
172 panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
173 vcpuid, state);
174 }
175
176 vcpu_set_state(vcpu, VCPU_IDLE, false);
177 }
178
179 static int
vcpu_lock_all(struct vmmdev_softc * sc)180 vcpu_lock_all(struct vmmdev_softc *sc)
181 {
182 struct vcpu *vcpu;
183 int error;
184 uint16_t i, j, maxcpus;
185
186 error = 0;
187 vm_slock_vcpus(sc->vm);
188 maxcpus = vm_get_maxcpus(sc->vm);
189 for (i = 0; i < maxcpus; i++) {
190 vcpu = vm_vcpu(sc->vm, i);
191 if (vcpu == NULL)
192 continue;
193 error = vcpu_lock_one(vcpu);
194 if (error)
195 break;
196 }
197
198 if (error) {
199 for (j = 0; j < i; j++) {
200 vcpu = vm_vcpu(sc->vm, j);
201 if (vcpu == NULL)
202 continue;
203 vcpu_unlock_one(sc, j, vcpu);
204 }
205 vm_unlock_vcpus(sc->vm);
206 }
207
208 return (error);
209 }
210
211 static void
vcpu_unlock_all(struct vmmdev_softc * sc)212 vcpu_unlock_all(struct vmmdev_softc *sc)
213 {
214 struct vcpu *vcpu;
215 uint16_t i, maxcpus;
216
217 maxcpus = vm_get_maxcpus(sc->vm);
218 for (i = 0; i < maxcpus; i++) {
219 vcpu = vm_vcpu(sc->vm, i);
220 if (vcpu == NULL)
221 continue;
222 vcpu_unlock_one(sc, i, vcpu);
223 }
224 vm_unlock_vcpus(sc->vm);
225 }
226
227 static struct vmmdev_softc *
vmmdev_lookup(const char * name)228 vmmdev_lookup(const char *name)
229 {
230 struct vmmdev_softc *sc;
231
232 #ifdef notyet /* XXX kernel is not compiled with invariants */
233 mtx_assert(&vmmdev_mtx, MA_OWNED);
234 #endif
235
236 SLIST_FOREACH(sc, &head, link) {
237 if (strcmp(name, vm_name(sc->vm)) == 0)
238 break;
239 }
240
241 if (sc == NULL)
242 return (NULL);
243
244 if (cr_cansee(curthread->td_ucred, sc->ucred))
245 return (NULL);
246
247 return (sc);
248 }
249
250 static struct vmmdev_softc *
vmmdev_lookup2(struct cdev * cdev)251 vmmdev_lookup2(struct cdev *cdev)
252 {
253
254 return (cdev->si_drv1);
255 }
256
257 static int
vmmdev_rw(struct cdev * cdev,struct uio * uio,int flags)258 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
259 {
260 int error, off, c, prot;
261 vm_paddr_t gpa, maxaddr;
262 void *hpa, *cookie;
263 struct vmmdev_softc *sc;
264
265 error = vmm_priv_check(curthread->td_ucred);
266 if (error)
267 return (error);
268
269 sc = vmmdev_lookup2(cdev);
270 if (sc == NULL)
271 return (ENXIO);
272
273 /*
274 * Get a read lock on the guest memory map.
275 */
276 vm_slock_memsegs(sc->vm);
277
278 prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
279 maxaddr = vmm_sysmem_maxaddr(sc->vm);
280 while (uio->uio_resid > 0 && error == 0) {
281 gpa = uio->uio_offset;
282 off = gpa & PAGE_MASK;
283 c = min(uio->uio_resid, PAGE_SIZE - off);
284
285 /*
286 * The VM has a hole in its physical memory map. If we want to
287 * use 'dd' to inspect memory beyond the hole we need to
288 * provide bogus data for memory that lies in the hole.
289 *
290 * Since this device does not support lseek(2), dd(1) will
291 * read(2) blocks of data to simulate the lseek(2).
292 */
293 hpa = vm_gpa_hold_global(sc->vm, gpa, c, prot, &cookie);
294 if (hpa == NULL) {
295 if (uio->uio_rw == UIO_READ && gpa < maxaddr)
296 error = uiomove(__DECONST(void *, zero_region),
297 c, uio);
298 else
299 error = EFAULT;
300 } else {
301 error = uiomove(hpa, c, uio);
302 vm_gpa_release(cookie);
303 }
304 }
305 vm_unlock_memsegs(sc->vm);
306 return (error);
307 }
308
309 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= VM_MAX_SUFFIXLEN + 1);
310
311 static int
get_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)312 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
313 {
314 struct devmem_softc *dsc;
315 int error;
316 bool sysmem;
317
318 error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
319 if (error || mseg->len == 0)
320 return (error);
321
322 if (!sysmem) {
323 SLIST_FOREACH(dsc, &sc->devmem, link) {
324 if (dsc->segid == mseg->segid)
325 break;
326 }
327 KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
328 __func__, mseg->segid));
329 error = copystr(dsc->name, mseg->name, len, NULL);
330 } else {
331 bzero(mseg->name, len);
332 }
333
334 return (error);
335 }
336
337 static int
alloc_memseg(struct vmmdev_softc * sc,struct vm_memseg * mseg,size_t len)338 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
339 {
340 char *name;
341 int error;
342 bool sysmem;
343
344 error = 0;
345 name = NULL;
346 sysmem = true;
347
348 /*
349 * The allocation is lengthened by 1 to hold a terminating NUL. It'll
350 * by stripped off when devfs processes the full string.
351 */
352 if (VM_MEMSEG_NAME(mseg)) {
353 sysmem = false;
354 name = malloc(len, M_VMMDEV, M_WAITOK);
355 error = copystr(mseg->name, name, len, NULL);
356 if (error)
357 goto done;
358 }
359
360 error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
361 if (error)
362 goto done;
363
364 if (VM_MEMSEG_NAME(mseg)) {
365 error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
366 if (error)
367 vm_free_memseg(sc->vm, mseg->segid);
368 else
369 name = NULL; /* freed when 'cdev' is destroyed */
370 }
371 done:
372 free(name, M_VMMDEV);
373 return (error);
374 }
375
376 static int
vm_get_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)377 vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
378 uint64_t *regval)
379 {
380 int error, i;
381
382 error = 0;
383 for (i = 0; i < count; i++) {
384 error = vm_get_register(vcpu, regnum[i], ®val[i]);
385 if (error)
386 break;
387 }
388 return (error);
389 }
390
391 static int
vm_set_register_set(struct vcpu * vcpu,unsigned int count,int * regnum,uint64_t * regval)392 vm_set_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
393 uint64_t *regval)
394 {
395 int error, i;
396
397 error = 0;
398 for (i = 0; i < count; i++) {
399 error = vm_set_register(vcpu, regnum[i], regval[i]);
400 if (error)
401 break;
402 }
403 return (error);
404 }
405
406 static int
vmmdev_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int fflag,struct thread * td)407 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
408 struct thread *td)
409 {
410 int error, vcpuid, size;
411 cpuset_t *cpuset;
412 struct vmmdev_softc *sc;
413 struct vcpu *vcpu;
414 struct vm_register *vmreg;
415 struct vm_seg_desc *vmsegdesc;
416 struct vm_register_set *vmregset;
417 struct vm_run *vmrun;
418 #ifdef COMPAT_FREEBSD13
419 struct vm_run_13 *vmrun_13;
420 #endif
421 struct vm_exception *vmexc;
422 struct vm_lapic_irq *vmirq;
423 struct vm_lapic_msi *vmmsi;
424 struct vm_ioapic_irq *ioapic_irq;
425 struct vm_isa_irq *isa_irq;
426 struct vm_isa_irq_trigger *isa_irq_trigger;
427 struct vm_capability *vmcap;
428 struct vm_pptdev *pptdev;
429 struct vm_pptdev_mmio *pptmmio;
430 struct vm_pptdev_msi *pptmsi;
431 struct vm_pptdev_msix *pptmsix;
432 #ifdef COMPAT_FREEBSD13
433 struct vm_stats_old *vmstats_old;
434 #endif
435 struct vm_stats *vmstats;
436 struct vm_stat_desc *statdesc;
437 struct vm_x2apic *x2apic;
438 struct vm_gpa_pte *gpapte;
439 struct vm_suspend *vmsuspend;
440 struct vm_gla2gpa *gg;
441 struct vm_cpuset *vm_cpuset;
442 struct vm_intinfo *vmii;
443 struct vm_rtc_time *rtctime;
444 struct vm_rtc_data *rtcdata;
445 struct vm_memmap *mm;
446 struct vm_munmap *mu;
447 struct vm_cpu_topology *topology;
448 struct vm_readwrite_kernemu_device *kernemu;
449 uint64_t *regvals;
450 int *regnums;
451 enum { NONE, SINGLE, ALL } vcpus_locked;
452 bool memsegs_locked;
453 #ifdef BHYVE_SNAPSHOT
454 struct vm_snapshot_meta *snapshot_meta;
455 #ifdef COMPAT_FREEBSD13
456 struct vm_snapshot_meta_old *snapshot_old;
457 #endif
458 #endif
459
460 error = vmm_priv_check(curthread->td_ucred);
461 if (error)
462 return (error);
463
464 sc = vmmdev_lookup2(cdev);
465 if (sc == NULL)
466 return (ENXIO);
467
468 vcpuid = -1;
469 vcpu = NULL;
470 vcpus_locked = NONE;
471 memsegs_locked = false;
472
473 /*
474 * For VMM ioctls that operate on a single vCPU, lookup the
475 * vcpu. For VMM ioctls which require one or more vCPUs to
476 * not be running, lock necessary vCPUs.
477 *
478 * XXX fragile, handle with care
479 * Most of these assume that the first field of the ioctl data
480 * is the vcpuid.
481 */
482 switch (cmd) {
483 case VM_RUN:
484 #ifdef COMPAT_FREEBSD13
485 case VM_RUN_13:
486 #endif
487 case VM_GET_REGISTER:
488 case VM_SET_REGISTER:
489 case VM_GET_SEGMENT_DESCRIPTOR:
490 case VM_SET_SEGMENT_DESCRIPTOR:
491 case VM_GET_REGISTER_SET:
492 case VM_SET_REGISTER_SET:
493 case VM_INJECT_EXCEPTION:
494 case VM_GET_CAPABILITY:
495 case VM_SET_CAPABILITY:
496 case VM_SET_X2APIC_STATE:
497 case VM_GLA2GPA:
498 case VM_GLA2GPA_NOFAULT:
499 case VM_ACTIVATE_CPU:
500 case VM_SET_INTINFO:
501 case VM_GET_INTINFO:
502 case VM_RESTART_INSTRUCTION:
503 case VM_GET_KERNEMU_DEV:
504 case VM_SET_KERNEMU_DEV:
505 /*
506 * ioctls that can operate only on vcpus that are not running.
507 */
508 vcpuid = *(int *)data;
509 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
510 if (vcpu == NULL) {
511 error = EINVAL;
512 goto done;
513 }
514 error = vcpu_lock_one(vcpu);
515 if (error)
516 goto done;
517 vcpus_locked = SINGLE;
518 break;
519
520 #ifdef COMPAT_FREEBSD12
521 case VM_ALLOC_MEMSEG_FBSD12:
522 #endif
523 case VM_ALLOC_MEMSEG:
524 case VM_BIND_PPTDEV:
525 case VM_UNBIND_PPTDEV:
526 case VM_MMAP_MEMSEG:
527 case VM_MUNMAP_MEMSEG:
528 case VM_REINIT:
529 /*
530 * ioctls that modify the memory map must lock memory
531 * segments exclusively.
532 */
533 vm_xlock_memsegs(sc->vm);
534 memsegs_locked = true;
535 /* FALLTHROUGH */
536 case VM_MAP_PPTDEV_MMIO:
537 case VM_UNMAP_PPTDEV_MMIO:
538 #ifdef BHYVE_SNAPSHOT
539 case VM_SNAPSHOT_REQ:
540 #ifdef COMPAT_FREEBSD13
541 case VM_SNAPSHOT_REQ_OLD:
542 #endif
543 case VM_RESTORE_TIME:
544 #endif
545 /*
546 * ioctls that operate on the entire virtual machine must
547 * prevent all vcpus from running.
548 */
549 error = vcpu_lock_all(sc);
550 if (error)
551 goto done;
552 vcpus_locked = ALL;
553 break;
554
555 #ifdef COMPAT_FREEBSD12
556 case VM_GET_MEMSEG_FBSD12:
557 #endif
558 case VM_GET_MEMSEG:
559 case VM_MMAP_GETNEXT:
560 /*
561 * Lock the memory map while it is being inspected.
562 */
563 vm_slock_memsegs(sc->vm);
564 memsegs_locked = true;
565 break;
566
567 #ifdef COMPAT_FREEBSD13
568 case VM_STATS_OLD:
569 #endif
570 case VM_STATS:
571 case VM_INJECT_NMI:
572 case VM_LAPIC_IRQ:
573 case VM_GET_X2APIC_STATE:
574 /*
575 * These do not need the vCPU locked but do operate on
576 * a specific vCPU.
577 */
578 vcpuid = *(int *)data;
579 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
580 if (vcpu == NULL) {
581 error = EINVAL;
582 goto done;
583 }
584 break;
585
586 case VM_LAPIC_LOCAL_IRQ:
587 case VM_SUSPEND_CPU:
588 case VM_RESUME_CPU:
589 /*
590 * These can either operate on all CPUs via a vcpuid of
591 * -1 or on a specific vCPU.
592 */
593 vcpuid = *(int *)data;
594 if (vcpuid == -1)
595 break;
596 vcpu = vm_alloc_vcpu(sc->vm, vcpuid);
597 if (vcpu == NULL) {
598 error = EINVAL;
599 goto done;
600 }
601 break;
602
603 default:
604 break;
605 }
606
607 switch (cmd) {
608 case VM_RUN: {
609 struct vm_exit *vme;
610
611 vmrun = (struct vm_run *)data;
612 vme = vm_exitinfo(vcpu);
613
614 error = vm_run(vcpu);
615 if (error != 0)
616 break;
617
618 error = copyout(vme, vmrun->vm_exit, sizeof(*vme));
619 if (error != 0)
620 break;
621 if (vme->exitcode == VM_EXITCODE_IPI) {
622 error = copyout(vm_exitinfo_cpuset(vcpu),
623 vmrun->cpuset,
624 min(vmrun->cpusetsize, sizeof(cpuset_t)));
625 if (error != 0)
626 break;
627 if (sizeof(cpuset_t) < vmrun->cpusetsize) {
628 uint8_t *p;
629
630 p = (uint8_t *)vmrun->cpuset +
631 sizeof(cpuset_t);
632 while (p < (uint8_t *)vmrun->cpuset +
633 vmrun->cpusetsize) {
634 if (subyte(p++, 0) != 0) {
635 error = EFAULT;
636 break;
637 }
638 }
639 }
640 }
641 break;
642 }
643 #ifdef COMPAT_FREEBSD13
644 case VM_RUN_13: {
645 struct vm_exit *vme;
646 struct vm_exit_13 *vme_13;
647
648 vmrun_13 = (struct vm_run_13 *)data;
649 vme_13 = &vmrun_13->vm_exit;
650 vme = vm_exitinfo(vcpu);
651
652 error = vm_run(vcpu);
653 if (error == 0) {
654 vme_13->exitcode = vme->exitcode;
655 vme_13->inst_length = vme->inst_length;
656 vme_13->rip = vme->rip;
657 memcpy(vme_13->u, &vme->u, sizeof(vme_13->u));
658 if (vme->exitcode == VM_EXITCODE_IPI) {
659 struct vm_exit_ipi_13 *ipi;
660 cpuset_t *dmask;
661 int cpu;
662
663 dmask = vm_exitinfo_cpuset(vcpu);
664 ipi = (struct vm_exit_ipi_13 *)&vme_13->u[0];
665 BIT_ZERO(256, &ipi->dmask);
666 CPU_FOREACH_ISSET(cpu, dmask) {
667 if (cpu >= 256)
668 break;
669 BIT_SET(256, cpu, &ipi->dmask);
670 }
671 }
672 }
673 break;
674 }
675 #endif
676 case VM_SUSPEND:
677 vmsuspend = (struct vm_suspend *)data;
678 error = vm_suspend(sc->vm, vmsuspend->how);
679 break;
680 case VM_REINIT:
681 error = vm_reinit(sc->vm);
682 break;
683 case VM_STAT_DESC: {
684 statdesc = (struct vm_stat_desc *)data;
685 error = vmm_stat_desc_copy(statdesc->index,
686 statdesc->desc, sizeof(statdesc->desc));
687 break;
688 }
689 #ifdef COMPAT_FREEBSD13
690 case VM_STATS_OLD:
691 vmstats_old = (struct vm_stats_old *)data;
692 getmicrotime(&vmstats_old->tv);
693 error = vmm_stat_copy(vcpu, 0,
694 nitems(vmstats_old->statbuf),
695 &vmstats_old->num_entries,
696 vmstats_old->statbuf);
697 break;
698 #endif
699 case VM_STATS: {
700 vmstats = (struct vm_stats *)data;
701 getmicrotime(&vmstats->tv);
702 error = vmm_stat_copy(vcpu, vmstats->index,
703 nitems(vmstats->statbuf),
704 &vmstats->num_entries, vmstats->statbuf);
705 break;
706 }
707 case VM_PPTDEV_MSI:
708 pptmsi = (struct vm_pptdev_msi *)data;
709 error = ppt_setup_msi(sc->vm,
710 pptmsi->bus, pptmsi->slot, pptmsi->func,
711 pptmsi->addr, pptmsi->msg,
712 pptmsi->numvec);
713 break;
714 case VM_PPTDEV_MSIX:
715 pptmsix = (struct vm_pptdev_msix *)data;
716 error = ppt_setup_msix(sc->vm,
717 pptmsix->bus, pptmsix->slot,
718 pptmsix->func, pptmsix->idx,
719 pptmsix->addr, pptmsix->msg,
720 pptmsix->vector_control);
721 break;
722 case VM_PPTDEV_DISABLE_MSIX:
723 pptdev = (struct vm_pptdev *)data;
724 error = ppt_disable_msix(sc->vm, pptdev->bus, pptdev->slot,
725 pptdev->func);
726 break;
727 case VM_MAP_PPTDEV_MMIO:
728 pptmmio = (struct vm_pptdev_mmio *)data;
729 error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
730 pptmmio->func, pptmmio->gpa, pptmmio->len,
731 pptmmio->hpa);
732 break;
733 case VM_UNMAP_PPTDEV_MMIO:
734 pptmmio = (struct vm_pptdev_mmio *)data;
735 error = ppt_unmap_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
736 pptmmio->func, pptmmio->gpa, pptmmio->len);
737 break;
738 case VM_BIND_PPTDEV:
739 pptdev = (struct vm_pptdev *)data;
740 error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
741 pptdev->func);
742 break;
743 case VM_UNBIND_PPTDEV:
744 pptdev = (struct vm_pptdev *)data;
745 error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
746 pptdev->func);
747 break;
748 case VM_INJECT_EXCEPTION:
749 vmexc = (struct vm_exception *)data;
750 error = vm_inject_exception(vcpu,
751 vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
752 vmexc->restart_instruction);
753 break;
754 case VM_INJECT_NMI:
755 error = vm_inject_nmi(vcpu);
756 break;
757 case VM_LAPIC_IRQ:
758 vmirq = (struct vm_lapic_irq *)data;
759 error = lapic_intr_edge(vcpu, vmirq->vector);
760 break;
761 case VM_LAPIC_LOCAL_IRQ:
762 vmirq = (struct vm_lapic_irq *)data;
763 error = lapic_set_local_intr(sc->vm, vcpu, vmirq->vector);
764 break;
765 case VM_LAPIC_MSI:
766 vmmsi = (struct vm_lapic_msi *)data;
767 error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
768 break;
769 case VM_IOAPIC_ASSERT_IRQ:
770 ioapic_irq = (struct vm_ioapic_irq *)data;
771 error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
772 break;
773 case VM_IOAPIC_DEASSERT_IRQ:
774 ioapic_irq = (struct vm_ioapic_irq *)data;
775 error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
776 break;
777 case VM_IOAPIC_PULSE_IRQ:
778 ioapic_irq = (struct vm_ioapic_irq *)data;
779 error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
780 break;
781 case VM_IOAPIC_PINCOUNT:
782 *(int *)data = vioapic_pincount(sc->vm);
783 break;
784 case VM_SET_KERNEMU_DEV:
785 case VM_GET_KERNEMU_DEV: {
786 mem_region_write_t mwrite;
787 mem_region_read_t mread;
788 bool arg;
789
790 kernemu = (void *)data;
791
792 if (kernemu->access_width > 0)
793 size = (1u << kernemu->access_width);
794 else
795 size = 1;
796
797 if (kernemu->gpa >= DEFAULT_APIC_BASE && kernemu->gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
798 mread = lapic_mmio_read;
799 mwrite = lapic_mmio_write;
800 } else if (kernemu->gpa >= VIOAPIC_BASE && kernemu->gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
801 mread = vioapic_mmio_read;
802 mwrite = vioapic_mmio_write;
803 } else if (kernemu->gpa >= VHPET_BASE && kernemu->gpa < VHPET_BASE + VHPET_SIZE) {
804 mread = vhpet_mmio_read;
805 mwrite = vhpet_mmio_write;
806 } else {
807 error = EINVAL;
808 break;
809 }
810
811 if (cmd == VM_SET_KERNEMU_DEV)
812 error = mwrite(vcpu, kernemu->gpa,
813 kernemu->value, size, &arg);
814 else
815 error = mread(vcpu, kernemu->gpa,
816 &kernemu->value, size, &arg);
817 break;
818 }
819 case VM_ISA_ASSERT_IRQ:
820 isa_irq = (struct vm_isa_irq *)data;
821 error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
822 if (error == 0 && isa_irq->ioapic_irq != -1)
823 error = vioapic_assert_irq(sc->vm,
824 isa_irq->ioapic_irq);
825 break;
826 case VM_ISA_DEASSERT_IRQ:
827 isa_irq = (struct vm_isa_irq *)data;
828 error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
829 if (error == 0 && isa_irq->ioapic_irq != -1)
830 error = vioapic_deassert_irq(sc->vm,
831 isa_irq->ioapic_irq);
832 break;
833 case VM_ISA_PULSE_IRQ:
834 isa_irq = (struct vm_isa_irq *)data;
835 error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
836 if (error == 0 && isa_irq->ioapic_irq != -1)
837 error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
838 break;
839 case VM_ISA_SET_IRQ_TRIGGER:
840 isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
841 error = vatpic_set_irq_trigger(sc->vm,
842 isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
843 break;
844 case VM_MMAP_GETNEXT:
845 mm = (struct vm_memmap *)data;
846 error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
847 &mm->segoff, &mm->len, &mm->prot, &mm->flags);
848 break;
849 case VM_MMAP_MEMSEG:
850 mm = (struct vm_memmap *)data;
851 error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
852 mm->len, mm->prot, mm->flags);
853 break;
854 case VM_MUNMAP_MEMSEG:
855 mu = (struct vm_munmap *)data;
856 error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
857 break;
858 #ifdef COMPAT_FREEBSD12
859 case VM_ALLOC_MEMSEG_FBSD12:
860 error = alloc_memseg(sc, (struct vm_memseg *)data,
861 sizeof(((struct vm_memseg_fbsd12 *)0)->name));
862 break;
863 #endif
864 case VM_ALLOC_MEMSEG:
865 error = alloc_memseg(sc, (struct vm_memseg *)data,
866 sizeof(((struct vm_memseg *)0)->name));
867 break;
868 #ifdef COMPAT_FREEBSD12
869 case VM_GET_MEMSEG_FBSD12:
870 error = get_memseg(sc, (struct vm_memseg *)data,
871 sizeof(((struct vm_memseg_fbsd12 *)0)->name));
872 break;
873 #endif
874 case VM_GET_MEMSEG:
875 error = get_memseg(sc, (struct vm_memseg *)data,
876 sizeof(((struct vm_memseg *)0)->name));
877 break;
878 case VM_GET_REGISTER:
879 vmreg = (struct vm_register *)data;
880 error = vm_get_register(vcpu, vmreg->regnum, &vmreg->regval);
881 break;
882 case VM_SET_REGISTER:
883 vmreg = (struct vm_register *)data;
884 error = vm_set_register(vcpu, vmreg->regnum, vmreg->regval);
885 break;
886 case VM_SET_SEGMENT_DESCRIPTOR:
887 vmsegdesc = (struct vm_seg_desc *)data;
888 error = vm_set_seg_desc(vcpu,
889 vmsegdesc->regnum,
890 &vmsegdesc->desc);
891 break;
892 case VM_GET_SEGMENT_DESCRIPTOR:
893 vmsegdesc = (struct vm_seg_desc *)data;
894 error = vm_get_seg_desc(vcpu,
895 vmsegdesc->regnum,
896 &vmsegdesc->desc);
897 break;
898 case VM_GET_REGISTER_SET:
899 vmregset = (struct vm_register_set *)data;
900 if (vmregset->count > VM_REG_LAST) {
901 error = EINVAL;
902 break;
903 }
904 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
905 M_WAITOK);
906 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
907 M_WAITOK);
908 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
909 vmregset->count);
910 if (error == 0)
911 error = vm_get_register_set(vcpu,
912 vmregset->count, regnums, regvals);
913 if (error == 0)
914 error = copyout(regvals, vmregset->regvals,
915 sizeof(regvals[0]) * vmregset->count);
916 free(regvals, M_VMMDEV);
917 free(regnums, M_VMMDEV);
918 break;
919 case VM_SET_REGISTER_SET:
920 vmregset = (struct vm_register_set *)data;
921 if (vmregset->count > VM_REG_LAST) {
922 error = EINVAL;
923 break;
924 }
925 regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
926 M_WAITOK);
927 regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
928 M_WAITOK);
929 error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
930 vmregset->count);
931 if (error == 0)
932 error = copyin(vmregset->regvals, regvals,
933 sizeof(regvals[0]) * vmregset->count);
934 if (error == 0)
935 error = vm_set_register_set(vcpu,
936 vmregset->count, regnums, regvals);
937 free(regvals, M_VMMDEV);
938 free(regnums, M_VMMDEV);
939 break;
940 case VM_GET_CAPABILITY:
941 vmcap = (struct vm_capability *)data;
942 error = vm_get_capability(vcpu,
943 vmcap->captype,
944 &vmcap->capval);
945 break;
946 case VM_SET_CAPABILITY:
947 vmcap = (struct vm_capability *)data;
948 error = vm_set_capability(vcpu,
949 vmcap->captype,
950 vmcap->capval);
951 break;
952 case VM_SET_X2APIC_STATE:
953 x2apic = (struct vm_x2apic *)data;
954 error = vm_set_x2apic_state(vcpu, x2apic->state);
955 break;
956 case VM_GET_X2APIC_STATE:
957 x2apic = (struct vm_x2apic *)data;
958 error = vm_get_x2apic_state(vcpu, &x2apic->state);
959 break;
960 case VM_GET_GPA_PMAP:
961 gpapte = (struct vm_gpa_pte *)data;
962 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
963 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
964 error = 0;
965 break;
966 case VM_GET_HPET_CAPABILITIES:
967 error = vhpet_getcap((struct vm_hpet_cap *)data);
968 break;
969 case VM_GLA2GPA: {
970 CTASSERT(PROT_READ == VM_PROT_READ);
971 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
972 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
973 gg = (struct vm_gla2gpa *)data;
974 error = vm_gla2gpa(vcpu, &gg->paging, gg->gla,
975 gg->prot, &gg->gpa, &gg->fault);
976 KASSERT(error == 0 || error == EFAULT,
977 ("%s: vm_gla2gpa unknown error %d", __func__, error));
978 break;
979 }
980 case VM_GLA2GPA_NOFAULT:
981 gg = (struct vm_gla2gpa *)data;
982 error = vm_gla2gpa_nofault(vcpu, &gg->paging, gg->gla,
983 gg->prot, &gg->gpa, &gg->fault);
984 KASSERT(error == 0 || error == EFAULT,
985 ("%s: vm_gla2gpa unknown error %d", __func__, error));
986 break;
987 case VM_ACTIVATE_CPU:
988 error = vm_activate_cpu(vcpu);
989 break;
990 case VM_GET_CPUS:
991 error = 0;
992 vm_cpuset = (struct vm_cpuset *)data;
993 size = vm_cpuset->cpusetsize;
994 if (size < 1 || size > CPU_MAXSIZE / NBBY) {
995 error = ERANGE;
996 break;
997 }
998 cpuset = malloc(max(size, sizeof(cpuset_t)), M_TEMP,
999 M_WAITOK | M_ZERO);
1000 if (vm_cpuset->which == VM_ACTIVE_CPUS)
1001 *cpuset = vm_active_cpus(sc->vm);
1002 else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
1003 *cpuset = vm_suspended_cpus(sc->vm);
1004 else if (vm_cpuset->which == VM_DEBUG_CPUS)
1005 *cpuset = vm_debug_cpus(sc->vm);
1006 else
1007 error = EINVAL;
1008 if (error == 0 && size < howmany(CPU_FLS(cpuset), NBBY))
1009 error = ERANGE;
1010 if (error == 0)
1011 error = copyout(cpuset, vm_cpuset->cpus, size);
1012 free(cpuset, M_TEMP);
1013 break;
1014 case VM_SUSPEND_CPU:
1015 error = vm_suspend_cpu(sc->vm, vcpu);
1016 break;
1017 case VM_RESUME_CPU:
1018 error = vm_resume_cpu(sc->vm, vcpu);
1019 break;
1020 case VM_SET_INTINFO:
1021 vmii = (struct vm_intinfo *)data;
1022 error = vm_exit_intinfo(vcpu, vmii->info1);
1023 break;
1024 case VM_GET_INTINFO:
1025 vmii = (struct vm_intinfo *)data;
1026 error = vm_get_intinfo(vcpu, &vmii->info1, &vmii->info2);
1027 break;
1028 case VM_RTC_WRITE:
1029 rtcdata = (struct vm_rtc_data *)data;
1030 error = vrtc_nvram_write(sc->vm, rtcdata->offset,
1031 rtcdata->value);
1032 break;
1033 case VM_RTC_READ:
1034 rtcdata = (struct vm_rtc_data *)data;
1035 error = vrtc_nvram_read(sc->vm, rtcdata->offset,
1036 &rtcdata->value);
1037 break;
1038 case VM_RTC_SETTIME:
1039 rtctime = (struct vm_rtc_time *)data;
1040 error = vrtc_set_time(sc->vm, rtctime->secs);
1041 break;
1042 case VM_RTC_GETTIME:
1043 error = 0;
1044 rtctime = (struct vm_rtc_time *)data;
1045 rtctime->secs = vrtc_get_time(sc->vm);
1046 break;
1047 case VM_RESTART_INSTRUCTION:
1048 error = vm_restart_instruction(vcpu);
1049 break;
1050 case VM_SET_TOPOLOGY:
1051 topology = (struct vm_cpu_topology *)data;
1052 error = vm_set_topology(sc->vm, topology->sockets,
1053 topology->cores, topology->threads, topology->maxcpus);
1054 break;
1055 case VM_GET_TOPOLOGY:
1056 topology = (struct vm_cpu_topology *)data;
1057 vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
1058 &topology->threads, &topology->maxcpus);
1059 error = 0;
1060 break;
1061 #ifdef BHYVE_SNAPSHOT
1062 case VM_SNAPSHOT_REQ:
1063 snapshot_meta = (struct vm_snapshot_meta *)data;
1064 error = vm_snapshot_req(sc->vm, snapshot_meta);
1065 break;
1066 #ifdef COMPAT_FREEBSD13
1067 case VM_SNAPSHOT_REQ_OLD:
1068 /*
1069 * The old structure just has an additional pointer at
1070 * the start that is ignored.
1071 */
1072 snapshot_old = (struct vm_snapshot_meta_old *)data;
1073 snapshot_meta =
1074 (struct vm_snapshot_meta *)&snapshot_old->dev_data;
1075 error = vm_snapshot_req(sc->vm, snapshot_meta);
1076 break;
1077 #endif
1078 case VM_RESTORE_TIME:
1079 error = vm_restore_time(sc->vm);
1080 break;
1081 #endif
1082 default:
1083 error = ENOTTY;
1084 break;
1085 }
1086
1087 done:
1088 if (vcpus_locked == SINGLE)
1089 vcpu_unlock_one(sc, vcpuid, vcpu);
1090 else if (vcpus_locked == ALL)
1091 vcpu_unlock_all(sc);
1092 if (memsegs_locked)
1093 vm_unlock_memsegs(sc->vm);
1094
1095 /*
1096 * Make sure that no handler returns a kernel-internal
1097 * error value to userspace.
1098 */
1099 KASSERT(error == ERESTART || error >= 0,
1100 ("vmmdev_ioctl: invalid error return %d", error));
1101 return (error);
1102 }
1103
1104 static int
vmmdev_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t mapsize,struct vm_object ** objp,int nprot)1105 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
1106 struct vm_object **objp, int nprot)
1107 {
1108 struct vmmdev_softc *sc;
1109 vm_paddr_t gpa;
1110 size_t len;
1111 vm_ooffset_t segoff, first, last;
1112 int error, found, segid;
1113 bool sysmem;
1114
1115 error = vmm_priv_check(curthread->td_ucred);
1116 if (error)
1117 return (error);
1118
1119 first = *offset;
1120 last = first + mapsize;
1121 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1122 return (EINVAL);
1123
1124 sc = vmmdev_lookup2(cdev);
1125 if (sc == NULL) {
1126 /* virtual machine is in the process of being created */
1127 return (EINVAL);
1128 }
1129
1130 /*
1131 * Get a read lock on the guest memory map.
1132 */
1133 vm_slock_memsegs(sc->vm);
1134
1135 gpa = 0;
1136 found = 0;
1137 while (!found) {
1138 error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
1139 NULL, NULL);
1140 if (error)
1141 break;
1142
1143 if (first >= gpa && last <= gpa + len)
1144 found = 1;
1145 else
1146 gpa += len;
1147 }
1148
1149 if (found) {
1150 error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
1151 KASSERT(error == 0 && *objp != NULL,
1152 ("%s: invalid memory segment %d", __func__, segid));
1153 if (sysmem) {
1154 vm_object_reference(*objp);
1155 *offset = segoff + (first - gpa);
1156 } else {
1157 error = EINVAL;
1158 }
1159 }
1160 vm_unlock_memsegs(sc->vm);
1161 return (error);
1162 }
1163
1164 static void
vmmdev_destroy(void * arg)1165 vmmdev_destroy(void *arg)
1166 {
1167 struct vmmdev_softc *sc = arg;
1168 struct devmem_softc *dsc;
1169 int error __diagused;
1170
1171 vm_disable_vcpu_creation(sc->vm);
1172 error = vcpu_lock_all(sc);
1173 KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
1174 vm_unlock_vcpus(sc->vm);
1175
1176 while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
1177 KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
1178 SLIST_REMOVE_HEAD(&sc->devmem, link);
1179 free(dsc->name, M_VMMDEV);
1180 free(dsc, M_VMMDEV);
1181 }
1182
1183 if (sc->cdev != NULL)
1184 destroy_dev(sc->cdev);
1185
1186 if (sc->vm != NULL)
1187 vm_destroy(sc->vm);
1188
1189 if (sc->ucred != NULL)
1190 crfree(sc->ucred);
1191
1192 if ((sc->flags & VSC_LINKED) != 0) {
1193 mtx_lock(&vmmdev_mtx);
1194 SLIST_REMOVE(&head, sc, vmmdev_softc, link);
1195 mtx_unlock(&vmmdev_mtx);
1196 }
1197
1198 free(sc, M_VMMDEV);
1199 }
1200
1201 static int
sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)1202 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
1203 {
1204 struct devmem_softc *dsc;
1205 struct vmmdev_softc *sc;
1206 struct cdev *cdev;
1207 char *buf;
1208 int error, buflen;
1209
1210 error = vmm_priv_check(req->td->td_ucred);
1211 if (error)
1212 return (error);
1213
1214 buflen = VM_MAX_NAMELEN + 1;
1215 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1216 strlcpy(buf, "beavis", buflen);
1217 error = sysctl_handle_string(oidp, buf, buflen, req);
1218 if (error != 0 || req->newptr == NULL)
1219 goto out;
1220
1221 mtx_lock(&vmmdev_mtx);
1222 sc = vmmdev_lookup(buf);
1223 if (sc == NULL || sc->cdev == NULL) {
1224 mtx_unlock(&vmmdev_mtx);
1225 error = EINVAL;
1226 goto out;
1227 }
1228
1229 /*
1230 * Setting 'sc->cdev' to NULL is used to indicate that the VM
1231 * is scheduled for destruction.
1232 */
1233 cdev = sc->cdev;
1234 sc->cdev = NULL;
1235 mtx_unlock(&vmmdev_mtx);
1236
1237 /*
1238 * Destroy all cdevs:
1239 *
1240 * - any new operations on the 'cdev' will return an error (ENXIO).
1241 *
1242 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
1243 */
1244 SLIST_FOREACH(dsc, &sc->devmem, link) {
1245 KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
1246 destroy_dev(dsc->cdev);
1247 devmem_destroy(dsc);
1248 }
1249 destroy_dev(cdev);
1250 vmmdev_destroy(sc);
1251 error = 0;
1252
1253 out:
1254 free(buf, M_VMMDEV);
1255 return (error);
1256 }
1257 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
1258 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1259 NULL, 0, sysctl_vmm_destroy, "A",
1260 NULL);
1261
1262 static struct cdevsw vmmdevsw = {
1263 .d_name = "vmmdev",
1264 .d_version = D_VERSION,
1265 .d_ioctl = vmmdev_ioctl,
1266 .d_mmap_single = vmmdev_mmap_single,
1267 .d_read = vmmdev_rw,
1268 .d_write = vmmdev_rw,
1269 };
1270
1271 static int
sysctl_vmm_create(SYSCTL_HANDLER_ARGS)1272 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
1273 {
1274 struct vm *vm;
1275 struct cdev *cdev;
1276 struct vmmdev_softc *sc, *sc2;
1277 char *buf;
1278 int error, buflen;
1279
1280 error = vmm_priv_check(req->td->td_ucred);
1281 if (error)
1282 return (error);
1283
1284 buflen = VM_MAX_NAMELEN + 1;
1285 buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
1286 strlcpy(buf, "beavis", buflen);
1287 error = sysctl_handle_string(oidp, buf, buflen, req);
1288 if (error != 0 || req->newptr == NULL)
1289 goto out;
1290
1291 mtx_lock(&vmmdev_mtx);
1292 sc = vmmdev_lookup(buf);
1293 mtx_unlock(&vmmdev_mtx);
1294 if (sc != NULL) {
1295 error = EEXIST;
1296 goto out;
1297 }
1298
1299 error = vm_create(buf, &vm);
1300 if (error != 0)
1301 goto out;
1302
1303 sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1304 sc->ucred = crhold(curthread->td_ucred);
1305 sc->vm = vm;
1306 SLIST_INIT(&sc->devmem);
1307
1308 /*
1309 * Lookup the name again just in case somebody sneaked in when we
1310 * dropped the lock.
1311 */
1312 mtx_lock(&vmmdev_mtx);
1313 sc2 = vmmdev_lookup(buf);
1314 if (sc2 == NULL) {
1315 SLIST_INSERT_HEAD(&head, sc, link);
1316 sc->flags |= VSC_LINKED;
1317 }
1318 mtx_unlock(&vmmdev_mtx);
1319
1320 if (sc2 != NULL) {
1321 vmmdev_destroy(sc);
1322 error = EEXIST;
1323 goto out;
1324 }
1325
1326 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, sc->ucred,
1327 UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
1328 if (error != 0) {
1329 vmmdev_destroy(sc);
1330 goto out;
1331 }
1332
1333 mtx_lock(&vmmdev_mtx);
1334 sc->cdev = cdev;
1335 sc->cdev->si_drv1 = sc;
1336 mtx_unlock(&vmmdev_mtx);
1337
1338 out:
1339 free(buf, M_VMMDEV);
1340 return (error);
1341 }
1342 SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
1343 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
1344 NULL, 0, sysctl_vmm_create, "A",
1345 NULL);
1346
1347 void
vmmdev_init(void)1348 vmmdev_init(void)
1349 {
1350 pr_allow_flag = prison_add_allow(NULL, "vmm", NULL,
1351 "Allow use of vmm in a jail.");
1352 }
1353
1354 int
vmmdev_cleanup(void)1355 vmmdev_cleanup(void)
1356 {
1357 int error;
1358
1359 if (SLIST_EMPTY(&head))
1360 error = 0;
1361 else
1362 error = EBUSY;
1363
1364 return (error);
1365 }
1366
1367 static int
devmem_mmap_single(struct cdev * cdev,vm_ooffset_t * offset,vm_size_t len,struct vm_object ** objp,int nprot)1368 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
1369 struct vm_object **objp, int nprot)
1370 {
1371 struct devmem_softc *dsc;
1372 vm_ooffset_t first, last;
1373 size_t seglen;
1374 int error;
1375 bool sysmem;
1376
1377 dsc = cdev->si_drv1;
1378 if (dsc == NULL) {
1379 /* 'cdev' has been created but is not ready for use */
1380 return (ENXIO);
1381 }
1382
1383 first = *offset;
1384 last = *offset + len;
1385 if ((nprot & PROT_EXEC) || first < 0 || first >= last)
1386 return (EINVAL);
1387
1388 vm_slock_memsegs(dsc->sc->vm);
1389
1390 error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
1391 KASSERT(error == 0 && !sysmem && *objp != NULL,
1392 ("%s: invalid devmem segment %d", __func__, dsc->segid));
1393
1394 if (seglen >= last)
1395 vm_object_reference(*objp);
1396 else
1397 error = EINVAL;
1398
1399 vm_unlock_memsegs(dsc->sc->vm);
1400 return (error);
1401 }
1402
1403 static struct cdevsw devmemsw = {
1404 .d_name = "devmem",
1405 .d_version = D_VERSION,
1406 .d_mmap_single = devmem_mmap_single,
1407 };
1408
1409 static int
devmem_create_cdev(const char * vmname,int segid,char * devname)1410 devmem_create_cdev(const char *vmname, int segid, char *devname)
1411 {
1412 struct devmem_softc *dsc;
1413 struct vmmdev_softc *sc;
1414 struct cdev *cdev;
1415 int error;
1416
1417 error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
1418 UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
1419 if (error)
1420 return (error);
1421
1422 dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
1423
1424 mtx_lock(&vmmdev_mtx);
1425 sc = vmmdev_lookup(vmname);
1426 KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
1427 if (sc->cdev == NULL) {
1428 /* virtual machine is being created or destroyed */
1429 mtx_unlock(&vmmdev_mtx);
1430 free(dsc, M_VMMDEV);
1431 destroy_dev_sched_cb(cdev, NULL, 0);
1432 return (ENODEV);
1433 }
1434
1435 dsc->segid = segid;
1436 dsc->name = devname;
1437 dsc->cdev = cdev;
1438 dsc->sc = sc;
1439 SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
1440 mtx_unlock(&vmmdev_mtx);
1441
1442 /* The 'cdev' is ready for use after 'si_drv1' is initialized */
1443 cdev->si_drv1 = dsc;
1444 return (0);
1445 }
1446
1447 static void
devmem_destroy(void * arg)1448 devmem_destroy(void *arg)
1449 {
1450 struct devmem_softc *dsc = arg;
1451
1452 KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
1453 dsc->cdev = NULL;
1454 dsc->sc = NULL;
1455 }
1456