1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include "opt_bhyve_snapshot.h"
30 #include "opt_ddb.h"
31
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/sysctl.h>
35 #include <sys/systm.h>
36 #include <sys/pcpu.h>
37
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40
41 #include <machine/segments.h>
42 #include <machine/vmm.h>
43 #include <machine/vmm_snapshot.h>
44 #include "vmm_host.h"
45 #include "vmx_cpufunc.h"
46 #include "vmcs.h"
47 #include "ept.h"
48 #include "vmx.h"
49
50 #ifdef DDB
51 #include <ddb/ddb.h>
52 #endif
53
54 SYSCTL_DECL(_hw_vmm_vmx);
55
56 static int no_flush_rsb;
57 SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
58 &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
59
60 static uint64_t
vmcs_fix_regval(uint32_t encoding,uint64_t val)61 vmcs_fix_regval(uint32_t encoding, uint64_t val)
62 {
63
64 switch (encoding) {
65 case VMCS_GUEST_CR0:
66 val = vmx_fix_cr0(val);
67 break;
68 case VMCS_GUEST_CR4:
69 val = vmx_fix_cr4(val);
70 break;
71 default:
72 break;
73 }
74 return (val);
75 }
76
77 static uint32_t
vmcs_field_encoding(int ident)78 vmcs_field_encoding(int ident)
79 {
80 switch (ident) {
81 case VM_REG_GUEST_CR0:
82 return (VMCS_GUEST_CR0);
83 case VM_REG_GUEST_CR3:
84 return (VMCS_GUEST_CR3);
85 case VM_REG_GUEST_CR4:
86 return (VMCS_GUEST_CR4);
87 case VM_REG_GUEST_DR7:
88 return (VMCS_GUEST_DR7);
89 case VM_REG_GUEST_RSP:
90 return (VMCS_GUEST_RSP);
91 case VM_REG_GUEST_RIP:
92 return (VMCS_GUEST_RIP);
93 case VM_REG_GUEST_RFLAGS:
94 return (VMCS_GUEST_RFLAGS);
95 case VM_REG_GUEST_ES:
96 return (VMCS_GUEST_ES_SELECTOR);
97 case VM_REG_GUEST_CS:
98 return (VMCS_GUEST_CS_SELECTOR);
99 case VM_REG_GUEST_SS:
100 return (VMCS_GUEST_SS_SELECTOR);
101 case VM_REG_GUEST_DS:
102 return (VMCS_GUEST_DS_SELECTOR);
103 case VM_REG_GUEST_FS:
104 return (VMCS_GUEST_FS_SELECTOR);
105 case VM_REG_GUEST_GS:
106 return (VMCS_GUEST_GS_SELECTOR);
107 case VM_REG_GUEST_TR:
108 return (VMCS_GUEST_TR_SELECTOR);
109 case VM_REG_GUEST_LDTR:
110 return (VMCS_GUEST_LDTR_SELECTOR);
111 case VM_REG_GUEST_EFER:
112 return (VMCS_GUEST_IA32_EFER);
113 case VM_REG_GUEST_PDPTE0:
114 return (VMCS_GUEST_PDPTE0);
115 case VM_REG_GUEST_PDPTE1:
116 return (VMCS_GUEST_PDPTE1);
117 case VM_REG_GUEST_PDPTE2:
118 return (VMCS_GUEST_PDPTE2);
119 case VM_REG_GUEST_PDPTE3:
120 return (VMCS_GUEST_PDPTE3);
121 case VM_REG_GUEST_ENTRY_INST_LENGTH:
122 return (VMCS_ENTRY_INST_LENGTH);
123 case VM_REG_GUEST_FS_BASE:
124 return (VMCS_GUEST_FS_BASE);
125 case VM_REG_GUEST_GS_BASE:
126 return (VMCS_GUEST_GS_BASE);
127 default:
128 return (-1);
129 }
130 }
131
132 static int
vmcs_seg_desc_encoding(int seg,uint32_t * base,uint32_t * lim,uint32_t * acc)133 vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
134 {
135
136 switch (seg) {
137 case VM_REG_GUEST_ES:
138 *base = VMCS_GUEST_ES_BASE;
139 *lim = VMCS_GUEST_ES_LIMIT;
140 *acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
141 break;
142 case VM_REG_GUEST_CS:
143 *base = VMCS_GUEST_CS_BASE;
144 *lim = VMCS_GUEST_CS_LIMIT;
145 *acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
146 break;
147 case VM_REG_GUEST_SS:
148 *base = VMCS_GUEST_SS_BASE;
149 *lim = VMCS_GUEST_SS_LIMIT;
150 *acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
151 break;
152 case VM_REG_GUEST_DS:
153 *base = VMCS_GUEST_DS_BASE;
154 *lim = VMCS_GUEST_DS_LIMIT;
155 *acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
156 break;
157 case VM_REG_GUEST_FS:
158 *base = VMCS_GUEST_FS_BASE;
159 *lim = VMCS_GUEST_FS_LIMIT;
160 *acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
161 break;
162 case VM_REG_GUEST_GS:
163 *base = VMCS_GUEST_GS_BASE;
164 *lim = VMCS_GUEST_GS_LIMIT;
165 *acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
166 break;
167 case VM_REG_GUEST_TR:
168 *base = VMCS_GUEST_TR_BASE;
169 *lim = VMCS_GUEST_TR_LIMIT;
170 *acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
171 break;
172 case VM_REG_GUEST_LDTR:
173 *base = VMCS_GUEST_LDTR_BASE;
174 *lim = VMCS_GUEST_LDTR_LIMIT;
175 *acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
176 break;
177 case VM_REG_GUEST_IDTR:
178 *base = VMCS_GUEST_IDTR_BASE;
179 *lim = VMCS_GUEST_IDTR_LIMIT;
180 *acc = VMCS_INVALID_ENCODING;
181 break;
182 case VM_REG_GUEST_GDTR:
183 *base = VMCS_GUEST_GDTR_BASE;
184 *lim = VMCS_GUEST_GDTR_LIMIT;
185 *acc = VMCS_INVALID_ENCODING;
186 break;
187 default:
188 return (EINVAL);
189 }
190
191 return (0);
192 }
193
194 int
vmcs_getreg(struct vmcs * vmcs,int running,int ident,uint64_t * retval)195 vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
196 {
197 int error;
198 uint32_t encoding;
199
200 /*
201 * If we need to get at vmx-specific state in the VMCS we can bypass
202 * the translation of 'ident' to 'encoding' by simply setting the
203 * sign bit. As it so happens the upper 16 bits are reserved (i.e
204 * set to 0) in the encodings for the VMCS so we are free to use the
205 * sign bit.
206 */
207 if (ident < 0)
208 encoding = ident & 0x7fffffff;
209 else
210 encoding = vmcs_field_encoding(ident);
211
212 if (encoding == (uint32_t)-1)
213 return (EINVAL);
214
215 if (!running)
216 VMPTRLD(vmcs);
217
218 error = vmread(encoding, retval);
219
220 if (!running)
221 VMCLEAR(vmcs);
222
223 return (error);
224 }
225
226 int
vmcs_setreg(struct vmcs * vmcs,int running,int ident,uint64_t val)227 vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
228 {
229 int error;
230 uint32_t encoding;
231
232 if (ident < 0)
233 encoding = ident & 0x7fffffff;
234 else
235 encoding = vmcs_field_encoding(ident);
236
237 if (encoding == (uint32_t)-1)
238 return (EINVAL);
239
240 val = vmcs_fix_regval(encoding, val);
241
242 if (!running)
243 VMPTRLD(vmcs);
244
245 error = vmwrite(encoding, val);
246
247 if (!running)
248 VMCLEAR(vmcs);
249
250 return (error);
251 }
252
253 int
vmcs_setdesc(struct vmcs * vmcs,int running,int seg,struct seg_desc * desc)254 vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
255 {
256 int error;
257 uint32_t base, limit, access;
258
259 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
260 if (error != 0)
261 panic("vmcs_setdesc: invalid segment register %d", seg);
262
263 if (!running)
264 VMPTRLD(vmcs);
265 if ((error = vmwrite(base, desc->base)) != 0)
266 goto done;
267
268 if ((error = vmwrite(limit, desc->limit)) != 0)
269 goto done;
270
271 if (access != VMCS_INVALID_ENCODING) {
272 if ((error = vmwrite(access, desc->access)) != 0)
273 goto done;
274 }
275 done:
276 if (!running)
277 VMCLEAR(vmcs);
278 return (error);
279 }
280
281 int
vmcs_getdesc(struct vmcs * vmcs,int running,int seg,struct seg_desc * desc)282 vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
283 {
284 int error;
285 uint32_t base, limit, access;
286 uint64_t u64;
287
288 error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
289 if (error != 0)
290 panic("vmcs_getdesc: invalid segment register %d", seg);
291
292 if (!running)
293 VMPTRLD(vmcs);
294 if ((error = vmread(base, &u64)) != 0)
295 goto done;
296 desc->base = u64;
297
298 if ((error = vmread(limit, &u64)) != 0)
299 goto done;
300 desc->limit = u64;
301
302 if (access != VMCS_INVALID_ENCODING) {
303 if ((error = vmread(access, &u64)) != 0)
304 goto done;
305 desc->access = u64;
306 }
307 done:
308 if (!running)
309 VMCLEAR(vmcs);
310 return (error);
311 }
312
313 int
vmcs_set_msr_save(struct vmcs * vmcs,u_long g_area,u_int g_count)314 vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
315 {
316 int error;
317
318 VMPTRLD(vmcs);
319
320 /*
321 * Guest MSRs are saved in the VM-exit MSR-store area.
322 * Guest MSRs are loaded from the VM-entry MSR-load area.
323 * Both areas point to the same location in memory.
324 */
325 if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
326 goto done;
327 if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
328 goto done;
329
330 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
331 goto done;
332 if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
333 goto done;
334
335 error = 0;
336 done:
337 VMCLEAR(vmcs);
338 return (error);
339 }
340
341 int
vmcs_init(struct vmcs * vmcs)342 vmcs_init(struct vmcs *vmcs)
343 {
344 int error, codesel, datasel, tsssel;
345 u_long cr0, cr4, efer;
346 uint64_t pat, fsbase, idtrbase;
347
348 codesel = vmm_get_host_codesel();
349 datasel = vmm_get_host_datasel();
350 tsssel = vmm_get_host_tsssel();
351
352 /*
353 * Make sure we have a "current" VMCS to work with.
354 */
355 VMPTRLD(vmcs);
356
357 /* Host state */
358
359 /* Initialize host IA32_PAT MSR */
360 pat = vmm_get_host_pat();
361 if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
362 goto done;
363
364 /* Load the IA32_EFER MSR */
365 efer = vmm_get_host_efer();
366 if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
367 goto done;
368
369 /* Load the control registers */
370
371 cr0 = vmm_get_host_cr0();
372 if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
373 goto done;
374
375 cr4 = vmm_get_host_cr4() | CR4_VMXE;
376 if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
377 goto done;
378
379 /* Load the segment selectors */
380 if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
381 goto done;
382
383 if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
384 goto done;
385
386 if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
387 goto done;
388
389 if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
390 goto done;
391
392 if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
393 goto done;
394
395 if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
396 goto done;
397
398 if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
399 goto done;
400
401 /*
402 * Load the Base-Address for %fs and idtr.
403 *
404 * Note that we exclude %gs, tss and gdtr here because their base
405 * address is pcpu specific.
406 */
407 fsbase = vmm_get_host_fsbase();
408 if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
409 goto done;
410
411 idtrbase = vmm_get_host_idtrbase();
412 if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
413 goto done;
414
415 /* instruction pointer */
416 if (no_flush_rsb) {
417 if ((error = vmwrite(VMCS_HOST_RIP,
418 (u_long)vmx_exit_guest)) != 0)
419 goto done;
420 } else {
421 if ((error = vmwrite(VMCS_HOST_RIP,
422 (u_long)vmx_exit_guest_flush_rsb)) != 0)
423 goto done;
424 }
425
426 /* link pointer */
427 if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
428 goto done;
429 done:
430 VMCLEAR(vmcs);
431 return (error);
432 }
433
434 #ifdef BHYVE_SNAPSHOT
435 int
vmcs_getany(struct vmcs * vmcs,int running,int ident,uint64_t * val)436 vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
437 {
438 int error;
439
440 if (!running)
441 VMPTRLD(vmcs);
442
443 error = vmread(ident, val);
444
445 if (!running)
446 VMCLEAR(vmcs);
447
448 return (error);
449 }
450
451 int
vmcs_setany(struct vmcs * vmcs,int running,int ident,uint64_t val)452 vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
453 {
454 int error;
455
456 if (!running)
457 VMPTRLD(vmcs);
458
459 error = vmwrite(ident, val);
460
461 if (!running)
462 VMCLEAR(vmcs);
463
464 return (error);
465 }
466
467 int
vmcs_snapshot_reg(struct vmcs * vmcs,int running,int ident,struct vm_snapshot_meta * meta)468 vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
469 struct vm_snapshot_meta *meta)
470 {
471 int ret;
472 uint64_t val;
473
474 if (meta->op == VM_SNAPSHOT_SAVE) {
475 ret = vmcs_getreg(vmcs, running, ident, &val);
476 if (ret != 0)
477 goto done;
478
479 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
480 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
481 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
482
483 ret = vmcs_setreg(vmcs, running, ident, val);
484 if (ret != 0)
485 goto done;
486 } else {
487 ret = EINVAL;
488 goto done;
489 }
490
491 done:
492 return (ret);
493 }
494
495 int
vmcs_snapshot_desc(struct vmcs * vmcs,int running,int seg,struct vm_snapshot_meta * meta)496 vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
497 struct vm_snapshot_meta *meta)
498 {
499 int ret;
500 struct seg_desc desc;
501
502 if (meta->op == VM_SNAPSHOT_SAVE) {
503 ret = vmcs_getdesc(vmcs, running, seg, &desc);
504 if (ret != 0)
505 goto done;
506
507 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
508 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
509 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
510 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
511 SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
512 SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
513 SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
514
515 ret = vmcs_setdesc(vmcs, running, seg, &desc);
516 if (ret != 0)
517 goto done;
518 } else {
519 ret = EINVAL;
520 goto done;
521 }
522
523 done:
524 return (ret);
525 }
526
527 int
vmcs_snapshot_any(struct vmcs * vmcs,int running,int ident,struct vm_snapshot_meta * meta)528 vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
529 struct vm_snapshot_meta *meta)
530 {
531 int ret;
532 uint64_t val;
533
534 if (meta->op == VM_SNAPSHOT_SAVE) {
535 ret = vmcs_getany(vmcs, running, ident, &val);
536 if (ret != 0)
537 goto done;
538
539 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
540 } else if (meta->op == VM_SNAPSHOT_RESTORE) {
541 SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
542
543 ret = vmcs_setany(vmcs, running, ident, val);
544 if (ret != 0)
545 goto done;
546 } else {
547 ret = EINVAL;
548 goto done;
549 }
550
551 done:
552 return (ret);
553 }
554 #endif
555
556 #ifdef DDB
557 extern int vmxon_enabled[];
558
DB_SHOW_COMMAND(vmcs,db_show_vmcs)559 DB_SHOW_COMMAND(vmcs, db_show_vmcs)
560 {
561 uint64_t cur_vmcs, val;
562 uint32_t exit;
563
564 if (!vmxon_enabled[curcpu]) {
565 db_printf("VMX not enabled\n");
566 return;
567 }
568
569 if (have_addr) {
570 db_printf("Only current VMCS supported\n");
571 return;
572 }
573
574 vmptrst(&cur_vmcs);
575 if (cur_vmcs == VMCS_INITIAL) {
576 db_printf("No current VM context\n");
577 return;
578 }
579 db_printf("VMCS: %jx\n", cur_vmcs);
580 db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
581 db_printf("Activity: ");
582 val = vmcs_read(VMCS_GUEST_ACTIVITY);
583 switch (val) {
584 case 0:
585 db_printf("Active");
586 break;
587 case 1:
588 db_printf("HLT");
589 break;
590 case 2:
591 db_printf("Shutdown");
592 break;
593 case 3:
594 db_printf("Wait for SIPI");
595 break;
596 default:
597 db_printf("Unknown: %#lx", val);
598 }
599 db_printf("\n");
600 exit = vmcs_read(VMCS_EXIT_REASON);
601 if (exit & 0x80000000)
602 db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
603 else
604 db_printf("Exit Reason: %u\n", exit & 0xffff);
605 db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
606 db_printf("Guest Linear Address: %#lx\n",
607 vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
608 switch (exit & 0x8000ffff) {
609 case EXIT_REASON_EXCEPTION:
610 case EXIT_REASON_EXT_INTR:
611 val = vmcs_read(VMCS_EXIT_INTR_INFO);
612 db_printf("Interrupt Type: ");
613 switch (val >> 8 & 0x7) {
614 case 0:
615 db_printf("external");
616 break;
617 case 2:
618 db_printf("NMI");
619 break;
620 case 3:
621 db_printf("HW exception");
622 break;
623 case 4:
624 db_printf("SW exception");
625 break;
626 default:
627 db_printf("?? %lu", val >> 8 & 0x7);
628 break;
629 }
630 db_printf(" Vector: %lu", val & 0xff);
631 if (val & 0x800)
632 db_printf(" Error Code: %lx",
633 vmcs_read(VMCS_EXIT_INTR_ERRCODE));
634 db_printf("\n");
635 break;
636 case EXIT_REASON_EPT_FAULT:
637 case EXIT_REASON_EPT_MISCONFIG:
638 db_printf("Guest Physical Address: %#lx\n",
639 vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
640 break;
641 }
642 db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
643 }
644 #endif
645