1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/kernel.h>
32 #include <sys/systm.h>
33 #include <sys/malloc.h>
34
35 #include <vm/vm.h>
36 #include <vm/pmap.h>
37
38 #include <dev/pci/pcireg.h>
39
40 #include <machine/vmparam.h>
41 #include <contrib/dev/acpica/include/acpi.h>
42
43 #include "io/iommu.h"
44
45 /*
46 * Documented in the "Intel Virtualization Technology for Directed I/O",
47 * Architecture Spec, September 2008.
48 */
49
50 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1)
51
52 /* Section 10.4 "Register Descriptions" */
53 struct vtdmap {
54 volatile uint32_t version;
55 volatile uint32_t res0;
56 volatile uint64_t cap;
57 volatile uint64_t ext_cap;
58 volatile uint32_t gcr;
59 volatile uint32_t gsr;
60 volatile uint64_t rta;
61 volatile uint64_t ccr;
62 };
63
64 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F)
65 #define VTD_CAP_ND(cap) ((cap) & 0x7)
66 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1)
67 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF)
68 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1)
69
70 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1)
71 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1)
72 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF)
73
74 #define VTD_GCR_WBF (1 << 27)
75 #define VTD_GCR_SRTP (1 << 30)
76 #define VTD_GCR_TE (1U << 31)
77
78 #define VTD_GSR_WBFS (1 << 27)
79 #define VTD_GSR_RTPS (1 << 30)
80 #define VTD_GSR_TES (1U << 31)
81
82 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */
83 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */
84
85 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */
86 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */
87 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */
88 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */
89 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */
90 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */
91 #define VTD_IIR_DOMAIN_P 32
92
93 #define VTD_ROOT_PRESENT 0x1
94 #define VTD_CTX_PRESENT 0x1
95 #define VTD_CTX_TT_ALL (1UL << 2)
96
97 #define VTD_PTE_RD (1UL << 0)
98 #define VTD_PTE_WR (1UL << 1)
99 #define VTD_PTE_SUPERPAGE (1UL << 7)
100 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL)
101
102 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2)
103
104 struct domain {
105 uint64_t *ptp; /* first level page table page */
106 int pt_levels; /* number of page table levels */
107 int addrwidth; /* 'AW' field in context entry */
108 int spsmask; /* supported super page sizes */
109 u_int id; /* domain id */
110 vm_paddr_t maxaddr; /* highest address to be mapped */
111 SLIST_ENTRY(domain) next;
112 };
113
114 static SLIST_HEAD(, domain) domhead;
115
116 #define DRHD_MAX_UNITS 16
117 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS];
118 static int drhd_num;
119 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS];
120 static int max_domains;
121 typedef int (*drhd_ident_func_t)(void);
122
123 static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
124 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096);
125
126 static MALLOC_DEFINE(M_VTD, "vtd", "vtd");
127
128 static int
vtd_max_domains(struct vtdmap * vtdmap)129 vtd_max_domains(struct vtdmap *vtdmap)
130 {
131 int nd;
132
133 nd = VTD_CAP_ND(vtdmap->cap);
134
135 switch (nd) {
136 case 0:
137 return (16);
138 case 1:
139 return (64);
140 case 2:
141 return (256);
142 case 3:
143 return (1024);
144 case 4:
145 return (4 * 1024);
146 case 5:
147 return (16 * 1024);
148 case 6:
149 return (64 * 1024);
150 default:
151 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd);
152 }
153 }
154
155 static u_int
domain_id(void)156 domain_id(void)
157 {
158 u_int id;
159 struct domain *dom;
160
161 /* Skip domain id 0 - it is reserved when Caching Mode field is set */
162 for (id = 1; id < max_domains; id++) {
163 SLIST_FOREACH(dom, &domhead, next) {
164 if (dom->id == id)
165 break;
166 }
167 if (dom == NULL)
168 break; /* found it */
169 }
170
171 if (id >= max_domains)
172 panic("domain ids exhausted");
173
174 return (id);
175 }
176
177 static struct vtdmap *
vtd_device_scope(uint16_t rid)178 vtd_device_scope(uint16_t rid)
179 {
180 int i, remaining, pathremaining;
181 char *end, *pathend;
182 struct vtdmap *vtdmap;
183 ACPI_DMAR_HARDWARE_UNIT *drhd;
184 ACPI_DMAR_DEVICE_SCOPE *device_scope;
185 ACPI_DMAR_PCI_PATH *path;
186
187 for (i = 0; i < drhd_num; i++) {
188 drhd = drhds[i];
189
190 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) {
191 /*
192 * From Intel VT-d arch spec, version 3.0:
193 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported
194 * for a Segment, it must be enumerated by BIOS after all other
195 * DRHD structures for the same Segment.
196 */
197 vtdmap = vtdmaps[i];
198 return(vtdmap);
199 }
200
201 end = (char *)drhd + drhd->Header.Length;
202 remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT);
203 while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) {
204 device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining);
205 remaining -= device_scope->Length;
206
207 switch (device_scope->EntryType){
208 /* 0x01 and 0x02 are PCI device entries */
209 case 0x01:
210 case 0x02:
211 break;
212 default:
213 continue;
214 }
215
216 if (PCI_RID2BUS(rid) != device_scope->Bus)
217 continue;
218
219 pathend = (char *)device_scope + device_scope->Length;
220 pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE);
221 while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) {
222 path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining);
223 pathremaining -= sizeof(ACPI_DMAR_PCI_PATH);
224
225 if (PCI_RID2SLOT(rid) != path->Device)
226 continue;
227 if (PCI_RID2FUNC(rid) != path->Function)
228 continue;
229
230 vtdmap = vtdmaps[i];
231 return (vtdmap);
232 }
233 }
234 }
235
236 /* No matching scope */
237 return (NULL);
238 }
239
240 static void
vtd_wbflush(struct vtdmap * vtdmap)241 vtd_wbflush(struct vtdmap *vtdmap)
242 {
243
244 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0)
245 pmap_invalidate_cache();
246
247 if (VTD_CAP_RWBF(vtdmap->cap)) {
248 vtdmap->gcr = VTD_GCR_WBF;
249 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0)
250 ;
251 }
252 }
253
254 static void
vtd_ctx_global_invalidate(struct vtdmap * vtdmap)255 vtd_ctx_global_invalidate(struct vtdmap *vtdmap)
256 {
257
258 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL;
259 while ((vtdmap->ccr & VTD_CCR_ICC) != 0)
260 ;
261 }
262
263 static void
vtd_iotlb_global_invalidate(struct vtdmap * vtdmap)264 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap)
265 {
266 int offset;
267 volatile uint64_t *iotlb_reg, val;
268
269 vtd_wbflush(vtdmap);
270
271 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16;
272 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8);
273
274 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL |
275 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES;
276
277 while (1) {
278 val = *iotlb_reg;
279 if ((val & VTD_IIR_IVT) == 0)
280 break;
281 }
282 }
283
284 static void
vtd_translation_enable(struct vtdmap * vtdmap)285 vtd_translation_enable(struct vtdmap *vtdmap)
286 {
287
288 vtdmap->gcr = VTD_GCR_TE;
289 while ((vtdmap->gsr & VTD_GSR_TES) == 0)
290 ;
291 }
292
293 static void
vtd_translation_disable(struct vtdmap * vtdmap)294 vtd_translation_disable(struct vtdmap *vtdmap)
295 {
296
297 vtdmap->gcr = 0;
298 while ((vtdmap->gsr & VTD_GSR_TES) != 0)
299 ;
300 }
301
302 static int
vtd_init(void)303 vtd_init(void)
304 {
305 int i, units, remaining, tmp;
306 struct vtdmap *vtdmap;
307 vm_paddr_t ctx_paddr;
308 char *end, envname[32];
309 unsigned long mapaddr;
310 ACPI_STATUS status;
311 ACPI_TABLE_DMAR *dmar;
312 ACPI_DMAR_HEADER *hdr;
313 ACPI_DMAR_HARDWARE_UNIT *drhd;
314
315 /*
316 * Allow the user to override the ACPI DMAR table by specifying the
317 * physical address of each remapping unit.
318 *
319 * The following example specifies two remapping units at
320 * physical addresses 0xfed90000 and 0xfeda0000 respectively.
321 * set vtd.regmap.0.addr=0xfed90000
322 * set vtd.regmap.1.addr=0xfeda0000
323 */
324 for (units = 0; units < DRHD_MAX_UNITS; units++) {
325 snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units);
326 if (getenv_ulong(envname, &mapaddr) == 0)
327 break;
328 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr);
329 }
330
331 if (units > 0)
332 goto skip_dmar;
333
334 /* Search for DMAR table. */
335 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar);
336 if (ACPI_FAILURE(status))
337 return (ENXIO);
338
339 end = (char *)dmar + dmar->Header.Length;
340 remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR);
341 while (remaining > sizeof(ACPI_DMAR_HEADER)) {
342 hdr = (ACPI_DMAR_HEADER *)(end - remaining);
343 if (hdr->Length > remaining)
344 break;
345 /*
346 * From Intel VT-d arch spec, version 1.3:
347 * BIOS implementations must report mapping structures
348 * in numerical order, i.e. All remapping structures of
349 * type 0 (DRHD) enumerated before remapping structures of
350 * type 1 (RMRR) and so forth.
351 */
352 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
353 break;
354
355 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr;
356 drhds[units] = drhd;
357 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address);
358 if (++units >= DRHD_MAX_UNITS)
359 break;
360 remaining -= hdr->Length;
361 }
362
363 if (units <= 0)
364 return (ENXIO);
365
366 skip_dmar:
367 drhd_num = units;
368
369 max_domains = 64 * 1024; /* maximum valid value */
370 for (i = 0; i < drhd_num; i++){
371 vtdmap = vtdmaps[i];
372
373 if (VTD_CAP_CM(vtdmap->cap) != 0)
374 panic("vtd_init: invalid caching mode");
375
376 /* take most compatible (minimum) value */
377 if ((tmp = vtd_max_domains(vtdmap)) < max_domains)
378 max_domains = tmp;
379 }
380
381 /*
382 * Set up the root-table to point to the context-entry tables
383 */
384 for (i = 0; i < 256; i++) {
385 ctx_paddr = vtophys(ctx_tables[i]);
386 if (ctx_paddr & PAGE_MASK)
387 panic("ctx table (0x%0lx) not page aligned", ctx_paddr);
388
389 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT;
390 }
391
392 return (0);
393 }
394
395 static void
vtd_cleanup(void)396 vtd_cleanup(void)
397 {
398 }
399
400 static void
vtd_enable(void)401 vtd_enable(void)
402 {
403 int i;
404 struct vtdmap *vtdmap;
405
406 for (i = 0; i < drhd_num; i++) {
407 vtdmap = vtdmaps[i];
408 vtd_wbflush(vtdmap);
409
410 /* Update the root table address */
411 vtdmap->rta = vtophys(root_table);
412 vtdmap->gcr = VTD_GCR_SRTP;
413 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0)
414 ;
415
416 vtd_ctx_global_invalidate(vtdmap);
417 vtd_iotlb_global_invalidate(vtdmap);
418
419 vtd_translation_enable(vtdmap);
420 }
421 }
422
423 static void
vtd_disable(void)424 vtd_disable(void)
425 {
426 int i;
427 struct vtdmap *vtdmap;
428
429 for (i = 0; i < drhd_num; i++) {
430 vtdmap = vtdmaps[i];
431 vtd_translation_disable(vtdmap);
432 }
433 }
434
435 static void
vtd_add_device(void * arg,uint16_t rid)436 vtd_add_device(void *arg, uint16_t rid)
437 {
438 int idx;
439 uint64_t *ctxp;
440 struct domain *dom = arg;
441 vm_paddr_t pt_paddr;
442 struct vtdmap *vtdmap;
443 uint8_t bus;
444
445 KASSERT(dom != NULL, ("domain is NULL"));
446
447 bus = PCI_RID2BUS(rid);
448 ctxp = ctx_tables[bus];
449 pt_paddr = vtophys(dom->ptp);
450 idx = VTD_RID2IDX(rid);
451
452 if (ctxp[idx] & VTD_CTX_PRESENT) {
453 panic("vtd_add_device: device %x is already owned by "
454 "domain %d", rid,
455 (uint16_t)(ctxp[idx + 1] >> 8));
456 }
457
458 if ((vtdmap = vtd_device_scope(rid)) == NULL)
459 panic("vtd_add_device: device %x is not in scope for "
460 "any DMA remapping unit", rid);
461
462 /*
463 * Order is important. The 'present' bit is set only after all fields
464 * of the context pointer are initialized.
465 */
466 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8);
467
468 if (VTD_ECAP_DI(vtdmap->ext_cap))
469 ctxp[idx] = VTD_CTX_TT_ALL;
470 else
471 ctxp[idx] = 0;
472
473 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT;
474
475 /*
476 * 'Not Present' entries are not cached in either the Context Cache
477 * or in the IOTLB, so there is no need to invalidate either of them.
478 */
479 }
480
481 static void
vtd_remove_device(void * arg,uint16_t rid)482 vtd_remove_device(void *arg, uint16_t rid)
483 {
484 int i, idx;
485 uint64_t *ctxp;
486 struct vtdmap *vtdmap;
487 uint8_t bus;
488
489 bus = PCI_RID2BUS(rid);
490 ctxp = ctx_tables[bus];
491 idx = VTD_RID2IDX(rid);
492
493 /*
494 * Order is important. The 'present' bit is must be cleared first.
495 */
496 ctxp[idx] = 0;
497 ctxp[idx + 1] = 0;
498
499 /*
500 * Invalidate the Context Cache and the IOTLB.
501 *
502 * XXX use device-selective invalidation for Context Cache
503 * XXX use domain-selective invalidation for IOTLB
504 */
505 for (i = 0; i < drhd_num; i++) {
506 vtdmap = vtdmaps[i];
507 vtd_ctx_global_invalidate(vtdmap);
508 vtd_iotlb_global_invalidate(vtdmap);
509 }
510 }
511
512 #define CREATE_MAPPING 0
513 #define REMOVE_MAPPING 1
514
515 static uint64_t
vtd_update_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len,int remove)516 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len,
517 int remove)
518 {
519 struct domain *dom;
520 int i, spshift, ptpshift, ptpindex, nlevels;
521 uint64_t spsize, *ptp;
522
523 dom = arg;
524 ptpindex = 0;
525 ptpshift = 0;
526
527 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__,
528 gpa, len));
529 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond "
530 "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr));
531
532 if (gpa & PAGE_MASK)
533 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa);
534
535 if (hpa & PAGE_MASK)
536 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa);
537
538 if (len & PAGE_MASK)
539 panic("vtd_create_mapping: unaligned len 0x%0lx", len);
540
541 /*
542 * Compute the size of the mapping that we can accommodate.
543 *
544 * This is based on three factors:
545 * - supported super page size
546 * - alignment of the region starting at 'gpa' and 'hpa'
547 * - length of the region 'len'
548 */
549 spshift = 48;
550 for (i = 3; i >= 0; i--) {
551 spsize = 1UL << spshift;
552 if ((dom->spsmask & (1 << i)) != 0 &&
553 (gpa & (spsize - 1)) == 0 &&
554 (hpa & (spsize - 1)) == 0 &&
555 (len >= spsize)) {
556 break;
557 }
558 spshift -= 9;
559 }
560
561 ptp = dom->ptp;
562 nlevels = dom->pt_levels;
563 while (--nlevels >= 0) {
564 ptpshift = 12 + nlevels * 9;
565 ptpindex = (gpa >> ptpshift) & 0x1FF;
566
567 /* We have reached the leaf mapping */
568 if (spshift >= ptpshift) {
569 break;
570 }
571
572 /*
573 * We are working on a non-leaf page table page.
574 *
575 * Create a downstream page table page if necessary and point
576 * to it from the current page table.
577 */
578 if (ptp[ptpindex] == 0) {
579 void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO);
580 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR;
581 }
582
583 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M);
584 }
585
586 if ((gpa & ((1UL << ptpshift) - 1)) != 0)
587 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift);
588
589 /*
590 * Update the 'gpa' -> 'hpa' mapping
591 */
592 if (remove) {
593 ptp[ptpindex] = 0;
594 } else {
595 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR;
596
597 if (nlevels > 0)
598 ptp[ptpindex] |= VTD_PTE_SUPERPAGE;
599 }
600
601 return (1UL << ptpshift);
602 }
603
604 static uint64_t
vtd_create_mapping(void * arg,vm_paddr_t gpa,vm_paddr_t hpa,uint64_t len)605 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len)
606 {
607
608 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING));
609 }
610
611 static uint64_t
vtd_remove_mapping(void * arg,vm_paddr_t gpa,uint64_t len)612 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len)
613 {
614
615 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING));
616 }
617
618 static void
vtd_invalidate_tlb(void * dom)619 vtd_invalidate_tlb(void *dom)
620 {
621 int i;
622 struct vtdmap *vtdmap;
623
624 /*
625 * Invalidate the IOTLB.
626 * XXX use domain-selective invalidation for IOTLB
627 */
628 for (i = 0; i < drhd_num; i++) {
629 vtdmap = vtdmaps[i];
630 vtd_iotlb_global_invalidate(vtdmap);
631 }
632 }
633
634 static void *
vtd_create_domain(vm_paddr_t maxaddr)635 vtd_create_domain(vm_paddr_t maxaddr)
636 {
637 struct domain *dom;
638 vm_paddr_t addr;
639 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth;
640 struct vtdmap *vtdmap;
641
642 if (drhd_num <= 0)
643 panic("vtd_create_domain: no dma remapping hardware available");
644
645 /*
646 * Calculate AGAW.
647 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec.
648 */
649 addr = 0;
650 for (gaw = 0; addr < maxaddr; gaw++)
651 addr = 1ULL << gaw;
652
653 res = (gaw - 12) % 9;
654 if (res == 0)
655 agaw = gaw;
656 else
657 agaw = gaw + 9 - res;
658
659 if (agaw > 64)
660 agaw = 64;
661
662 /*
663 * Select the smallest Supported AGAW and the corresponding number
664 * of page table levels.
665 */
666 pt_levels = 2;
667 sagaw = 30;
668 addrwidth = 0;
669
670 tmp = ~0;
671 for (i = 0; i < drhd_num; i++) {
672 vtdmap = vtdmaps[i];
673 /* take most compatible value */
674 tmp &= VTD_CAP_SAGAW(vtdmap->cap);
675 }
676
677 for (i = 0; i < 5; i++) {
678 if ((tmp & (1 << i)) != 0 && sagaw >= agaw)
679 break;
680 pt_levels++;
681 addrwidth++;
682 sagaw += 9;
683 if (sagaw > 64)
684 sagaw = 64;
685 }
686
687 if (i >= 5) {
688 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d",
689 tmp, agaw);
690 }
691
692 dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK);
693 dom->pt_levels = pt_levels;
694 dom->addrwidth = addrwidth;
695 dom->id = domain_id();
696 dom->maxaddr = maxaddr;
697 dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK);
698 if ((uintptr_t)dom->ptp & PAGE_MASK)
699 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp);
700
701 #ifdef notyet
702 /*
703 * XXX superpage mappings for the iommu do not work correctly.
704 *
705 * By default all physical memory is mapped into the host_domain.
706 * When a VM is allocated wired memory the pages belonging to it
707 * are removed from the host_domain and added to the vm's domain.
708 *
709 * If the page being removed was mapped using a superpage mapping
710 * in the host_domain then we need to demote the mapping before
711 * removing the page.
712 *
713 * There is not any code to deal with the demotion at the moment
714 * so we disable superpage mappings altogether.
715 */
716 dom->spsmask = ~0;
717 for (i = 0; i < drhd_num; i++) {
718 vtdmap = vtdmaps[i];
719 /* take most compatible value */
720 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap);
721 }
722 #endif
723
724 SLIST_INSERT_HEAD(&domhead, dom, next);
725
726 return (dom);
727 }
728
729 static void
vtd_free_ptp(uint64_t * ptp,int level)730 vtd_free_ptp(uint64_t *ptp, int level)
731 {
732 int i;
733 uint64_t *nlp;
734
735 if (level > 1) {
736 for (i = 0; i < 512; i++) {
737 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0)
738 continue;
739 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0)
740 continue;
741 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M);
742 vtd_free_ptp(nlp, level - 1);
743 }
744 }
745
746 bzero(ptp, PAGE_SIZE);
747 free(ptp, M_VTD);
748 }
749
750 static void
vtd_destroy_domain(void * arg)751 vtd_destroy_domain(void *arg)
752 {
753 struct domain *dom;
754
755 dom = arg;
756
757 SLIST_REMOVE(&domhead, dom, domain, next);
758 vtd_free_ptp(dom->ptp, dom->pt_levels);
759 free(dom, M_VTD);
760 }
761
762 const struct iommu_ops iommu_ops_intel = {
763 .init = vtd_init,
764 .cleanup = vtd_cleanup,
765 .enable = vtd_enable,
766 .disable = vtd_disable,
767 .create_domain = vtd_create_domain,
768 .destroy_domain = vtd_destroy_domain,
769 .create_mapping = vtd_create_mapping,
770 .remove_mapping = vtd_remove_mapping,
771 .add_device = vtd_add_device,
772 .remove_device = vtd_remove_device,
773 .invalidate_tlb = vtd_invalidate_tlb,
774 };
775