1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x driver 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8 #include <linux/clk.h> 9 #include <linux/delay.h> 10 #include <linux/dma-mapping.h> 11 #include <linux/io.h> 12 #include <linux/list.h> 13 #include <linux/module.h> 14 #include <linux/of_device.h> 15 #include <linux/of.h> 16 #include <linux/pm_runtime.h> 17 #include <linux/slab.h> 18 19 #include <soc/tegra/common.h> 20 21 #define CREATE_TRACE_POINTS 22 #include <trace/events/host1x.h> 23 #undef CREATE_TRACE_POINTS 24 25 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 26 #include <asm/dma-iommu.h> 27 #endif 28 29 #include "bus.h" 30 #include "channel.h" 31 #include "context.h" 32 #include "debug.h" 33 #include "dev.h" 34 #include "intr.h" 35 36 #include "hw/host1x01.h" 37 #include "hw/host1x02.h" 38 #include "hw/host1x04.h" 39 #include "hw/host1x05.h" 40 #include "hw/host1x06.h" 41 #include "hw/host1x07.h" 42 43 void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) 44 { 45 writel(v, host1x->common_regs + r); 46 } 47 48 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) 49 { 50 writel(v, host1x->hv_regs + r); 51 } 52 53 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) 54 { 55 return readl(host1x->hv_regs + r); 56 } 57 58 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) 59 { 60 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 61 62 writel(v, sync_regs + r); 63 } 64 65 u32 host1x_sync_readl(struct host1x *host1x, u32 r) 66 { 67 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 68 69 return readl(sync_regs + r); 70 } 71 72 void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) 73 { 74 writel(v, ch->regs + r); 75 } 76 77 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) 78 { 79 return readl(ch->regs + r); 80 } 81 82 static const struct host1x_info host1x01_info = { 83 .nb_channels = 8, 84 .nb_pts = 32, 85 .nb_mlocks = 16, 86 .nb_bases = 8, 87 .init = host1x01_init, 88 .sync_offset = 0x3000, 89 .dma_mask = DMA_BIT_MASK(32), 90 .has_wide_gather = false, 91 .has_hypervisor = false, 92 .num_sid_entries = 0, 93 .sid_table = NULL, 94 .reserve_vblank_syncpts = true, 95 }; 96 97 static const struct host1x_info host1x02_info = { 98 .nb_channels = 9, 99 .nb_pts = 32, 100 .nb_mlocks = 16, 101 .nb_bases = 12, 102 .init = host1x02_init, 103 .sync_offset = 0x3000, 104 .dma_mask = DMA_BIT_MASK(32), 105 .has_wide_gather = false, 106 .has_hypervisor = false, 107 .num_sid_entries = 0, 108 .sid_table = NULL, 109 .reserve_vblank_syncpts = true, 110 }; 111 112 static const struct host1x_info host1x04_info = { 113 .nb_channels = 12, 114 .nb_pts = 192, 115 .nb_mlocks = 16, 116 .nb_bases = 64, 117 .init = host1x04_init, 118 .sync_offset = 0x2100, 119 .dma_mask = DMA_BIT_MASK(34), 120 .has_wide_gather = false, 121 .has_hypervisor = false, 122 .num_sid_entries = 0, 123 .sid_table = NULL, 124 .reserve_vblank_syncpts = false, 125 }; 126 127 static const struct host1x_info host1x05_info = { 128 .nb_channels = 14, 129 .nb_pts = 192, 130 .nb_mlocks = 16, 131 .nb_bases = 64, 132 .init = host1x05_init, 133 .sync_offset = 0x2100, 134 .dma_mask = DMA_BIT_MASK(34), 135 .has_wide_gather = false, 136 .has_hypervisor = false, 137 .num_sid_entries = 0, 138 .sid_table = NULL, 139 .reserve_vblank_syncpts = false, 140 }; 141 142 static const struct host1x_sid_entry tegra186_sid_table[] = { 143 { 144 /* VIC */ 145 .base = 0x1af0, 146 .offset = 0x30, 147 .limit = 0x34 148 }, 149 { 150 /* NVDEC */ 151 .base = 0x1b00, 152 .offset = 0x30, 153 .limit = 0x34 154 }, 155 }; 156 157 static const struct host1x_info host1x06_info = { 158 .nb_channels = 63, 159 .nb_pts = 576, 160 .nb_mlocks = 24, 161 .nb_bases = 16, 162 .init = host1x06_init, 163 .sync_offset = 0x0, 164 .dma_mask = DMA_BIT_MASK(40), 165 .has_wide_gather = true, 166 .has_hypervisor = true, 167 .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), 168 .sid_table = tegra186_sid_table, 169 .reserve_vblank_syncpts = false, 170 }; 171 172 static const struct host1x_sid_entry tegra194_sid_table[] = { 173 { 174 /* VIC */ 175 .base = 0x1af0, 176 .offset = 0x30, 177 .limit = 0x34 178 }, 179 { 180 /* NVDEC */ 181 .base = 0x1b00, 182 .offset = 0x30, 183 .limit = 0x34 184 }, 185 { 186 /* NVDEC1 */ 187 .base = 0x1bc0, 188 .offset = 0x30, 189 .limit = 0x34 190 }, 191 }; 192 193 static const struct host1x_info host1x07_info = { 194 .nb_channels = 63, 195 .nb_pts = 704, 196 .nb_mlocks = 32, 197 .nb_bases = 0, 198 .init = host1x07_init, 199 .sync_offset = 0x0, 200 .dma_mask = DMA_BIT_MASK(40), 201 .has_wide_gather = true, 202 .has_hypervisor = true, 203 .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), 204 .sid_table = tegra194_sid_table, 205 .reserve_vblank_syncpts = false, 206 }; 207 208 static const struct of_device_id host1x_of_match[] = { 209 { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, 210 { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, 211 { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, 212 { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, 213 { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, 214 { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, }, 215 { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, }, 216 { }, 217 }; 218 MODULE_DEVICE_TABLE(of, host1x_of_match); 219 220 static void host1x_setup_virtualization_tables(struct host1x *host) 221 { 222 const struct host1x_info *info = host->info; 223 unsigned int i; 224 225 if (!info->has_hypervisor) 226 return; 227 228 for (i = 0; i < info->num_sid_entries; i++) { 229 const struct host1x_sid_entry *entry = &info->sid_table[i]; 230 231 host1x_hypervisor_writel(host, entry->offset, entry->base); 232 host1x_hypervisor_writel(host, entry->limit, entry->base + 4); 233 } 234 235 for (i = 0; i < info->streamid_vm_table.count; i++) { 236 /* Allow access to all stream IDs to all VMs. */ 237 host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); 238 } 239 240 for (i = 0; i < info->classid_vm_table.count; i++) { 241 /* Allow access to all classes to all VMs. */ 242 host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); 243 } 244 245 for (i = 0; i < info->mmio_vm_table.count; i++) { 246 /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ 247 host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); 248 } 249 } 250 251 static bool host1x_wants_iommu(struct host1x *host1x) 252 { 253 /* 254 * If we support addressing a maximum of 32 bits of physical memory 255 * and if the host1x firewall is enabled, there's no need to enable 256 * IOMMU support. This can happen for example on Tegra20, Tegra30 257 * and Tegra114. 258 * 259 * Tegra124 and later can address up to 34 bits of physical memory and 260 * many platforms come equipped with more than 2 GiB of system memory, 261 * which requires crossing the 4 GiB boundary. But there's a catch: on 262 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can 263 * only address up to 32 bits of memory in GATHER opcodes, which means 264 * that command buffers need to either be in the first 2 GiB of system 265 * memory (which could quickly lead to memory exhaustion), or command 266 * buffers need to be treated differently from other buffers (which is 267 * not possible with the current ABI). 268 * 269 * A third option is to use the IOMMU in these cases to make sure all 270 * buffers will be mapped into a 32-bit IOVA space that host1x can 271 * address. This allows all of the system memory to be used and works 272 * within the limitations of the host1x on these SoCs. 273 * 274 * In summary, default to enable IOMMU on Tegra124 and later. For any 275 * of the earlier SoCs, only use the IOMMU for additional safety when 276 * the host1x firewall is disabled. 277 */ 278 if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { 279 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 280 return false; 281 } 282 283 return true; 284 } 285 286 static struct iommu_domain *host1x_iommu_attach(struct host1x *host) 287 { 288 struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); 289 int err; 290 291 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 292 if (host->dev->archdata.mapping) { 293 struct dma_iommu_mapping *mapping = 294 to_dma_iommu_mapping(host->dev); 295 arm_iommu_detach_device(host->dev); 296 arm_iommu_release_mapping(mapping); 297 298 domain = iommu_get_domain_for_dev(host->dev); 299 } 300 #endif 301 302 /* 303 * We may not always want to enable IOMMU support (for example if the 304 * host1x firewall is already enabled and we don't support addressing 305 * more than 32 bits of physical memory), so check for that first. 306 * 307 * Similarly, if host1x is already attached to an IOMMU (via the DMA 308 * API), don't try to attach again. 309 */ 310 if (!host1x_wants_iommu(host) || domain) 311 return domain; 312 313 host->group = iommu_group_get(host->dev); 314 if (host->group) { 315 struct iommu_domain_geometry *geometry; 316 dma_addr_t start, end; 317 unsigned long order; 318 319 err = iova_cache_get(); 320 if (err < 0) 321 goto put_group; 322 323 host->domain = iommu_domain_alloc(&platform_bus_type); 324 if (!host->domain) { 325 err = -ENOMEM; 326 goto put_cache; 327 } 328 329 err = iommu_attach_group(host->domain, host->group); 330 if (err) { 331 if (err == -ENODEV) 332 err = 0; 333 334 goto free_domain; 335 } 336 337 geometry = &host->domain->geometry; 338 start = geometry->aperture_start & host->info->dma_mask; 339 end = geometry->aperture_end & host->info->dma_mask; 340 341 order = __ffs(host->domain->pgsize_bitmap); 342 init_iova_domain(&host->iova, 1UL << order, start >> order); 343 host->iova_end = end; 344 345 domain = host->domain; 346 } 347 348 return domain; 349 350 free_domain: 351 iommu_domain_free(host->domain); 352 host->domain = NULL; 353 put_cache: 354 iova_cache_put(); 355 put_group: 356 iommu_group_put(host->group); 357 host->group = NULL; 358 359 return ERR_PTR(err); 360 } 361 362 static int host1x_iommu_init(struct host1x *host) 363 { 364 u64 mask = host->info->dma_mask; 365 struct iommu_domain *domain; 366 int err; 367 368 domain = host1x_iommu_attach(host); 369 if (IS_ERR(domain)) { 370 err = PTR_ERR(domain); 371 dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); 372 return err; 373 } 374 375 /* 376 * If we're not behind an IOMMU make sure we don't get push buffers 377 * that are allocated outside of the range addressable by the GATHER 378 * opcode. 379 * 380 * Newer generations of Tegra (Tegra186 and later) support a wide 381 * variant of the GATHER opcode that allows addressing more bits. 382 */ 383 if (!domain && !host->info->has_wide_gather) 384 mask = DMA_BIT_MASK(32); 385 386 err = dma_coerce_mask_and_coherent(host->dev, mask); 387 if (err < 0) { 388 dev_err(host->dev, "failed to set DMA mask: %d\n", err); 389 return err; 390 } 391 392 return 0; 393 } 394 395 static void host1x_iommu_exit(struct host1x *host) 396 { 397 if (host->domain) { 398 put_iova_domain(&host->iova); 399 iommu_detach_group(host->domain, host->group); 400 401 iommu_domain_free(host->domain); 402 host->domain = NULL; 403 404 iova_cache_put(); 405 406 iommu_group_put(host->group); 407 host->group = NULL; 408 } 409 } 410 411 static int host1x_get_resets(struct host1x *host) 412 { 413 int err; 414 415 host->resets[0].id = "mc"; 416 host->resets[1].id = "host1x"; 417 host->nresets = ARRAY_SIZE(host->resets); 418 419 err = devm_reset_control_bulk_get_optional_exclusive_released( 420 host->dev, host->nresets, host->resets); 421 if (err) { 422 dev_err(host->dev, "failed to get reset: %d\n", err); 423 return err; 424 } 425 426 if (WARN_ON(!host->resets[1].rstc)) 427 return -ENOENT; 428 429 return 0; 430 } 431 432 static int host1x_probe(struct platform_device *pdev) 433 { 434 struct host1x *host; 435 int syncpt_irq; 436 int err; 437 438 host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); 439 if (!host) 440 return -ENOMEM; 441 442 host->info = of_device_get_match_data(&pdev->dev); 443 444 if (host->info->has_hypervisor) { 445 host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); 446 if (IS_ERR(host->regs)) 447 return PTR_ERR(host->regs); 448 449 host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); 450 if (IS_ERR(host->hv_regs)) 451 return PTR_ERR(host->hv_regs); 452 453 if (host->info->has_common) { 454 host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); 455 if (IS_ERR(host->common_regs)) 456 return PTR_ERR(host->common_regs); 457 } 458 } else { 459 host->regs = devm_platform_ioremap_resource(pdev, 0); 460 if (IS_ERR(host->regs)) 461 return PTR_ERR(host->regs); 462 } 463 464 syncpt_irq = platform_get_irq(pdev, 0); 465 if (syncpt_irq < 0) 466 return syncpt_irq; 467 468 mutex_init(&host->devices_lock); 469 INIT_LIST_HEAD(&host->devices); 470 INIT_LIST_HEAD(&host->list); 471 host->dev = &pdev->dev; 472 473 /* set common host1x device data */ 474 platform_set_drvdata(pdev, host); 475 476 host->dev->dma_parms = &host->dma_parms; 477 dma_set_max_seg_size(host->dev, UINT_MAX); 478 479 if (host->info->init) { 480 err = host->info->init(host); 481 if (err) 482 return err; 483 } 484 485 host->clk = devm_clk_get(&pdev->dev, NULL); 486 if (IS_ERR(host->clk)) { 487 err = PTR_ERR(host->clk); 488 489 if (err != -EPROBE_DEFER) 490 dev_err(&pdev->dev, "failed to get clock: %d\n", err); 491 492 return err; 493 } 494 495 err = host1x_get_resets(host); 496 if (err) 497 return err; 498 499 host1x_bo_cache_init(&host->cache); 500 501 err = host1x_iommu_init(host); 502 if (err < 0) { 503 dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); 504 goto destroy_cache; 505 } 506 507 err = host1x_channel_list_init(&host->channel_list, 508 host->info->nb_channels); 509 if (err) { 510 dev_err(&pdev->dev, "failed to initialize channel list\n"); 511 goto iommu_exit; 512 } 513 514 err = host1x_memory_context_list_init(host); 515 if (err) { 516 dev_err(&pdev->dev, "failed to initialize context list\n"); 517 goto free_channels; 518 } 519 520 err = host1x_syncpt_init(host); 521 if (err) { 522 dev_err(&pdev->dev, "failed to initialize syncpts\n"); 523 goto free_contexts; 524 } 525 526 err = host1x_intr_init(host, syncpt_irq); 527 if (err) { 528 dev_err(&pdev->dev, "failed to initialize interrupts\n"); 529 goto deinit_syncpt; 530 } 531 532 pm_runtime_enable(&pdev->dev); 533 534 err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); 535 if (err) 536 goto pm_disable; 537 538 /* the driver's code isn't ready yet for the dynamic RPM */ 539 err = pm_runtime_resume_and_get(&pdev->dev); 540 if (err) 541 goto pm_disable; 542 543 host1x_debug_init(host); 544 545 err = host1x_register(host); 546 if (err < 0) 547 goto deinit_debugfs; 548 549 err = devm_of_platform_populate(&pdev->dev); 550 if (err < 0) 551 goto unregister; 552 553 return 0; 554 555 unregister: 556 host1x_unregister(host); 557 deinit_debugfs: 558 host1x_debug_deinit(host); 559 560 pm_runtime_put_sync_suspend(&pdev->dev); 561 pm_disable: 562 pm_runtime_disable(&pdev->dev); 563 564 host1x_intr_deinit(host); 565 deinit_syncpt: 566 host1x_syncpt_deinit(host); 567 free_contexts: 568 host1x_memory_context_list_free(&host->context_list); 569 free_channels: 570 host1x_channel_list_free(&host->channel_list); 571 iommu_exit: 572 host1x_iommu_exit(host); 573 destroy_cache: 574 host1x_bo_cache_destroy(&host->cache); 575 576 return err; 577 } 578 579 static int host1x_remove(struct platform_device *pdev) 580 { 581 struct host1x *host = platform_get_drvdata(pdev); 582 583 host1x_unregister(host); 584 host1x_debug_deinit(host); 585 586 pm_runtime_force_suspend(&pdev->dev); 587 588 host1x_intr_deinit(host); 589 host1x_syncpt_deinit(host); 590 host1x_memory_context_list_free(&host->context_list); 591 host1x_channel_list_free(&host->channel_list); 592 host1x_iommu_exit(host); 593 host1x_bo_cache_destroy(&host->cache); 594 595 return 0; 596 } 597 598 static int __maybe_unused host1x_runtime_suspend(struct device *dev) 599 { 600 struct host1x *host = dev_get_drvdata(dev); 601 int err; 602 603 host1x_intr_stop(host); 604 host1x_syncpt_save(host); 605 606 err = reset_control_bulk_assert(host->nresets, host->resets); 607 if (err) { 608 dev_err(dev, "failed to assert reset: %d\n", err); 609 goto resume_host1x; 610 } 611 612 usleep_range(1000, 2000); 613 614 clk_disable_unprepare(host->clk); 615 reset_control_bulk_release(host->nresets, host->resets); 616 617 return 0; 618 619 resume_host1x: 620 host1x_setup_virtualization_tables(host); 621 host1x_syncpt_restore(host); 622 host1x_intr_start(host); 623 624 return err; 625 } 626 627 static int __maybe_unused host1x_runtime_resume(struct device *dev) 628 { 629 struct host1x *host = dev_get_drvdata(dev); 630 int err; 631 632 err = reset_control_bulk_acquire(host->nresets, host->resets); 633 if (err) { 634 dev_err(dev, "failed to acquire reset: %d\n", err); 635 return err; 636 } 637 638 err = clk_prepare_enable(host->clk); 639 if (err) { 640 dev_err(dev, "failed to enable clock: %d\n", err); 641 goto release_reset; 642 } 643 644 err = reset_control_bulk_deassert(host->nresets, host->resets); 645 if (err < 0) { 646 dev_err(dev, "failed to deassert reset: %d\n", err); 647 goto disable_clk; 648 } 649 650 host1x_setup_virtualization_tables(host); 651 host1x_syncpt_restore(host); 652 host1x_intr_start(host); 653 654 return 0; 655 656 disable_clk: 657 clk_disable_unprepare(host->clk); 658 release_reset: 659 reset_control_bulk_release(host->nresets, host->resets); 660 661 return err; 662 } 663 664 static const struct dev_pm_ops host1x_pm_ops = { 665 SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, 666 NULL) 667 /* TODO: add system suspend-resume once driver will be ready for that */ 668 }; 669 670 static struct platform_driver tegra_host1x_driver = { 671 .driver = { 672 .name = "tegra-host1x", 673 .of_match_table = host1x_of_match, 674 .pm = &host1x_pm_ops, 675 }, 676 .probe = host1x_probe, 677 .remove = host1x_remove, 678 }; 679 680 static struct platform_driver * const drivers[] = { 681 &tegra_host1x_driver, 682 &tegra_mipi_driver, 683 }; 684 685 static int __init tegra_host1x_init(void) 686 { 687 int err; 688 689 err = bus_register(&host1x_bus_type); 690 if (err < 0) 691 return err; 692 693 err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); 694 if (err < 0) 695 bus_unregister(&host1x_bus_type); 696 697 return err; 698 } 699 module_init(tegra_host1x_init); 700 701 static void __exit tegra_host1x_exit(void) 702 { 703 platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); 704 bus_unregister(&host1x_bus_type); 705 } 706 module_exit(tegra_host1x_exit); 707 708 /** 709 * host1x_get_dma_mask() - query the supported DMA mask for host1x 710 * @host1x: host1x instance 711 * 712 * Note that this returns the supported DMA mask for host1x, which can be 713 * different from the applicable DMA mask under certain circumstances. 714 */ 715 u64 host1x_get_dma_mask(struct host1x *host1x) 716 { 717 return host1x->info->dma_mask; 718 } 719 EXPORT_SYMBOL(host1x_get_dma_mask); 720 721 MODULE_AUTHOR("Thierry Reding <[email protected]>"); 722 MODULE_AUTHOR("Terje Bergstrom <[email protected]>"); 723 MODULE_DESCRIPTION("Host1x driver for Tegra products"); 724 MODULE_LICENSE("GPL"); 725