1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x driver 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8 #include <linux/clk.h> 9 #include <linux/delay.h> 10 #include <linux/dma-mapping.h> 11 #include <linux/io.h> 12 #include <linux/list.h> 13 #include <linux/module.h> 14 #include <linux/of_device.h> 15 #include <linux/of.h> 16 #include <linux/pm_runtime.h> 17 #include <linux/slab.h> 18 19 #include <soc/tegra/common.h> 20 21 #define CREATE_TRACE_POINTS 22 #include <trace/events/host1x.h> 23 #undef CREATE_TRACE_POINTS 24 25 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 26 #include <asm/dma-iommu.h> 27 #endif 28 29 #include "bus.h" 30 #include "channel.h" 31 #include "context.h" 32 #include "debug.h" 33 #include "dev.h" 34 #include "intr.h" 35 36 #include "hw/host1x01.h" 37 #include "hw/host1x02.h" 38 #include "hw/host1x04.h" 39 #include "hw/host1x05.h" 40 #include "hw/host1x06.h" 41 #include "hw/host1x07.h" 42 43 void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) 44 { 45 writel(v, host1x->common_regs + r); 46 } 47 48 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) 49 { 50 writel(v, host1x->hv_regs + r); 51 } 52 53 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) 54 { 55 return readl(host1x->hv_regs + r); 56 } 57 58 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) 59 { 60 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 61 62 writel(v, sync_regs + r); 63 } 64 65 u32 host1x_sync_readl(struct host1x *host1x, u32 r) 66 { 67 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 68 69 return readl(sync_regs + r); 70 } 71 72 void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) 73 { 74 writel(v, ch->regs + r); 75 } 76 77 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) 78 { 79 return readl(ch->regs + r); 80 } 81 82 static const struct host1x_info host1x01_info = { 83 .nb_channels = 8, 84 .nb_pts = 32, 85 .nb_mlocks = 16, 86 .nb_bases = 8, 87 .init = host1x01_init, 88 .sync_offset = 0x3000, 89 .dma_mask = DMA_BIT_MASK(32), 90 .has_wide_gather = false, 91 .has_hypervisor = false, 92 .num_sid_entries = 0, 93 .sid_table = NULL, 94 .reserve_vblank_syncpts = true, 95 }; 96 97 static const struct host1x_info host1x02_info = { 98 .nb_channels = 9, 99 .nb_pts = 32, 100 .nb_mlocks = 16, 101 .nb_bases = 12, 102 .init = host1x02_init, 103 .sync_offset = 0x3000, 104 .dma_mask = DMA_BIT_MASK(32), 105 .has_wide_gather = false, 106 .has_hypervisor = false, 107 .num_sid_entries = 0, 108 .sid_table = NULL, 109 .reserve_vblank_syncpts = true, 110 }; 111 112 static const struct host1x_info host1x04_info = { 113 .nb_channels = 12, 114 .nb_pts = 192, 115 .nb_mlocks = 16, 116 .nb_bases = 64, 117 .init = host1x04_init, 118 .sync_offset = 0x2100, 119 .dma_mask = DMA_BIT_MASK(34), 120 .has_wide_gather = false, 121 .has_hypervisor = false, 122 .num_sid_entries = 0, 123 .sid_table = NULL, 124 .reserve_vblank_syncpts = false, 125 }; 126 127 static const struct host1x_info host1x05_info = { 128 .nb_channels = 14, 129 .nb_pts = 192, 130 .nb_mlocks = 16, 131 .nb_bases = 64, 132 .init = host1x05_init, 133 .sync_offset = 0x2100, 134 .dma_mask = DMA_BIT_MASK(34), 135 .has_wide_gather = false, 136 .has_hypervisor = false, 137 .num_sid_entries = 0, 138 .sid_table = NULL, 139 .reserve_vblank_syncpts = false, 140 }; 141 142 static const struct host1x_sid_entry tegra186_sid_table[] = { 143 { 144 /* VIC */ 145 .base = 0x1af0, 146 .offset = 0x30, 147 .limit = 0x34 148 }, 149 { 150 /* NVDEC */ 151 .base = 0x1b00, 152 .offset = 0x30, 153 .limit = 0x34 154 }, 155 }; 156 157 static const struct host1x_info host1x06_info = { 158 .nb_channels = 63, 159 .nb_pts = 576, 160 .nb_mlocks = 24, 161 .nb_bases = 16, 162 .init = host1x06_init, 163 .sync_offset = 0x0, 164 .dma_mask = DMA_BIT_MASK(40), 165 .has_wide_gather = true, 166 .has_hypervisor = true, 167 .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), 168 .sid_table = tegra186_sid_table, 169 .reserve_vblank_syncpts = false, 170 }; 171 172 static const struct host1x_sid_entry tegra194_sid_table[] = { 173 { 174 /* VIC */ 175 .base = 0x1af0, 176 .offset = 0x30, 177 .limit = 0x34 178 }, 179 { 180 /* NVDEC */ 181 .base = 0x1b00, 182 .offset = 0x30, 183 .limit = 0x34 184 }, 185 { 186 /* NVDEC1 */ 187 .base = 0x1bc0, 188 .offset = 0x30, 189 .limit = 0x34 190 }, 191 }; 192 193 static const struct host1x_info host1x07_info = { 194 .nb_channels = 63, 195 .nb_pts = 704, 196 .nb_mlocks = 32, 197 .nb_bases = 0, 198 .init = host1x07_init, 199 .sync_offset = 0x0, 200 .dma_mask = DMA_BIT_MASK(40), 201 .has_wide_gather = true, 202 .has_hypervisor = true, 203 .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), 204 .sid_table = tegra194_sid_table, 205 .reserve_vblank_syncpts = false, 206 }; 207 208 static const struct of_device_id host1x_of_match[] = { 209 { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, 210 { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, 211 { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, 212 { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, 213 { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, 214 { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, }, 215 { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, }, 216 { }, 217 }; 218 MODULE_DEVICE_TABLE(of, host1x_of_match); 219 220 static void host1x_setup_virtualization_tables(struct host1x *host) 221 { 222 const struct host1x_info *info = host->info; 223 unsigned int i; 224 225 if (!info->has_hypervisor) 226 return; 227 228 for (i = 0; i < info->num_sid_entries; i++) { 229 const struct host1x_sid_entry *entry = &info->sid_table[i]; 230 231 host1x_hypervisor_writel(host, entry->offset, entry->base); 232 host1x_hypervisor_writel(host, entry->limit, entry->base + 4); 233 } 234 235 for (i = 0; i < info->streamid_vm_table.count; i++) { 236 /* Allow access to all stream IDs to all VMs. */ 237 host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); 238 } 239 240 for (i = 0; i < info->classid_vm_table.count; i++) { 241 /* Allow access to all classes to all VMs. */ 242 host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); 243 } 244 245 for (i = 0; i < info->mmio_vm_table.count; i++) { 246 /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ 247 host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); 248 } 249 } 250 251 static bool host1x_wants_iommu(struct host1x *host1x) 252 { 253 /* 254 * If we support addressing a maximum of 32 bits of physical memory 255 * and if the host1x firewall is enabled, there's no need to enable 256 * IOMMU support. This can happen for example on Tegra20, Tegra30 257 * and Tegra114. 258 * 259 * Tegra124 and later can address up to 34 bits of physical memory and 260 * many platforms come equipped with more than 2 GiB of system memory, 261 * which requires crossing the 4 GiB boundary. But there's a catch: on 262 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can 263 * only address up to 32 bits of memory in GATHER opcodes, which means 264 * that command buffers need to either be in the first 2 GiB of system 265 * memory (which could quickly lead to memory exhaustion), or command 266 * buffers need to be treated differently from other buffers (which is 267 * not possible with the current ABI). 268 * 269 * A third option is to use the IOMMU in these cases to make sure all 270 * buffers will be mapped into a 32-bit IOVA space that host1x can 271 * address. This allows all of the system memory to be used and works 272 * within the limitations of the host1x on these SoCs. 273 * 274 * In summary, default to enable IOMMU on Tegra124 and later. For any 275 * of the earlier SoCs, only use the IOMMU for additional safety when 276 * the host1x firewall is disabled. 277 */ 278 if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { 279 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 280 return false; 281 } 282 283 return true; 284 } 285 286 static struct iommu_domain *host1x_iommu_attach(struct host1x *host) 287 { 288 struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); 289 int err; 290 291 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 292 if (host->dev->archdata.mapping) { 293 struct dma_iommu_mapping *mapping = 294 to_dma_iommu_mapping(host->dev); 295 arm_iommu_detach_device(host->dev); 296 arm_iommu_release_mapping(mapping); 297 298 domain = iommu_get_domain_for_dev(host->dev); 299 } 300 #endif 301 302 /* 303 * We may not always want to enable IOMMU support (for example if the 304 * host1x firewall is already enabled and we don't support addressing 305 * more than 32 bits of physical memory), so check for that first. 306 * 307 * Similarly, if host1x is already attached to an IOMMU (via the DMA 308 * API), don't try to attach again. 309 */ 310 if (!host1x_wants_iommu(host) || domain) 311 return domain; 312 313 host->group = iommu_group_get(host->dev); 314 if (host->group) { 315 struct iommu_domain_geometry *geometry; 316 dma_addr_t start, end; 317 unsigned long order; 318 319 err = iova_cache_get(); 320 if (err < 0) 321 goto put_group; 322 323 host->domain = iommu_domain_alloc(&platform_bus_type); 324 if (!host->domain) { 325 err = -ENOMEM; 326 goto put_cache; 327 } 328 329 err = iommu_attach_group(host->domain, host->group); 330 if (err) { 331 if (err == -ENODEV) 332 err = 0; 333 334 goto free_domain; 335 } 336 337 geometry = &host->domain->geometry; 338 start = geometry->aperture_start & host->info->dma_mask; 339 end = geometry->aperture_end & host->info->dma_mask; 340 341 order = __ffs(host->domain->pgsize_bitmap); 342 init_iova_domain(&host->iova, 1UL << order, start >> order); 343 host->iova_end = end; 344 345 domain = host->domain; 346 } 347 348 return domain; 349 350 free_domain: 351 iommu_domain_free(host->domain); 352 host->domain = NULL; 353 put_cache: 354 iova_cache_put(); 355 put_group: 356 iommu_group_put(host->group); 357 host->group = NULL; 358 359 return ERR_PTR(err); 360 } 361 362 static int host1x_iommu_init(struct host1x *host) 363 { 364 u64 mask = host->info->dma_mask; 365 struct iommu_domain *domain; 366 int err; 367 368 domain = host1x_iommu_attach(host); 369 if (IS_ERR(domain)) { 370 err = PTR_ERR(domain); 371 dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); 372 return err; 373 } 374 375 /* 376 * If we're not behind an IOMMU make sure we don't get push buffers 377 * that are allocated outside of the range addressable by the GATHER 378 * opcode. 379 * 380 * Newer generations of Tegra (Tegra186 and later) support a wide 381 * variant of the GATHER opcode that allows addressing more bits. 382 */ 383 if (!domain && !host->info->has_wide_gather) 384 mask = DMA_BIT_MASK(32); 385 386 err = dma_coerce_mask_and_coherent(host->dev, mask); 387 if (err < 0) { 388 dev_err(host->dev, "failed to set DMA mask: %d\n", err); 389 return err; 390 } 391 392 return 0; 393 } 394 395 static void host1x_iommu_exit(struct host1x *host) 396 { 397 if (host->domain) { 398 put_iova_domain(&host->iova); 399 iommu_detach_group(host->domain, host->group); 400 401 iommu_domain_free(host->domain); 402 host->domain = NULL; 403 404 iova_cache_put(); 405 406 iommu_group_put(host->group); 407 host->group = NULL; 408 } 409 } 410 411 static int host1x_get_resets(struct host1x *host) 412 { 413 int err; 414 415 host->resets[0].id = "mc"; 416 host->resets[1].id = "host1x"; 417 host->nresets = ARRAY_SIZE(host->resets); 418 419 err = devm_reset_control_bulk_get_optional_exclusive_released( 420 host->dev, host->nresets, host->resets); 421 if (err) { 422 dev_err(host->dev, "failed to get reset: %d\n", err); 423 return err; 424 } 425 426 return 0; 427 } 428 429 static int host1x_probe(struct platform_device *pdev) 430 { 431 struct host1x *host; 432 int syncpt_irq; 433 int err; 434 435 host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); 436 if (!host) 437 return -ENOMEM; 438 439 host->info = of_device_get_match_data(&pdev->dev); 440 441 if (host->info->has_hypervisor) { 442 host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); 443 if (IS_ERR(host->regs)) 444 return PTR_ERR(host->regs); 445 446 host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); 447 if (IS_ERR(host->hv_regs)) 448 return PTR_ERR(host->hv_regs); 449 450 if (host->info->has_common) { 451 host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); 452 if (IS_ERR(host->common_regs)) 453 return PTR_ERR(host->common_regs); 454 } 455 } else { 456 host->regs = devm_platform_ioremap_resource(pdev, 0); 457 if (IS_ERR(host->regs)) 458 return PTR_ERR(host->regs); 459 } 460 461 syncpt_irq = platform_get_irq(pdev, 0); 462 if (syncpt_irq < 0) 463 return syncpt_irq; 464 465 mutex_init(&host->devices_lock); 466 INIT_LIST_HEAD(&host->devices); 467 INIT_LIST_HEAD(&host->list); 468 host->dev = &pdev->dev; 469 470 /* set common host1x device data */ 471 platform_set_drvdata(pdev, host); 472 473 host->dev->dma_parms = &host->dma_parms; 474 dma_set_max_seg_size(host->dev, UINT_MAX); 475 476 if (host->info->init) { 477 err = host->info->init(host); 478 if (err) 479 return err; 480 } 481 482 host->clk = devm_clk_get(&pdev->dev, NULL); 483 if (IS_ERR(host->clk)) { 484 err = PTR_ERR(host->clk); 485 486 if (err != -EPROBE_DEFER) 487 dev_err(&pdev->dev, "failed to get clock: %d\n", err); 488 489 return err; 490 } 491 492 err = host1x_get_resets(host); 493 if (err) 494 return err; 495 496 host1x_bo_cache_init(&host->cache); 497 498 err = host1x_iommu_init(host); 499 if (err < 0) { 500 dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); 501 goto destroy_cache; 502 } 503 504 err = host1x_channel_list_init(&host->channel_list, 505 host->info->nb_channels); 506 if (err) { 507 dev_err(&pdev->dev, "failed to initialize channel list\n"); 508 goto iommu_exit; 509 } 510 511 err = host1x_memory_context_list_init(host); 512 if (err) { 513 dev_err(&pdev->dev, "failed to initialize context list\n"); 514 goto free_channels; 515 } 516 517 err = host1x_syncpt_init(host); 518 if (err) { 519 dev_err(&pdev->dev, "failed to initialize syncpts\n"); 520 goto free_contexts; 521 } 522 523 err = host1x_intr_init(host, syncpt_irq); 524 if (err) { 525 dev_err(&pdev->dev, "failed to initialize interrupts\n"); 526 goto deinit_syncpt; 527 } 528 529 pm_runtime_enable(&pdev->dev); 530 531 err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); 532 if (err) 533 goto pm_disable; 534 535 /* the driver's code isn't ready yet for the dynamic RPM */ 536 err = pm_runtime_resume_and_get(&pdev->dev); 537 if (err) 538 goto pm_disable; 539 540 host1x_debug_init(host); 541 542 err = host1x_register(host); 543 if (err < 0) 544 goto deinit_debugfs; 545 546 err = devm_of_platform_populate(&pdev->dev); 547 if (err < 0) 548 goto unregister; 549 550 return 0; 551 552 unregister: 553 host1x_unregister(host); 554 deinit_debugfs: 555 host1x_debug_deinit(host); 556 557 pm_runtime_put_sync_suspend(&pdev->dev); 558 pm_disable: 559 pm_runtime_disable(&pdev->dev); 560 561 host1x_intr_deinit(host); 562 deinit_syncpt: 563 host1x_syncpt_deinit(host); 564 free_contexts: 565 host1x_memory_context_list_free(&host->context_list); 566 free_channels: 567 host1x_channel_list_free(&host->channel_list); 568 iommu_exit: 569 host1x_iommu_exit(host); 570 destroy_cache: 571 host1x_bo_cache_destroy(&host->cache); 572 573 return err; 574 } 575 576 static int host1x_remove(struct platform_device *pdev) 577 { 578 struct host1x *host = platform_get_drvdata(pdev); 579 580 host1x_unregister(host); 581 host1x_debug_deinit(host); 582 583 pm_runtime_force_suspend(&pdev->dev); 584 585 host1x_intr_deinit(host); 586 host1x_syncpt_deinit(host); 587 host1x_memory_context_list_free(&host->context_list); 588 host1x_channel_list_free(&host->channel_list); 589 host1x_iommu_exit(host); 590 host1x_bo_cache_destroy(&host->cache); 591 592 return 0; 593 } 594 595 static int __maybe_unused host1x_runtime_suspend(struct device *dev) 596 { 597 struct host1x *host = dev_get_drvdata(dev); 598 int err; 599 600 host1x_intr_stop(host); 601 host1x_syncpt_save(host); 602 603 err = reset_control_bulk_assert(host->nresets, host->resets); 604 if (err) { 605 dev_err(dev, "failed to assert reset: %d\n", err); 606 goto resume_host1x; 607 } 608 609 usleep_range(1000, 2000); 610 611 clk_disable_unprepare(host->clk); 612 reset_control_bulk_release(host->nresets, host->resets); 613 614 return 0; 615 616 resume_host1x: 617 host1x_setup_virtualization_tables(host); 618 host1x_syncpt_restore(host); 619 host1x_intr_start(host); 620 621 return err; 622 } 623 624 static int __maybe_unused host1x_runtime_resume(struct device *dev) 625 { 626 struct host1x *host = dev_get_drvdata(dev); 627 int err; 628 629 err = reset_control_bulk_acquire(host->nresets, host->resets); 630 if (err) { 631 dev_err(dev, "failed to acquire reset: %d\n", err); 632 return err; 633 } 634 635 err = clk_prepare_enable(host->clk); 636 if (err) { 637 dev_err(dev, "failed to enable clock: %d\n", err); 638 goto release_reset; 639 } 640 641 err = reset_control_bulk_deassert(host->nresets, host->resets); 642 if (err < 0) { 643 dev_err(dev, "failed to deassert reset: %d\n", err); 644 goto disable_clk; 645 } 646 647 host1x_setup_virtualization_tables(host); 648 host1x_syncpt_restore(host); 649 host1x_intr_start(host); 650 651 return 0; 652 653 disable_clk: 654 clk_disable_unprepare(host->clk); 655 release_reset: 656 reset_control_bulk_release(host->nresets, host->resets); 657 658 return err; 659 } 660 661 static const struct dev_pm_ops host1x_pm_ops = { 662 SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, 663 NULL) 664 /* TODO: add system suspend-resume once driver will be ready for that */ 665 }; 666 667 static struct platform_driver tegra_host1x_driver = { 668 .driver = { 669 .name = "tegra-host1x", 670 .of_match_table = host1x_of_match, 671 .pm = &host1x_pm_ops, 672 }, 673 .probe = host1x_probe, 674 .remove = host1x_remove, 675 }; 676 677 static struct platform_driver * const drivers[] = { 678 &tegra_host1x_driver, 679 &tegra_mipi_driver, 680 }; 681 682 static int __init tegra_host1x_init(void) 683 { 684 int err; 685 686 err = bus_register(&host1x_bus_type); 687 if (err < 0) 688 return err; 689 690 err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); 691 if (err < 0) 692 bus_unregister(&host1x_bus_type); 693 694 return err; 695 } 696 module_init(tegra_host1x_init); 697 698 static void __exit tegra_host1x_exit(void) 699 { 700 platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); 701 bus_unregister(&host1x_bus_type); 702 } 703 module_exit(tegra_host1x_exit); 704 705 /** 706 * host1x_get_dma_mask() - query the supported DMA mask for host1x 707 * @host1x: host1x instance 708 * 709 * Note that this returns the supported DMA mask for host1x, which can be 710 * different from the applicable DMA mask under certain circumstances. 711 */ 712 u64 host1x_get_dma_mask(struct host1x *host1x) 713 { 714 return host1x->info->dma_mask; 715 } 716 EXPORT_SYMBOL(host1x_get_dma_mask); 717 718 MODULE_AUTHOR("Thierry Reding <[email protected]>"); 719 MODULE_AUTHOR("Terje Bergstrom <[email protected]>"); 720 MODULE_DESCRIPTION("Host1x driver for Tegra products"); 721 MODULE_LICENSE("GPL"); 722