111c3a249SChristian König /*
211c3a249SChristian König * Copyright 2018 Advanced Micro Devices, Inc.
311c3a249SChristian König * All Rights Reserved.
411c3a249SChristian König *
511c3a249SChristian König * Permission is hereby granted, free of charge, to any person obtaining a
611c3a249SChristian König * copy of this software and associated documentation files (the
711c3a249SChristian König * "Software"), to deal in the Software without restriction, including
811c3a249SChristian König * without limitation the rights to use, copy, modify, merge, publish,
911c3a249SChristian König * distribute, sub license, and/or sell copies of the Software, and to
1011c3a249SChristian König * permit persons to whom the Software is furnished to do so, subject to
1111c3a249SChristian König * the following conditions:
1211c3a249SChristian König *
1311c3a249SChristian König * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1411c3a249SChristian König * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1511c3a249SChristian König * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1611c3a249SChristian König * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
1711c3a249SChristian König * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
1811c3a249SChristian König * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
1911c3a249SChristian König * USE OR OTHER DEALINGS IN THE SOFTWARE.
2011c3a249SChristian König *
2111c3a249SChristian König * The above copyright notice and this permission notice (including the
2211c3a249SChristian König * next paragraph) shall be included in all copies or substantial portions
2311c3a249SChristian König * of the Software.
2411c3a249SChristian König *
2511c3a249SChristian König */
2611c3a249SChristian König
27c366be54SSam Ravnborg #include <linux/io-64-nonatomic-lo-hi.h>
28d9e50239SYongqiang Sun #ifdef CONFIG_X86
2949aa98caSYongqiang Sun #include <asm/hypervisor.h>
30d9e50239SYongqiang Sun #endif
31c366be54SSam Ravnborg
3211c3a249SChristian König #include "amdgpu.h"
33dd285c5dSAlex Deucher #include "amdgpu_gmc.h"
34a85eff14SHawking Zhang #include "amdgpu_ras.h"
35e7b90e99SChristian König #include "amdgpu_reset.h"
36be5b39d8STao Zhou #include "amdgpu_xgmi.h"
3711c3a249SChristian König
38f89f8c6bSAndrey Grodzovsky #include <drm/drm_drv.h>
39a3185f91SChristian König #include <drm/ttm/ttm_tt.h>
40f89f8c6bSAndrey Grodzovsky
4111c3a249SChristian König /**
42a2902c09SOak Zeng * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
43a2902c09SOak Zeng *
44a2902c09SOak Zeng * @adev: amdgpu_device pointer
45a2902c09SOak Zeng *
46a2902c09SOak Zeng * Allocate video memory for pdb0 and map it for CPU access
47a2902c09SOak Zeng * Returns 0 for success, error for failure.
48a2902c09SOak Zeng */
amdgpu_gmc_pdb0_alloc(struct amdgpu_device * adev)49a2902c09SOak Zeng int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
50a2902c09SOak Zeng {
51a2902c09SOak Zeng int r;
52a2902c09SOak Zeng struct amdgpu_bo_param bp;
53a2902c09SOak Zeng u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
54a2902c09SOak Zeng uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
55a2902c09SOak Zeng uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
56a2902c09SOak Zeng
57a2902c09SOak Zeng memset(&bp, 0, sizeof(bp));
58a2902c09SOak Zeng bp.size = PAGE_ALIGN((npdes + 1) * 8);
59a2902c09SOak Zeng bp.byte_align = PAGE_SIZE;
60a2902c09SOak Zeng bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
61a2902c09SOak Zeng bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
62a2902c09SOak Zeng AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
63a2902c09SOak Zeng bp.type = ttm_bo_type_kernel;
64a2902c09SOak Zeng bp.resv = NULL;
659fd5543eSNirmoy Das bp.bo_ptr_size = sizeof(struct amdgpu_bo);
669fd5543eSNirmoy Das
67a2902c09SOak Zeng r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
68a2902c09SOak Zeng if (r)
69a2902c09SOak Zeng return r;
70a2902c09SOak Zeng
71a2902c09SOak Zeng r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
72a2902c09SOak Zeng if (unlikely(r != 0))
73a2902c09SOak Zeng goto bo_reserve_failure;
74a2902c09SOak Zeng
75a2902c09SOak Zeng r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
76a2902c09SOak Zeng if (r)
77a2902c09SOak Zeng goto bo_pin_failure;
78a2902c09SOak Zeng r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
79a2902c09SOak Zeng if (r)
80a2902c09SOak Zeng goto bo_kmap_failure;
81a2902c09SOak Zeng
82a2902c09SOak Zeng amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
83a2902c09SOak Zeng return 0;
84a2902c09SOak Zeng
85a2902c09SOak Zeng bo_kmap_failure:
86a2902c09SOak Zeng amdgpu_bo_unpin(adev->gmc.pdb0_bo);
87a2902c09SOak Zeng bo_pin_failure:
88a2902c09SOak Zeng amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
89a2902c09SOak Zeng bo_reserve_failure:
90a2902c09SOak Zeng amdgpu_bo_unref(&adev->gmc.pdb0_bo);
91a2902c09SOak Zeng return r;
92a2902c09SOak Zeng }
93a2902c09SOak Zeng
94a2902c09SOak Zeng /**
9524a8d289SChristian König * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
9624a8d289SChristian König *
9724a8d289SChristian König * @bo: the BO to get the PDE for
9824a8d289SChristian König * @level: the level in the PD hirarchy
9924a8d289SChristian König * @addr: resulting addr
10024a8d289SChristian König * @flags: resulting flags
10124a8d289SChristian König *
10224a8d289SChristian König * Get the address and flags to be used for a PDE (Page Directory Entry).
10324a8d289SChristian König */
amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo * bo,int level,uint64_t * addr,uint64_t * flags)10424a8d289SChristian König void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
10524a8d289SChristian König uint64_t *addr, uint64_t *flags)
10624a8d289SChristian König {
10724a8d289SChristian König struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
10824a8d289SChristian König
109d3116756SChristian König switch (bo->tbo.resource->mem_type) {
11024a8d289SChristian König case TTM_PL_TT:
111e34b8feeSChristian König *addr = bo->tbo.ttm->dma_address[0];
11224a8d289SChristian König break;
11324a8d289SChristian König case TTM_PL_VRAM:
11424a8d289SChristian König *addr = amdgpu_bo_gpu_offset(bo);
11524a8d289SChristian König break;
11624a8d289SChristian König default:
11724a8d289SChristian König *addr = 0;
11824a8d289SChristian König break;
11924a8d289SChristian König }
120d3116756SChristian König *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
12124a8d289SChristian König amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
12224a8d289SChristian König }
12324a8d289SChristian König
124ff08711cSLee Jones /*
12511c3a249SChristian König * amdgpu_gmc_pd_addr - return the address of the root directory
12611c3a249SChristian König */
amdgpu_gmc_pd_addr(struct amdgpu_bo * bo)12711c3a249SChristian König uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
12811c3a249SChristian König {
12911c3a249SChristian König struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
13011c3a249SChristian König uint64_t pd_addr;
13111c3a249SChristian König
13211c3a249SChristian König /* TODO: move that into ASIC specific code */
13311c3a249SChristian König if (adev->asic_type >= CHIP_VEGA10) {
13411c3a249SChristian König uint64_t flags = AMDGPU_PTE_VALID;
13511c3a249SChristian König
13624a8d289SChristian König amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
13711c3a249SChristian König pd_addr |= flags;
13824a8d289SChristian König } else {
13924a8d289SChristian König pd_addr = amdgpu_bo_gpu_offset(bo);
14011c3a249SChristian König }
14111c3a249SChristian König return pd_addr;
14211c3a249SChristian König }
143961c75cfSChristian König
144961c75cfSChristian König /**
1456490bd76SYong Zhao * amdgpu_gmc_set_pte_pde - update the page tables using CPU
1466490bd76SYong Zhao *
1476490bd76SYong Zhao * @adev: amdgpu_device pointer
1486490bd76SYong Zhao * @cpu_pt_addr: cpu address of the page table
1496490bd76SYong Zhao * @gpu_page_idx: entry in the page table to update
1506490bd76SYong Zhao * @addr: dst addr to write into pte/pde
1516490bd76SYong Zhao * @flags: access flags
1526490bd76SYong Zhao *
1536490bd76SYong Zhao * Update the page tables using CPU.
1546490bd76SYong Zhao */
amdgpu_gmc_set_pte_pde(struct amdgpu_device * adev,void * cpu_pt_addr,uint32_t gpu_page_idx,uint64_t addr,uint64_t flags)1556490bd76SYong Zhao int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
1566490bd76SYong Zhao uint32_t gpu_page_idx, uint64_t addr,
1576490bd76SYong Zhao uint64_t flags)
1586490bd76SYong Zhao {
1596490bd76SYong Zhao void __iomem *ptr = (void *)cpu_pt_addr;
1606490bd76SYong Zhao uint64_t value;
1616490bd76SYong Zhao
1626490bd76SYong Zhao /*
1636490bd76SYong Zhao * The following is for PTE only. GART does not have PDEs.
1646490bd76SYong Zhao */
1656490bd76SYong Zhao value = addr & 0x0000FFFFFFFFF000ULL;
1666490bd76SYong Zhao value |= flags;
1676490bd76SYong Zhao writeq(value, ptr + (gpu_page_idx * 8));
168f89f8c6bSAndrey Grodzovsky
1696490bd76SYong Zhao return 0;
1706490bd76SYong Zhao }
1716490bd76SYong Zhao
1726490bd76SYong Zhao /**
173485fc361SChristian König * amdgpu_gmc_agp_addr - return the address in the AGP address space
174485fc361SChristian König *
175ff08711cSLee Jones * @bo: TTM BO which needs the address, must be in GTT domain
176485fc361SChristian König *
177485fc361SChristian König * Tries to figure out how to access the BO through the AGP aperture. Returns
178485fc361SChristian König * AMDGPU_BO_INVALID_OFFSET if that is not possible.
179485fc361SChristian König */
amdgpu_gmc_agp_addr(struct ttm_buffer_object * bo)180485fc361SChristian König uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
181485fc361SChristian König {
182485fc361SChristian König struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
183485fc361SChristian König
184ca0b0069SAlex Deucher if (!bo->ttm)
185ca0b0069SAlex Deucher return AMDGPU_BO_INVALID_OFFSET;
186ca0b0069SAlex Deucher
187e11bfb99SChristian König if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
188485fc361SChristian König return AMDGPU_BO_INVALID_OFFSET;
189485fc361SChristian König
190e34b8feeSChristian König if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
191485fc361SChristian König return AMDGPU_BO_INVALID_OFFSET;
192485fc361SChristian König
193e34b8feeSChristian König return adev->gmc.agp_start + bo->ttm->dma_address[0];
194485fc361SChristian König }
195485fc361SChristian König
196485fc361SChristian König /**
197961c75cfSChristian König * amdgpu_gmc_vram_location - try to find VRAM location
198961c75cfSChristian König *
19953c9c89aSRajneesh Bhardwaj * @adev: amdgpu device structure holding all necessary information
20053c9c89aSRajneesh Bhardwaj * @mc: memory controller structure holding memory information
201961c75cfSChristian König * @base: base address at which to put VRAM
202961c75cfSChristian König *
203961c75cfSChristian König * Function will try to place VRAM at base address provided
204961c75cfSChristian König * as parameter.
205961c75cfSChristian König */
amdgpu_gmc_vram_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,u64 base)206961c75cfSChristian König void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
207961c75cfSChristian König u64 base)
208961c75cfSChristian König {
209da2f9920SChristian König uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
210961c75cfSChristian König uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
211961c75cfSChristian König
212961c75cfSChristian König mc->vram_start = base;
213961c75cfSChristian König mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2140b04ea39SChristian König if (limit < mc->real_vram_size)
215961c75cfSChristian König mc->real_vram_size = limit;
2166fdd68b1SAlex Deucher
217da2f9920SChristian König if (vis_limit && vis_limit < mc->visible_vram_size)
218da2f9920SChristian König mc->visible_vram_size = vis_limit;
219da2f9920SChristian König
220da2f9920SChristian König if (mc->real_vram_size < mc->visible_vram_size)
221da2f9920SChristian König mc->visible_vram_size = mc->real_vram_size;
222da2f9920SChristian König
2236fdd68b1SAlex Deucher if (mc->xgmi.num_physical_nodes == 0) {
2246fdd68b1SAlex Deucher mc->fb_start = mc->vram_start;
2256fdd68b1SAlex Deucher mc->fb_end = mc->vram_end;
2266fdd68b1SAlex Deucher }
227961c75cfSChristian König dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
228961c75cfSChristian König mc->mc_vram_size >> 20, mc->vram_start,
229961c75cfSChristian König mc->vram_end, mc->real_vram_size >> 20);
230961c75cfSChristian König }
231961c75cfSChristian König
232f527f310SOak Zeng /** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
233f527f310SOak Zeng *
234f527f310SOak Zeng * @adev: amdgpu device structure holding all necessary information
235f527f310SOak Zeng * @mc: memory controller structure holding memory information
236f527f310SOak Zeng *
237f527f310SOak Zeng * This function is only used if use GART for FB translation. In such
238f527f310SOak Zeng * case, we use sysvm aperture (vmid0 page tables) for both vram
239f527f310SOak Zeng * and gart (aka system memory) access.
240f527f310SOak Zeng *
241f527f310SOak Zeng * GPUVM (and our organization of vmid0 page tables) require sysvm
242f527f310SOak Zeng * aperture to be placed at a location aligned with 8 times of native
243f527f310SOak Zeng * page size. For example, if vm_context0_cntl.page_table_block_size
244f527f310SOak Zeng * is 12, then native page size is 8G (2M*2^12), sysvm should start
245f527f310SOak Zeng * with a 64G aligned address. For simplicity, we just put sysvm at
246f527f310SOak Zeng * address 0. So vram start at address 0 and gart is right after vram.
247f527f310SOak Zeng */
amdgpu_gmc_sysvm_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)248f527f310SOak Zeng void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
249f527f310SOak Zeng {
250f527f310SOak Zeng u64 hive_vram_start = 0;
251f527f310SOak Zeng u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
252f527f310SOak Zeng mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
253f527f310SOak Zeng mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
254f527f310SOak Zeng mc->gart_start = hive_vram_end + 1;
255f527f310SOak Zeng mc->gart_end = mc->gart_start + mc->gart_size - 1;
256f527f310SOak Zeng mc->fb_start = hive_vram_start;
257f527f310SOak Zeng mc->fb_end = hive_vram_end;
258f527f310SOak Zeng dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
259f527f310SOak Zeng mc->mc_vram_size >> 20, mc->vram_start,
260f527f310SOak Zeng mc->vram_end, mc->real_vram_size >> 20);
261f527f310SOak Zeng dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
262f527f310SOak Zeng mc->gart_size >> 20, mc->gart_start, mc->gart_end);
263f527f310SOak Zeng }
264f527f310SOak Zeng
265961c75cfSChristian König /**
266961c75cfSChristian König * amdgpu_gmc_gart_location - try to find GART location
267961c75cfSChristian König *
26853c9c89aSRajneesh Bhardwaj * @adev: amdgpu device structure holding all necessary information
26953c9c89aSRajneesh Bhardwaj * @mc: memory controller structure holding memory information
27054967d56SSrinivasan Shanmugam * @gart_placement: GART placement policy with respect to VRAM
271961c75cfSChristian König *
272ce43abd7SAlexandre Demers * Function will try to place GART before or after VRAM.
273961c75cfSChristian König * If GART size is bigger than space left then we ajust GART size.
274961c75cfSChristian König * Thus function will never fails.
275961c75cfSChristian König */
amdgpu_gmc_gart_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,enum amdgpu_gart_placement gart_placement)276917f91d8SAlex Deucher void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
277917f91d8SAlex Deucher enum amdgpu_gart_placement gart_placement)
278961c75cfSChristian König {
279ec210e32SChristian König const uint64_t four_gb = 0x100000000ULL;
280961c75cfSChristian König u64 size_af, size_bf;
281f2d9bbc9SEmily Deng /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
282f2d9bbc9SEmily Deng u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
283961c75cfSChristian König
284961c75cfSChristian König /* VCE doesn't like it when BOs cross a 4GB segment, so align
285961c75cfSChristian König * the GART base on a 4GB boundary as well.
286961c75cfSChristian König */
2876fdd68b1SAlex Deucher size_bf = mc->fb_start;
288f2d9bbc9SEmily Deng size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
2890be655d1SChristian König
2900be655d1SChristian König if (mc->gart_size > max(size_bf, size_af)) {
2910be655d1SChristian König dev_warn(adev->dev, "limiting GART\n");
2920be655d1SChristian König mc->gart_size = max(size_bf, size_af);
293961c75cfSChristian König }
2940be655d1SChristian König
295917f91d8SAlex Deucher switch (gart_placement) {
296917f91d8SAlex Deucher case AMDGPU_GART_PLACEMENT_HIGH:
297917f91d8SAlex Deucher mc->gart_start = max_mc_address - mc->gart_size + 1;
298917f91d8SAlex Deucher break;
299917f91d8SAlex Deucher case AMDGPU_GART_PLACEMENT_LOW:
300917f91d8SAlex Deucher mc->gart_start = 0;
301917f91d8SAlex Deucher break;
302917f91d8SAlex Deucher case AMDGPU_GART_PLACEMENT_BEST_FIT:
303917f91d8SAlex Deucher default:
3045f232bd7SChristian König if ((size_bf >= mc->gart_size && size_bf < size_af) ||
3055f232bd7SChristian König (size_af < mc->gart_size))
3060be655d1SChristian König mc->gart_start = 0;
3070be655d1SChristian König else
308f2d9bbc9SEmily Deng mc->gart_start = max_mc_address - mc->gart_size + 1;
309917f91d8SAlex Deucher break;
310917f91d8SAlex Deucher }
311ec210e32SChristian König
312feabaad8SChristian König mc->gart_start &= ~(four_gb - 1);
313961c75cfSChristian König mc->gart_end = mc->gart_start + mc->gart_size - 1;
314961c75cfSChristian König dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
315961c75cfSChristian König mc->gart_size >> 20, mc->gart_start, mc->gart_end);
316961c75cfSChristian König }
317d76364fcSChristian König
318d76364fcSChristian König /**
319d76364fcSChristian König * amdgpu_gmc_agp_location - try to find AGP location
32053c9c89aSRajneesh Bhardwaj * @adev: amdgpu device structure holding all necessary information
32153c9c89aSRajneesh Bhardwaj * @mc: memory controller structure holding memory information
322d76364fcSChristian König *
323d76364fcSChristian König * Function will place try to find a place for the AGP BAR in the MC address
324d76364fcSChristian König * space.
325d76364fcSChristian König *
326d76364fcSChristian König * AGP BAR will be assigned the largest available hole in the address space.
327d76364fcSChristian König * Should be called after VRAM and GART locations are setup.
328d76364fcSChristian König */
amdgpu_gmc_agp_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)329d76364fcSChristian König void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
330d76364fcSChristian König {
331d76364fcSChristian König const uint64_t sixteen_gb = 1ULL << 34;
332d76364fcSChristian König const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
333d76364fcSChristian König u64 size_af, size_bf;
334d76364fcSChristian König
3356fdd68b1SAlex Deucher if (mc->fb_start > mc->gart_start) {
3366fdd68b1SAlex Deucher size_bf = (mc->fb_start & sixteen_gb_mask) -
337d76364fcSChristian König ALIGN(mc->gart_end + 1, sixteen_gb);
3386fdd68b1SAlex Deucher size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
339d76364fcSChristian König } else {
3406fdd68b1SAlex Deucher size_bf = mc->fb_start & sixteen_gb_mask;
341d76364fcSChristian König size_af = (mc->gart_start & sixteen_gb_mask) -
3426fdd68b1SAlex Deucher ALIGN(mc->fb_end + 1, sixteen_gb);
343d76364fcSChristian König }
344d76364fcSChristian König
345d76364fcSChristian König if (size_bf > size_af) {
3462ccecaf6SChristian König mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
347d76364fcSChristian König mc->agp_size = size_bf;
348d76364fcSChristian König } else {
3492ccecaf6SChristian König mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
350d76364fcSChristian König mc->agp_size = size_af;
351d76364fcSChristian König }
352d76364fcSChristian König
353d76364fcSChristian König mc->agp_end = mc->agp_start + mc->agp_size - 1;
354d76364fcSChristian König dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
355d76364fcSChristian König mc->agp_size >> 20, mc->agp_start, mc->agp_end);
356d76364fcSChristian König }
357c1a8abd9SChristian König
358c1a8abd9SChristian König /**
359de59b699SAlex Deucher * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
360de59b699SAlex Deucher * @adev: amdgpu device structure holding all necessary information
361de59b699SAlex Deucher * @mc: memory controller structure holding memory information
362de59b699SAlex Deucher *
363de59b699SAlex Deucher * To disable the AGP aperture, you need to set the start to a larger
364de59b699SAlex Deucher * value than the end. This function sets the default value which
365de59b699SAlex Deucher * can then be overridden using amdgpu_gmc_agp_location() if you want
366de59b699SAlex Deucher * to enable the AGP aperture on a specific chip.
367de59b699SAlex Deucher *
368de59b699SAlex Deucher */
amdgpu_gmc_set_agp_default(struct amdgpu_device * adev,struct amdgpu_gmc * mc)369de59b699SAlex Deucher void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
370de59b699SAlex Deucher struct amdgpu_gmc *mc)
371de59b699SAlex Deucher {
372de59b699SAlex Deucher mc->agp_start = 0xffffffffffff;
373de59b699SAlex Deucher mc->agp_end = 0;
374de59b699SAlex Deucher mc->agp_size = 0;
375de59b699SAlex Deucher }
376de59b699SAlex Deucher
377de59b699SAlex Deucher /**
37836255b5fSPhilip Yang * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
37936255b5fSPhilip Yang *
38036255b5fSPhilip Yang * @addr: 48 bit physical address, page aligned (36 significant bits)
38136255b5fSPhilip Yang * @pasid: 16 bit process address space identifier
38236255b5fSPhilip Yang */
amdgpu_gmc_fault_key(uint64_t addr,uint16_t pasid)38336255b5fSPhilip Yang static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
38436255b5fSPhilip Yang {
38536255b5fSPhilip Yang return addr << 4 | pasid;
38636255b5fSPhilip Yang }
38736255b5fSPhilip Yang
38836255b5fSPhilip Yang /**
389c1a8abd9SChristian König * amdgpu_gmc_filter_faults - filter VM faults
390c1a8abd9SChristian König *
391c1a8abd9SChristian König * @adev: amdgpu device structure
3923c2d6ea2SPhilip Yang * @ih: interrupt ring that the fault received from
393c1a8abd9SChristian König * @addr: address of the VM fault
394c1a8abd9SChristian König * @pasid: PASID of the process causing the fault
395c1a8abd9SChristian König * @timestamp: timestamp of the fault
396c1a8abd9SChristian König *
397c1a8abd9SChristian König * Returns:
398c1a8abd9SChristian König * True if the fault was filtered and should not be processed further.
399c1a8abd9SChristian König * False if the fault is a new one and needs to be handled.
400c1a8abd9SChristian König */
amdgpu_gmc_filter_faults(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,uint64_t addr,uint16_t pasid,uint64_t timestamp)4013c2d6ea2SPhilip Yang bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
4023c2d6ea2SPhilip Yang struct amdgpu_ih_ring *ih, uint64_t addr,
403c1a8abd9SChristian König uint16_t pasid, uint64_t timestamp)
404c1a8abd9SChristian König {
405c1a8abd9SChristian König struct amdgpu_gmc *gmc = &adev->gmc;
40636255b5fSPhilip Yang uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
407c1a8abd9SChristian König struct amdgpu_gmc_fault *fault;
408c1a8abd9SChristian König uint32_t hash;
409c1a8abd9SChristian König
4103c2d6ea2SPhilip Yang /* Stale retry fault if timestamp goes backward */
4113c2d6ea2SPhilip Yang if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
4123c2d6ea2SPhilip Yang return true;
4133c2d6ea2SPhilip Yang
414c1a8abd9SChristian König /* If we don't have space left in the ring buffer return immediately */
415c1a8abd9SChristian König stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
416c1a8abd9SChristian König AMDGPU_GMC_FAULT_TIMEOUT;
417c1a8abd9SChristian König if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
418c1a8abd9SChristian König return true;
419c1a8abd9SChristian König
420c1a8abd9SChristian König /* Try to find the fault in the hash */
421c1a8abd9SChristian König hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
422c1a8abd9SChristian König fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
423c1a8abd9SChristian König while (fault->timestamp >= stamp) {
424c1a8abd9SChristian König uint64_t tmp;
425c1a8abd9SChristian König
426dd299441SMukul Joshi if (atomic64_read(&fault->key) == key) {
427dd299441SMukul Joshi /*
428dd299441SMukul Joshi * if we get a fault which is already present in
429dd299441SMukul Joshi * the fault_ring and the timestamp of
430dd299441SMukul Joshi * the fault is after the expired timestamp,
431dd299441SMukul Joshi * then this is a new fault that needs to be added
432dd299441SMukul Joshi * into the fault ring.
433dd299441SMukul Joshi */
434dd299441SMukul Joshi if (fault->timestamp_expiry != 0 &&
435dd299441SMukul Joshi amdgpu_ih_ts_after(fault->timestamp_expiry,
436dd299441SMukul Joshi timestamp))
437dd299441SMukul Joshi break;
438dd299441SMukul Joshi else
439c1a8abd9SChristian König return true;
440dd299441SMukul Joshi }
441c1a8abd9SChristian König
442c1a8abd9SChristian König tmp = fault->timestamp;
443c1a8abd9SChristian König fault = &gmc->fault_ring[fault->next];
444c1a8abd9SChristian König
445c1a8abd9SChristian König /* Check if the entry was reused */
446c1a8abd9SChristian König if (fault->timestamp >= tmp)
447c1a8abd9SChristian König break;
448c1a8abd9SChristian König }
449c1a8abd9SChristian König
450c1a8abd9SChristian König /* Add the fault to the ring */
451c1a8abd9SChristian König fault = &gmc->fault_ring[gmc->last_fault];
45236255b5fSPhilip Yang atomic64_set(&fault->key, key);
453c1a8abd9SChristian König fault->timestamp = timestamp;
454c1a8abd9SChristian König
455c1a8abd9SChristian König /* And update the hash */
456c1a8abd9SChristian König fault->next = gmc->fault_hash[hash].idx;
457c1a8abd9SChristian König gmc->fault_hash[hash].idx = gmc->last_fault++;
458c1a8abd9SChristian König return false;
459c1a8abd9SChristian König }
4602adf1344STao Zhou
46136255b5fSPhilip Yang /**
46236255b5fSPhilip Yang * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
46336255b5fSPhilip Yang *
46436255b5fSPhilip Yang * @adev: amdgpu device structure
46536255b5fSPhilip Yang * @addr: address of the VM fault
46636255b5fSPhilip Yang * @pasid: PASID of the process causing the fault
46736255b5fSPhilip Yang *
46836255b5fSPhilip Yang * Remove the address from fault filter, then future vm fault on this address
46936255b5fSPhilip Yang * will pass to retry fault handler to recover.
47036255b5fSPhilip Yang */
amdgpu_gmc_filter_faults_remove(struct amdgpu_device * adev,uint64_t addr,uint16_t pasid)47136255b5fSPhilip Yang void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
47236255b5fSPhilip Yang uint16_t pasid)
47336255b5fSPhilip Yang {
47436255b5fSPhilip Yang struct amdgpu_gmc *gmc = &adev->gmc;
47536255b5fSPhilip Yang uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
476dd299441SMukul Joshi struct amdgpu_ih_ring *ih;
47736255b5fSPhilip Yang struct amdgpu_gmc_fault *fault;
478dd299441SMukul Joshi uint32_t last_wptr;
479dd299441SMukul Joshi uint64_t last_ts;
48036255b5fSPhilip Yang uint32_t hash;
48136255b5fSPhilip Yang uint64_t tmp;
48236255b5fSPhilip Yang
483e61801f1SPhilip Yang if (adev->irq.retry_cam_enabled)
484e61801f1SPhilip Yang return;
485e61801f1SPhilip Yang
486e61801f1SPhilip Yang ih = &adev->irq.ih1;
487dd299441SMukul Joshi /* Get the WPTR of the last entry in IH ring */
488dd299441SMukul Joshi last_wptr = amdgpu_ih_get_wptr(adev, ih);
489dd299441SMukul Joshi /* Order wptr with ring data. */
490dd299441SMukul Joshi rmb();
491dd299441SMukul Joshi /* Get the timetamp of the last entry in IH ring */
492dd299441SMukul Joshi last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
493dd299441SMukul Joshi
49436255b5fSPhilip Yang hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
49536255b5fSPhilip Yang fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
49636255b5fSPhilip Yang do {
497dd299441SMukul Joshi if (atomic64_read(&fault->key) == key) {
498dd299441SMukul Joshi /*
499dd299441SMukul Joshi * Update the timestamp when this fault
500dd299441SMukul Joshi * expired.
501dd299441SMukul Joshi */
502dd299441SMukul Joshi fault->timestamp_expiry = last_ts;
50336255b5fSPhilip Yang break;
504dd299441SMukul Joshi }
50536255b5fSPhilip Yang
50636255b5fSPhilip Yang tmp = fault->timestamp;
50736255b5fSPhilip Yang fault = &gmc->fault_ring[fault->next];
50836255b5fSPhilip Yang } while (fault->timestamp < tmp);
50936255b5fSPhilip Yang }
51036255b5fSPhilip Yang
amdgpu_gmc_ras_sw_init(struct amdgpu_device * adev)511a6dcf9a7SHawking Zhang int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
5121f33bd18Syipechai {
513a6dcf9a7SHawking Zhang int r;
514a6dcf9a7SHawking Zhang
515a6dcf9a7SHawking Zhang /* umc ras block */
516a6dcf9a7SHawking Zhang r = amdgpu_umc_ras_sw_init(adev);
517a6dcf9a7SHawking Zhang if (r)
518a6dcf9a7SHawking Zhang return r;
519a6dcf9a7SHawking Zhang
520fec70a86SHawking Zhang /* mmhub ras block */
521fec70a86SHawking Zhang r = amdgpu_mmhub_ras_sw_init(adev);
522fec70a86SHawking Zhang if (r)
523fec70a86SHawking Zhang return r;
524fec70a86SHawking Zhang
525474e2d49SHawking Zhang /* hdp ras block */
526474e2d49SHawking Zhang r = amdgpu_hdp_ras_sw_init(adev);
527474e2d49SHawking Zhang if (r)
528474e2d49SHawking Zhang return r;
529474e2d49SHawking Zhang
5307f544c54SHawking Zhang /* mca.x ras block */
5317f544c54SHawking Zhang r = amdgpu_mca_mp0_ras_sw_init(adev);
5327f544c54SHawking Zhang if (r)
5337f544c54SHawking Zhang return r;
5347f544c54SHawking Zhang
5357f544c54SHawking Zhang r = amdgpu_mca_mp1_ras_sw_init(adev);
5367f544c54SHawking Zhang if (r)
5377f544c54SHawking Zhang return r;
5387f544c54SHawking Zhang
5397f544c54SHawking Zhang r = amdgpu_mca_mpio_ras_sw_init(adev);
5407f544c54SHawking Zhang if (r)
5417f544c54SHawking Zhang return r;
5427f544c54SHawking Zhang
543da9d669eSHawking Zhang /* xgmi ras block */
544da9d669eSHawking Zhang r = amdgpu_xgmi_ras_sw_init(adev);
545da9d669eSHawking Zhang if (r)
546da9d669eSHawking Zhang return r;
5471f33bd18Syipechai
5481f33bd18Syipechai return 0;
5491f33bd18Syipechai }
5501f33bd18Syipechai
amdgpu_gmc_ras_late_init(struct amdgpu_device * adev)551ba083492STao Zhou int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
552ba083492STao Zhou {
55352137ca8SHawking Zhang return 0;
554ba083492STao Zhou }
555ba083492STao Zhou
amdgpu_gmc_ras_fini(struct amdgpu_device * adev)5562adf1344STao Zhou void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
5572adf1344STao Zhou {
55849070c4eSHawking Zhang
5592adf1344STao Zhou }
560bdbe90f0SAlex Deucher
561bdbe90f0SAlex Deucher /*
5625677c520SAlex Deucher * The latest engine allocation on gfx9/10 is:
563bdbe90f0SAlex Deucher * Engine 2, 3: firmware
564bdbe90f0SAlex Deucher * Engine 0, 1, 4~16: amdgpu ring,
565bdbe90f0SAlex Deucher * subject to change when ring number changes
566bdbe90f0SAlex Deucher * Engine 17: Gart flushes
567bdbe90f0SAlex Deucher */
56862e79087SShiwu Zhang #define AMDGPU_VMHUB_INV_ENG_BITMAP 0x1FFF3
569bdbe90f0SAlex Deucher
amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device * adev)570bdbe90f0SAlex Deucher int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
571bdbe90f0SAlex Deucher {
572bdbe90f0SAlex Deucher struct amdgpu_ring *ring;
57362e79087SShiwu Zhang unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
574bdbe90f0SAlex Deucher unsigned i;
575bdbe90f0SAlex Deucher unsigned vmhub, inv_eng;
576b09cdeb4S[email protected] struct amdgpu_ring *shared_ring;
577bdbe90f0SAlex Deucher
57862e79087SShiwu Zhang /* init the vm inv eng for all vmhubs */
57962e79087SShiwu Zhang for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
58062e79087SShiwu Zhang vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
581a2b1df92SJack Xiao /* reserve engine 5 for firmware */
58262e79087SShiwu Zhang if (adev->enable_mes)
58362e79087SShiwu Zhang vm_inv_engs[i] &= ~(1 << 5);
58440748f9aSLang Yu /* reserve mmhub engine 3 for firmware */
58540748f9aSLang Yu if (adev->enable_umsch_mm)
58640748f9aSLang Yu vm_inv_engs[i] &= ~(1 << 3);
587a2b1df92SJack Xiao }
588a2b1df92SJack Xiao
589bdbe90f0SAlex Deucher for (i = 0; i < adev->num_rings; ++i) {
590bdbe90f0SAlex Deucher ring = adev->rings[i];
5910530553bSLe Ma vmhub = ring->vm_hub;
592bdbe90f0SAlex Deucher
593c7d43556SJack Xiao if (ring == &adev->mes.ring[0] ||
594c7d43556SJack Xiao ring == &adev->mes.ring[1] ||
5954d614ce8STao Zhou ring == &adev->umsch_mm.ring ||
5964d614ce8STao Zhou ring == &adev->cper.ring_buf)
597b770f04bSLe Ma continue;
598b770f04bSLe Ma
599b09cdeb4S[email protected] /* Skip if the ring is a shared ring */
600b09cdeb4S[email protected] if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
601b09cdeb4S[email protected] continue;
602b09cdeb4S[email protected]
603bdbe90f0SAlex Deucher inv_eng = ffs(vm_inv_engs[vmhub]);
604bdbe90f0SAlex Deucher if (!inv_eng) {
605bdbe90f0SAlex Deucher dev_err(adev->dev, "no VM inv eng for ring %s\n",
606bdbe90f0SAlex Deucher ring->name);
607bdbe90f0SAlex Deucher return -EINVAL;
608bdbe90f0SAlex Deucher }
609bdbe90f0SAlex Deucher
610bdbe90f0SAlex Deucher ring->vm_inv_eng = inv_eng - 1;
611bdbe90f0SAlex Deucher vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
612bdbe90f0SAlex Deucher
613bdbe90f0SAlex Deucher dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
6140530553bSLe Ma ring->name, ring->vm_inv_eng, ring->vm_hub);
615b09cdeb4S[email protected] /* SDMA has a special packet which allows it to use the same
616b09cdeb4S[email protected] * invalidation engine for all the rings in one instance.
617b09cdeb4S[email protected] * Therefore, we do not allocate a separate VM invalidation engine
618b09cdeb4S[email protected] * for SDMA page rings. Instead, they share the VM invalidation
619b09cdeb4S[email protected] * engine with the SDMA gfx ring. This change ensures efficient
620b09cdeb4S[email protected] * resource management and avoids the issue of insufficient VM
621b09cdeb4S[email protected] * invalidation engines.
622b09cdeb4S[email protected] */
623b09cdeb4S[email protected] shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
624b09cdeb4S[email protected] if (shared_ring) {
625b09cdeb4S[email protected] shared_ring->vm_inv_eng = ring->vm_inv_eng;
626b09cdeb4S[email protected] dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
627b09cdeb4S[email protected] ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
628b09cdeb4S[email protected] continue;
629b09cdeb4S[email protected] }
630bdbe90f0SAlex Deucher }
631bdbe90f0SAlex Deucher
632bdbe90f0SAlex Deucher return 0;
633bdbe90f0SAlex Deucher }
634c6252390SLuben Tuikov
amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device * adev,uint32_t vmid,uint32_t vmhub,uint32_t flush_type)635a70cb217SChristian König void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
636a70cb217SChristian König uint32_t vmhub, uint32_t flush_type)
637a70cb217SChristian König {
638a70cb217SChristian König struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
639a70cb217SChristian König struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
640a70cb217SChristian König struct dma_fence *fence;
641a70cb217SChristian König struct amdgpu_job *job;
642a70cb217SChristian König int r;
643a70cb217SChristian König
644a70cb217SChristian König if (!hub->sdma_invalidation_workaround || vmid ||
645ba531117SYunxiang Li !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
646a70cb217SChristian König !ring->sched.ready) {
647e2e37888SChristian König /*
648e2e37888SChristian König * A GPU reset should flush all TLBs anyway, so no need to do
649e2e37888SChristian König * this while one is ongoing.
650e2e37888SChristian König */
651e2e37888SChristian König if (!down_read_trylock(&adev->reset_domain->sem))
652e2e37888SChristian König return;
653e2e37888SChristian König
65408abccc9SChristian König if (adev->gmc.flush_tlb_needs_extra_type_2)
65508abccc9SChristian König adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
65608abccc9SChristian König vmhub, 2);
65708abccc9SChristian König
65808abccc9SChristian König if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
65908abccc9SChristian König adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
66008abccc9SChristian König vmhub, 0);
66108abccc9SChristian König
662a70cb217SChristian König adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
663a70cb217SChristian König flush_type);
664e2e37888SChristian König up_read(&adev->reset_domain->sem);
665a70cb217SChristian König return;
666a70cb217SChristian König }
667a70cb217SChristian König
668a70cb217SChristian König /* The SDMA on Navi 1x has a bug which can theoretically result in memory
669a70cb217SChristian König * corruption if an invalidation happens at the same time as an VA
670a70cb217SChristian König * translation. Avoid this by doing the invalidation from the SDMA
671a70cb217SChristian König * itself at least for GART.
672a70cb217SChristian König */
673a70cb217SChristian König mutex_lock(&adev->mman.gtt_window_lock);
674a70cb217SChristian König r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
675a70cb217SChristian König AMDGPU_FENCE_OWNER_UNDEFINED,
676a70cb217SChristian König 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
677a70cb217SChristian König &job);
678a70cb217SChristian König if (r)
679a70cb217SChristian König goto error_alloc;
680a70cb217SChristian König
681a70cb217SChristian König job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
682a70cb217SChristian König job->vm_needs_flush = true;
683a70cb217SChristian König job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
684a70cb217SChristian König amdgpu_ring_pad_ib(ring, &job->ibs[0]);
685a70cb217SChristian König fence = amdgpu_job_submit(job);
686a70cb217SChristian König mutex_unlock(&adev->mman.gtt_window_lock);
687a70cb217SChristian König
688a70cb217SChristian König dma_fence_wait(fence, false);
689a70cb217SChristian König dma_fence_put(fence);
690a70cb217SChristian König
691a70cb217SChristian König return;
692a70cb217SChristian König
693a70cb217SChristian König error_alloc:
694a70cb217SChristian König mutex_unlock(&adev->mman.gtt_window_lock);
695a70cb217SChristian König dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
696a70cb217SChristian König }
697a70cb217SChristian König
amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device * adev,uint16_t pasid,uint32_t flush_type,bool all_hub,uint32_t inst)698e7b90e99SChristian König int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
699e7b90e99SChristian König uint32_t flush_type, bool all_hub,
700e7b90e99SChristian König uint32_t inst)
701e7b90e99SChristian König {
702e7b90e99SChristian König struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
703e7b90e99SChristian König struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
704e7b90e99SChristian König unsigned int ndw;
705*3666ed82SJay Cornwall int r, cnt = 0;
706e7b90e99SChristian König uint32_t seq;
707e7b90e99SChristian König
7089c33e5fdSYunxiang Li /*
7099c33e5fdSYunxiang Li * A GPU reset should flush all TLBs anyway, so no need to do
7109c33e5fdSYunxiang Li * this while one is ongoing.
7119c33e5fdSYunxiang Li */
7129c33e5fdSYunxiang Li if (!down_read_trylock(&adev->reset_domain->sem))
7139c33e5fdSYunxiang Li return 0;
71408abccc9SChristian König
7159c33e5fdSYunxiang Li if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
71608abccc9SChristian König if (adev->gmc.flush_tlb_needs_extra_type_2)
71708abccc9SChristian König adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
71808abccc9SChristian König 2, all_hub,
71908abccc9SChristian König inst);
72008abccc9SChristian König
72108abccc9SChristian König if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
72208abccc9SChristian König adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
72308abccc9SChristian König 0, all_hub,
72408abccc9SChristian König inst);
72508abccc9SChristian König
7263983c9fdSChristian König adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
7273983c9fdSChristian König flush_type, all_hub,
7283983c9fdSChristian König inst);
7299c33e5fdSYunxiang Li r = 0;
7309c33e5fdSYunxiang Li } else {
731e7b90e99SChristian König /* 2 dwords flush + 8 dwords fence */
732e7b90e99SChristian König ndw = kiq->pmf->invalidate_tlbs_size + 8;
733e7b90e99SChristian König
734e7b90e99SChristian König if (adev->gmc.flush_tlb_needs_extra_type_2)
735e7b90e99SChristian König ndw += kiq->pmf->invalidate_tlbs_size;
736e7b90e99SChristian König
737e7b90e99SChristian König if (adev->gmc.flush_tlb_needs_extra_type_0)
738e7b90e99SChristian König ndw += kiq->pmf->invalidate_tlbs_size;
739e7b90e99SChristian König
740e7b90e99SChristian König spin_lock(&adev->gfx.kiq[inst].ring_lock);
7419ff2e14cSBob Zhou r = amdgpu_ring_alloc(ring, ndw);
7429ff2e14cSBob Zhou if (r) {
7439ff2e14cSBob Zhou spin_unlock(&adev->gfx.kiq[inst].ring_lock);
7449ff2e14cSBob Zhou goto error_unlock_reset;
7459ff2e14cSBob Zhou }
746e7b90e99SChristian König if (adev->gmc.flush_tlb_needs_extra_type_2)
747e7b90e99SChristian König kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
748e7b90e99SChristian König
749e7b90e99SChristian König if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
750e7b90e99SChristian König kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
751e7b90e99SChristian König
752e7b90e99SChristian König kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
753e7b90e99SChristian König r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
754e7b90e99SChristian König if (r) {
755e7b90e99SChristian König amdgpu_ring_undo(ring);
756e7b90e99SChristian König spin_unlock(&adev->gfx.kiq[inst].ring_lock);
757e7b90e99SChristian König goto error_unlock_reset;
758e7b90e99SChristian König }
759e7b90e99SChristian König
760e7b90e99SChristian König amdgpu_ring_commit(ring);
761e7b90e99SChristian König spin_unlock(&adev->gfx.kiq[inst].ring_lock);
762*3666ed82SJay Cornwall
763*3666ed82SJay Cornwall r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
764*3666ed82SJay Cornwall
765*3666ed82SJay Cornwall might_sleep();
766*3666ed82SJay Cornwall while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
767*3666ed82SJay Cornwall !amdgpu_reset_pending(adev->reset_domain)) {
768*3666ed82SJay Cornwall msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
769*3666ed82SJay Cornwall r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
770*3666ed82SJay Cornwall }
771*3666ed82SJay Cornwall
772*3666ed82SJay Cornwall if (cnt > MAX_KIQ_REG_TRY) {
7739c33e5fdSYunxiang Li dev_err(adev->dev, "timeout waiting for kiq fence\n");
774e7b90e99SChristian König r = -ETIME;
775*3666ed82SJay Cornwall } else
776*3666ed82SJay Cornwall r = 0;
7779c33e5fdSYunxiang Li }
778e7b90e99SChristian König
779e7b90e99SChristian König error_unlock_reset:
780e7b90e99SChristian König up_read(&adev->reset_domain->sem);
781e7b90e99SChristian König return r;
782e7b90e99SChristian König }
783e7b90e99SChristian König
amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device * adev,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask,uint32_t xcc_inst)78426405ff4SAlex Deucher void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
78526405ff4SAlex Deucher uint32_t reg0, uint32_t reg1,
78626405ff4SAlex Deucher uint32_t ref, uint32_t mask,
78726405ff4SAlex Deucher uint32_t xcc_inst)
78826405ff4SAlex Deucher {
78926405ff4SAlex Deucher struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
79026405ff4SAlex Deucher struct amdgpu_ring *ring = &kiq->ring;
79126405ff4SAlex Deucher signed long r, cnt = 0;
79226405ff4SAlex Deucher unsigned long flags;
79326405ff4SAlex Deucher uint32_t seq;
79426405ff4SAlex Deucher
795c7d43556SJack Xiao if (adev->mes.ring[0].sched.ready) {
79626405ff4SAlex Deucher amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
79726405ff4SAlex Deucher ref, mask);
79826405ff4SAlex Deucher return;
79926405ff4SAlex Deucher }
80026405ff4SAlex Deucher
80126405ff4SAlex Deucher spin_lock_irqsave(&kiq->ring_lock, flags);
80226405ff4SAlex Deucher amdgpu_ring_alloc(ring, 32);
80326405ff4SAlex Deucher amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
80426405ff4SAlex Deucher ref, mask);
80526405ff4SAlex Deucher r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
80626405ff4SAlex Deucher if (r)
80726405ff4SAlex Deucher goto failed_undo;
80826405ff4SAlex Deucher
80926405ff4SAlex Deucher amdgpu_ring_commit(ring);
81026405ff4SAlex Deucher spin_unlock_irqrestore(&kiq->ring_lock, flags);
81126405ff4SAlex Deucher
81226405ff4SAlex Deucher r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
81326405ff4SAlex Deucher
81426405ff4SAlex Deucher /* don't wait anymore for IRQ context */
81526405ff4SAlex Deucher if (r < 1 && in_interrupt())
81626405ff4SAlex Deucher goto failed_kiq;
81726405ff4SAlex Deucher
81826405ff4SAlex Deucher might_sleep();
81919cff165SVictor Skvortsov while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
82019cff165SVictor Skvortsov !amdgpu_reset_pending(adev->reset_domain)) {
82126405ff4SAlex Deucher
82226405ff4SAlex Deucher msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
82326405ff4SAlex Deucher r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
82426405ff4SAlex Deucher }
82526405ff4SAlex Deucher
82626405ff4SAlex Deucher if (cnt > MAX_KIQ_REG_TRY)
82726405ff4SAlex Deucher goto failed_kiq;
82826405ff4SAlex Deucher
82926405ff4SAlex Deucher return;
83026405ff4SAlex Deucher
83126405ff4SAlex Deucher failed_undo:
83226405ff4SAlex Deucher amdgpu_ring_undo(ring);
83326405ff4SAlex Deucher spin_unlock_irqrestore(&kiq->ring_lock, flags);
83426405ff4SAlex Deucher failed_kiq:
83526405ff4SAlex Deucher dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
83626405ff4SAlex Deucher }
83726405ff4SAlex Deucher
838c6252390SLuben Tuikov /**
839590a74c6SLee Jones * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
840c6252390SLuben Tuikov * @adev: amdgpu_device pointer
841c6252390SLuben Tuikov *
842c6252390SLuben Tuikov * Check and set if an the device @adev supports Trusted Memory
843c6252390SLuben Tuikov * Zones (TMZ).
844c6252390SLuben Tuikov */
amdgpu_gmc_tmz_set(struct amdgpu_device * adev)845c6252390SLuben Tuikov void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
846c6252390SLuben Tuikov {
8474e8303cfSLijo Lazar switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
8480ef3dc7eSSunil Khatri /* RAVEN */
8490ef3dc7eSSunil Khatri case IP_VERSION(9, 2, 2):
8500ef3dc7eSSunil Khatri case IP_VERSION(9, 1, 0):
8510ef3dc7eSSunil Khatri /* RENOIR looks like RAVEN */
8520ef3dc7eSSunil Khatri case IP_VERSION(9, 3, 0):
85337101730SSunil Khatri /* GC 10.3.7 */
85437101730SSunil Khatri case IP_VERSION(10, 3, 7):
8553a25071aSIkshwaku Chauhan /* GC 11.0.1 */
8563a25071aSIkshwaku Chauhan case IP_VERSION(11, 0, 1):
85758aa7790SAlex Deucher if (amdgpu_tmz == 0) {
85858aa7790SAlex Deucher adev->gmc.tmz_enabled = false;
85958aa7790SAlex Deucher dev_info(adev->dev,
86058aa7790SAlex Deucher "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
86158aa7790SAlex Deucher } else {
86258aa7790SAlex Deucher adev->gmc.tmz_enabled = true;
86358aa7790SAlex Deucher dev_info(adev->dev,
86458aa7790SAlex Deucher "Trusted Memory Zone (TMZ) feature enabled\n");
86558aa7790SAlex Deucher }
86658aa7790SAlex Deucher break;
8670ef3dc7eSSunil Khatri case IP_VERSION(10, 1, 10):
8680ef3dc7eSSunil Khatri case IP_VERSION(10, 1, 1):
8690ef3dc7eSSunil Khatri case IP_VERSION(10, 1, 2):
8700ef3dc7eSSunil Khatri case IP_VERSION(10, 1, 3):
8710ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 0):
8720ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 2):
8730ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 4):
8740ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 5):
87508c6ab7fSJesse Zhang case IP_VERSION(10, 3, 6):
8760ef3dc7eSSunil Khatri /* VANGOGH */
8770ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 1):
8780ef3dc7eSSunil Khatri /* YELLOW_CARP*/
8790ef3dc7eSSunil Khatri case IP_VERSION(10, 3, 3):
8802aecbe49STim Huang case IP_VERSION(11, 0, 4):
881037fb9c6SJiadong Zhu case IP_VERSION(11, 5, 0):
8822612c831SYifan Zhang case IP_VERSION(11, 5, 1):
88398392782STim Huang case IP_VERSION(11, 5, 2):
884b784faebSTim Huang case IP_VERSION(11, 5, 3):
885b71a564eSLuben Tuikov /* Don't enable it by default yet.
886b71a564eSLuben Tuikov */
887b71a564eSLuben Tuikov if (amdgpu_tmz < 1) {
888b71a564eSLuben Tuikov adev->gmc.tmz_enabled = false;
889b71a564eSLuben Tuikov dev_info(adev->dev,
890b71a564eSLuben Tuikov "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
891b71a564eSLuben Tuikov } else {
892b71a564eSLuben Tuikov adev->gmc.tmz_enabled = true;
893b71a564eSLuben Tuikov dev_info(adev->dev,
894b71a564eSLuben Tuikov "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
895b71a564eSLuben Tuikov }
896b71a564eSLuben Tuikov break;
897b71a564eSLuben Tuikov default:
898c6252390SLuben Tuikov adev->gmc.tmz_enabled = false;
89931ea4344SPaul Menzel dev_info(adev->dev,
900c6252390SLuben Tuikov "Trusted Memory Zone (TMZ) feature not supported\n");
901b71a564eSLuben Tuikov break;
902c6252390SLuben Tuikov }
903c6252390SLuben Tuikov }
904f2c1b5c1SHuang Rui
9059b498efaSAlex Deucher /**
906590a74c6SLee Jones * amdgpu_gmc_noretry_set -- set per asic noretry defaults
9079b498efaSAlex Deucher * @adev: amdgpu_device pointer
9089b498efaSAlex Deucher *
9099b498efaSAlex Deucher * Set a per asic default for the no-retry parameter.
9109b498efaSAlex Deucher *
9119b498efaSAlex Deucher */
amdgpu_gmc_noretry_set(struct amdgpu_device * adev)9129b498efaSAlex Deucher void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
9139b498efaSAlex Deucher {
9149b498efaSAlex Deucher struct amdgpu_gmc *gmc = &adev->gmc;
9154e8303cfSLijo Lazar uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
916e6713557SGraham Sider bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
917e6713557SGraham Sider gc_ver == IP_VERSION(9, 4, 0) ||
918e6713557SGraham Sider gc_ver == IP_VERSION(9, 4, 1) ||
919e6713557SGraham Sider gc_ver == IP_VERSION(9, 4, 2) ||
920cebbfdd5SAmber Lin gc_ver == IP_VERSION(9, 4, 3) ||
9215f571c61SHawking Zhang gc_ver == IP_VERSION(9, 4, 4) ||
92246d0436aSAmber Lin gc_ver == IP_VERSION(9, 5, 0) ||
923e6713557SGraham Sider gc_ver >= IP_VERSION(10, 3, 0));
9249b498efaSAlex Deucher
9259256e8d4SSurbhi Kakarya if (!amdgpu_sriov_xnack_support(adev))
9269256e8d4SSurbhi Kakarya gmc->noretry = 1;
9279256e8d4SSurbhi Kakarya else
928e6713557SGraham Sider gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
9299b498efaSAlex Deucher }
9309b498efaSAlex Deucher
amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device * adev,int hub_type,bool enable)931f2c1b5c1SHuang Rui void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
932f2c1b5c1SHuang Rui bool enable)
933f2c1b5c1SHuang Rui {
934f2c1b5c1SHuang Rui struct amdgpu_vmhub *hub;
935f2c1b5c1SHuang Rui u32 tmp, reg, i;
936f2c1b5c1SHuang Rui
937f2c1b5c1SHuang Rui hub = &adev->vmhub[hub_type];
938f2c1b5c1SHuang Rui for (i = 0; i < 16; i++) {
939f2c1b5c1SHuang Rui reg = hub->vm_context0_cntl + hub->ctx_distance * i;
940f2c1b5c1SHuang Rui
941f4caf584SHawking Zhang tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
9426ba3f59eSPeng Ju Zhou RREG32_SOC15_IP(GC, reg) :
9436ba3f59eSPeng Ju Zhou RREG32_SOC15_IP(MMHUB, reg);
9446ba3f59eSPeng Ju Zhou
945f2c1b5c1SHuang Rui if (enable)
946f2c1b5c1SHuang Rui tmp |= hub->vm_cntx_cntl_vm_fault;
947f2c1b5c1SHuang Rui else
948f2c1b5c1SHuang Rui tmp &= ~hub->vm_cntx_cntl_vm_fault;
949f2c1b5c1SHuang Rui
950f4caf584SHawking Zhang (hub_type == AMDGPU_GFXHUB(0)) ?
9516ba3f59eSPeng Ju Zhou WREG32_SOC15_IP(GC, reg, tmp) :
9526ba3f59eSPeng Ju Zhou WREG32_SOC15_IP(MMHUB, reg, tmp);
953f2c1b5c1SHuang Rui }
954f2c1b5c1SHuang Rui }
955dd285c5dSAlex Deucher
amdgpu_gmc_get_vbios_allocations(struct amdgpu_device * adev)956dd285c5dSAlex Deucher void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
957dd285c5dSAlex Deucher {
958dd285c5dSAlex Deucher unsigned size;
959dd285c5dSAlex Deucher
960dd285c5dSAlex Deucher /*
9613f543552SYongqiang Sun * Some ASICs need to reserve a region of video memory to avoid access
9623f543552SYongqiang Sun * from driver
9633f543552SYongqiang Sun */
9643f543552SYongqiang Sun adev->mman.stolen_reserved_offset = 0;
9653f543552SYongqiang Sun adev->mman.stolen_reserved_size = 0;
9663f543552SYongqiang Sun
9673f543552SYongqiang Sun /*
968dd285c5dSAlex Deucher * TODO:
969dd285c5dSAlex Deucher * Currently there is a bug where some memory client outside
970dd285c5dSAlex Deucher * of the driver writes to first 8M of VRAM on S3 resume,
971dd285c5dSAlex Deucher * this overrides GART which by default gets placed in first 8M and
972dd285c5dSAlex Deucher * causes VM_FAULTS once GTT is accessed.
973dd285c5dSAlex Deucher * Keep the stolen memory reservation until the while this is not solved.
974dd285c5dSAlex Deucher */
975dd285c5dSAlex Deucher switch (adev->asic_type) {
976dd285c5dSAlex Deucher case CHIP_VEGA10:
977faad5ccaSYongqiang Sun adev->mman.keep_stolen_vga_memory = true;
978faad5ccaSYongqiang Sun /*
97949aa98caSYongqiang Sun * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
980faad5ccaSYongqiang Sun */
981d9e50239SYongqiang Sun #ifdef CONFIG_X86
98249aa98caSYongqiang Sun if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
98349aa98caSYongqiang Sun adev->mman.stolen_reserved_offset = 0x500000;
98449aa98caSYongqiang Sun adev->mman.stolen_reserved_size = 0x200000;
985faad5ccaSYongqiang Sun }
986d9e50239SYongqiang Sun #endif
987faad5ccaSYongqiang Sun break;
988dd285c5dSAlex Deucher case CHIP_RAVEN:
989dd285c5dSAlex Deucher case CHIP_RENOIR:
990cacbbe7cSAlex Deucher adev->mman.keep_stolen_vga_memory = true;
991dd285c5dSAlex Deucher break;
992dd285c5dSAlex Deucher default:
993cacbbe7cSAlex Deucher adev->mman.keep_stolen_vga_memory = false;
994dd285c5dSAlex Deucher break;
995dd285c5dSAlex Deucher }
996dd285c5dSAlex Deucher
997088fb29bSAlex Deucher if (amdgpu_sriov_vf(adev) ||
998220c8cc8SAlex Deucher !amdgpu_device_has_display_hardware(adev)) {
999dd285c5dSAlex Deucher size = 0;
10007eded018SAlex Deucher } else {
1001dd285c5dSAlex Deucher size = amdgpu_gmc_get_vbios_fb_size(adev);
1002dd285c5dSAlex Deucher
100330018679SAlex Deucher if (adev->mman.keep_stolen_vga_memory)
100430018679SAlex Deucher size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
10057eded018SAlex Deucher }
100630018679SAlex Deucher
1007dd285c5dSAlex Deucher /* set to 0 if the pre-OS buffer uses up most of vram */
1008dd285c5dSAlex Deucher if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
1009dd285c5dSAlex Deucher size = 0;
1010dd285c5dSAlex Deucher
1011dd285c5dSAlex Deucher if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
1012cacbbe7cSAlex Deucher adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
1013cacbbe7cSAlex Deucher adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
1014dd285c5dSAlex Deucher } else {
1015cacbbe7cSAlex Deucher adev->mman.stolen_vga_size = size;
1016cacbbe7cSAlex Deucher adev->mman.stolen_extended_size = 0;
1017dd285c5dSAlex Deucher }
1018dd285c5dSAlex Deucher }
1019a2902c09SOak Zeng
1020a2902c09SOak Zeng /**
1021a2902c09SOak Zeng * amdgpu_gmc_init_pdb0 - initialize PDB0
1022a2902c09SOak Zeng *
1023a2902c09SOak Zeng * @adev: amdgpu_device pointer
1024a2902c09SOak Zeng *
1025a2902c09SOak Zeng * This function is only used when GART page table is used
1026a2902c09SOak Zeng * for FB address translatioin. In such a case, we construct
1027a2902c09SOak Zeng * a 2-level system VM page table: PDB0->PTB, to cover both
1028a2902c09SOak Zeng * VRAM of the hive and system memory.
1029a2902c09SOak Zeng *
1030a2902c09SOak Zeng * PDB0 is static, initialized once on driver initialization.
1031a2902c09SOak Zeng * The first n entries of PDB0 are used as PTE by setting
1032a2902c09SOak Zeng * P bit to 1, pointing to VRAM. The n+1'th entry points
1033a2902c09SOak Zeng * to a big PTB covering system memory.
1034a2902c09SOak Zeng *
1035a2902c09SOak Zeng */
amdgpu_gmc_init_pdb0(struct amdgpu_device * adev)1036a2902c09SOak Zeng void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
1037a2902c09SOak Zeng {
1038a2902c09SOak Zeng int i;
1039a2902c09SOak Zeng uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
1040a2902c09SOak Zeng /* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
1041a2902c09SOak Zeng */
1042a2902c09SOak Zeng u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
1043a2902c09SOak Zeng u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
1044a2902c09SOak Zeng u64 vram_addr = adev->vm_manager.vram_base_offset -
1045a2902c09SOak Zeng adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1046a2902c09SOak Zeng u64 vram_end = vram_addr + vram_size;
10470ca565abSOak Zeng u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
1048b2fe31cfSxinhui pan int idx;
1049b2fe31cfSxinhui pan
1050c58a863bSGuchun Chen if (!drm_dev_enter(adev_to_drm(adev), &idx))
1051b2fe31cfSxinhui pan return;
1052a2902c09SOak Zeng
1053a2902c09SOak Zeng flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
1054a2902c09SOak Zeng flags |= AMDGPU_PTE_WRITEABLE;
1055a2902c09SOak Zeng flags |= AMDGPU_PTE_SNOOPED;
1056a2902c09SOak Zeng flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
1057980a0a94SHawking Zhang flags |= AMDGPU_PDE_PTE_FLAG(adev);
1058a2902c09SOak Zeng
1059a2902c09SOak Zeng /* The first n PDE0 entries are used as PTE,
1060a2902c09SOak Zeng * pointing to vram
1061a2902c09SOak Zeng */
1062a2902c09SOak Zeng for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
1063a2902c09SOak Zeng amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
1064a2902c09SOak Zeng
1065a2902c09SOak Zeng /* The n+1'th PDE0 entry points to a huge
1066a2902c09SOak Zeng * PTB who has more than 512 entries each
1067a2902c09SOak Zeng * pointing to a 4K system page
1068a2902c09SOak Zeng */
106979194dacSOak Zeng flags = AMDGPU_PTE_VALID;
1070980a0a94SHawking Zhang flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
1071a2902c09SOak Zeng /* Requires gart_ptb_gpu_pa to be 4K aligned */
1072a2902c09SOak Zeng amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
1073b2fe31cfSxinhui pan drm_dev_exit(idx);
1074a2902c09SOak Zeng }
1075dead5e42SOak Zeng
1076dead5e42SOak Zeng /**
1077dead5e42SOak Zeng * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
1078dead5e42SOak Zeng * address
1079dead5e42SOak Zeng *
1080dead5e42SOak Zeng * @adev: amdgpu_device pointer
1081dead5e42SOak Zeng * @mc_addr: MC address of buffer
1082dead5e42SOak Zeng */
amdgpu_gmc_vram_mc2pa(struct amdgpu_device * adev,uint64_t mc_addr)1083dead5e42SOak Zeng uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
1084dead5e42SOak Zeng {
1085dead5e42SOak Zeng return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
1086dead5e42SOak Zeng }
1087dead5e42SOak Zeng
1088dead5e42SOak Zeng /**
1089dead5e42SOak Zeng * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
1090dead5e42SOak Zeng * GPU's view
1091dead5e42SOak Zeng *
1092dead5e42SOak Zeng * @adev: amdgpu_device pointer
1093dead5e42SOak Zeng * @bo: amdgpu buffer object
1094dead5e42SOak Zeng */
amdgpu_gmc_vram_pa(struct amdgpu_device * adev,struct amdgpu_bo * bo)1095dead5e42SOak Zeng uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
1096dead5e42SOak Zeng {
1097dead5e42SOak Zeng return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
1098dead5e42SOak Zeng }
1099dead5e42SOak Zeng
amdgpu_gmc_vram_checking(struct amdgpu_device * adev)1100479e3b02SXiaojian Du int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
1101479e3b02SXiaojian Du {
1102a357dca9SXiaojian Du struct amdgpu_bo *vram_bo = NULL;
1103a357dca9SXiaojian Du uint64_t vram_gpu = 0;
1104a357dca9SXiaojian Du void *vram_ptr = NULL;
1105479e3b02SXiaojian Du
1106479e3b02SXiaojian Du int ret, size = 0x100000;
1107479e3b02SXiaojian Du uint8_t cptr[10];
1108479e3b02SXiaojian Du
1109479e3b02SXiaojian Du ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
1110479e3b02SXiaojian Du AMDGPU_GEM_DOMAIN_VRAM,
1111479e3b02SXiaojian Du &vram_bo,
1112479e3b02SXiaojian Du &vram_gpu,
1113479e3b02SXiaojian Du &vram_ptr);
1114479e3b02SXiaojian Du if (ret)
1115479e3b02SXiaojian Du return ret;
1116479e3b02SXiaojian Du
1117479e3b02SXiaojian Du memset(vram_ptr, 0x86, size);
1118479e3b02SXiaojian Du memset(cptr, 0x86, 10);
1119479e3b02SXiaojian Du
1120479e3b02SXiaojian Du /**
1121479e3b02SXiaojian Du * Check the start, the mid, and the end of the memory if the content of
1122479e3b02SXiaojian Du * each byte is the pattern "0x86". If yes, we suppose the vram bo is
1123479e3b02SXiaojian Du * workable.
1124479e3b02SXiaojian Du *
1125479e3b02SXiaojian Du * Note: If check the each byte of whole 1M bo, it will cost too many
1126479e3b02SXiaojian Du * seconds, so here, we just pick up three parts for emulation.
1127479e3b02SXiaojian Du */
1128479e3b02SXiaojian Du ret = memcmp(vram_ptr, cptr, 10);
1129fac4ebd7SSrinivasan Shanmugam if (ret) {
1130fac4ebd7SSrinivasan Shanmugam ret = -EIO;
1131fac4ebd7SSrinivasan Shanmugam goto release_buffer;
1132fac4ebd7SSrinivasan Shanmugam }
1133479e3b02SXiaojian Du
1134479e3b02SXiaojian Du ret = memcmp(vram_ptr + (size / 2), cptr, 10);
1135fac4ebd7SSrinivasan Shanmugam if (ret) {
1136fac4ebd7SSrinivasan Shanmugam ret = -EIO;
1137fac4ebd7SSrinivasan Shanmugam goto release_buffer;
1138fac4ebd7SSrinivasan Shanmugam }
1139479e3b02SXiaojian Du
1140479e3b02SXiaojian Du ret = memcmp(vram_ptr + size - 10, cptr, 10);
1141fac4ebd7SSrinivasan Shanmugam if (ret) {
1142fac4ebd7SSrinivasan Shanmugam ret = -EIO;
1143fac4ebd7SSrinivasan Shanmugam goto release_buffer;
1144fac4ebd7SSrinivasan Shanmugam }
1145479e3b02SXiaojian Du
1146fac4ebd7SSrinivasan Shanmugam release_buffer:
1147479e3b02SXiaojian Du amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
1148479e3b02SXiaojian Du &vram_ptr);
1149479e3b02SXiaojian Du
1150fac4ebd7SSrinivasan Shanmugam return ret;
1151479e3b02SXiaojian Du }
1152b6f90baaSLijo Lazar
1153012be6f2SLijo Lazar static const char *nps_desc[] = {
1154012be6f2SLijo Lazar [AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
1155012be6f2SLijo Lazar [AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
1156012be6f2SLijo Lazar [AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
1157012be6f2SLijo Lazar [AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
1158012be6f2SLijo Lazar [AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
1159012be6f2SLijo Lazar [AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
1160012be6f2SLijo Lazar };
1161012be6f2SLijo Lazar
available_memory_partition_show(struct device * dev,struct device_attribute * addr,char * buf)1162012be6f2SLijo Lazar static ssize_t available_memory_partition_show(struct device *dev,
1163012be6f2SLijo Lazar struct device_attribute *addr,
1164012be6f2SLijo Lazar char *buf)
1165012be6f2SLijo Lazar {
1166012be6f2SLijo Lazar struct drm_device *ddev = dev_get_drvdata(dev);
1167012be6f2SLijo Lazar struct amdgpu_device *adev = drm_to_adev(ddev);
1168012be6f2SLijo Lazar int size = 0, mode;
1169012be6f2SLijo Lazar char *sep = "";
1170012be6f2SLijo Lazar
1171012be6f2SLijo Lazar for_each_inst(mode, adev->gmc.supported_nps_modes) {
1172012be6f2SLijo Lazar size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
1173012be6f2SLijo Lazar sep = ", ";
1174012be6f2SLijo Lazar }
1175012be6f2SLijo Lazar size += sysfs_emit_at(buf, size, "\n");
1176012be6f2SLijo Lazar
1177012be6f2SLijo Lazar return size;
1178012be6f2SLijo Lazar }
1179012be6f2SLijo Lazar
current_memory_partition_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1180012be6f2SLijo Lazar static ssize_t current_memory_partition_store(struct device *dev,
1181012be6f2SLijo Lazar struct device_attribute *attr,
1182012be6f2SLijo Lazar const char *buf, size_t count)
1183012be6f2SLijo Lazar {
1184012be6f2SLijo Lazar struct drm_device *ddev = dev_get_drvdata(dev);
1185012be6f2SLijo Lazar struct amdgpu_device *adev = drm_to_adev(ddev);
1186012be6f2SLijo Lazar enum amdgpu_memory_partition mode;
1187012be6f2SLijo Lazar struct amdgpu_hive_info *hive;
1188012be6f2SLijo Lazar int i;
1189012be6f2SLijo Lazar
1190012be6f2SLijo Lazar mode = UNKNOWN_MEMORY_PARTITION_MODE;
1191012be6f2SLijo Lazar for_each_inst(i, adev->gmc.supported_nps_modes) {
1192012be6f2SLijo Lazar if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
1193012be6f2SLijo Lazar mode = i;
1194012be6f2SLijo Lazar break;
1195012be6f2SLijo Lazar }
1196012be6f2SLijo Lazar }
1197012be6f2SLijo Lazar
1198012be6f2SLijo Lazar if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
1199012be6f2SLijo Lazar return -EINVAL;
1200012be6f2SLijo Lazar
1201012be6f2SLijo Lazar if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
1202012be6f2SLijo Lazar dev_info(
1203012be6f2SLijo Lazar adev->dev,
1204012be6f2SLijo Lazar "requested NPS mode is same as current NPS mode, skipping\n");
1205012be6f2SLijo Lazar return count;
1206012be6f2SLijo Lazar }
1207012be6f2SLijo Lazar
1208012be6f2SLijo Lazar /* If device is part of hive, all devices in the hive should request the
1209012be6f2SLijo Lazar * same mode. Hence store the requested mode in hive.
1210012be6f2SLijo Lazar */
1211012be6f2SLijo Lazar hive = amdgpu_get_xgmi_hive(adev);
1212012be6f2SLijo Lazar if (hive) {
1213012be6f2SLijo Lazar atomic_set(&hive->requested_nps_mode, mode);
1214012be6f2SLijo Lazar amdgpu_put_xgmi_hive(hive);
1215012be6f2SLijo Lazar } else {
1216012be6f2SLijo Lazar adev->gmc.requested_nps_mode = mode;
1217012be6f2SLijo Lazar }
1218012be6f2SLijo Lazar
1219012be6f2SLijo Lazar dev_info(
1220012be6f2SLijo Lazar adev->dev,
1221012be6f2SLijo Lazar "NPS mode change requested, please remove and reload the driver\n");
1222012be6f2SLijo Lazar
1223012be6f2SLijo Lazar return count;
1224012be6f2SLijo Lazar }
1225012be6f2SLijo Lazar
current_memory_partition_show(struct device * dev,struct device_attribute * addr,char * buf)1226b6f90baaSLijo Lazar static ssize_t current_memory_partition_show(
1227b6f90baaSLijo Lazar struct device *dev, struct device_attribute *addr, char *buf)
1228b6f90baaSLijo Lazar {
1229b6f90baaSLijo Lazar struct drm_device *ddev = dev_get_drvdata(dev);
1230b6f90baaSLijo Lazar struct amdgpu_device *adev = drm_to_adev(ddev);
1231b6f90baaSLijo Lazar enum amdgpu_memory_partition mode;
1232b6f90baaSLijo Lazar
1233b6f90baaSLijo Lazar mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
12349f7e94afSDan Carpenter if ((mode >= ARRAY_SIZE(nps_desc)) ||
1235012be6f2SLijo Lazar (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
1236b6f90baaSLijo Lazar return sysfs_emit(buf, "UNKNOWN\n");
1237012be6f2SLijo Lazar
1238012be6f2SLijo Lazar return sysfs_emit(buf, "%s\n", nps_desc[mode]);
1239b6f90baaSLijo Lazar }
1240b6f90baaSLijo Lazar
1241012be6f2SLijo Lazar static DEVICE_ATTR_RW(current_memory_partition);
1242012be6f2SLijo Lazar static DEVICE_ATTR_RO(available_memory_partition);
1243b6f90baaSLijo Lazar
amdgpu_gmc_sysfs_init(struct amdgpu_device * adev)1244b6f90baaSLijo Lazar int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
1245b6f90baaSLijo Lazar {
1246012be6f2SLijo Lazar bool nps_switch_support;
1247012be6f2SLijo Lazar int r = 0;
1248012be6f2SLijo Lazar
1249b6f90baaSLijo Lazar if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
1250b6f90baaSLijo Lazar return 0;
1251b6f90baaSLijo Lazar
1252012be6f2SLijo Lazar nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
1253012be6f2SLijo Lazar AMDGPU_ALL_NPS_MASK) > 1);
1254012be6f2SLijo Lazar if (!nps_switch_support)
1255012be6f2SLijo Lazar dev_attr_current_memory_partition.attr.mode &=
1256012be6f2SLijo Lazar ~(S_IWUSR | S_IWGRP | S_IWOTH);
1257012be6f2SLijo Lazar else
1258012be6f2SLijo Lazar r = device_create_file(adev->dev,
1259012be6f2SLijo Lazar &dev_attr_available_memory_partition);
1260012be6f2SLijo Lazar
1261012be6f2SLijo Lazar if (r)
1262012be6f2SLijo Lazar return r;
1263012be6f2SLijo Lazar
1264b6f90baaSLijo Lazar return device_create_file(adev->dev,
1265b6f90baaSLijo Lazar &dev_attr_current_memory_partition);
1266b6f90baaSLijo Lazar }
1267b6f90baaSLijo Lazar
amdgpu_gmc_sysfs_fini(struct amdgpu_device * adev)1268b6f90baaSLijo Lazar void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
1269b6f90baaSLijo Lazar {
1270012be6f2SLijo Lazar if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
1271012be6f2SLijo Lazar return;
1272012be6f2SLijo Lazar
1273b6f90baaSLijo Lazar device_remove_file(adev->dev, &dev_attr_current_memory_partition);
1274012be6f2SLijo Lazar device_remove_file(adev->dev, &dev_attr_available_memory_partition);
1275b6f90baaSLijo Lazar }
1276b194d21bSLijo Lazar
amdgpu_gmc_get_nps_memranges(struct amdgpu_device * adev,struct amdgpu_mem_partition_info * mem_ranges,uint8_t * exp_ranges)1277b194d21bSLijo Lazar int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
1278b194d21bSLijo Lazar struct amdgpu_mem_partition_info *mem_ranges,
1279b3c68716SLijo Lazar uint8_t *exp_ranges)
1280b194d21bSLijo Lazar {
1281b194d21bSLijo Lazar struct amdgpu_gmc_memrange *ranges;
1282b194d21bSLijo Lazar int range_cnt, ret, i, j;
1283b194d21bSLijo Lazar uint32_t nps_type;
1284ed3dac4bSLijo Lazar bool refresh;
1285b194d21bSLijo Lazar
1286b3c68716SLijo Lazar if (!mem_ranges || !exp_ranges)
1287b194d21bSLijo Lazar return -EINVAL;
1288b194d21bSLijo Lazar
1289ed3dac4bSLijo Lazar refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
1290ed3dac4bSLijo Lazar (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
1291b194d21bSLijo Lazar ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
1292ed3dac4bSLijo Lazar &range_cnt, refresh);
1293b194d21bSLijo Lazar
1294b194d21bSLijo Lazar if (ret)
1295b194d21bSLijo Lazar return ret;
1296b194d21bSLijo Lazar
1297b194d21bSLijo Lazar /* TODO: For now, expect ranges and partition count to be the same.
1298b194d21bSLijo Lazar * Adjust if there are holes expected in any NPS domain.
1299b194d21bSLijo Lazar */
1300b3c68716SLijo Lazar if (*exp_ranges && (range_cnt != *exp_ranges)) {
1301b194d21bSLijo Lazar dev_warn(
1302b194d21bSLijo Lazar adev->dev,
1303b194d21bSLijo Lazar "NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
1304b3c68716SLijo Lazar *exp_ranges, nps_type, range_cnt);
1305b194d21bSLijo Lazar ret = -EINVAL;
1306b194d21bSLijo Lazar goto err;
1307b194d21bSLijo Lazar }
1308b194d21bSLijo Lazar
1309b3c68716SLijo Lazar for (i = 0; i < range_cnt; ++i) {
1310b194d21bSLijo Lazar if (ranges[i].base_address >= ranges[i].limit_address) {
1311b194d21bSLijo Lazar dev_warn(
1312b194d21bSLijo Lazar adev->dev,
1313b194d21bSLijo Lazar "Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
1314b194d21bSLijo Lazar nps_type, i, ranges[i].base_address,
1315b194d21bSLijo Lazar ranges[i].limit_address);
1316b194d21bSLijo Lazar ret = -EINVAL;
1317b194d21bSLijo Lazar goto err;
1318b194d21bSLijo Lazar }
1319b194d21bSLijo Lazar
1320b194d21bSLijo Lazar /* Check for overlaps, not expecting any now */
1321b194d21bSLijo Lazar for (j = i - 1; j >= 0; j--) {
1322b194d21bSLijo Lazar if (max(ranges[j].base_address,
1323b194d21bSLijo Lazar ranges[i].base_address) <=
1324b194d21bSLijo Lazar min(ranges[j].limit_address,
1325b194d21bSLijo Lazar ranges[i].limit_address)) {
1326b194d21bSLijo Lazar dev_warn(
1327b194d21bSLijo Lazar adev->dev,
1328b194d21bSLijo Lazar "overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
1329b194d21bSLijo Lazar ranges[j].base_address,
1330b194d21bSLijo Lazar ranges[j].limit_address,
1331b194d21bSLijo Lazar ranges[i].base_address,
1332b194d21bSLijo Lazar ranges[i].limit_address);
1333b194d21bSLijo Lazar ret = -EINVAL;
1334b194d21bSLijo Lazar goto err;
1335b194d21bSLijo Lazar }
1336b194d21bSLijo Lazar }
1337b194d21bSLijo Lazar
1338b194d21bSLijo Lazar mem_ranges[i].range.fpfn =
1339b194d21bSLijo Lazar (ranges[i].base_address -
1340b194d21bSLijo Lazar adev->vm_manager.vram_base_offset) >>
1341b194d21bSLijo Lazar AMDGPU_GPU_PAGE_SHIFT;
1342b194d21bSLijo Lazar mem_ranges[i].range.lpfn =
1343b194d21bSLijo Lazar (ranges[i].limit_address -
1344b194d21bSLijo Lazar adev->vm_manager.vram_base_offset) >>
1345b194d21bSLijo Lazar AMDGPU_GPU_PAGE_SHIFT;
1346b194d21bSLijo Lazar mem_ranges[i].size =
1347b194d21bSLijo Lazar ranges[i].limit_address - ranges[i].base_address + 1;
1348b194d21bSLijo Lazar }
1349b194d21bSLijo Lazar
1350b3c68716SLijo Lazar if (!*exp_ranges)
1351b3c68716SLijo Lazar *exp_ranges = range_cnt;
1352b194d21bSLijo Lazar err:
1353b194d21bSLijo Lazar kfree(ranges);
1354b194d21bSLijo Lazar
1355b194d21bSLijo Lazar return ret;
1356b194d21bSLijo Lazar }
1357bbc16008SLijo Lazar
amdgpu_gmc_request_memory_partition(struct amdgpu_device * adev,int nps_mode)1358bbc16008SLijo Lazar int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
1359bbc16008SLijo Lazar int nps_mode)
1360bbc16008SLijo Lazar {
1361bbc16008SLijo Lazar /* Not supported on VF devices and APUs */
1362bbc16008SLijo Lazar if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
1363bbc16008SLijo Lazar return -EOPNOTSUPP;
1364bbc16008SLijo Lazar
1365bbc16008SLijo Lazar if (!adev->psp.funcs) {
1366bbc16008SLijo Lazar dev_err(adev->dev,
1367bbc16008SLijo Lazar "PSP interface not available for nps mode change request");
1368bbc16008SLijo Lazar return -EINVAL;
1369bbc16008SLijo Lazar }
1370bbc16008SLijo Lazar
1371bbc16008SLijo Lazar return psp_memory_partition(&adev->psp, nps_mode);
1372bbc16008SLijo Lazar }
1373ee52489dSLijo Lazar
amdgpu_gmc_need_nps_switch_req(struct amdgpu_device * adev,int req_nps_mode,int cur_nps_mode)1374ee52489dSLijo Lazar static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
1375ee52489dSLijo Lazar int req_nps_mode,
1376ee52489dSLijo Lazar int cur_nps_mode)
1377ee52489dSLijo Lazar {
1378ee52489dSLijo Lazar return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
1379ee52489dSLijo Lazar BIT(req_nps_mode)) &&
1380ee52489dSLijo Lazar req_nps_mode != cur_nps_mode);
1381ee52489dSLijo Lazar }
1382ee52489dSLijo Lazar
amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device * adev)1383ee52489dSLijo Lazar void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
1384ee52489dSLijo Lazar {
1385ee52489dSLijo Lazar int req_nps_mode, cur_nps_mode, r;
1386ee52489dSLijo Lazar struct amdgpu_hive_info *hive;
1387ee52489dSLijo Lazar
1388ee52489dSLijo Lazar if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
1389ee52489dSLijo Lazar !adev->gmc.gmc_funcs->request_mem_partition_mode)
1390ee52489dSLijo Lazar return;
1391ee52489dSLijo Lazar
1392ee52489dSLijo Lazar cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
1393ee52489dSLijo Lazar hive = amdgpu_get_xgmi_hive(adev);
1394ee52489dSLijo Lazar if (hive) {
1395ee52489dSLijo Lazar req_nps_mode = atomic_read(&hive->requested_nps_mode);
1396ee52489dSLijo Lazar if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
1397ee52489dSLijo Lazar cur_nps_mode)) {
1398ee52489dSLijo Lazar amdgpu_put_xgmi_hive(hive);
1399ee52489dSLijo Lazar return;
1400ee52489dSLijo Lazar }
1401ee52489dSLijo Lazar r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
1402ee52489dSLijo Lazar amdgpu_put_xgmi_hive(hive);
1403ee52489dSLijo Lazar goto out;
1404ee52489dSLijo Lazar }
1405ee52489dSLijo Lazar
1406ee52489dSLijo Lazar req_nps_mode = adev->gmc.requested_nps_mode;
1407ee52489dSLijo Lazar if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
1408ee52489dSLijo Lazar return;
1409ee52489dSLijo Lazar
1410ee52489dSLijo Lazar /* even if this fails, we should let driver unload w/o blocking */
1411ee52489dSLijo Lazar r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
1412ee52489dSLijo Lazar out:
1413ee52489dSLijo Lazar if (r)
1414ee52489dSLijo Lazar dev_err(adev->dev, "NPS mode change request failed\n");
1415ee52489dSLijo Lazar else
1416ee52489dSLijo Lazar dev_info(
1417ee52489dSLijo Lazar adev->dev,
1418ee52489dSLijo Lazar "NPS mode change request done, reload driver to complete the change\n");
1419ee52489dSLijo Lazar }
1420ed3dac4bSLijo Lazar
amdgpu_gmc_need_reset_on_init(struct amdgpu_device * adev)1421ed3dac4bSLijo Lazar bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
1422ed3dac4bSLijo Lazar {
1423ed3dac4bSLijo Lazar if (adev->gmc.gmc_funcs->need_reset_on_init)
1424ed3dac4bSLijo Lazar return adev->gmc.gmc_funcs->need_reset_on_init(adev);
1425ed3dac4bSLijo Lazar
1426ed3dac4bSLijo Lazar return false;
1427ed3dac4bSLijo Lazar }
1428