111c3a249SChristian König /*
211c3a249SChristian König  * Copyright 2018 Advanced Micro Devices, Inc.
311c3a249SChristian König  * All Rights Reserved.
411c3a249SChristian König  *
511c3a249SChristian König  * Permission is hereby granted, free of charge, to any person obtaining a
611c3a249SChristian König  * copy of this software and associated documentation files (the
711c3a249SChristian König  * "Software"), to deal in the Software without restriction, including
811c3a249SChristian König  * without limitation the rights to use, copy, modify, merge, publish,
911c3a249SChristian König  * distribute, sub license, and/or sell copies of the Software, and to
1011c3a249SChristian König  * permit persons to whom the Software is furnished to do so, subject to
1111c3a249SChristian König  * the following conditions:
1211c3a249SChristian König  *
1311c3a249SChristian König  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1411c3a249SChristian König  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1511c3a249SChristian König  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
1611c3a249SChristian König  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
1711c3a249SChristian König  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
1811c3a249SChristian König  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
1911c3a249SChristian König  * USE OR OTHER DEALINGS IN THE SOFTWARE.
2011c3a249SChristian König  *
2111c3a249SChristian König  * The above copyright notice and this permission notice (including the
2211c3a249SChristian König  * next paragraph) shall be included in all copies or substantial portions
2311c3a249SChristian König  * of the Software.
2411c3a249SChristian König  *
2511c3a249SChristian König  */
2611c3a249SChristian König 
27c366be54SSam Ravnborg #include <linux/io-64-nonatomic-lo-hi.h>
28d9e50239SYongqiang Sun #ifdef CONFIG_X86
2949aa98caSYongqiang Sun #include <asm/hypervisor.h>
30d9e50239SYongqiang Sun #endif
31c366be54SSam Ravnborg 
3211c3a249SChristian König #include "amdgpu.h"
33dd285c5dSAlex Deucher #include "amdgpu_gmc.h"
34a85eff14SHawking Zhang #include "amdgpu_ras.h"
35e7b90e99SChristian König #include "amdgpu_reset.h"
36be5b39d8STao Zhou #include "amdgpu_xgmi.h"
3711c3a249SChristian König 
38f89f8c6bSAndrey Grodzovsky #include <drm/drm_drv.h>
39a3185f91SChristian König #include <drm/ttm/ttm_tt.h>
40f89f8c6bSAndrey Grodzovsky 
4111c3a249SChristian König /**
42a2902c09SOak Zeng  * amdgpu_gmc_pdb0_alloc - allocate vram for pdb0
43a2902c09SOak Zeng  *
44a2902c09SOak Zeng  * @adev: amdgpu_device pointer
45a2902c09SOak Zeng  *
46a2902c09SOak Zeng  * Allocate video memory for pdb0 and map it for CPU access
47a2902c09SOak Zeng  * Returns 0 for success, error for failure.
48a2902c09SOak Zeng  */
amdgpu_gmc_pdb0_alloc(struct amdgpu_device * adev)49a2902c09SOak Zeng int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev)
50a2902c09SOak Zeng {
51a2902c09SOak Zeng 	int r;
52a2902c09SOak Zeng 	struct amdgpu_bo_param bp;
53a2902c09SOak Zeng 	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
54a2902c09SOak Zeng 	uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21;
55a2902c09SOak Zeng 	uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift;
56a2902c09SOak Zeng 
57a2902c09SOak Zeng 	memset(&bp, 0, sizeof(bp));
58a2902c09SOak Zeng 	bp.size = PAGE_ALIGN((npdes + 1) * 8);
59a2902c09SOak Zeng 	bp.byte_align = PAGE_SIZE;
60a2902c09SOak Zeng 	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
61a2902c09SOak Zeng 	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
62a2902c09SOak Zeng 		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
63a2902c09SOak Zeng 	bp.type = ttm_bo_type_kernel;
64a2902c09SOak Zeng 	bp.resv = NULL;
659fd5543eSNirmoy Das 	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
669fd5543eSNirmoy Das 
67a2902c09SOak Zeng 	r = amdgpu_bo_create(adev, &bp, &adev->gmc.pdb0_bo);
68a2902c09SOak Zeng 	if (r)
69a2902c09SOak Zeng 		return r;
70a2902c09SOak Zeng 
71a2902c09SOak Zeng 	r = amdgpu_bo_reserve(adev->gmc.pdb0_bo, false);
72a2902c09SOak Zeng 	if (unlikely(r != 0))
73a2902c09SOak Zeng 		goto bo_reserve_failure;
74a2902c09SOak Zeng 
75a2902c09SOak Zeng 	r = amdgpu_bo_pin(adev->gmc.pdb0_bo, AMDGPU_GEM_DOMAIN_VRAM);
76a2902c09SOak Zeng 	if (r)
77a2902c09SOak Zeng 		goto bo_pin_failure;
78a2902c09SOak Zeng 	r = amdgpu_bo_kmap(adev->gmc.pdb0_bo, &adev->gmc.ptr_pdb0);
79a2902c09SOak Zeng 	if (r)
80a2902c09SOak Zeng 		goto bo_kmap_failure;
81a2902c09SOak Zeng 
82a2902c09SOak Zeng 	amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
83a2902c09SOak Zeng 	return 0;
84a2902c09SOak Zeng 
85a2902c09SOak Zeng bo_kmap_failure:
86a2902c09SOak Zeng 	amdgpu_bo_unpin(adev->gmc.pdb0_bo);
87a2902c09SOak Zeng bo_pin_failure:
88a2902c09SOak Zeng 	amdgpu_bo_unreserve(adev->gmc.pdb0_bo);
89a2902c09SOak Zeng bo_reserve_failure:
90a2902c09SOak Zeng 	amdgpu_bo_unref(&adev->gmc.pdb0_bo);
91a2902c09SOak Zeng 	return r;
92a2902c09SOak Zeng }
93a2902c09SOak Zeng 
94a2902c09SOak Zeng /**
9524a8d289SChristian König  * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO
9624a8d289SChristian König  *
9724a8d289SChristian König  * @bo: the BO to get the PDE for
9824a8d289SChristian König  * @level: the level in the PD hirarchy
9924a8d289SChristian König  * @addr: resulting addr
10024a8d289SChristian König  * @flags: resulting flags
10124a8d289SChristian König  *
10224a8d289SChristian König  * Get the address and flags to be used for a PDE (Page Directory Entry).
10324a8d289SChristian König  */
amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo * bo,int level,uint64_t * addr,uint64_t * flags)10424a8d289SChristian König void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level,
10524a8d289SChristian König 			       uint64_t *addr, uint64_t *flags)
10624a8d289SChristian König {
10724a8d289SChristian König 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
10824a8d289SChristian König 
109d3116756SChristian König 	switch (bo->tbo.resource->mem_type) {
11024a8d289SChristian König 	case TTM_PL_TT:
111e34b8feeSChristian König 		*addr = bo->tbo.ttm->dma_address[0];
11224a8d289SChristian König 		break;
11324a8d289SChristian König 	case TTM_PL_VRAM:
11424a8d289SChristian König 		*addr = amdgpu_bo_gpu_offset(bo);
11524a8d289SChristian König 		break;
11624a8d289SChristian König 	default:
11724a8d289SChristian König 		*addr = 0;
11824a8d289SChristian König 		break;
11924a8d289SChristian König 	}
120d3116756SChristian König 	*flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, bo->tbo.resource);
12124a8d289SChristian König 	amdgpu_gmc_get_vm_pde(adev, level, addr, flags);
12224a8d289SChristian König }
12324a8d289SChristian König 
124ff08711cSLee Jones /*
12511c3a249SChristian König  * amdgpu_gmc_pd_addr - return the address of the root directory
12611c3a249SChristian König  */
amdgpu_gmc_pd_addr(struct amdgpu_bo * bo)12711c3a249SChristian König uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo)
12811c3a249SChristian König {
12911c3a249SChristian König 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
13011c3a249SChristian König 	uint64_t pd_addr;
13111c3a249SChristian König 
13211c3a249SChristian König 	/* TODO: move that into ASIC specific code */
13311c3a249SChristian König 	if (adev->asic_type >= CHIP_VEGA10) {
13411c3a249SChristian König 		uint64_t flags = AMDGPU_PTE_VALID;
13511c3a249SChristian König 
13624a8d289SChristian König 		amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags);
13711c3a249SChristian König 		pd_addr |= flags;
13824a8d289SChristian König 	} else {
13924a8d289SChristian König 		pd_addr = amdgpu_bo_gpu_offset(bo);
14011c3a249SChristian König 	}
14111c3a249SChristian König 	return pd_addr;
14211c3a249SChristian König }
143961c75cfSChristian König 
144961c75cfSChristian König /**
1456490bd76SYong Zhao  * amdgpu_gmc_set_pte_pde - update the page tables using CPU
1466490bd76SYong Zhao  *
1476490bd76SYong Zhao  * @adev: amdgpu_device pointer
1486490bd76SYong Zhao  * @cpu_pt_addr: cpu address of the page table
1496490bd76SYong Zhao  * @gpu_page_idx: entry in the page table to update
1506490bd76SYong Zhao  * @addr: dst addr to write into pte/pde
1516490bd76SYong Zhao  * @flags: access flags
1526490bd76SYong Zhao  *
1536490bd76SYong Zhao  * Update the page tables using CPU.
1546490bd76SYong Zhao  */
amdgpu_gmc_set_pte_pde(struct amdgpu_device * adev,void * cpu_pt_addr,uint32_t gpu_page_idx,uint64_t addr,uint64_t flags)1556490bd76SYong Zhao int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
1566490bd76SYong Zhao 				uint32_t gpu_page_idx, uint64_t addr,
1576490bd76SYong Zhao 				uint64_t flags)
1586490bd76SYong Zhao {
1596490bd76SYong Zhao 	void __iomem *ptr = (void *)cpu_pt_addr;
1606490bd76SYong Zhao 	uint64_t value;
1616490bd76SYong Zhao 
1626490bd76SYong Zhao 	/*
1636490bd76SYong Zhao 	 * The following is for PTE only. GART does not have PDEs.
1646490bd76SYong Zhao 	*/
1656490bd76SYong Zhao 	value = addr & 0x0000FFFFFFFFF000ULL;
1666490bd76SYong Zhao 	value |= flags;
1676490bd76SYong Zhao 	writeq(value, ptr + (gpu_page_idx * 8));
168f89f8c6bSAndrey Grodzovsky 
1696490bd76SYong Zhao 	return 0;
1706490bd76SYong Zhao }
1716490bd76SYong Zhao 
1726490bd76SYong Zhao /**
173485fc361SChristian König  * amdgpu_gmc_agp_addr - return the address in the AGP address space
174485fc361SChristian König  *
175ff08711cSLee Jones  * @bo: TTM BO which needs the address, must be in GTT domain
176485fc361SChristian König  *
177485fc361SChristian König  * Tries to figure out how to access the BO through the AGP aperture. Returns
178485fc361SChristian König  * AMDGPU_BO_INVALID_OFFSET if that is not possible.
179485fc361SChristian König  */
amdgpu_gmc_agp_addr(struct ttm_buffer_object * bo)180485fc361SChristian König uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo)
181485fc361SChristian König {
182485fc361SChristian König 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
183485fc361SChristian König 
184ca0b0069SAlex Deucher 	if (!bo->ttm)
185ca0b0069SAlex Deucher 		return AMDGPU_BO_INVALID_OFFSET;
186ca0b0069SAlex Deucher 
187e11bfb99SChristian König 	if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached)
188485fc361SChristian König 		return AMDGPU_BO_INVALID_OFFSET;
189485fc361SChristian König 
190e34b8feeSChristian König 	if (bo->ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size)
191485fc361SChristian König 		return AMDGPU_BO_INVALID_OFFSET;
192485fc361SChristian König 
193e34b8feeSChristian König 	return adev->gmc.agp_start + bo->ttm->dma_address[0];
194485fc361SChristian König }
195485fc361SChristian König 
196485fc361SChristian König /**
197961c75cfSChristian König  * amdgpu_gmc_vram_location - try to find VRAM location
198961c75cfSChristian König  *
19953c9c89aSRajneesh Bhardwaj  * @adev: amdgpu device structure holding all necessary information
20053c9c89aSRajneesh Bhardwaj  * @mc: memory controller structure holding memory information
201961c75cfSChristian König  * @base: base address at which to put VRAM
202961c75cfSChristian König  *
203961c75cfSChristian König  * Function will try to place VRAM at base address provided
204961c75cfSChristian König  * as parameter.
205961c75cfSChristian König  */
amdgpu_gmc_vram_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,u64 base)206961c75cfSChristian König void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
207961c75cfSChristian König 			      u64 base)
208961c75cfSChristian König {
209da2f9920SChristian König 	uint64_t vis_limit = (uint64_t)amdgpu_vis_vram_limit << 20;
210961c75cfSChristian König 	uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
211961c75cfSChristian König 
212961c75cfSChristian König 	mc->vram_start = base;
213961c75cfSChristian König 	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
2140b04ea39SChristian König 	if (limit < mc->real_vram_size)
215961c75cfSChristian König 		mc->real_vram_size = limit;
2166fdd68b1SAlex Deucher 
217da2f9920SChristian König 	if (vis_limit && vis_limit < mc->visible_vram_size)
218da2f9920SChristian König 		mc->visible_vram_size = vis_limit;
219da2f9920SChristian König 
220da2f9920SChristian König 	if (mc->real_vram_size < mc->visible_vram_size)
221da2f9920SChristian König 		mc->visible_vram_size = mc->real_vram_size;
222da2f9920SChristian König 
2236fdd68b1SAlex Deucher 	if (mc->xgmi.num_physical_nodes == 0) {
2246fdd68b1SAlex Deucher 		mc->fb_start = mc->vram_start;
2256fdd68b1SAlex Deucher 		mc->fb_end = mc->vram_end;
2266fdd68b1SAlex Deucher 	}
227961c75cfSChristian König 	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
228961c75cfSChristian König 			mc->mc_vram_size >> 20, mc->vram_start,
229961c75cfSChristian König 			mc->vram_end, mc->real_vram_size >> 20);
230961c75cfSChristian König }
231961c75cfSChristian König 
232f527f310SOak Zeng /** amdgpu_gmc_sysvm_location - place vram and gart in sysvm aperture
233f527f310SOak Zeng  *
234f527f310SOak Zeng  * @adev: amdgpu device structure holding all necessary information
235f527f310SOak Zeng  * @mc: memory controller structure holding memory information
236f527f310SOak Zeng  *
237f527f310SOak Zeng  * This function is only used if use GART for FB translation. In such
238f527f310SOak Zeng  * case, we use sysvm aperture (vmid0 page tables) for both vram
239f527f310SOak Zeng  * and gart (aka system memory) access.
240f527f310SOak Zeng  *
241f527f310SOak Zeng  * GPUVM (and our organization of vmid0 page tables) require sysvm
242f527f310SOak Zeng  * aperture to be placed at a location aligned with 8 times of native
243f527f310SOak Zeng  * page size. For example, if vm_context0_cntl.page_table_block_size
244f527f310SOak Zeng  * is 12, then native page size is 8G (2M*2^12), sysvm should start
245f527f310SOak Zeng  * with a 64G aligned address. For simplicity, we just put sysvm at
246f527f310SOak Zeng  * address 0. So vram start at address 0 and gart is right after vram.
247f527f310SOak Zeng  */
amdgpu_gmc_sysvm_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)248f527f310SOak Zeng void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
249f527f310SOak Zeng {
250f527f310SOak Zeng 	u64 hive_vram_start = 0;
251f527f310SOak Zeng 	u64 hive_vram_end = mc->xgmi.node_segment_size * mc->xgmi.num_physical_nodes - 1;
252f527f310SOak Zeng 	mc->vram_start = mc->xgmi.node_segment_size * mc->xgmi.physical_node_id;
253f527f310SOak Zeng 	mc->vram_end = mc->vram_start + mc->xgmi.node_segment_size - 1;
254f527f310SOak Zeng 	mc->gart_start = hive_vram_end + 1;
255f527f310SOak Zeng 	mc->gart_end = mc->gart_start + mc->gart_size - 1;
256f527f310SOak Zeng 	mc->fb_start = hive_vram_start;
257f527f310SOak Zeng 	mc->fb_end = hive_vram_end;
258f527f310SOak Zeng 	dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
259f527f310SOak Zeng 			mc->mc_vram_size >> 20, mc->vram_start,
260f527f310SOak Zeng 			mc->vram_end, mc->real_vram_size >> 20);
261f527f310SOak Zeng 	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
262f527f310SOak Zeng 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
263f527f310SOak Zeng }
264f527f310SOak Zeng 
265961c75cfSChristian König /**
266961c75cfSChristian König  * amdgpu_gmc_gart_location - try to find GART location
267961c75cfSChristian König  *
26853c9c89aSRajneesh Bhardwaj  * @adev: amdgpu device structure holding all necessary information
26953c9c89aSRajneesh Bhardwaj  * @mc: memory controller structure holding memory information
27054967d56SSrinivasan Shanmugam  * @gart_placement: GART placement policy with respect to VRAM
271961c75cfSChristian König  *
272ce43abd7SAlexandre Demers  * Function will try to place GART before or after VRAM.
273961c75cfSChristian König  * If GART size is bigger than space left then we ajust GART size.
274961c75cfSChristian König  * Thus function will never fails.
275961c75cfSChristian König  */
amdgpu_gmc_gart_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc,enum amdgpu_gart_placement gart_placement)276917f91d8SAlex Deucher void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc,
277917f91d8SAlex Deucher 			      enum amdgpu_gart_placement gart_placement)
278961c75cfSChristian König {
279ec210e32SChristian König 	const uint64_t four_gb = 0x100000000ULL;
280961c75cfSChristian König 	u64 size_af, size_bf;
281f2d9bbc9SEmily Deng 	/*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/
282f2d9bbc9SEmily Deng 	u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1);
283961c75cfSChristian König 
284961c75cfSChristian König 	/* VCE doesn't like it when BOs cross a 4GB segment, so align
285961c75cfSChristian König 	 * the GART base on a 4GB boundary as well.
286961c75cfSChristian König 	 */
2876fdd68b1SAlex Deucher 	size_bf = mc->fb_start;
288f2d9bbc9SEmily Deng 	size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb);
2890be655d1SChristian König 
2900be655d1SChristian König 	if (mc->gart_size > max(size_bf, size_af)) {
2910be655d1SChristian König 		dev_warn(adev->dev, "limiting GART\n");
2920be655d1SChristian König 		mc->gart_size = max(size_bf, size_af);
293961c75cfSChristian König 	}
2940be655d1SChristian König 
295917f91d8SAlex Deucher 	switch (gart_placement) {
296917f91d8SAlex Deucher 	case AMDGPU_GART_PLACEMENT_HIGH:
297917f91d8SAlex Deucher 		mc->gart_start = max_mc_address - mc->gart_size + 1;
298917f91d8SAlex Deucher 		break;
299917f91d8SAlex Deucher 	case AMDGPU_GART_PLACEMENT_LOW:
300917f91d8SAlex Deucher 		mc->gart_start = 0;
301917f91d8SAlex Deucher 		break;
302917f91d8SAlex Deucher 	case AMDGPU_GART_PLACEMENT_BEST_FIT:
303917f91d8SAlex Deucher 	default:
3045f232bd7SChristian König 		if ((size_bf >= mc->gart_size && size_bf < size_af) ||
3055f232bd7SChristian König 		    (size_af < mc->gart_size))
3060be655d1SChristian König 			mc->gart_start = 0;
3070be655d1SChristian König 		else
308f2d9bbc9SEmily Deng 			mc->gart_start = max_mc_address - mc->gart_size + 1;
309917f91d8SAlex Deucher 		break;
310917f91d8SAlex Deucher 	}
311ec210e32SChristian König 
312feabaad8SChristian König 	mc->gart_start &= ~(four_gb - 1);
313961c75cfSChristian König 	mc->gart_end = mc->gart_start + mc->gart_size - 1;
314961c75cfSChristian König 	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
315961c75cfSChristian König 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
316961c75cfSChristian König }
317d76364fcSChristian König 
318d76364fcSChristian König /**
319d76364fcSChristian König  * amdgpu_gmc_agp_location - try to find AGP location
32053c9c89aSRajneesh Bhardwaj  * @adev: amdgpu device structure holding all necessary information
32153c9c89aSRajneesh Bhardwaj  * @mc: memory controller structure holding memory information
322d76364fcSChristian König  *
323d76364fcSChristian König  * Function will place try to find a place for the AGP BAR in the MC address
324d76364fcSChristian König  * space.
325d76364fcSChristian König  *
326d76364fcSChristian König  * AGP BAR will be assigned the largest available hole in the address space.
327d76364fcSChristian König  * Should be called after VRAM and GART locations are setup.
328d76364fcSChristian König  */
amdgpu_gmc_agp_location(struct amdgpu_device * adev,struct amdgpu_gmc * mc)329d76364fcSChristian König void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
330d76364fcSChristian König {
331d76364fcSChristian König 	const uint64_t sixteen_gb = 1ULL << 34;
332d76364fcSChristian König 	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);
333d76364fcSChristian König 	u64 size_af, size_bf;
334d76364fcSChristian König 
3356fdd68b1SAlex Deucher 	if (mc->fb_start > mc->gart_start) {
3366fdd68b1SAlex Deucher 		size_bf = (mc->fb_start & sixteen_gb_mask) -
337d76364fcSChristian König 			ALIGN(mc->gart_end + 1, sixteen_gb);
3386fdd68b1SAlex Deucher 		size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb);
339d76364fcSChristian König 	} else {
3406fdd68b1SAlex Deucher 		size_bf = mc->fb_start & sixteen_gb_mask;
341d76364fcSChristian König 		size_af = (mc->gart_start & sixteen_gb_mask) -
3426fdd68b1SAlex Deucher 			ALIGN(mc->fb_end + 1, sixteen_gb);
343d76364fcSChristian König 	}
344d76364fcSChristian König 
345d76364fcSChristian König 	if (size_bf > size_af) {
3462ccecaf6SChristian König 		mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask;
347d76364fcSChristian König 		mc->agp_size = size_bf;
348d76364fcSChristian König 	} else {
3492ccecaf6SChristian König 		mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb);
350d76364fcSChristian König 		mc->agp_size = size_af;
351d76364fcSChristian König 	}
352d76364fcSChristian König 
353d76364fcSChristian König 	mc->agp_end = mc->agp_start + mc->agp_size - 1;
354d76364fcSChristian König 	dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n",
355d76364fcSChristian König 			mc->agp_size >> 20, mc->agp_start, mc->agp_end);
356d76364fcSChristian König }
357c1a8abd9SChristian König 
358c1a8abd9SChristian König /**
359de59b699SAlex Deucher  * amdgpu_gmc_set_agp_default - Set the default AGP aperture value.
360de59b699SAlex Deucher  * @adev: amdgpu device structure holding all necessary information
361de59b699SAlex Deucher  * @mc: memory controller structure holding memory information
362de59b699SAlex Deucher  *
363de59b699SAlex Deucher  * To disable the AGP aperture, you need to set the start to a larger
364de59b699SAlex Deucher  * value than the end.  This function sets the default value which
365de59b699SAlex Deucher  * can then be overridden using amdgpu_gmc_agp_location() if you want
366de59b699SAlex Deucher  * to enable the AGP aperture on a specific chip.
367de59b699SAlex Deucher  *
368de59b699SAlex Deucher  */
amdgpu_gmc_set_agp_default(struct amdgpu_device * adev,struct amdgpu_gmc * mc)369de59b699SAlex Deucher void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev,
370de59b699SAlex Deucher 				struct amdgpu_gmc *mc)
371de59b699SAlex Deucher {
372de59b699SAlex Deucher 	mc->agp_start = 0xffffffffffff;
373de59b699SAlex Deucher 	mc->agp_end = 0;
374de59b699SAlex Deucher 	mc->agp_size = 0;
375de59b699SAlex Deucher }
376de59b699SAlex Deucher 
377de59b699SAlex Deucher /**
37836255b5fSPhilip Yang  * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
37936255b5fSPhilip Yang  *
38036255b5fSPhilip Yang  * @addr: 48 bit physical address, page aligned (36 significant bits)
38136255b5fSPhilip Yang  * @pasid: 16 bit process address space identifier
38236255b5fSPhilip Yang  */
amdgpu_gmc_fault_key(uint64_t addr,uint16_t pasid)38336255b5fSPhilip Yang static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
38436255b5fSPhilip Yang {
38536255b5fSPhilip Yang 	return addr << 4 | pasid;
38636255b5fSPhilip Yang }
38736255b5fSPhilip Yang 
38836255b5fSPhilip Yang /**
389c1a8abd9SChristian König  * amdgpu_gmc_filter_faults - filter VM faults
390c1a8abd9SChristian König  *
391c1a8abd9SChristian König  * @adev: amdgpu device structure
3923c2d6ea2SPhilip Yang  * @ih: interrupt ring that the fault received from
393c1a8abd9SChristian König  * @addr: address of the VM fault
394c1a8abd9SChristian König  * @pasid: PASID of the process causing the fault
395c1a8abd9SChristian König  * @timestamp: timestamp of the fault
396c1a8abd9SChristian König  *
397c1a8abd9SChristian König  * Returns:
398c1a8abd9SChristian König  * True if the fault was filtered and should not be processed further.
399c1a8abd9SChristian König  * False if the fault is a new one and needs to be handled.
400c1a8abd9SChristian König  */
amdgpu_gmc_filter_faults(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,uint64_t addr,uint16_t pasid,uint64_t timestamp)4013c2d6ea2SPhilip Yang bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev,
4023c2d6ea2SPhilip Yang 			      struct amdgpu_ih_ring *ih, uint64_t addr,
403c1a8abd9SChristian König 			      uint16_t pasid, uint64_t timestamp)
404c1a8abd9SChristian König {
405c1a8abd9SChristian König 	struct amdgpu_gmc *gmc = &adev->gmc;
40636255b5fSPhilip Yang 	uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
407c1a8abd9SChristian König 	struct amdgpu_gmc_fault *fault;
408c1a8abd9SChristian König 	uint32_t hash;
409c1a8abd9SChristian König 
4103c2d6ea2SPhilip Yang 	/* Stale retry fault if timestamp goes backward */
4113c2d6ea2SPhilip Yang 	if (amdgpu_ih_ts_after(timestamp, ih->processed_timestamp))
4123c2d6ea2SPhilip Yang 		return true;
4133c2d6ea2SPhilip Yang 
414c1a8abd9SChristian König 	/* If we don't have space left in the ring buffer return immediately */
415c1a8abd9SChristian König 	stamp = max(timestamp, AMDGPU_GMC_FAULT_TIMEOUT + 1) -
416c1a8abd9SChristian König 		AMDGPU_GMC_FAULT_TIMEOUT;
417c1a8abd9SChristian König 	if (gmc->fault_ring[gmc->last_fault].timestamp >= stamp)
418c1a8abd9SChristian König 		return true;
419c1a8abd9SChristian König 
420c1a8abd9SChristian König 	/* Try to find the fault in the hash */
421c1a8abd9SChristian König 	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
422c1a8abd9SChristian König 	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
423c1a8abd9SChristian König 	while (fault->timestamp >= stamp) {
424c1a8abd9SChristian König 		uint64_t tmp;
425c1a8abd9SChristian König 
426dd299441SMukul Joshi 		if (atomic64_read(&fault->key) == key) {
427dd299441SMukul Joshi 			/*
428dd299441SMukul Joshi 			 * if we get a fault which is already present in
429dd299441SMukul Joshi 			 * the fault_ring and the timestamp of
430dd299441SMukul Joshi 			 * the fault is after the expired timestamp,
431dd299441SMukul Joshi 			 * then this is a new fault that needs to be added
432dd299441SMukul Joshi 			 * into the fault ring.
433dd299441SMukul Joshi 			 */
434dd299441SMukul Joshi 			if (fault->timestamp_expiry != 0 &&
435dd299441SMukul Joshi 			    amdgpu_ih_ts_after(fault->timestamp_expiry,
436dd299441SMukul Joshi 					       timestamp))
437dd299441SMukul Joshi 				break;
438dd299441SMukul Joshi 			else
439c1a8abd9SChristian König 				return true;
440dd299441SMukul Joshi 		}
441c1a8abd9SChristian König 
442c1a8abd9SChristian König 		tmp = fault->timestamp;
443c1a8abd9SChristian König 		fault = &gmc->fault_ring[fault->next];
444c1a8abd9SChristian König 
445c1a8abd9SChristian König 		/* Check if the entry was reused */
446c1a8abd9SChristian König 		if (fault->timestamp >= tmp)
447c1a8abd9SChristian König 			break;
448c1a8abd9SChristian König 	}
449c1a8abd9SChristian König 
450c1a8abd9SChristian König 	/* Add the fault to the ring */
451c1a8abd9SChristian König 	fault = &gmc->fault_ring[gmc->last_fault];
45236255b5fSPhilip Yang 	atomic64_set(&fault->key, key);
453c1a8abd9SChristian König 	fault->timestamp = timestamp;
454c1a8abd9SChristian König 
455c1a8abd9SChristian König 	/* And update the hash */
456c1a8abd9SChristian König 	fault->next = gmc->fault_hash[hash].idx;
457c1a8abd9SChristian König 	gmc->fault_hash[hash].idx = gmc->last_fault++;
458c1a8abd9SChristian König 	return false;
459c1a8abd9SChristian König }
4602adf1344STao Zhou 
46136255b5fSPhilip Yang /**
46236255b5fSPhilip Yang  * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
46336255b5fSPhilip Yang  *
46436255b5fSPhilip Yang  * @adev: amdgpu device structure
46536255b5fSPhilip Yang  * @addr: address of the VM fault
46636255b5fSPhilip Yang  * @pasid: PASID of the process causing the fault
46736255b5fSPhilip Yang  *
46836255b5fSPhilip Yang  * Remove the address from fault filter, then future vm fault on this address
46936255b5fSPhilip Yang  * will pass to retry fault handler to recover.
47036255b5fSPhilip Yang  */
amdgpu_gmc_filter_faults_remove(struct amdgpu_device * adev,uint64_t addr,uint16_t pasid)47136255b5fSPhilip Yang void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
47236255b5fSPhilip Yang 				     uint16_t pasid)
47336255b5fSPhilip Yang {
47436255b5fSPhilip Yang 	struct amdgpu_gmc *gmc = &adev->gmc;
47536255b5fSPhilip Yang 	uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
476dd299441SMukul Joshi 	struct amdgpu_ih_ring *ih;
47736255b5fSPhilip Yang 	struct amdgpu_gmc_fault *fault;
478dd299441SMukul Joshi 	uint32_t last_wptr;
479dd299441SMukul Joshi 	uint64_t last_ts;
48036255b5fSPhilip Yang 	uint32_t hash;
48136255b5fSPhilip Yang 	uint64_t tmp;
48236255b5fSPhilip Yang 
483e61801f1SPhilip Yang 	if (adev->irq.retry_cam_enabled)
484e61801f1SPhilip Yang 		return;
485e61801f1SPhilip Yang 
486e61801f1SPhilip Yang 	ih = &adev->irq.ih1;
487dd299441SMukul Joshi 	/* Get the WPTR of the last entry in IH ring */
488dd299441SMukul Joshi 	last_wptr = amdgpu_ih_get_wptr(adev, ih);
489dd299441SMukul Joshi 	/* Order wptr with ring data. */
490dd299441SMukul Joshi 	rmb();
491dd299441SMukul Joshi 	/* Get the timetamp of the last entry in IH ring */
492dd299441SMukul Joshi 	last_ts = amdgpu_ih_decode_iv_ts(adev, ih, last_wptr, -1);
493dd299441SMukul Joshi 
49436255b5fSPhilip Yang 	hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
49536255b5fSPhilip Yang 	fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
49636255b5fSPhilip Yang 	do {
497dd299441SMukul Joshi 		if (atomic64_read(&fault->key) == key) {
498dd299441SMukul Joshi 			/*
499dd299441SMukul Joshi 			 * Update the timestamp when this fault
500dd299441SMukul Joshi 			 * expired.
501dd299441SMukul Joshi 			 */
502dd299441SMukul Joshi 			fault->timestamp_expiry = last_ts;
50336255b5fSPhilip Yang 			break;
504dd299441SMukul Joshi 		}
50536255b5fSPhilip Yang 
50636255b5fSPhilip Yang 		tmp = fault->timestamp;
50736255b5fSPhilip Yang 		fault = &gmc->fault_ring[fault->next];
50836255b5fSPhilip Yang 	} while (fault->timestamp < tmp);
50936255b5fSPhilip Yang }
51036255b5fSPhilip Yang 
amdgpu_gmc_ras_sw_init(struct amdgpu_device * adev)511a6dcf9a7SHawking Zhang int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev)
5121f33bd18Syipechai {
513a6dcf9a7SHawking Zhang 	int r;
514a6dcf9a7SHawking Zhang 
515a6dcf9a7SHawking Zhang 	/* umc ras block */
516a6dcf9a7SHawking Zhang 	r = amdgpu_umc_ras_sw_init(adev);
517a6dcf9a7SHawking Zhang 	if (r)
518a6dcf9a7SHawking Zhang 		return r;
519a6dcf9a7SHawking Zhang 
520fec70a86SHawking Zhang 	/* mmhub ras block */
521fec70a86SHawking Zhang 	r = amdgpu_mmhub_ras_sw_init(adev);
522fec70a86SHawking Zhang 	if (r)
523fec70a86SHawking Zhang 		return r;
524fec70a86SHawking Zhang 
525474e2d49SHawking Zhang 	/* hdp ras block */
526474e2d49SHawking Zhang 	r = amdgpu_hdp_ras_sw_init(adev);
527474e2d49SHawking Zhang 	if (r)
528474e2d49SHawking Zhang 		return r;
529474e2d49SHawking Zhang 
5307f544c54SHawking Zhang 	/* mca.x ras block */
5317f544c54SHawking Zhang 	r = amdgpu_mca_mp0_ras_sw_init(adev);
5327f544c54SHawking Zhang 	if (r)
5337f544c54SHawking Zhang 		return r;
5347f544c54SHawking Zhang 
5357f544c54SHawking Zhang 	r = amdgpu_mca_mp1_ras_sw_init(adev);
5367f544c54SHawking Zhang 	if (r)
5377f544c54SHawking Zhang 		return r;
5387f544c54SHawking Zhang 
5397f544c54SHawking Zhang 	r = amdgpu_mca_mpio_ras_sw_init(adev);
5407f544c54SHawking Zhang 	if (r)
5417f544c54SHawking Zhang 		return r;
5427f544c54SHawking Zhang 
543da9d669eSHawking Zhang 	/* xgmi ras block */
544da9d669eSHawking Zhang 	r = amdgpu_xgmi_ras_sw_init(adev);
545da9d669eSHawking Zhang 	if (r)
546da9d669eSHawking Zhang 		return r;
5471f33bd18Syipechai 
5481f33bd18Syipechai 	return 0;
5491f33bd18Syipechai }
5501f33bd18Syipechai 
amdgpu_gmc_ras_late_init(struct amdgpu_device * adev)551ba083492STao Zhou int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
552ba083492STao Zhou {
55352137ca8SHawking Zhang 	return 0;
554ba083492STao Zhou }
555ba083492STao Zhou 
amdgpu_gmc_ras_fini(struct amdgpu_device * adev)5562adf1344STao Zhou void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
5572adf1344STao Zhou {
55849070c4eSHawking Zhang 
5592adf1344STao Zhou }
560bdbe90f0SAlex Deucher 
561bdbe90f0SAlex Deucher 	/*
5625677c520SAlex Deucher 	 * The latest engine allocation on gfx9/10 is:
563bdbe90f0SAlex Deucher 	 * Engine 2, 3: firmware
564bdbe90f0SAlex Deucher 	 * Engine 0, 1, 4~16: amdgpu ring,
565bdbe90f0SAlex Deucher 	 *                    subject to change when ring number changes
566bdbe90f0SAlex Deucher 	 * Engine 17: Gart flushes
567bdbe90f0SAlex Deucher 	 */
56862e79087SShiwu Zhang #define AMDGPU_VMHUB_INV_ENG_BITMAP		0x1FFF3
569bdbe90f0SAlex Deucher 
amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device * adev)570bdbe90f0SAlex Deucher int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
571bdbe90f0SAlex Deucher {
572bdbe90f0SAlex Deucher 	struct amdgpu_ring *ring;
57362e79087SShiwu Zhang 	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
574bdbe90f0SAlex Deucher 	unsigned i;
575bdbe90f0SAlex Deucher 	unsigned vmhub, inv_eng;
576b09cdeb4S[email protected] 	struct amdgpu_ring *shared_ring;
577bdbe90f0SAlex Deucher 
57862e79087SShiwu Zhang 	/* init the vm inv eng for all vmhubs */
57962e79087SShiwu Zhang 	for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
58062e79087SShiwu Zhang 		vm_inv_engs[i] = AMDGPU_VMHUB_INV_ENG_BITMAP;
581a2b1df92SJack Xiao 		/* reserve engine 5 for firmware */
58262e79087SShiwu Zhang 		if (adev->enable_mes)
58362e79087SShiwu Zhang 			vm_inv_engs[i] &= ~(1 << 5);
58440748f9aSLang Yu 		/* reserve mmhub engine 3 for firmware */
58540748f9aSLang Yu 		if (adev->enable_umsch_mm)
58640748f9aSLang Yu 			vm_inv_engs[i] &= ~(1 << 3);
587a2b1df92SJack Xiao 	}
588a2b1df92SJack Xiao 
589bdbe90f0SAlex Deucher 	for (i = 0; i < adev->num_rings; ++i) {
590bdbe90f0SAlex Deucher 		ring = adev->rings[i];
5910530553bSLe Ma 		vmhub = ring->vm_hub;
592bdbe90f0SAlex Deucher 
593c7d43556SJack Xiao 		if (ring == &adev->mes.ring[0] ||
594c7d43556SJack Xiao 		    ring == &adev->mes.ring[1] ||
5954d614ce8STao Zhou 		    ring == &adev->umsch_mm.ring ||
5964d614ce8STao Zhou 		    ring == &adev->cper.ring_buf)
597b770f04bSLe Ma 			continue;
598b770f04bSLe Ma 
599b09cdeb4S[email protected] 		/* Skip if the ring is a shared ring */
600b09cdeb4S[email protected] 		if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
601b09cdeb4S[email protected] 			continue;
602b09cdeb4S[email protected] 
603bdbe90f0SAlex Deucher 		inv_eng = ffs(vm_inv_engs[vmhub]);
604bdbe90f0SAlex Deucher 		if (!inv_eng) {
605bdbe90f0SAlex Deucher 			dev_err(adev->dev, "no VM inv eng for ring %s\n",
606bdbe90f0SAlex Deucher 				ring->name);
607bdbe90f0SAlex Deucher 			return -EINVAL;
608bdbe90f0SAlex Deucher 		}
609bdbe90f0SAlex Deucher 
610bdbe90f0SAlex Deucher 		ring->vm_inv_eng = inv_eng - 1;
611bdbe90f0SAlex Deucher 		vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng);
612bdbe90f0SAlex Deucher 
613bdbe90f0SAlex Deucher 		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
6140530553bSLe Ma 			 ring->name, ring->vm_inv_eng, ring->vm_hub);
615b09cdeb4S[email protected] 		/* SDMA has a special packet which allows it to use the same
616b09cdeb4S[email protected] 		 * invalidation engine for all the rings in one instance.
617b09cdeb4S[email protected] 		 * Therefore, we do not allocate a separate VM invalidation engine
618b09cdeb4S[email protected] 		 * for SDMA page rings. Instead, they share the VM invalidation
619b09cdeb4S[email protected] 		 * engine with the SDMA gfx ring. This change ensures efficient
620b09cdeb4S[email protected] 		 * resource management and avoids the issue of insufficient VM
621b09cdeb4S[email protected] 		 * invalidation engines.
622b09cdeb4S[email protected] 		 */
623b09cdeb4S[email protected] 		shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
624b09cdeb4S[email protected] 		if (shared_ring) {
625b09cdeb4S[email protected] 			shared_ring->vm_inv_eng = ring->vm_inv_eng;
626b09cdeb4S[email protected] 			dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
627b09cdeb4S[email protected] 					ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
628b09cdeb4S[email protected] 			continue;
629b09cdeb4S[email protected] 		}
630bdbe90f0SAlex Deucher 	}
631bdbe90f0SAlex Deucher 
632bdbe90f0SAlex Deucher 	return 0;
633bdbe90f0SAlex Deucher }
634c6252390SLuben Tuikov 
amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device * adev,uint32_t vmid,uint32_t vmhub,uint32_t flush_type)635a70cb217SChristian König void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
636a70cb217SChristian König 			      uint32_t vmhub, uint32_t flush_type)
637a70cb217SChristian König {
638a70cb217SChristian König 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
639a70cb217SChristian König 	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
640a70cb217SChristian König 	struct dma_fence *fence;
641a70cb217SChristian König 	struct amdgpu_job *job;
642a70cb217SChristian König 	int r;
643a70cb217SChristian König 
644a70cb217SChristian König 	if (!hub->sdma_invalidation_workaround || vmid ||
645ba531117SYunxiang Li 	    !adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready ||
646a70cb217SChristian König 	    !ring->sched.ready) {
647e2e37888SChristian König 		/*
648e2e37888SChristian König 		 * A GPU reset should flush all TLBs anyway, so no need to do
649e2e37888SChristian König 		 * this while one is ongoing.
650e2e37888SChristian König 		 */
651e2e37888SChristian König 		if (!down_read_trylock(&adev->reset_domain->sem))
652e2e37888SChristian König 			return;
653e2e37888SChristian König 
65408abccc9SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_2)
65508abccc9SChristian König 			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
65608abccc9SChristian König 							   vmhub, 2);
65708abccc9SChristian König 
65808abccc9SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
65908abccc9SChristian König 			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid,
66008abccc9SChristian König 							   vmhub, 0);
66108abccc9SChristian König 
662a70cb217SChristian König 		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
663a70cb217SChristian König 						   flush_type);
664e2e37888SChristian König 		up_read(&adev->reset_domain->sem);
665a70cb217SChristian König 		return;
666a70cb217SChristian König 	}
667a70cb217SChristian König 
668a70cb217SChristian König 	/* The SDMA on Navi 1x has a bug which can theoretically result in memory
669a70cb217SChristian König 	 * corruption if an invalidation happens at the same time as an VA
670a70cb217SChristian König 	 * translation. Avoid this by doing the invalidation from the SDMA
671a70cb217SChristian König 	 * itself at least for GART.
672a70cb217SChristian König 	 */
673a70cb217SChristian König 	mutex_lock(&adev->mman.gtt_window_lock);
674a70cb217SChristian König 	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
675a70cb217SChristian König 				     AMDGPU_FENCE_OWNER_UNDEFINED,
676a70cb217SChristian König 				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
677a70cb217SChristian König 				     &job);
678a70cb217SChristian König 	if (r)
679a70cb217SChristian König 		goto error_alloc;
680a70cb217SChristian König 
681a70cb217SChristian König 	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
682a70cb217SChristian König 	job->vm_needs_flush = true;
683a70cb217SChristian König 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
684a70cb217SChristian König 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
685a70cb217SChristian König 	fence = amdgpu_job_submit(job);
686a70cb217SChristian König 	mutex_unlock(&adev->mman.gtt_window_lock);
687a70cb217SChristian König 
688a70cb217SChristian König 	dma_fence_wait(fence, false);
689a70cb217SChristian König 	dma_fence_put(fence);
690a70cb217SChristian König 
691a70cb217SChristian König 	return;
692a70cb217SChristian König 
693a70cb217SChristian König error_alloc:
694a70cb217SChristian König 	mutex_unlock(&adev->mman.gtt_window_lock);
695a70cb217SChristian König 	dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
696a70cb217SChristian König }
697a70cb217SChristian König 
amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device * adev,uint16_t pasid,uint32_t flush_type,bool all_hub,uint32_t inst)698e7b90e99SChristian König int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
699e7b90e99SChristian König 				   uint32_t flush_type, bool all_hub,
700e7b90e99SChristian König 				   uint32_t inst)
701e7b90e99SChristian König {
702e7b90e99SChristian König 	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
703e7b90e99SChristian König 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
704e7b90e99SChristian König 	unsigned int ndw;
705*3666ed82SJay Cornwall 	int r, cnt = 0;
706e7b90e99SChristian König 	uint32_t seq;
707e7b90e99SChristian König 
7089c33e5fdSYunxiang Li 	/*
7099c33e5fdSYunxiang Li 	 * A GPU reset should flush all TLBs anyway, so no need to do
7109c33e5fdSYunxiang Li 	 * this while one is ongoing.
7119c33e5fdSYunxiang Li 	 */
7129c33e5fdSYunxiang Li 	if (!down_read_trylock(&adev->reset_domain->sem))
7139c33e5fdSYunxiang Li 		return 0;
71408abccc9SChristian König 
7159c33e5fdSYunxiang Li 	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
71608abccc9SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_2)
71708abccc9SChristian König 			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
71808abccc9SChristian König 								 2, all_hub,
71908abccc9SChristian König 								 inst);
72008abccc9SChristian König 
72108abccc9SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2)
72208abccc9SChristian König 			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
72308abccc9SChristian König 								 0, all_hub,
72408abccc9SChristian König 								 inst);
72508abccc9SChristian König 
7263983c9fdSChristian König 		adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
7273983c9fdSChristian König 							 flush_type, all_hub,
7283983c9fdSChristian König 							 inst);
7299c33e5fdSYunxiang Li 		r = 0;
7309c33e5fdSYunxiang Li 	} else {
731e7b90e99SChristian König 		/* 2 dwords flush + 8 dwords fence */
732e7b90e99SChristian König 		ndw = kiq->pmf->invalidate_tlbs_size + 8;
733e7b90e99SChristian König 
734e7b90e99SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_2)
735e7b90e99SChristian König 			ndw += kiq->pmf->invalidate_tlbs_size;
736e7b90e99SChristian König 
737e7b90e99SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_0)
738e7b90e99SChristian König 			ndw += kiq->pmf->invalidate_tlbs_size;
739e7b90e99SChristian König 
740e7b90e99SChristian König 		spin_lock(&adev->gfx.kiq[inst].ring_lock);
7419ff2e14cSBob Zhou 		r = amdgpu_ring_alloc(ring, ndw);
7429ff2e14cSBob Zhou 		if (r) {
7439ff2e14cSBob Zhou 			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
7449ff2e14cSBob Zhou 			goto error_unlock_reset;
7459ff2e14cSBob Zhou 		}
746e7b90e99SChristian König 		if (adev->gmc.flush_tlb_needs_extra_type_2)
747e7b90e99SChristian König 			kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
748e7b90e99SChristian König 
749e7b90e99SChristian König 		if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
750e7b90e99SChristian König 			kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
751e7b90e99SChristian König 
752e7b90e99SChristian König 		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
753e7b90e99SChristian König 		r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
754e7b90e99SChristian König 		if (r) {
755e7b90e99SChristian König 			amdgpu_ring_undo(ring);
756e7b90e99SChristian König 			spin_unlock(&adev->gfx.kiq[inst].ring_lock);
757e7b90e99SChristian König 			goto error_unlock_reset;
758e7b90e99SChristian König 		}
759e7b90e99SChristian König 
760e7b90e99SChristian König 		amdgpu_ring_commit(ring);
761e7b90e99SChristian König 		spin_unlock(&adev->gfx.kiq[inst].ring_lock);
762*3666ed82SJay Cornwall 
763*3666ed82SJay Cornwall 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
764*3666ed82SJay Cornwall 
765*3666ed82SJay Cornwall 		might_sleep();
766*3666ed82SJay Cornwall 		while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
767*3666ed82SJay Cornwall 		       !amdgpu_reset_pending(adev->reset_domain)) {
768*3666ed82SJay Cornwall 			msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
769*3666ed82SJay Cornwall 			r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
770*3666ed82SJay Cornwall 		}
771*3666ed82SJay Cornwall 
772*3666ed82SJay Cornwall 		if (cnt > MAX_KIQ_REG_TRY) {
7739c33e5fdSYunxiang Li 			dev_err(adev->dev, "timeout waiting for kiq fence\n");
774e7b90e99SChristian König 			r = -ETIME;
775*3666ed82SJay Cornwall 		} else
776*3666ed82SJay Cornwall 			r = 0;
7779c33e5fdSYunxiang Li 	}
778e7b90e99SChristian König 
779e7b90e99SChristian König error_unlock_reset:
780e7b90e99SChristian König 	up_read(&adev->reset_domain->sem);
781e7b90e99SChristian König 	return r;
782e7b90e99SChristian König }
783e7b90e99SChristian König 
amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device * adev,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask,uint32_t xcc_inst)78426405ff4SAlex Deucher void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev,
78526405ff4SAlex Deucher 				      uint32_t reg0, uint32_t reg1,
78626405ff4SAlex Deucher 				      uint32_t ref, uint32_t mask,
78726405ff4SAlex Deucher 				      uint32_t xcc_inst)
78826405ff4SAlex Deucher {
78926405ff4SAlex Deucher 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst];
79026405ff4SAlex Deucher 	struct amdgpu_ring *ring = &kiq->ring;
79126405ff4SAlex Deucher 	signed long r, cnt = 0;
79226405ff4SAlex Deucher 	unsigned long flags;
79326405ff4SAlex Deucher 	uint32_t seq;
79426405ff4SAlex Deucher 
795c7d43556SJack Xiao 	if (adev->mes.ring[0].sched.ready) {
79626405ff4SAlex Deucher 		amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1,
79726405ff4SAlex Deucher 					      ref, mask);
79826405ff4SAlex Deucher 		return;
79926405ff4SAlex Deucher 	}
80026405ff4SAlex Deucher 
80126405ff4SAlex Deucher 	spin_lock_irqsave(&kiq->ring_lock, flags);
80226405ff4SAlex Deucher 	amdgpu_ring_alloc(ring, 32);
80326405ff4SAlex Deucher 	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
80426405ff4SAlex Deucher 					    ref, mask);
80526405ff4SAlex Deucher 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
80626405ff4SAlex Deucher 	if (r)
80726405ff4SAlex Deucher 		goto failed_undo;
80826405ff4SAlex Deucher 
80926405ff4SAlex Deucher 	amdgpu_ring_commit(ring);
81026405ff4SAlex Deucher 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
81126405ff4SAlex Deucher 
81226405ff4SAlex Deucher 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
81326405ff4SAlex Deucher 
81426405ff4SAlex Deucher 	/* don't wait anymore for IRQ context */
81526405ff4SAlex Deucher 	if (r < 1 && in_interrupt())
81626405ff4SAlex Deucher 		goto failed_kiq;
81726405ff4SAlex Deucher 
81826405ff4SAlex Deucher 	might_sleep();
81919cff165SVictor Skvortsov 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY &&
82019cff165SVictor Skvortsov 	       !amdgpu_reset_pending(adev->reset_domain)) {
82126405ff4SAlex Deucher 
82226405ff4SAlex Deucher 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
82326405ff4SAlex Deucher 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
82426405ff4SAlex Deucher 	}
82526405ff4SAlex Deucher 
82626405ff4SAlex Deucher 	if (cnt > MAX_KIQ_REG_TRY)
82726405ff4SAlex Deucher 		goto failed_kiq;
82826405ff4SAlex Deucher 
82926405ff4SAlex Deucher 	return;
83026405ff4SAlex Deucher 
83126405ff4SAlex Deucher failed_undo:
83226405ff4SAlex Deucher 	amdgpu_ring_undo(ring);
83326405ff4SAlex Deucher 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
83426405ff4SAlex Deucher failed_kiq:
83526405ff4SAlex Deucher 	dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1);
83626405ff4SAlex Deucher }
83726405ff4SAlex Deucher 
838c6252390SLuben Tuikov /**
839590a74c6SLee Jones  * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
840c6252390SLuben Tuikov  * @adev: amdgpu_device pointer
841c6252390SLuben Tuikov  *
842c6252390SLuben Tuikov  * Check and set if an the device @adev supports Trusted Memory
843c6252390SLuben Tuikov  * Zones (TMZ).
844c6252390SLuben Tuikov  */
amdgpu_gmc_tmz_set(struct amdgpu_device * adev)845c6252390SLuben Tuikov void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
846c6252390SLuben Tuikov {
8474e8303cfSLijo Lazar 	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
8480ef3dc7eSSunil Khatri 	/* RAVEN */
8490ef3dc7eSSunil Khatri 	case IP_VERSION(9, 2, 2):
8500ef3dc7eSSunil Khatri 	case IP_VERSION(9, 1, 0):
8510ef3dc7eSSunil Khatri 	/* RENOIR looks like RAVEN */
8520ef3dc7eSSunil Khatri 	case IP_VERSION(9, 3, 0):
85337101730SSunil Khatri 	/* GC 10.3.7 */
85437101730SSunil Khatri 	case IP_VERSION(10, 3, 7):
8553a25071aSIkshwaku Chauhan 	/* GC 11.0.1 */
8563a25071aSIkshwaku Chauhan 	case IP_VERSION(11, 0, 1):
85758aa7790SAlex Deucher 		if (amdgpu_tmz == 0) {
85858aa7790SAlex Deucher 			adev->gmc.tmz_enabled = false;
85958aa7790SAlex Deucher 			dev_info(adev->dev,
86058aa7790SAlex Deucher 				 "Trusted Memory Zone (TMZ) feature disabled (cmd line)\n");
86158aa7790SAlex Deucher 		} else {
86258aa7790SAlex Deucher 			adev->gmc.tmz_enabled = true;
86358aa7790SAlex Deucher 			dev_info(adev->dev,
86458aa7790SAlex Deucher 				 "Trusted Memory Zone (TMZ) feature enabled\n");
86558aa7790SAlex Deucher 		}
86658aa7790SAlex Deucher 		break;
8670ef3dc7eSSunil Khatri 	case IP_VERSION(10, 1, 10):
8680ef3dc7eSSunil Khatri 	case IP_VERSION(10, 1, 1):
8690ef3dc7eSSunil Khatri 	case IP_VERSION(10, 1, 2):
8700ef3dc7eSSunil Khatri 	case IP_VERSION(10, 1, 3):
8710ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 0):
8720ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 2):
8730ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 4):
8740ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 5):
87508c6ab7fSJesse Zhang 	case IP_VERSION(10, 3, 6):
8760ef3dc7eSSunil Khatri 	/* VANGOGH */
8770ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 1):
8780ef3dc7eSSunil Khatri 	/* YELLOW_CARP*/
8790ef3dc7eSSunil Khatri 	case IP_VERSION(10, 3, 3):
8802aecbe49STim Huang 	case IP_VERSION(11, 0, 4):
881037fb9c6SJiadong Zhu 	case IP_VERSION(11, 5, 0):
8822612c831SYifan Zhang 	case IP_VERSION(11, 5, 1):
88398392782STim Huang 	case IP_VERSION(11, 5, 2):
884b784faebSTim Huang 	case IP_VERSION(11, 5, 3):
885b71a564eSLuben Tuikov 		/* Don't enable it by default yet.
886b71a564eSLuben Tuikov 		 */
887b71a564eSLuben Tuikov 		if (amdgpu_tmz < 1) {
888b71a564eSLuben Tuikov 			adev->gmc.tmz_enabled = false;
889b71a564eSLuben Tuikov 			dev_info(adev->dev,
890b71a564eSLuben Tuikov 				 "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n");
891b71a564eSLuben Tuikov 		} else {
892b71a564eSLuben Tuikov 			adev->gmc.tmz_enabled = true;
893b71a564eSLuben Tuikov 			dev_info(adev->dev,
894b71a564eSLuben Tuikov 				 "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n");
895b71a564eSLuben Tuikov 		}
896b71a564eSLuben Tuikov 		break;
897b71a564eSLuben Tuikov 	default:
898c6252390SLuben Tuikov 		adev->gmc.tmz_enabled = false;
89931ea4344SPaul Menzel 		dev_info(adev->dev,
900c6252390SLuben Tuikov 			 "Trusted Memory Zone (TMZ) feature not supported\n");
901b71a564eSLuben Tuikov 		break;
902c6252390SLuben Tuikov 	}
903c6252390SLuben Tuikov }
904f2c1b5c1SHuang Rui 
9059b498efaSAlex Deucher /**
906590a74c6SLee Jones  * amdgpu_gmc_noretry_set -- set per asic noretry defaults
9079b498efaSAlex Deucher  * @adev: amdgpu_device pointer
9089b498efaSAlex Deucher  *
9099b498efaSAlex Deucher  * Set a per asic default for the no-retry parameter.
9109b498efaSAlex Deucher  *
9119b498efaSAlex Deucher  */
amdgpu_gmc_noretry_set(struct amdgpu_device * adev)9129b498efaSAlex Deucher void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
9139b498efaSAlex Deucher {
9149b498efaSAlex Deucher 	struct amdgpu_gmc *gmc = &adev->gmc;
9154e8303cfSLijo Lazar 	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
916e6713557SGraham Sider 	bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
917e6713557SGraham Sider 				gc_ver == IP_VERSION(9, 4, 0) ||
918e6713557SGraham Sider 				gc_ver == IP_VERSION(9, 4, 1) ||
919e6713557SGraham Sider 				gc_ver == IP_VERSION(9, 4, 2) ||
920cebbfdd5SAmber Lin 				gc_ver == IP_VERSION(9, 4, 3) ||
9215f571c61SHawking Zhang 				gc_ver == IP_VERSION(9, 4, 4) ||
92246d0436aSAmber Lin 				gc_ver == IP_VERSION(9, 5, 0) ||
923e6713557SGraham Sider 				gc_ver >= IP_VERSION(10, 3, 0));
9249b498efaSAlex Deucher 
9259256e8d4SSurbhi Kakarya 	if (!amdgpu_sriov_xnack_support(adev))
9269256e8d4SSurbhi Kakarya 		gmc->noretry = 1;
9279256e8d4SSurbhi Kakarya 	else
928e6713557SGraham Sider 		gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry;
9299b498efaSAlex Deucher }
9309b498efaSAlex Deucher 
amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device * adev,int hub_type,bool enable)931f2c1b5c1SHuang Rui void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type,
932f2c1b5c1SHuang Rui 				   bool enable)
933f2c1b5c1SHuang Rui {
934f2c1b5c1SHuang Rui 	struct amdgpu_vmhub *hub;
935f2c1b5c1SHuang Rui 	u32 tmp, reg, i;
936f2c1b5c1SHuang Rui 
937f2c1b5c1SHuang Rui 	hub = &adev->vmhub[hub_type];
938f2c1b5c1SHuang Rui 	for (i = 0; i < 16; i++) {
939f2c1b5c1SHuang Rui 		reg = hub->vm_context0_cntl + hub->ctx_distance * i;
940f2c1b5c1SHuang Rui 
941f4caf584SHawking Zhang 		tmp = (hub_type == AMDGPU_GFXHUB(0)) ?
9426ba3f59eSPeng Ju Zhou 			RREG32_SOC15_IP(GC, reg) :
9436ba3f59eSPeng Ju Zhou 			RREG32_SOC15_IP(MMHUB, reg);
9446ba3f59eSPeng Ju Zhou 
945f2c1b5c1SHuang Rui 		if (enable)
946f2c1b5c1SHuang Rui 			tmp |= hub->vm_cntx_cntl_vm_fault;
947f2c1b5c1SHuang Rui 		else
948f2c1b5c1SHuang Rui 			tmp &= ~hub->vm_cntx_cntl_vm_fault;
949f2c1b5c1SHuang Rui 
950f4caf584SHawking Zhang 		(hub_type == AMDGPU_GFXHUB(0)) ?
9516ba3f59eSPeng Ju Zhou 			WREG32_SOC15_IP(GC, reg, tmp) :
9526ba3f59eSPeng Ju Zhou 			WREG32_SOC15_IP(MMHUB, reg, tmp);
953f2c1b5c1SHuang Rui 	}
954f2c1b5c1SHuang Rui }
955dd285c5dSAlex Deucher 
amdgpu_gmc_get_vbios_allocations(struct amdgpu_device * adev)956dd285c5dSAlex Deucher void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
957dd285c5dSAlex Deucher {
958dd285c5dSAlex Deucher 	unsigned size;
959dd285c5dSAlex Deucher 
960dd285c5dSAlex Deucher 	/*
9613f543552SYongqiang Sun 	 * Some ASICs need to reserve a region of video memory to avoid access
9623f543552SYongqiang Sun 	 * from driver
9633f543552SYongqiang Sun 	 */
9643f543552SYongqiang Sun 	adev->mman.stolen_reserved_offset = 0;
9653f543552SYongqiang Sun 	adev->mman.stolen_reserved_size = 0;
9663f543552SYongqiang Sun 
9673f543552SYongqiang Sun 	/*
968dd285c5dSAlex Deucher 	 * TODO:
969dd285c5dSAlex Deucher 	 * Currently there is a bug where some memory client outside
970dd285c5dSAlex Deucher 	 * of the driver writes to first 8M of VRAM on S3 resume,
971dd285c5dSAlex Deucher 	 * this overrides GART which by default gets placed in first 8M and
972dd285c5dSAlex Deucher 	 * causes VM_FAULTS once GTT is accessed.
973dd285c5dSAlex Deucher 	 * Keep the stolen memory reservation until the while this is not solved.
974dd285c5dSAlex Deucher 	 */
975dd285c5dSAlex Deucher 	switch (adev->asic_type) {
976dd285c5dSAlex Deucher 	case CHIP_VEGA10:
977faad5ccaSYongqiang Sun 		adev->mman.keep_stolen_vga_memory = true;
978faad5ccaSYongqiang Sun 		/*
97949aa98caSYongqiang Sun 		 * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
980faad5ccaSYongqiang Sun 		 */
981d9e50239SYongqiang Sun #ifdef CONFIG_X86
98249aa98caSYongqiang Sun 		if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
98349aa98caSYongqiang Sun 			adev->mman.stolen_reserved_offset = 0x500000;
98449aa98caSYongqiang Sun 			adev->mman.stolen_reserved_size = 0x200000;
985faad5ccaSYongqiang Sun 		}
986d9e50239SYongqiang Sun #endif
987faad5ccaSYongqiang Sun 		break;
988dd285c5dSAlex Deucher 	case CHIP_RAVEN:
989dd285c5dSAlex Deucher 	case CHIP_RENOIR:
990cacbbe7cSAlex Deucher 		adev->mman.keep_stolen_vga_memory = true;
991dd285c5dSAlex Deucher 		break;
992dd285c5dSAlex Deucher 	default:
993cacbbe7cSAlex Deucher 		adev->mman.keep_stolen_vga_memory = false;
994dd285c5dSAlex Deucher 		break;
995dd285c5dSAlex Deucher 	}
996dd285c5dSAlex Deucher 
997088fb29bSAlex Deucher 	if (amdgpu_sriov_vf(adev) ||
998220c8cc8SAlex Deucher 	    !amdgpu_device_has_display_hardware(adev)) {
999dd285c5dSAlex Deucher 		size = 0;
10007eded018SAlex Deucher 	} else {
1001dd285c5dSAlex Deucher 		size = amdgpu_gmc_get_vbios_fb_size(adev);
1002dd285c5dSAlex Deucher 
100330018679SAlex Deucher 		if (adev->mman.keep_stolen_vga_memory)
100430018679SAlex Deucher 			size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION);
10057eded018SAlex Deucher 	}
100630018679SAlex Deucher 
1007dd285c5dSAlex Deucher 	/* set to 0 if the pre-OS buffer uses up most of vram */
1008dd285c5dSAlex Deucher 	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
1009dd285c5dSAlex Deucher 		size = 0;
1010dd285c5dSAlex Deucher 
1011dd285c5dSAlex Deucher 	if (size > AMDGPU_VBIOS_VGA_ALLOCATION) {
1012cacbbe7cSAlex Deucher 		adev->mman.stolen_vga_size = AMDGPU_VBIOS_VGA_ALLOCATION;
1013cacbbe7cSAlex Deucher 		adev->mman.stolen_extended_size = size - adev->mman.stolen_vga_size;
1014dd285c5dSAlex Deucher 	} else {
1015cacbbe7cSAlex Deucher 		adev->mman.stolen_vga_size = size;
1016cacbbe7cSAlex Deucher 		adev->mman.stolen_extended_size = 0;
1017dd285c5dSAlex Deucher 	}
1018dd285c5dSAlex Deucher }
1019a2902c09SOak Zeng 
1020a2902c09SOak Zeng /**
1021a2902c09SOak Zeng  * amdgpu_gmc_init_pdb0 - initialize PDB0
1022a2902c09SOak Zeng  *
1023a2902c09SOak Zeng  * @adev: amdgpu_device pointer
1024a2902c09SOak Zeng  *
1025a2902c09SOak Zeng  * This function is only used when GART page table is used
1026a2902c09SOak Zeng  * for FB address translatioin. In such a case, we construct
1027a2902c09SOak Zeng  * a 2-level system VM page table: PDB0->PTB, to cover both
1028a2902c09SOak Zeng  * VRAM of the hive and system memory.
1029a2902c09SOak Zeng  *
1030a2902c09SOak Zeng  * PDB0 is static, initialized once on driver initialization.
1031a2902c09SOak Zeng  * The first n entries of PDB0 are used as PTE by setting
1032a2902c09SOak Zeng  * P bit to 1, pointing to VRAM. The n+1'th entry points
1033a2902c09SOak Zeng  * to a big PTB covering system memory.
1034a2902c09SOak Zeng  *
1035a2902c09SOak Zeng  */
amdgpu_gmc_init_pdb0(struct amdgpu_device * adev)1036a2902c09SOak Zeng void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
1037a2902c09SOak Zeng {
1038a2902c09SOak Zeng 	int i;
1039a2902c09SOak Zeng 	uint64_t flags = adev->gart.gart_pte_flags; //TODO it is UC. explore NC/RW?
1040a2902c09SOak Zeng 	/* Each PDE0 (used as PTE) covers (2^vmid0_page_table_block_size)*2M
1041a2902c09SOak Zeng 	 */
1042a2902c09SOak Zeng 	u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes;
1043a2902c09SOak Zeng 	u64 pde0_page_size = (1ULL<<adev->gmc.vmid0_page_table_block_size)<<21;
1044a2902c09SOak Zeng 	u64 vram_addr = adev->vm_manager.vram_base_offset -
1045a2902c09SOak Zeng 		adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1046a2902c09SOak Zeng 	u64 vram_end = vram_addr + vram_size;
10470ca565abSOak Zeng 	u64 gart_ptb_gpu_pa = amdgpu_gmc_vram_pa(adev, adev->gart.bo);
1048b2fe31cfSxinhui pan 	int idx;
1049b2fe31cfSxinhui pan 
1050c58a863bSGuchun Chen 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
1051b2fe31cfSxinhui pan 		return;
1052a2902c09SOak Zeng 
1053a2902c09SOak Zeng 	flags |= AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE;
1054a2902c09SOak Zeng 	flags |= AMDGPU_PTE_WRITEABLE;
1055a2902c09SOak Zeng 	flags |= AMDGPU_PTE_SNOOPED;
1056a2902c09SOak Zeng 	flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
1057980a0a94SHawking Zhang 	flags |= AMDGPU_PDE_PTE_FLAG(adev);
1058a2902c09SOak Zeng 
1059a2902c09SOak Zeng 	/* The first n PDE0 entries are used as PTE,
1060a2902c09SOak Zeng 	 * pointing to vram
1061a2902c09SOak Zeng 	 */
1062a2902c09SOak Zeng 	for (i = 0; vram_addr < vram_end; i++, vram_addr += pde0_page_size)
1063a2902c09SOak Zeng 		amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, vram_addr, flags);
1064a2902c09SOak Zeng 
1065a2902c09SOak Zeng 	/* The n+1'th PDE0 entry points to a huge
1066a2902c09SOak Zeng 	 * PTB who has more than 512 entries each
1067a2902c09SOak Zeng 	 * pointing to a 4K system page
1068a2902c09SOak Zeng 	 */
106979194dacSOak Zeng 	flags = AMDGPU_PTE_VALID;
1070980a0a94SHawking Zhang 	flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
1071a2902c09SOak Zeng 	/* Requires gart_ptb_gpu_pa to be 4K aligned */
1072a2902c09SOak Zeng 	amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
1073b2fe31cfSxinhui pan 	drm_dev_exit(idx);
1074a2902c09SOak Zeng }
1075dead5e42SOak Zeng 
1076dead5e42SOak Zeng /**
1077dead5e42SOak Zeng  * amdgpu_gmc_vram_mc2pa - calculate vram buffer's physical address from MC
1078dead5e42SOak Zeng  * address
1079dead5e42SOak Zeng  *
1080dead5e42SOak Zeng  * @adev: amdgpu_device pointer
1081dead5e42SOak Zeng  * @mc_addr: MC address of buffer
1082dead5e42SOak Zeng  */
amdgpu_gmc_vram_mc2pa(struct amdgpu_device * adev,uint64_t mc_addr)1083dead5e42SOak Zeng uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr)
1084dead5e42SOak Zeng {
1085dead5e42SOak Zeng 	return mc_addr - adev->gmc.vram_start + adev->vm_manager.vram_base_offset;
1086dead5e42SOak Zeng }
1087dead5e42SOak Zeng 
1088dead5e42SOak Zeng /**
1089dead5e42SOak Zeng  * amdgpu_gmc_vram_pa - calculate vram buffer object's physical address from
1090dead5e42SOak Zeng  * GPU's view
1091dead5e42SOak Zeng  *
1092dead5e42SOak Zeng  * @adev: amdgpu_device pointer
1093dead5e42SOak Zeng  * @bo: amdgpu buffer object
1094dead5e42SOak Zeng  */
amdgpu_gmc_vram_pa(struct amdgpu_device * adev,struct amdgpu_bo * bo)1095dead5e42SOak Zeng uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo)
1096dead5e42SOak Zeng {
1097dead5e42SOak Zeng 	return amdgpu_gmc_vram_mc2pa(adev, amdgpu_bo_gpu_offset(bo));
1098dead5e42SOak Zeng }
1099dead5e42SOak Zeng 
amdgpu_gmc_vram_checking(struct amdgpu_device * adev)1100479e3b02SXiaojian Du int amdgpu_gmc_vram_checking(struct amdgpu_device *adev)
1101479e3b02SXiaojian Du {
1102a357dca9SXiaojian Du 	struct amdgpu_bo *vram_bo = NULL;
1103a357dca9SXiaojian Du 	uint64_t vram_gpu = 0;
1104a357dca9SXiaojian Du 	void *vram_ptr = NULL;
1105479e3b02SXiaojian Du 
1106479e3b02SXiaojian Du 	int ret, size = 0x100000;
1107479e3b02SXiaojian Du 	uint8_t cptr[10];
1108479e3b02SXiaojian Du 
1109479e3b02SXiaojian Du 	ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
1110479e3b02SXiaojian Du 				AMDGPU_GEM_DOMAIN_VRAM,
1111479e3b02SXiaojian Du 				&vram_bo,
1112479e3b02SXiaojian Du 				&vram_gpu,
1113479e3b02SXiaojian Du 				&vram_ptr);
1114479e3b02SXiaojian Du 	if (ret)
1115479e3b02SXiaojian Du 		return ret;
1116479e3b02SXiaojian Du 
1117479e3b02SXiaojian Du 	memset(vram_ptr, 0x86, size);
1118479e3b02SXiaojian Du 	memset(cptr, 0x86, 10);
1119479e3b02SXiaojian Du 
1120479e3b02SXiaojian Du 	/**
1121479e3b02SXiaojian Du 	 * Check the start, the mid, and the end of the memory if the content of
1122479e3b02SXiaojian Du 	 * each byte is the pattern "0x86". If yes, we suppose the vram bo is
1123479e3b02SXiaojian Du 	 * workable.
1124479e3b02SXiaojian Du 	 *
1125479e3b02SXiaojian Du 	 * Note: If check the each byte of whole 1M bo, it will cost too many
1126479e3b02SXiaojian Du 	 * seconds, so here, we just pick up three parts for emulation.
1127479e3b02SXiaojian Du 	 */
1128479e3b02SXiaojian Du 	ret = memcmp(vram_ptr, cptr, 10);
1129fac4ebd7SSrinivasan Shanmugam 	if (ret) {
1130fac4ebd7SSrinivasan Shanmugam 		ret = -EIO;
1131fac4ebd7SSrinivasan Shanmugam 		goto release_buffer;
1132fac4ebd7SSrinivasan Shanmugam 	}
1133479e3b02SXiaojian Du 
1134479e3b02SXiaojian Du 	ret = memcmp(vram_ptr + (size / 2), cptr, 10);
1135fac4ebd7SSrinivasan Shanmugam 	if (ret) {
1136fac4ebd7SSrinivasan Shanmugam 		ret = -EIO;
1137fac4ebd7SSrinivasan Shanmugam 		goto release_buffer;
1138fac4ebd7SSrinivasan Shanmugam 	}
1139479e3b02SXiaojian Du 
1140479e3b02SXiaojian Du 	ret = memcmp(vram_ptr + size - 10, cptr, 10);
1141fac4ebd7SSrinivasan Shanmugam 	if (ret) {
1142fac4ebd7SSrinivasan Shanmugam 		ret = -EIO;
1143fac4ebd7SSrinivasan Shanmugam 		goto release_buffer;
1144fac4ebd7SSrinivasan Shanmugam 	}
1145479e3b02SXiaojian Du 
1146fac4ebd7SSrinivasan Shanmugam release_buffer:
1147479e3b02SXiaojian Du 	amdgpu_bo_free_kernel(&vram_bo, &vram_gpu,
1148479e3b02SXiaojian Du 			&vram_ptr);
1149479e3b02SXiaojian Du 
1150fac4ebd7SSrinivasan Shanmugam 	return ret;
1151479e3b02SXiaojian Du }
1152b6f90baaSLijo Lazar 
1153012be6f2SLijo Lazar static const char *nps_desc[] = {
1154012be6f2SLijo Lazar 	[AMDGPU_NPS1_PARTITION_MODE] = "NPS1",
1155012be6f2SLijo Lazar 	[AMDGPU_NPS2_PARTITION_MODE] = "NPS2",
1156012be6f2SLijo Lazar 	[AMDGPU_NPS3_PARTITION_MODE] = "NPS3",
1157012be6f2SLijo Lazar 	[AMDGPU_NPS4_PARTITION_MODE] = "NPS4",
1158012be6f2SLijo Lazar 	[AMDGPU_NPS6_PARTITION_MODE] = "NPS6",
1159012be6f2SLijo Lazar 	[AMDGPU_NPS8_PARTITION_MODE] = "NPS8",
1160012be6f2SLijo Lazar };
1161012be6f2SLijo Lazar 
available_memory_partition_show(struct device * dev,struct device_attribute * addr,char * buf)1162012be6f2SLijo Lazar static ssize_t available_memory_partition_show(struct device *dev,
1163012be6f2SLijo Lazar 					       struct device_attribute *addr,
1164012be6f2SLijo Lazar 					       char *buf)
1165012be6f2SLijo Lazar {
1166012be6f2SLijo Lazar 	struct drm_device *ddev = dev_get_drvdata(dev);
1167012be6f2SLijo Lazar 	struct amdgpu_device *adev = drm_to_adev(ddev);
1168012be6f2SLijo Lazar 	int size = 0, mode;
1169012be6f2SLijo Lazar 	char *sep = "";
1170012be6f2SLijo Lazar 
1171012be6f2SLijo Lazar 	for_each_inst(mode, adev->gmc.supported_nps_modes) {
1172012be6f2SLijo Lazar 		size += sysfs_emit_at(buf, size, "%s%s", sep, nps_desc[mode]);
1173012be6f2SLijo Lazar 		sep = ", ";
1174012be6f2SLijo Lazar 	}
1175012be6f2SLijo Lazar 	size += sysfs_emit_at(buf, size, "\n");
1176012be6f2SLijo Lazar 
1177012be6f2SLijo Lazar 	return size;
1178012be6f2SLijo Lazar }
1179012be6f2SLijo Lazar 
current_memory_partition_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1180012be6f2SLijo Lazar static ssize_t current_memory_partition_store(struct device *dev,
1181012be6f2SLijo Lazar 					      struct device_attribute *attr,
1182012be6f2SLijo Lazar 					      const char *buf, size_t count)
1183012be6f2SLijo Lazar {
1184012be6f2SLijo Lazar 	struct drm_device *ddev = dev_get_drvdata(dev);
1185012be6f2SLijo Lazar 	struct amdgpu_device *adev = drm_to_adev(ddev);
1186012be6f2SLijo Lazar 	enum amdgpu_memory_partition mode;
1187012be6f2SLijo Lazar 	struct amdgpu_hive_info *hive;
1188012be6f2SLijo Lazar 	int i;
1189012be6f2SLijo Lazar 
1190012be6f2SLijo Lazar 	mode = UNKNOWN_MEMORY_PARTITION_MODE;
1191012be6f2SLijo Lazar 	for_each_inst(i, adev->gmc.supported_nps_modes) {
1192012be6f2SLijo Lazar 		if (!strncasecmp(nps_desc[i], buf, strlen(nps_desc[i]))) {
1193012be6f2SLijo Lazar 			mode = i;
1194012be6f2SLijo Lazar 			break;
1195012be6f2SLijo Lazar 		}
1196012be6f2SLijo Lazar 	}
1197012be6f2SLijo Lazar 
1198012be6f2SLijo Lazar 	if (mode == UNKNOWN_MEMORY_PARTITION_MODE)
1199012be6f2SLijo Lazar 		return -EINVAL;
1200012be6f2SLijo Lazar 
1201012be6f2SLijo Lazar 	if (mode == adev->gmc.gmc_funcs->query_mem_partition_mode(adev)) {
1202012be6f2SLijo Lazar 		dev_info(
1203012be6f2SLijo Lazar 			adev->dev,
1204012be6f2SLijo Lazar 			"requested NPS mode is same as current NPS mode, skipping\n");
1205012be6f2SLijo Lazar 		return count;
1206012be6f2SLijo Lazar 	}
1207012be6f2SLijo Lazar 
1208012be6f2SLijo Lazar 	/* If device is part of hive, all devices in the hive should request the
1209012be6f2SLijo Lazar 	 * same mode. Hence store the requested mode in hive.
1210012be6f2SLijo Lazar 	 */
1211012be6f2SLijo Lazar 	hive = amdgpu_get_xgmi_hive(adev);
1212012be6f2SLijo Lazar 	if (hive) {
1213012be6f2SLijo Lazar 		atomic_set(&hive->requested_nps_mode, mode);
1214012be6f2SLijo Lazar 		amdgpu_put_xgmi_hive(hive);
1215012be6f2SLijo Lazar 	} else {
1216012be6f2SLijo Lazar 		adev->gmc.requested_nps_mode = mode;
1217012be6f2SLijo Lazar 	}
1218012be6f2SLijo Lazar 
1219012be6f2SLijo Lazar 	dev_info(
1220012be6f2SLijo Lazar 		adev->dev,
1221012be6f2SLijo Lazar 		"NPS mode change requested, please remove and reload the driver\n");
1222012be6f2SLijo Lazar 
1223012be6f2SLijo Lazar 	return count;
1224012be6f2SLijo Lazar }
1225012be6f2SLijo Lazar 
current_memory_partition_show(struct device * dev,struct device_attribute * addr,char * buf)1226b6f90baaSLijo Lazar static ssize_t current_memory_partition_show(
1227b6f90baaSLijo Lazar 	struct device *dev, struct device_attribute *addr, char *buf)
1228b6f90baaSLijo Lazar {
1229b6f90baaSLijo Lazar 	struct drm_device *ddev = dev_get_drvdata(dev);
1230b6f90baaSLijo Lazar 	struct amdgpu_device *adev = drm_to_adev(ddev);
1231b6f90baaSLijo Lazar 	enum amdgpu_memory_partition mode;
1232b6f90baaSLijo Lazar 
1233b6f90baaSLijo Lazar 	mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
12349f7e94afSDan Carpenter 	if ((mode >= ARRAY_SIZE(nps_desc)) ||
1235012be6f2SLijo Lazar 	    (BIT(mode) & AMDGPU_ALL_NPS_MASK) != BIT(mode))
1236b6f90baaSLijo Lazar 		return sysfs_emit(buf, "UNKNOWN\n");
1237012be6f2SLijo Lazar 
1238012be6f2SLijo Lazar 	return sysfs_emit(buf, "%s\n", nps_desc[mode]);
1239b6f90baaSLijo Lazar }
1240b6f90baaSLijo Lazar 
1241012be6f2SLijo Lazar static DEVICE_ATTR_RW(current_memory_partition);
1242012be6f2SLijo Lazar static DEVICE_ATTR_RO(available_memory_partition);
1243b6f90baaSLijo Lazar 
amdgpu_gmc_sysfs_init(struct amdgpu_device * adev)1244b6f90baaSLijo Lazar int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev)
1245b6f90baaSLijo Lazar {
1246012be6f2SLijo Lazar 	bool nps_switch_support;
1247012be6f2SLijo Lazar 	int r = 0;
1248012be6f2SLijo Lazar 
1249b6f90baaSLijo Lazar 	if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
1250b6f90baaSLijo Lazar 		return 0;
1251b6f90baaSLijo Lazar 
1252012be6f2SLijo Lazar 	nps_switch_support = (hweight32(adev->gmc.supported_nps_modes &
1253012be6f2SLijo Lazar 					AMDGPU_ALL_NPS_MASK) > 1);
1254012be6f2SLijo Lazar 	if (!nps_switch_support)
1255012be6f2SLijo Lazar 		dev_attr_current_memory_partition.attr.mode &=
1256012be6f2SLijo Lazar 			~(S_IWUSR | S_IWGRP | S_IWOTH);
1257012be6f2SLijo Lazar 	else
1258012be6f2SLijo Lazar 		r = device_create_file(adev->dev,
1259012be6f2SLijo Lazar 				       &dev_attr_available_memory_partition);
1260012be6f2SLijo Lazar 
1261012be6f2SLijo Lazar 	if (r)
1262012be6f2SLijo Lazar 		return r;
1263012be6f2SLijo Lazar 
1264b6f90baaSLijo Lazar 	return device_create_file(adev->dev,
1265b6f90baaSLijo Lazar 				  &dev_attr_current_memory_partition);
1266b6f90baaSLijo Lazar }
1267b6f90baaSLijo Lazar 
amdgpu_gmc_sysfs_fini(struct amdgpu_device * adev)1268b6f90baaSLijo Lazar void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
1269b6f90baaSLijo Lazar {
1270012be6f2SLijo Lazar 	if (!adev->gmc.gmc_funcs->query_mem_partition_mode)
1271012be6f2SLijo Lazar 		return;
1272012be6f2SLijo Lazar 
1273b6f90baaSLijo Lazar 	device_remove_file(adev->dev, &dev_attr_current_memory_partition);
1274012be6f2SLijo Lazar 	device_remove_file(adev->dev, &dev_attr_available_memory_partition);
1275b6f90baaSLijo Lazar }
1276b194d21bSLijo Lazar 
amdgpu_gmc_get_nps_memranges(struct amdgpu_device * adev,struct amdgpu_mem_partition_info * mem_ranges,uint8_t * exp_ranges)1277b194d21bSLijo Lazar int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
1278b194d21bSLijo Lazar 				 struct amdgpu_mem_partition_info *mem_ranges,
1279b3c68716SLijo Lazar 				 uint8_t *exp_ranges)
1280b194d21bSLijo Lazar {
1281b194d21bSLijo Lazar 	struct amdgpu_gmc_memrange *ranges;
1282b194d21bSLijo Lazar 	int range_cnt, ret, i, j;
1283b194d21bSLijo Lazar 	uint32_t nps_type;
1284ed3dac4bSLijo Lazar 	bool refresh;
1285b194d21bSLijo Lazar 
1286b3c68716SLijo Lazar 	if (!mem_ranges || !exp_ranges)
1287b194d21bSLijo Lazar 		return -EINVAL;
1288b194d21bSLijo Lazar 
1289ed3dac4bSLijo Lazar 	refresh = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
1290ed3dac4bSLijo Lazar 		  (adev->gmc.reset_flags & AMDGPU_GMC_INIT_RESET_NPS);
1291b194d21bSLijo Lazar 	ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
1292ed3dac4bSLijo Lazar 					    &range_cnt, refresh);
1293b194d21bSLijo Lazar 
1294b194d21bSLijo Lazar 	if (ret)
1295b194d21bSLijo Lazar 		return ret;
1296b194d21bSLijo Lazar 
1297b194d21bSLijo Lazar 	/* TODO: For now, expect ranges and partition count to be the same.
1298b194d21bSLijo Lazar 	 * Adjust if there are holes expected in any NPS domain.
1299b194d21bSLijo Lazar 	 */
1300b3c68716SLijo Lazar 	if (*exp_ranges && (range_cnt != *exp_ranges)) {
1301b194d21bSLijo Lazar 		dev_warn(
1302b194d21bSLijo Lazar 			adev->dev,
1303b194d21bSLijo Lazar 			"NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
1304b3c68716SLijo Lazar 			*exp_ranges, nps_type, range_cnt);
1305b194d21bSLijo Lazar 		ret = -EINVAL;
1306b194d21bSLijo Lazar 		goto err;
1307b194d21bSLijo Lazar 	}
1308b194d21bSLijo Lazar 
1309b3c68716SLijo Lazar 	for (i = 0; i < range_cnt; ++i) {
1310b194d21bSLijo Lazar 		if (ranges[i].base_address >= ranges[i].limit_address) {
1311b194d21bSLijo Lazar 			dev_warn(
1312b194d21bSLijo Lazar 				adev->dev,
1313b194d21bSLijo Lazar 				"Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
1314b194d21bSLijo Lazar 				nps_type, i, ranges[i].base_address,
1315b194d21bSLijo Lazar 				ranges[i].limit_address);
1316b194d21bSLijo Lazar 			ret = -EINVAL;
1317b194d21bSLijo Lazar 			goto err;
1318b194d21bSLijo Lazar 		}
1319b194d21bSLijo Lazar 
1320b194d21bSLijo Lazar 		/* Check for overlaps, not expecting any now */
1321b194d21bSLijo Lazar 		for (j = i - 1; j >= 0; j--) {
1322b194d21bSLijo Lazar 			if (max(ranges[j].base_address,
1323b194d21bSLijo Lazar 				ranges[i].base_address) <=
1324b194d21bSLijo Lazar 			    min(ranges[j].limit_address,
1325b194d21bSLijo Lazar 				ranges[i].limit_address)) {
1326b194d21bSLijo Lazar 				dev_warn(
1327b194d21bSLijo Lazar 					adev->dev,
1328b194d21bSLijo Lazar 					"overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
1329b194d21bSLijo Lazar 					ranges[j].base_address,
1330b194d21bSLijo Lazar 					ranges[j].limit_address,
1331b194d21bSLijo Lazar 					ranges[i].base_address,
1332b194d21bSLijo Lazar 					ranges[i].limit_address);
1333b194d21bSLijo Lazar 				ret = -EINVAL;
1334b194d21bSLijo Lazar 				goto err;
1335b194d21bSLijo Lazar 			}
1336b194d21bSLijo Lazar 		}
1337b194d21bSLijo Lazar 
1338b194d21bSLijo Lazar 		mem_ranges[i].range.fpfn =
1339b194d21bSLijo Lazar 			(ranges[i].base_address -
1340b194d21bSLijo Lazar 			 adev->vm_manager.vram_base_offset) >>
1341b194d21bSLijo Lazar 			AMDGPU_GPU_PAGE_SHIFT;
1342b194d21bSLijo Lazar 		mem_ranges[i].range.lpfn =
1343b194d21bSLijo Lazar 			(ranges[i].limit_address -
1344b194d21bSLijo Lazar 			 adev->vm_manager.vram_base_offset) >>
1345b194d21bSLijo Lazar 			AMDGPU_GPU_PAGE_SHIFT;
1346b194d21bSLijo Lazar 		mem_ranges[i].size =
1347b194d21bSLijo Lazar 			ranges[i].limit_address - ranges[i].base_address + 1;
1348b194d21bSLijo Lazar 	}
1349b194d21bSLijo Lazar 
1350b3c68716SLijo Lazar 	if (!*exp_ranges)
1351b3c68716SLijo Lazar 		*exp_ranges = range_cnt;
1352b194d21bSLijo Lazar err:
1353b194d21bSLijo Lazar 	kfree(ranges);
1354b194d21bSLijo Lazar 
1355b194d21bSLijo Lazar 	return ret;
1356b194d21bSLijo Lazar }
1357bbc16008SLijo Lazar 
amdgpu_gmc_request_memory_partition(struct amdgpu_device * adev,int nps_mode)1358bbc16008SLijo Lazar int amdgpu_gmc_request_memory_partition(struct amdgpu_device *adev,
1359bbc16008SLijo Lazar 					int nps_mode)
1360bbc16008SLijo Lazar {
1361bbc16008SLijo Lazar 	/* Not supported on VF devices and APUs */
1362bbc16008SLijo Lazar 	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
1363bbc16008SLijo Lazar 		return -EOPNOTSUPP;
1364bbc16008SLijo Lazar 
1365bbc16008SLijo Lazar 	if (!adev->psp.funcs) {
1366bbc16008SLijo Lazar 		dev_err(adev->dev,
1367bbc16008SLijo Lazar 			"PSP interface not available for nps mode change request");
1368bbc16008SLijo Lazar 		return -EINVAL;
1369bbc16008SLijo Lazar 	}
1370bbc16008SLijo Lazar 
1371bbc16008SLijo Lazar 	return psp_memory_partition(&adev->psp, nps_mode);
1372bbc16008SLijo Lazar }
1373ee52489dSLijo Lazar 
amdgpu_gmc_need_nps_switch_req(struct amdgpu_device * adev,int req_nps_mode,int cur_nps_mode)1374ee52489dSLijo Lazar static inline bool amdgpu_gmc_need_nps_switch_req(struct amdgpu_device *adev,
1375ee52489dSLijo Lazar 						  int req_nps_mode,
1376ee52489dSLijo Lazar 						  int cur_nps_mode)
1377ee52489dSLijo Lazar {
1378ee52489dSLijo Lazar 	return (((BIT(req_nps_mode) & adev->gmc.supported_nps_modes) ==
1379ee52489dSLijo Lazar 			BIT(req_nps_mode)) &&
1380ee52489dSLijo Lazar 		req_nps_mode != cur_nps_mode);
1381ee52489dSLijo Lazar }
1382ee52489dSLijo Lazar 
amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device * adev)1383ee52489dSLijo Lazar void amdgpu_gmc_prepare_nps_mode_change(struct amdgpu_device *adev)
1384ee52489dSLijo Lazar {
1385ee52489dSLijo Lazar 	int req_nps_mode, cur_nps_mode, r;
1386ee52489dSLijo Lazar 	struct amdgpu_hive_info *hive;
1387ee52489dSLijo Lazar 
1388ee52489dSLijo Lazar 	if (amdgpu_sriov_vf(adev) || !adev->gmc.supported_nps_modes ||
1389ee52489dSLijo Lazar 	    !adev->gmc.gmc_funcs->request_mem_partition_mode)
1390ee52489dSLijo Lazar 		return;
1391ee52489dSLijo Lazar 
1392ee52489dSLijo Lazar 	cur_nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
1393ee52489dSLijo Lazar 	hive = amdgpu_get_xgmi_hive(adev);
1394ee52489dSLijo Lazar 	if (hive) {
1395ee52489dSLijo Lazar 		req_nps_mode = atomic_read(&hive->requested_nps_mode);
1396ee52489dSLijo Lazar 		if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode,
1397ee52489dSLijo Lazar 						    cur_nps_mode)) {
1398ee52489dSLijo Lazar 			amdgpu_put_xgmi_hive(hive);
1399ee52489dSLijo Lazar 			return;
1400ee52489dSLijo Lazar 		}
1401ee52489dSLijo Lazar 		r = amdgpu_xgmi_request_nps_change(adev, hive, req_nps_mode);
1402ee52489dSLijo Lazar 		amdgpu_put_xgmi_hive(hive);
1403ee52489dSLijo Lazar 		goto out;
1404ee52489dSLijo Lazar 	}
1405ee52489dSLijo Lazar 
1406ee52489dSLijo Lazar 	req_nps_mode = adev->gmc.requested_nps_mode;
1407ee52489dSLijo Lazar 	if (!amdgpu_gmc_need_nps_switch_req(adev, req_nps_mode, cur_nps_mode))
1408ee52489dSLijo Lazar 		return;
1409ee52489dSLijo Lazar 
1410ee52489dSLijo Lazar 	/* even if this fails, we should let driver unload w/o blocking */
1411ee52489dSLijo Lazar 	r = adev->gmc.gmc_funcs->request_mem_partition_mode(adev, req_nps_mode);
1412ee52489dSLijo Lazar out:
1413ee52489dSLijo Lazar 	if (r)
1414ee52489dSLijo Lazar 		dev_err(adev->dev, "NPS mode change request failed\n");
1415ee52489dSLijo Lazar 	else
1416ee52489dSLijo Lazar 		dev_info(
1417ee52489dSLijo Lazar 			adev->dev,
1418ee52489dSLijo Lazar 			"NPS mode change request done, reload driver to complete the change\n");
1419ee52489dSLijo Lazar }
1420ed3dac4bSLijo Lazar 
amdgpu_gmc_need_reset_on_init(struct amdgpu_device * adev)1421ed3dac4bSLijo Lazar bool amdgpu_gmc_need_reset_on_init(struct amdgpu_device *adev)
1422ed3dac4bSLijo Lazar {
1423ed3dac4bSLijo Lazar 	if (adev->gmc.gmc_funcs->need_reset_on_init)
1424ed3dac4bSLijo Lazar 		return adev->gmc.gmc_funcs->need_reset_on_init(adev);
1425ed3dac4bSLijo Lazar 
1426ed3dac4bSLijo Lazar 	return false;
1427ed3dac4bSLijo Lazar }
1428