1d38ceaf9SAlex Deucher /*
2d38ceaf9SAlex Deucher  * Copyright 2014 Advanced Micro Devices, Inc.
3d38ceaf9SAlex Deucher  * Copyright 2008 Red Hat Inc.
4d38ceaf9SAlex Deucher  * Copyright 2009 Jerome Glisse.
5d38ceaf9SAlex Deucher  *
6d38ceaf9SAlex Deucher  * Permission is hereby granted, free of charge, to any person obtaining a
7d38ceaf9SAlex Deucher  * copy of this software and associated documentation files (the "Software"),
8d38ceaf9SAlex Deucher  * to deal in the Software without restriction, including without limitation
9d38ceaf9SAlex Deucher  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10d38ceaf9SAlex Deucher  * and/or sell copies of the Software, and to permit persons to whom the
11d38ceaf9SAlex Deucher  * Software is furnished to do so, subject to the following conditions:
12d38ceaf9SAlex Deucher  *
13d38ceaf9SAlex Deucher  * The above copyright notice and this permission notice shall be included in
14d38ceaf9SAlex Deucher  * all copies or substantial portions of the Software.
15d38ceaf9SAlex Deucher  *
16d38ceaf9SAlex Deucher  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17d38ceaf9SAlex Deucher  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18d38ceaf9SAlex Deucher  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19d38ceaf9SAlex Deucher  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20d38ceaf9SAlex Deucher  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21d38ceaf9SAlex Deucher  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22d38ceaf9SAlex Deucher  * OTHER DEALINGS IN THE SOFTWARE.
23d38ceaf9SAlex Deucher  *
24d38ceaf9SAlex Deucher  */
25fdf2f6c5SSam Ravnborg 
26ec71b250SLikun Gao #include <linux/firmware.h>
27d361ad5dSSrinivasan Shanmugam #include <linux/pm_runtime.h>
28d361ad5dSSrinivasan Shanmugam 
29d38ceaf9SAlex Deucher #include "amdgpu.h"
30356aee30SBaoyou Xie #include "amdgpu_gfx.h"
3188dfc9a3SLikun Gao #include "amdgpu_rlc.h"
326caeee7aSHawking Zhang #include "amdgpu_ras.h"
33d361ad5dSSrinivasan Shanmugam #include "amdgpu_reset.h"
348e7fd193SLijo Lazar #include "amdgpu_xcp.h"
35b1338a8eSStanley.Yang #include "amdgpu_xgmi.h"
36d38ceaf9SAlex Deucher 
37bf9b1d9dSRex Zhu /* delay 0.1 second to enable gfx off feature */
38bf9b1d9dSRex Zhu #define GFX_OFF_DELAY_ENABLE         msecs_to_jiffies(100)
391e317b99SRex Zhu 
401d617c02SLijo Lazar #define GFX_OFF_NO_DELAY 0
411d617c02SLijo Lazar 
42d38ceaf9SAlex Deucher /*
43448fe192SHuang Rui  * GPU GFX IP block helpers function.
44d38ceaf9SAlex Deucher  */
45448fe192SHuang Rui 
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)467470bfcfSHawking Zhang int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47448fe192SHuang Rui 				int pipe, int queue)
48448fe192SHuang Rui {
49448fe192SHuang Rui 	int bit = 0;
50448fe192SHuang Rui 
51448fe192SHuang Rui 	bit += mec * adev->gfx.mec.num_pipe_per_mec
52448fe192SHuang Rui 		* adev->gfx.mec.num_queue_per_pipe;
53448fe192SHuang Rui 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54448fe192SHuang Rui 	bit += queue;
55448fe192SHuang Rui 
56448fe192SHuang Rui 	return bit;
57448fe192SHuang Rui }
58448fe192SHuang Rui 
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)595c180eb9SYong Zhao void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60448fe192SHuang Rui 				 int *mec, int *pipe, int *queue)
61448fe192SHuang Rui {
62448fe192SHuang Rui 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
63448fe192SHuang Rui 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64448fe192SHuang Rui 		% adev->gfx.mec.num_pipe_per_mec;
65448fe192SHuang Rui 	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66448fe192SHuang Rui 	       / adev->gfx.mec.num_pipe_per_mec;
67448fe192SHuang Rui 
68448fe192SHuang Rui }
69448fe192SHuang Rui 
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70448fe192SHuang Rui bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71be697aa3SLe Ma 				     int xcc_id, int mec, int pipe, int queue)
72448fe192SHuang Rui {
737470bfcfSHawking Zhang 	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74be697aa3SLe Ma 			adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75448fe192SHuang Rui }
76448fe192SHuang Rui 
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)777470bfcfSHawking Zhang int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
787470bfcfSHawking Zhang 			       int me, int pipe, int queue)
797470bfcfSHawking Zhang {
807470bfcfSHawking Zhang 	int bit = 0;
817470bfcfSHawking Zhang 
827470bfcfSHawking Zhang 	bit += me * adev->gfx.me.num_pipe_per_me
837470bfcfSHawking Zhang 		* adev->gfx.me.num_queue_per_pipe;
847470bfcfSHawking Zhang 	bit += pipe * adev->gfx.me.num_queue_per_pipe;
857470bfcfSHawking Zhang 	bit += queue;
867470bfcfSHawking Zhang 
877470bfcfSHawking Zhang 	return bit;
887470bfcfSHawking Zhang }
897470bfcfSHawking Zhang 
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)907470bfcfSHawking Zhang bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
917470bfcfSHawking Zhang 				    int me, int pipe, int queue)
927470bfcfSHawking Zhang {
937470bfcfSHawking Zhang 	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
947470bfcfSHawking Zhang 			adev->gfx.me.queue_bitmap);
957470bfcfSHawking Zhang }
967470bfcfSHawking Zhang 
97d38ceaf9SAlex Deucher /**
986f8941a2SNicolai Hähnle  * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
996f8941a2SNicolai Hähnle  *
1006f8941a2SNicolai Hähnle  * @mask: array in which the per-shader array disable masks will be stored
1016f8941a2SNicolai Hähnle  * @max_se: number of SEs
1026f8941a2SNicolai Hähnle  * @max_sh: number of SHs
1036f8941a2SNicolai Hähnle  *
1046f8941a2SNicolai Hähnle  * The bitmask of CUs to be disabled in the shader array determined by se and
1056f8941a2SNicolai Hähnle  * sh is stored in mask[se * max_sh + sh].
1066f8941a2SNicolai Hähnle  */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)10750fbe0ccSSrinivasan Shanmugam void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
1086f8941a2SNicolai Hähnle {
10950fbe0ccSSrinivasan Shanmugam 	unsigned int se, sh, cu;
1106f8941a2SNicolai Hähnle 	const char *p;
1116f8941a2SNicolai Hähnle 
1126f8941a2SNicolai Hähnle 	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
1136f8941a2SNicolai Hähnle 
1146f8941a2SNicolai Hähnle 	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
1156f8941a2SNicolai Hähnle 		return;
1166f8941a2SNicolai Hähnle 
1176f8941a2SNicolai Hähnle 	p = amdgpu_disable_cu;
1186f8941a2SNicolai Hähnle 	for (;;) {
1196f8941a2SNicolai Hähnle 		char *next;
1206f8941a2SNicolai Hähnle 		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
12150fbe0ccSSrinivasan Shanmugam 
1226f8941a2SNicolai Hähnle 		if (ret < 3) {
1236f8941a2SNicolai Hähnle 			DRM_ERROR("amdgpu: could not parse disable_cu\n");
1246f8941a2SNicolai Hähnle 			return;
1256f8941a2SNicolai Hähnle 		}
1266f8941a2SNicolai Hähnle 
1276f8941a2SNicolai Hähnle 		if (se < max_se && sh < max_sh && cu < 16) {
1286f8941a2SNicolai Hähnle 			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
1296f8941a2SNicolai Hähnle 			mask[se * max_sh + sh] |= 1u << cu;
1306f8941a2SNicolai Hähnle 		} else {
1316f8941a2SNicolai Hähnle 			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
1326f8941a2SNicolai Hähnle 				  se, sh, cu);
1336f8941a2SNicolai Hähnle 		}
1346f8941a2SNicolai Hähnle 
1356f8941a2SNicolai Hähnle 		next = strchr(p, ',');
1366f8941a2SNicolai Hähnle 		if (!next)
1376f8941a2SNicolai Hähnle 			break;
1386f8941a2SNicolai Hähnle 		p = next + 1;
1396f8941a2SNicolai Hähnle 	}
1406f8941a2SNicolai Hähnle }
14141f6a99aSAlex Deucher 
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)142b07d1d73SArunpravin Paneer Selvam static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
143b07d1d73SArunpravin Paneer Selvam {
144b07d1d73SArunpravin Paneer Selvam 	return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
145b07d1d73SArunpravin Paneer Selvam }
146b07d1d73SArunpravin Paneer Selvam 
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)147b07d1d73SArunpravin Paneer Selvam static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
1480f7607d4SAndres Rodriguez {
1494a75aefeSAndres Rodriguez 	if (amdgpu_compute_multipipe != -1) {
1504a75aefeSAndres Rodriguez 		DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
1514a75aefeSAndres Rodriguez 			 amdgpu_compute_multipipe);
1524a75aefeSAndres Rodriguez 		return amdgpu_compute_multipipe == 1;
1534a75aefeSAndres Rodriguez 	}
1544a75aefeSAndres Rodriguez 
1554e8303cfSLijo Lazar 	if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
15625959dd6SLang Yu 		return true;
15725959dd6SLang Yu 
1580f7607d4SAndres Rodriguez 	/* FIXME: spreading the queues across pipes causes perf regressions
1590f7607d4SAndres Rodriguez 	 * on POLARIS11 compute workloads */
1600f7607d4SAndres Rodriguez 	if (adev->asic_type == CHIP_POLARIS11)
1610f7607d4SAndres Rodriguez 		return false;
1620f7607d4SAndres Rodriguez 
1630f7607d4SAndres Rodriguez 	return adev->gfx.mec.num_mec > 1;
1640f7607d4SAndres Rodriguez }
1650f7607d4SAndres Rodriguez 
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)166b07d1d73SArunpravin Paneer Selvam bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
167b07d1d73SArunpravin Paneer Selvam 						struct amdgpu_ring *ring)
168b07d1d73SArunpravin Paneer Selvam {
169b07d1d73SArunpravin Paneer Selvam 	int queue = ring->queue;
170b07d1d73SArunpravin Paneer Selvam 	int pipe = ring->pipe;
171b07d1d73SArunpravin Paneer Selvam 
172b07d1d73SArunpravin Paneer Selvam 	/* Policy: use pipe1 queue0 as high priority graphics queue if we
173b07d1d73SArunpravin Paneer Selvam 	 * have more than one gfx pipe.
174b07d1d73SArunpravin Paneer Selvam 	 */
175b07d1d73SArunpravin Paneer Selvam 	if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
176b07d1d73SArunpravin Paneer Selvam 	    adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
177b07d1d73SArunpravin Paneer Selvam 		int me = ring->me;
178b07d1d73SArunpravin Paneer Selvam 		int bit;
179b07d1d73SArunpravin Paneer Selvam 
180b07d1d73SArunpravin Paneer Selvam 		bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
181b07d1d73SArunpravin Paneer Selvam 		if (ring == &adev->gfx.gfx_ring[bit])
182b07d1d73SArunpravin Paneer Selvam 			return true;
183b07d1d73SArunpravin Paneer Selvam 	}
184b07d1d73SArunpravin Paneer Selvam 
185b07d1d73SArunpravin Paneer Selvam 	return false;
186b07d1d73SArunpravin Paneer Selvam }
187b07d1d73SArunpravin Paneer Selvam 
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)18833abcb1fSNirmoy Das bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
1898c0225d7SNirmoy Das 					       struct amdgpu_ring *ring)
19033abcb1fSNirmoy Das {
1918c0225d7SNirmoy Das 	/* Policy: use 1st queue as high priority compute queue if we
1928c0225d7SNirmoy Das 	 * have more than one compute queue.
1938c0225d7SNirmoy Das 	 */
1948c0225d7SNirmoy Das 	if (adev->gfx.num_compute_rings > 1 &&
1958c0225d7SNirmoy Das 	    ring == &adev->gfx.compute_ring[0])
1968c0225d7SNirmoy Das 		return true;
1973f66bf40SNirmoy Das 
1988c0225d7SNirmoy Das 	return false;
19933abcb1fSNirmoy Das }
20033abcb1fSNirmoy Das 
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)20141f6a99aSAlex Deucher void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
20241f6a99aSAlex Deucher {
203be697aa3SLe Ma 	int i, j, queue, pipe;
204b07d1d73SArunpravin Paneer Selvam 	bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
205a300de40SMonk Liu 	int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
206a300de40SMonk Liu 				     adev->gfx.mec.num_queue_per_pipe,
207a300de40SMonk Liu 				     adev->gfx.num_compute_rings);
2088078f1c6SLijo Lazar 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
20941f6a99aSAlex Deucher 
2100f7607d4SAndres Rodriguez 	if (multipipe_policy) {
211be697aa3SLe Ma 		/* policy: make queues evenly cross all pipes on MEC1 only
212be697aa3SLe Ma 		 * for multiple xcc, just use the original policy for simplicity */
2138078f1c6SLijo Lazar 		for (j = 0; j < num_xcc; j++) {
214a300de40SMonk Liu 			for (i = 0; i < max_queues_per_mec; i++) {
215a300de40SMonk Liu 				pipe = i % adev->gfx.mec.num_pipe_per_mec;
216a300de40SMonk Liu 				queue = (i / adev->gfx.mec.num_pipe_per_mec) %
217a300de40SMonk Liu 					 adev->gfx.mec.num_queue_per_pipe;
218a300de40SMonk Liu 
219a300de40SMonk Liu 				set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
220be697aa3SLe Ma 					adev->gfx.mec_bitmap[j].queue_bitmap);
221be697aa3SLe Ma 			}
222a300de40SMonk Liu 		}
22341f6a99aSAlex Deucher 	} else {
224a300de40SMonk Liu 		/* policy: amdgpu owns all queues in the given pipe */
2258078f1c6SLijo Lazar 		for (j = 0; j < num_xcc; j++) {
226a300de40SMonk Liu 			for (i = 0; i < max_queues_per_mec; ++i)
227be697aa3SLe Ma 				set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
228be697aa3SLe Ma 		}
22941f6a99aSAlex Deucher 	}
23041f6a99aSAlex Deucher 
2318078f1c6SLijo Lazar 	for (j = 0; j < num_xcc; j++) {
232be697aa3SLe Ma 		dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
233be697aa3SLe Ma 			bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
234be697aa3SLe Ma 	}
23541f6a99aSAlex Deucher }
23671c37505SAlex Deucher 
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)237e537c994SHawking Zhang void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
238e537c994SHawking Zhang {
239b07d1d73SArunpravin Paneer Selvam 	int i, queue, pipe;
240b07d1d73SArunpravin Paneer Selvam 	bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
241b07d1d73SArunpravin Paneer Selvam 	int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
242b07d1d73SArunpravin Paneer Selvam 					adev->gfx.me.num_queue_per_pipe;
243e537c994SHawking Zhang 
244b07d1d73SArunpravin Paneer Selvam 	if (multipipe_policy) {
245e537c994SHawking Zhang 		/* policy: amdgpu owns the first queue per pipe at this stage
246e537c994SHawking Zhang 		 * will extend to mulitple queues per pipe later */
247b07d1d73SArunpravin Paneer Selvam 		for (i = 0; i < max_queues_per_me; i++) {
248b07d1d73SArunpravin Paneer Selvam 			pipe = i % adev->gfx.me.num_pipe_per_me;
249b07d1d73SArunpravin Paneer Selvam 			queue = (i / adev->gfx.me.num_pipe_per_me) %
250b07d1d73SArunpravin Paneer Selvam 				adev->gfx.me.num_queue_per_pipe;
251b07d1d73SArunpravin Paneer Selvam 
252b07d1d73SArunpravin Paneer Selvam 			set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
253b07d1d73SArunpravin Paneer Selvam 				adev->gfx.me.queue_bitmap);
254b07d1d73SArunpravin Paneer Selvam 		}
255b07d1d73SArunpravin Paneer Selvam 	} else {
256b07d1d73SArunpravin Paneer Selvam 		for (i = 0; i < max_queues_per_me; ++i)
257e537c994SHawking Zhang 			set_bit(i, adev->gfx.me.queue_bitmap);
258e537c994SHawking Zhang 	}
259e537c994SHawking Zhang 
260e537c994SHawking Zhang 	/* update the number of active graphics rings */
261e537c994SHawking Zhang 	adev->gfx.num_gfx_rings =
262e537c994SHawking Zhang 		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
263e537c994SHawking Zhang }
264e537c994SHawking Zhang 
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)26571c37505SAlex Deucher static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
266def799c6SLe Ma 				  struct amdgpu_ring *ring, int xcc_id)
26771c37505SAlex Deucher {
26871c37505SAlex Deucher 	int queue_bit;
26971c37505SAlex Deucher 	int mec, pipe, queue;
27071c37505SAlex Deucher 
27171c37505SAlex Deucher 	queue_bit = adev->gfx.mec.num_mec
27271c37505SAlex Deucher 		    * adev->gfx.mec.num_pipe_per_mec
27371c37505SAlex Deucher 		    * adev->gfx.mec.num_queue_per_pipe;
27471c37505SAlex Deucher 
2751647b54eSDan Carpenter 	while (--queue_bit >= 0) {
276def799c6SLe Ma 		if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
27771c37505SAlex Deucher 			continue;
27871c37505SAlex Deucher 
2795c180eb9SYong Zhao 		amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
28071c37505SAlex Deucher 
28159fd27cdSHuang Rui 		/*
28259fd27cdSHuang Rui 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
28359fd27cdSHuang Rui 		 * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
28459fd27cdSHuang Rui 		 * only can be issued on queue 0.
28559fd27cdSHuang Rui 		 */
28659fd27cdSHuang Rui 		if ((mec == 1 && pipe > 1) || queue != 0)
28771c37505SAlex Deucher 			continue;
28871c37505SAlex Deucher 
28971c37505SAlex Deucher 		ring->me = mec + 1;
29071c37505SAlex Deucher 		ring->pipe = pipe;
29171c37505SAlex Deucher 		ring->queue = queue;
29271c37505SAlex Deucher 
29371c37505SAlex Deucher 		return 0;
29471c37505SAlex Deucher 	}
29571c37505SAlex Deucher 
29671c37505SAlex Deucher 	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
29771c37505SAlex Deucher 	return -EINVAL;
29871c37505SAlex Deucher }
29971c37505SAlex Deucher 
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)3004acd31e6SMa Jun int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
30171c37505SAlex Deucher {
302def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
3034acd31e6SMa Jun 	struct amdgpu_irq_src *irq = &kiq->irq;
3044acd31e6SMa Jun 	struct amdgpu_ring *ring = &kiq->ring;
30571c37505SAlex Deucher 	int r = 0;
30671c37505SAlex Deucher 
30743ca8efaSpding 	spin_lock_init(&kiq->ring_lock);
30871c37505SAlex Deucher 
30971c37505SAlex Deucher 	ring->adev = NULL;
31071c37505SAlex Deucher 	ring->ring_obj = NULL;
31171c37505SAlex Deucher 	ring->use_doorbell = true;
312def799c6SLe Ma 	ring->xcc_id = xcc_id;
3133566938bSLe Ma 	ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
314233bb373SLijo Lazar 	ring->doorbell_index =
315233bb373SLijo Lazar 		(adev->doorbell_index.kiq +
316233bb373SLijo Lazar 		 xcc_id * adev->doorbell_index.xcc_doorbell_range)
317233bb373SLijo Lazar 		<< 1;
31871c37505SAlex Deucher 
319def799c6SLe Ma 	r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
32071c37505SAlex Deucher 	if (r)
32171c37505SAlex Deucher 		return r;
32271c37505SAlex Deucher 
32371c37505SAlex Deucher 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
324a783910dSAlex Deucher 	ring->no_scheduler = true;
3250ea55445SSrinivasan Shanmugam 	snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
326745f7170SSrinivasan Shanmugam 		 (unsigned char)xcc_id, (unsigned char)ring->me,
327745f7170SSrinivasan Shanmugam 		 (unsigned char)ring->pipe, (unsigned char)ring->queue);
328c107171bSChristian König 	r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
329c107171bSChristian König 			     AMDGPU_RING_PRIO_DEFAULT, NULL);
33071c37505SAlex Deucher 	if (r)
33171c37505SAlex Deucher 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
33271c37505SAlex Deucher 
33371c37505SAlex Deucher 	return r;
33471c37505SAlex Deucher }
33571c37505SAlex Deucher 
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)3369f0256daSNirmoy Das void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
33771c37505SAlex Deucher {
33871c37505SAlex Deucher 	amdgpu_ring_fini(ring);
33971c37505SAlex Deucher }
34071c37505SAlex Deucher 
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)341def799c6SLe Ma void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
34271c37505SAlex Deucher {
343def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
34471c37505SAlex Deucher 
34571c37505SAlex Deucher 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
34671c37505SAlex Deucher }
34771c37505SAlex Deucher 
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)34871c37505SAlex Deucher int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
34950fbe0ccSSrinivasan Shanmugam 			unsigned int hpd_size, int xcc_id)
35071c37505SAlex Deucher {
35171c37505SAlex Deucher 	int r;
35271c37505SAlex Deucher 	u32 *hpd;
353def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
35471c37505SAlex Deucher 
35571c37505SAlex Deucher 	r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
35671c37505SAlex Deucher 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
35771c37505SAlex Deucher 				    &kiq->eop_gpu_addr, (void **)&hpd);
35871c37505SAlex Deucher 	if (r) {
35971c37505SAlex Deucher 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
36071c37505SAlex Deucher 		return r;
36171c37505SAlex Deucher 	}
36271c37505SAlex Deucher 
36371c37505SAlex Deucher 	memset(hpd, 0, hpd_size);
36471c37505SAlex Deucher 
36571c37505SAlex Deucher 	r = amdgpu_bo_reserve(kiq->eop_obj, true);
36671c37505SAlex Deucher 	if (unlikely(r != 0))
36771c37505SAlex Deucher 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
36871c37505SAlex Deucher 	amdgpu_bo_kunmap(kiq->eop_obj);
36971c37505SAlex Deucher 	amdgpu_bo_unreserve(kiq->eop_obj);
37071c37505SAlex Deucher 
37171c37505SAlex Deucher 	return 0;
37271c37505SAlex Deucher }
373b9683c21SAlex Deucher 
3744fc6a88fSHawking Zhang /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)3754fc6a88fSHawking Zhang int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
37650fbe0ccSSrinivasan Shanmugam 			   unsigned int mqd_size, int xcc_id)
377b9683c21SAlex Deucher {
37834305ac3SGuchun Chen 	int r, i, j;
379def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
380def799c6SLe Ma 	struct amdgpu_ring *ring = &kiq->ring;
3811cfb4d61SAlex Deucher 	u32 domain = AMDGPU_GEM_DOMAIN_GTT;
3821cfb4d61SAlex Deucher 
383ba0fb4b4SAlex Deucher #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
3841cfb4d61SAlex Deucher 	/* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
3854e8303cfSLijo Lazar 	if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3861cfb4d61SAlex Deucher 		domain |= AMDGPU_GEM_DOMAIN_VRAM;
387ba0fb4b4SAlex Deucher #endif
388b9683c21SAlex Deucher 
389b9683c21SAlex Deucher 	/* create MQD for KIQ */
39018ee4ce6SJack Xiao 	if (!adev->enable_mes_kiq && !ring->mqd_obj) {
391beb84102SMonk Liu 		/* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
392beb84102SMonk Liu 		 * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
393beb84102SMonk Liu 		 * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
394beb84102SMonk Liu 		 * KIQ MQD no matter SRIOV or Bare-metal
395beb84102SMonk Liu 		 */
396b9683c21SAlex Deucher 		r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
39758ab2c08SChristian König 					    AMDGPU_GEM_DOMAIN_VRAM |
39858ab2c08SChristian König 					    AMDGPU_GEM_DOMAIN_GTT,
39958ab2c08SChristian König 					    &ring->mqd_obj,
40058ab2c08SChristian König 					    &ring->mqd_gpu_addr,
40158ab2c08SChristian König 					    &ring->mqd_ptr);
402b9683c21SAlex Deucher 		if (r) {
403b9683c21SAlex Deucher 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
404b9683c21SAlex Deucher 			return r;
405b9683c21SAlex Deucher 		}
406b9683c21SAlex Deucher 
407b9683c21SAlex Deucher 		/* prepare MQD backup */
4088e3a3e84SLijo Lazar 		kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
40937c3fc66SSrinivasan Shanmugam 		if (!kiq->mqd_backup) {
41037c3fc66SSrinivasan Shanmugam 			dev_warn(adev->dev,
41137c3fc66SSrinivasan Shanmugam 				 "no memory to create MQD backup for ring %s\n", ring->name);
41237c3fc66SSrinivasan Shanmugam 			return -ENOMEM;
41337c3fc66SSrinivasan Shanmugam 		}
414b9683c21SAlex Deucher 	}
415b9683c21SAlex Deucher 
4165e0f378dSXiaojie Yuan 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
41754fc4472SHawking Zhang 		/* create MQD for each KGQ */
41854fc4472SHawking Zhang 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
41954fc4472SHawking Zhang 			ring = &adev->gfx.gfx_ring[i];
42054fc4472SHawking Zhang 			if (!ring->mqd_obj) {
42154fc4472SHawking Zhang 				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
4221cfb4d61SAlex Deucher 							    domain, &ring->mqd_obj,
42354fc4472SHawking Zhang 							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
42454fc4472SHawking Zhang 				if (r) {
42554fc4472SHawking Zhang 					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
42654fc4472SHawking Zhang 					return r;
42754fc4472SHawking Zhang 				}
42854fc4472SHawking Zhang 
429b185c318SAlex Deucher 				ring->mqd_size = mqd_size;
43054fc4472SHawking Zhang 				/* prepare MQD backup */
4318e3a3e84SLijo Lazar 				adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
43237c3fc66SSrinivasan Shanmugam 				if (!adev->gfx.me.mqd_backup[i]) {
43354fc4472SHawking Zhang 					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
43437c3fc66SSrinivasan Shanmugam 					return -ENOMEM;
43537c3fc66SSrinivasan Shanmugam 				}
43654fc4472SHawking Zhang 			}
43754fc4472SHawking Zhang 		}
43854fc4472SHawking Zhang 	}
43954fc4472SHawking Zhang 
440b9683c21SAlex Deucher 	/* create MQD for each KCQ */
441b9683c21SAlex Deucher 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
44234305ac3SGuchun Chen 		j = i + xcc_id * adev->gfx.num_compute_rings;
44334305ac3SGuchun Chen 		ring = &adev->gfx.compute_ring[j];
444b9683c21SAlex Deucher 		if (!ring->mqd_obj) {
445b9683c21SAlex Deucher 			r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
4461cfb4d61SAlex Deucher 						    domain, &ring->mqd_obj,
447b9683c21SAlex Deucher 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
448b9683c21SAlex Deucher 			if (r) {
44954fc4472SHawking Zhang 				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
450b9683c21SAlex Deucher 				return r;
451b9683c21SAlex Deucher 			}
452b9683c21SAlex Deucher 
453b185c318SAlex Deucher 			ring->mqd_size = mqd_size;
454b9683c21SAlex Deucher 			/* prepare MQD backup */
4558e3a3e84SLijo Lazar 			adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
45650fbe0ccSSrinivasan Shanmugam 			if (!adev->gfx.mec.mqd_backup[j]) {
457b9683c21SAlex Deucher 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
45850fbe0ccSSrinivasan Shanmugam 				return -ENOMEM;
45950fbe0ccSSrinivasan Shanmugam 			}
460b9683c21SAlex Deucher 		}
461b9683c21SAlex Deucher 	}
462b9683c21SAlex Deucher 
463b9683c21SAlex Deucher 	return 0;
464b9683c21SAlex Deucher }
465b9683c21SAlex Deucher 
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)466def799c6SLe Ma void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
467b9683c21SAlex Deucher {
468b9683c21SAlex Deucher 	struct amdgpu_ring *ring = NULL;
469def799c6SLe Ma 	int i, j;
470def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
471b9683c21SAlex Deucher 
4725e0f378dSXiaojie Yuan 	if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
47354fc4472SHawking Zhang 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
47454fc4472SHawking Zhang 			ring = &adev->gfx.gfx_ring[i];
47554fc4472SHawking Zhang 			kfree(adev->gfx.me.mqd_backup[i]);
47654fc4472SHawking Zhang 			amdgpu_bo_free_kernel(&ring->mqd_obj,
47754fc4472SHawking Zhang 					      &ring->mqd_gpu_addr,
47854fc4472SHawking Zhang 					      &ring->mqd_ptr);
47954fc4472SHawking Zhang 		}
48054fc4472SHawking Zhang 	}
48154fc4472SHawking Zhang 
482b9683c21SAlex Deucher 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
483def799c6SLe Ma 		j = i + xcc_id * adev->gfx.num_compute_rings;
4847a4685cdSGuchun Chen 		ring = &adev->gfx.compute_ring[j];
4857a4685cdSGuchun Chen 		kfree(adev->gfx.mec.mqd_backup[j]);
486b9683c21SAlex Deucher 		amdgpu_bo_free_kernel(&ring->mqd_obj,
487b9683c21SAlex Deucher 				      &ring->mqd_gpu_addr,
488b9683c21SAlex Deucher 				      &ring->mqd_ptr);
489b9683c21SAlex Deucher 	}
490b9683c21SAlex Deucher 
491def799c6SLe Ma 	ring = &kiq->ring;
492def799c6SLe Ma 	kfree(kiq->mqd_backup);
493b9683c21SAlex Deucher 	amdgpu_bo_free_kernel(&ring->mqd_obj,
494b9683c21SAlex Deucher 			      &ring->mqd_gpu_addr,
495b9683c21SAlex Deucher 			      &ring->mqd_ptr);
496b9683c21SAlex Deucher }
497d23ee13fSRex Zhu 
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)498def799c6SLe Ma int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
499ba0c13b7SRex Zhu {
500def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
501ba0c13b7SRex Zhu 	struct amdgpu_ring *kiq_ring = &kiq->ring;
50218ee4ce6SJack Xiao 	int i, r = 0;
503def799c6SLe Ma 	int j;
504ba0c13b7SRex Zhu 
505f7fb9d67SJack Xiao 	if (adev->enable_mes) {
506f7fb9d67SJack Xiao 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
507f7fb9d67SJack Xiao 			j = i + xcc_id * adev->gfx.num_compute_rings;
508f7fb9d67SJack Xiao 			amdgpu_mes_unmap_legacy_queue(adev,
509f7fb9d67SJack Xiao 						   &adev->gfx.compute_ring[j],
510f7fb9d67SJack Xiao 						   RESET_QUEUES, 0, 0);
511f7fb9d67SJack Xiao 		}
512f7fb9d67SJack Xiao 		return 0;
513f7fb9d67SJack Xiao 	}
514f7fb9d67SJack Xiao 
515ba0c13b7SRex Zhu 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
516ba0c13b7SRex Zhu 		return -EINVAL;
517ba0c13b7SRex Zhu 
51811815bb0SChristian König 	if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
519fa317985SLijo Lazar 		return 0;
520fa317985SLijo Lazar 
521def799c6SLe Ma 	spin_lock(&kiq->ring_lock);
522ba0c13b7SRex Zhu 	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
5235a8cd98eSNirmoy Das 					adev->gfx.num_compute_rings)) {
52466daccdeSLe Ma 		spin_unlock(&kiq->ring_lock);
525ba0c13b7SRex Zhu 		return -ENOMEM;
5265a8cd98eSNirmoy Das 	}
527ba0c13b7SRex Zhu 
528def799c6SLe Ma 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
529def799c6SLe Ma 		j = i + xcc_id * adev->gfx.num_compute_rings;
53066daccdeSLe Ma 		kiq->pmf->kiq_unmap_queues(kiq_ring,
5318cce1682SSrinivasan Shanmugam 					   &adev->gfx.compute_ring[j],
53219191961SJack Xiao 					   RESET_QUEUES, 0, 0);
533def799c6SLe Ma 	}
534fa317985SLijo Lazar 	/* Submit unmap queue packet */
535fa317985SLijo Lazar 	amdgpu_ring_commit(kiq_ring);
536fa317985SLijo Lazar 	/*
537fa317985SLijo Lazar 	 * Ring test will do a basic scratch register change check. Just run
538fa317985SLijo Lazar 	 * this to ensure that unmap queues that is submitted before got
539fa317985SLijo Lazar 	 * processed successfully before returning.
540b1338a8eSStanley.Yang 	 */
5415a8cd98eSNirmoy Das 	r = amdgpu_ring_test_helper(kiq_ring);
542fa317985SLijo Lazar 
543def799c6SLe Ma 	spin_unlock(&kiq->ring_lock);
544ba0c13b7SRex Zhu 
5455a8cd98eSNirmoy Das 	return r;
546ba0c13b7SRex Zhu }
547ba0c13b7SRex Zhu 
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)5481156e1a6SAlex Deucher int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
5491156e1a6SAlex Deucher {
5501156e1a6SAlex Deucher 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
5511156e1a6SAlex Deucher 	struct amdgpu_ring *kiq_ring = &kiq->ring;
5521156e1a6SAlex Deucher 	int i, r = 0;
5531156e1a6SAlex Deucher 	int j;
5541156e1a6SAlex Deucher 
555f7fb9d67SJack Xiao 	if (adev->enable_mes) {
556f7fb9d67SJack Xiao 		if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
557f7fb9d67SJack Xiao 			for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
558f7fb9d67SJack Xiao 				j = i + xcc_id * adev->gfx.num_gfx_rings;
559f7fb9d67SJack Xiao 				amdgpu_mes_unmap_legacy_queue(adev,
560f7fb9d67SJack Xiao 						      &adev->gfx.gfx_ring[j],
561f7fb9d67SJack Xiao 						      PREEMPT_QUEUES, 0, 0);
562f7fb9d67SJack Xiao 			}
563f7fb9d67SJack Xiao 		}
564f7fb9d67SJack Xiao 		return 0;
565f7fb9d67SJack Xiao 	}
566f7fb9d67SJack Xiao 
5671156e1a6SAlex Deucher 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5681156e1a6SAlex Deucher 		return -EINVAL;
5691156e1a6SAlex Deucher 
57011815bb0SChristian König 	if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
571fa317985SLijo Lazar 		return 0;
572fa317985SLijo Lazar 
5731156e1a6SAlex Deucher 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
574fa317985SLijo Lazar 		spin_lock(&kiq->ring_lock);
5751156e1a6SAlex Deucher 		if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
5761156e1a6SAlex Deucher 						adev->gfx.num_gfx_rings)) {
5771156e1a6SAlex Deucher 			spin_unlock(&kiq->ring_lock);
5781156e1a6SAlex Deucher 			return -ENOMEM;
5791156e1a6SAlex Deucher 		}
5801156e1a6SAlex Deucher 
5811156e1a6SAlex Deucher 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5821156e1a6SAlex Deucher 			j = i + xcc_id * adev->gfx.num_gfx_rings;
5831156e1a6SAlex Deucher 			kiq->pmf->kiq_unmap_queues(kiq_ring,
5848cce1682SSrinivasan Shanmugam 						   &adev->gfx.gfx_ring[j],
5851156e1a6SAlex Deucher 						   PREEMPT_QUEUES, 0, 0);
5861156e1a6SAlex Deucher 		}
587fa317985SLijo Lazar 		/* Submit unmap queue packet */
588fa317985SLijo Lazar 		amdgpu_ring_commit(kiq_ring);
5891156e1a6SAlex Deucher 
590fa317985SLijo Lazar 		/*
591fa317985SLijo Lazar 		 * Ring test will do a basic scratch register change check.
592fa317985SLijo Lazar 		 * Just run this to ensure that unmap queues that is submitted
593fa317985SLijo Lazar 		 * before got processed successfully before returning.
594fa317985SLijo Lazar 		 */
5951156e1a6SAlex Deucher 		r = amdgpu_ring_test_helper(kiq_ring);
5961156e1a6SAlex Deucher 		spin_unlock(&kiq->ring_lock);
597fa317985SLijo Lazar 	}
5981156e1a6SAlex Deucher 
5991156e1a6SAlex Deucher 	return r;
6001156e1a6SAlex Deucher }
6011156e1a6SAlex Deucher 
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)6025c180eb9SYong Zhao int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
6033ab6fe4bSLikun Gao 					int queue_bit)
6043ab6fe4bSLikun Gao {
6053ab6fe4bSLikun Gao 	int mec, pipe, queue;
6065c180eb9SYong Zhao 	int set_resource_bit = 0;
6073ab6fe4bSLikun Gao 
6085c180eb9SYong Zhao 	amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
6093ab6fe4bSLikun Gao 
6105c180eb9SYong Zhao 	set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
6113ab6fe4bSLikun Gao 
6125c180eb9SYong Zhao 	return set_resource_bit;
6133ab6fe4bSLikun Gao }
6143ab6fe4bSLikun Gao 
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)615745e0a90SJack Xiao static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
616745e0a90SJack Xiao {
617745e0a90SJack Xiao 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
618745e0a90SJack Xiao 	struct amdgpu_ring *kiq_ring = &kiq->ring;
619745e0a90SJack Xiao 	uint64_t queue_mask = ~0ULL;
620745e0a90SJack Xiao 	int r, i, j;
621745e0a90SJack Xiao 
622745e0a90SJack Xiao 	amdgpu_device_flush_hdp(adev, NULL);
623745e0a90SJack Xiao 
624745e0a90SJack Xiao 	if (!adev->enable_uni_mes) {
625745e0a90SJack Xiao 		spin_lock(&kiq->ring_lock);
626745e0a90SJack Xiao 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
627745e0a90SJack Xiao 		if (r) {
628745e0a90SJack Xiao 			dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
629745e0a90SJack Xiao 			spin_unlock(&kiq->ring_lock);
630745e0a90SJack Xiao 			return r;
631745e0a90SJack Xiao 		}
632745e0a90SJack Xiao 
633745e0a90SJack Xiao 		kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
634745e0a90SJack Xiao 		r = amdgpu_ring_test_helper(kiq_ring);
635745e0a90SJack Xiao 		spin_unlock(&kiq->ring_lock);
636745e0a90SJack Xiao 		if (r)
637745e0a90SJack Xiao 			dev_err(adev->dev, "KIQ failed to set resources\n");
638745e0a90SJack Xiao 	}
639745e0a90SJack Xiao 
640745e0a90SJack Xiao 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
641745e0a90SJack Xiao 		j = i + xcc_id * adev->gfx.num_compute_rings;
642745e0a90SJack Xiao 		r = amdgpu_mes_map_legacy_queue(adev,
643745e0a90SJack Xiao 						&adev->gfx.compute_ring[j]);
644745e0a90SJack Xiao 		if (r) {
645745e0a90SJack Xiao 			dev_err(adev->dev, "failed to map compute queue\n");
646745e0a90SJack Xiao 			return r;
647745e0a90SJack Xiao 		}
648745e0a90SJack Xiao 	}
649745e0a90SJack Xiao 
650745e0a90SJack Xiao 	return 0;
651745e0a90SJack Xiao }
652745e0a90SJack Xiao 
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)653def799c6SLe Ma int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
654849aca9fSHawking Zhang {
655def799c6SLe Ma 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
656def799c6SLe Ma 	struct amdgpu_ring *kiq_ring = &kiq->ring;
657849aca9fSHawking Zhang 	uint64_t queue_mask = 0;
658def799c6SLe Ma 	int r, i, j;
659849aca9fSHawking Zhang 
66052491d97SJack Xiao 	if (adev->mes.enable_legacy_queue_map)
661745e0a90SJack Xiao 		return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
662745e0a90SJack Xiao 
663849aca9fSHawking Zhang 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
664849aca9fSHawking Zhang 		return -EINVAL;
665849aca9fSHawking Zhang 
666849aca9fSHawking Zhang 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
667def799c6SLe Ma 		if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
668849aca9fSHawking Zhang 			continue;
669849aca9fSHawking Zhang 
670849aca9fSHawking Zhang 		/* This situation may be hit in the future if a new HW
671849aca9fSHawking Zhang 		 * generation exposes more than 64 queues. If so, the
672849aca9fSHawking Zhang 		 * definition of queue_mask needs updating */
673849aca9fSHawking Zhang 		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
674849aca9fSHawking Zhang 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
675849aca9fSHawking Zhang 			break;
676849aca9fSHawking Zhang 		}
677849aca9fSHawking Zhang 
6785c180eb9SYong Zhao 		queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
679849aca9fSHawking Zhang 	}
680849aca9fSHawking Zhang 
681f9d8c5c7SJack Xiao 	amdgpu_device_flush_hdp(adev, NULL);
682f9d8c5c7SJack Xiao 
683849aca9fSHawking Zhang 	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
684849aca9fSHawking Zhang 		 kiq_ring->queue);
685e602157eSJack Xiao 
686def799c6SLe Ma 	spin_lock(&kiq->ring_lock);
687849aca9fSHawking Zhang 	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
688849aca9fSHawking Zhang 					adev->gfx.num_compute_rings +
689849aca9fSHawking Zhang 					kiq->pmf->set_resources_size);
690849aca9fSHawking Zhang 	if (r) {
691849aca9fSHawking Zhang 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
6927a6a2e59SDan Carpenter 		spin_unlock(&kiq->ring_lock);
693849aca9fSHawking Zhang 		return r;
694849aca9fSHawking Zhang 	}
695849aca9fSHawking Zhang 
696849aca9fSHawking Zhang 	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
697def799c6SLe Ma 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
698def799c6SLe Ma 		j = i + xcc_id * adev->gfx.num_compute_rings;
69966daccdeSLe Ma 		kiq->pmf->kiq_map_queues(kiq_ring,
700147862d0SShiwu Zhang 					 &adev->gfx.compute_ring[j]);
701def799c6SLe Ma 	}
702fa317985SLijo Lazar 	/* Submit map queue packet */
703fa317985SLijo Lazar 	amdgpu_ring_commit(kiq_ring);
704fa317985SLijo Lazar 	/*
705fa317985SLijo Lazar 	 * Ring test will do a basic scratch register change check. Just run
706fa317985SLijo Lazar 	 * this to ensure that map queues that is submitted before got
707fa317985SLijo Lazar 	 * processed successfully before returning.
708fa317985SLijo Lazar 	 */
709849aca9fSHawking Zhang 	r = amdgpu_ring_test_helper(kiq_ring);
710def799c6SLe Ma 	spin_unlock(&kiq->ring_lock);
711849aca9fSHawking Zhang 	if (r)
712849aca9fSHawking Zhang 		DRM_ERROR("KCQ enable failed\n");
713849aca9fSHawking Zhang 
714849aca9fSHawking Zhang 	return r;
715849aca9fSHawking Zhang }
716849aca9fSHawking Zhang 
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)7171156e1a6SAlex Deucher int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
7181156e1a6SAlex Deucher {
7191156e1a6SAlex Deucher 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
7201156e1a6SAlex Deucher 	struct amdgpu_ring *kiq_ring = &kiq->ring;
7211156e1a6SAlex Deucher 	int r, i, j;
7221156e1a6SAlex Deucher 
7231156e1a6SAlex Deucher 	if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
7241156e1a6SAlex Deucher 		return -EINVAL;
7251156e1a6SAlex Deucher 
726e602157eSJack Xiao 	amdgpu_device_flush_hdp(adev, NULL);
727e602157eSJack Xiao 
72852491d97SJack Xiao 	if (adev->mes.enable_legacy_queue_map) {
729f9d8c5c7SJack Xiao 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
730f9d8c5c7SJack Xiao 			j = i + xcc_id * adev->gfx.num_gfx_rings;
731f9d8c5c7SJack Xiao 			r = amdgpu_mes_map_legacy_queue(adev,
732f9d8c5c7SJack Xiao 							&adev->gfx.gfx_ring[j]);
733f9d8c5c7SJack Xiao 			if (r) {
734f9d8c5c7SJack Xiao 				DRM_ERROR("failed to map gfx queue\n");
735f9d8c5c7SJack Xiao 				return r;
736f9d8c5c7SJack Xiao 			}
737f9d8c5c7SJack Xiao 		}
738f9d8c5c7SJack Xiao 
739f9d8c5c7SJack Xiao 		return 0;
740f9d8c5c7SJack Xiao 	}
741f9d8c5c7SJack Xiao 
7421156e1a6SAlex Deucher 	spin_lock(&kiq->ring_lock);
7431156e1a6SAlex Deucher 	/* No need to map kcq on the slave */
7441156e1a6SAlex Deucher 	if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
7451156e1a6SAlex Deucher 		r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
7461156e1a6SAlex Deucher 						adev->gfx.num_gfx_rings);
7471156e1a6SAlex Deucher 		if (r) {
7481156e1a6SAlex Deucher 			DRM_ERROR("Failed to lock KIQ (%d).\n", r);
7493fb9dd5fSDan Carpenter 			spin_unlock(&kiq->ring_lock);
7501156e1a6SAlex Deucher 			return r;
7511156e1a6SAlex Deucher 		}
7521156e1a6SAlex Deucher 
7531156e1a6SAlex Deucher 		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
7541156e1a6SAlex Deucher 			j = i + xcc_id * adev->gfx.num_gfx_rings;
7551156e1a6SAlex Deucher 			kiq->pmf->kiq_map_queues(kiq_ring,
7568cce1682SSrinivasan Shanmugam 						 &adev->gfx.gfx_ring[j]);
7571156e1a6SAlex Deucher 		}
7581156e1a6SAlex Deucher 	}
759fa317985SLijo Lazar 	/* Submit map queue packet */
760fa317985SLijo Lazar 	amdgpu_ring_commit(kiq_ring);
761fa317985SLijo Lazar 	/*
762fa317985SLijo Lazar 	 * Ring test will do a basic scratch register change check. Just run
763fa317985SLijo Lazar 	 * this to ensure that map queues that is submitted before got
764fa317985SLijo Lazar 	 * processed successfully before returning.
765fa317985SLijo Lazar 	 */
7661156e1a6SAlex Deucher 	r = amdgpu_ring_test_helper(kiq_ring);
7671156e1a6SAlex Deucher 	spin_unlock(&kiq->ring_lock);
7681156e1a6SAlex Deucher 	if (r)
76943bda3e7SPrike Liang 		DRM_ERROR("KGQ enable failed\n");
7701156e1a6SAlex Deucher 
7711156e1a6SAlex Deucher 	return r;
7721156e1a6SAlex Deucher }
7731156e1a6SAlex Deucher 
amdgpu_gfx_do_off_ctrl(struct amdgpu_device * adev,bool enable,bool no_delay)774250d9769SAlex Deucher static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
775250d9769SAlex Deucher 				   bool no_delay)
776d23ee13fSRex Zhu {
7771d617c02SLijo Lazar 	unsigned long delay = GFX_OFF_DELAY_ENABLE;
7781d617c02SLijo Lazar 
7793b94fb10SLikun Gao 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
780d23ee13fSRex Zhu 		return;
781d23ee13fSRex Zhu 
782d23ee13fSRex Zhu 	mutex_lock(&adev->gfx.gfx_off_mutex);
783d23ee13fSRex Zhu 
78490a92662SMichel Dänzer 	if (enable) {
78590a92662SMichel Dänzer 		/* If the count is already 0, it means there's an imbalance bug somewhere.
78690a92662SMichel Dänzer 		 * Note that the bug may be in a different caller than the one which triggers the
78790a92662SMichel Dänzer 		 * WARN_ON_ONCE.
78890a92662SMichel Dänzer 		 */
78990a92662SMichel Dänzer 		if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
79090a92662SMichel Dänzer 			goto unlock;
79190a92662SMichel Dänzer 
792d23ee13fSRex Zhu 		adev->gfx.gfx_off_req_count--;
793d23ee13fSRex Zhu 
7941d617c02SLijo Lazar 		if (adev->gfx.gfx_off_req_count == 0 &&
7951d617c02SLijo Lazar 		    !adev->gfx.gfx_off_state) {
796ce311df9SMario Limonciello 			/* If going to s2idle, no need to wait */
797250d9769SAlex Deucher 			if (no_delay) {
798ce311df9SMario Limonciello 				if (!amdgpu_dpm_set_powergating_by_smu(adev,
799ff69bba0SBoyuan Zhang 						AMD_IP_BLOCK_TYPE_GFX, true, 0))
800ce311df9SMario Limonciello 					adev->gfx.gfx_off_state = true;
801ce311df9SMario Limonciello 			} else {
8021d617c02SLijo Lazar 				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
8031d617c02SLijo Lazar 					      delay);
8041d617c02SLijo Lazar 			}
805ce311df9SMario Limonciello 		}
80690a92662SMichel Dänzer 	} else {
80790a92662SMichel Dänzer 		if (adev->gfx.gfx_off_req_count == 0) {
80890a92662SMichel Dänzer 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
80990a92662SMichel Dänzer 
81090a92662SMichel Dänzer 			if (adev->gfx.gfx_off_state &&
811ff69bba0SBoyuan Zhang 			    !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
812d23ee13fSRex Zhu 				adev->gfx.gfx_off_state = false;
813425a78f4STianci.Yin 
814425a78f4STianci.Yin 				if (adev->gfx.funcs->init_spm_golden) {
81590a92662SMichel Dänzer 					dev_dbg(adev->dev,
81690a92662SMichel Dänzer 						"GFXOFF is disabled, re-init SPM golden settings\n");
817425a78f4STianci.Yin 					amdgpu_gfx_init_spm_golden(adev);
818425a78f4STianci.Yin 				}
819425a78f4STianci.Yin 			}
820d23ee13fSRex Zhu 		}
8211e317b99SRex Zhu 
82290a92662SMichel Dänzer 		adev->gfx.gfx_off_req_count++;
82390a92662SMichel Dänzer 	}
82490a92662SMichel Dänzer 
82590a92662SMichel Dänzer unlock:
826d23ee13fSRex Zhu 	mutex_unlock(&adev->gfx.gfx_off_mutex);
827d23ee13fSRex Zhu }
8286caeee7aSHawking Zhang 
829250d9769SAlex Deucher /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
830250d9769SAlex Deucher  *
831250d9769SAlex Deucher  * @adev: amdgpu_device pointer
832250d9769SAlex Deucher  * @bool enable true: enable gfx off feature, false: disable gfx off feature
833250d9769SAlex Deucher  *
834250d9769SAlex Deucher  * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
835250d9769SAlex Deucher  * 2. other client can send request to disable gfx off feature, the request should be honored.
836250d9769SAlex Deucher  * 3. other client can cancel their request of disable gfx off feature
837250d9769SAlex Deucher  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
838250d9769SAlex Deucher  *
839250d9769SAlex Deucher  * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
840250d9769SAlex Deucher  */
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)841250d9769SAlex Deucher void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
842250d9769SAlex Deucher {
843250d9769SAlex Deucher 	/* If going to s2idle, no need to wait */
844250d9769SAlex Deucher 	bool no_delay = adev->in_s0ix ? true : false;
845250d9769SAlex Deucher 
846250d9769SAlex Deucher 	amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
847250d9769SAlex Deucher }
848250d9769SAlex Deucher 
849250d9769SAlex Deucher /* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
850250d9769SAlex Deucher  *
851250d9769SAlex Deucher  * @adev: amdgpu_device pointer
852250d9769SAlex Deucher  * @bool enable true: enable gfx off feature, false: disable gfx off feature
853250d9769SAlex Deucher  *
854250d9769SAlex Deucher  * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
855250d9769SAlex Deucher  * 2. other client can send request to disable gfx off feature, the request should be honored.
856250d9769SAlex Deucher  * 3. other client can cancel their request of disable gfx off feature
857250d9769SAlex Deucher  * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
858250d9769SAlex Deucher  *
859250d9769SAlex Deucher  * gfx off allow will be issued immediately.
860250d9769SAlex Deucher  */
amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device * adev,bool enable)861250d9769SAlex Deucher void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
862250d9769SAlex Deucher {
863250d9769SAlex Deucher 	amdgpu_gfx_do_off_ctrl(adev, enable, true);
864250d9769SAlex Deucher }
865250d9769SAlex Deucher 
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)8660ad7347aSAndré Almeida int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
8670ad7347aSAndré Almeida {
8680ad7347aSAndré Almeida 	int r = 0;
8690ad7347aSAndré Almeida 
8700ad7347aSAndré Almeida 	mutex_lock(&adev->gfx.gfx_off_mutex);
8710ad7347aSAndré Almeida 
8720ad7347aSAndré Almeida 	r = amdgpu_dpm_set_residency_gfxoff(adev, value);
8730ad7347aSAndré Almeida 
8740ad7347aSAndré Almeida 	mutex_unlock(&adev->gfx.gfx_off_mutex);
8750ad7347aSAndré Almeida 
8760ad7347aSAndré Almeida 	return r;
8770ad7347aSAndré Almeida }
8780ad7347aSAndré Almeida 
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)8790ad7347aSAndré Almeida int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
8800ad7347aSAndré Almeida {
8810ad7347aSAndré Almeida 	int r = 0;
8820ad7347aSAndré Almeida 
8830ad7347aSAndré Almeida 	mutex_lock(&adev->gfx.gfx_off_mutex);
8840ad7347aSAndré Almeida 
8850ad7347aSAndré Almeida 	r = amdgpu_dpm_get_residency_gfxoff(adev, value);
8860ad7347aSAndré Almeida 
8870ad7347aSAndré Almeida 	mutex_unlock(&adev->gfx.gfx_off_mutex);
8880ad7347aSAndré Almeida 
8890ad7347aSAndré Almeida 	return r;
8900ad7347aSAndré Almeida }
8910ad7347aSAndré Almeida 
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)8920ad7347aSAndré Almeida int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
8930ad7347aSAndré Almeida {
8940ad7347aSAndré Almeida 	int r = 0;
8950ad7347aSAndré Almeida 
8960ad7347aSAndré Almeida 	mutex_lock(&adev->gfx.gfx_off_mutex);
8970ad7347aSAndré Almeida 
8980ad7347aSAndré Almeida 	r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
8990ad7347aSAndré Almeida 
9000ad7347aSAndré Almeida 	mutex_unlock(&adev->gfx.gfx_off_mutex);
9010ad7347aSAndré Almeida 
9020ad7347aSAndré Almeida 	return r;
9030ad7347aSAndré Almeida }
9040ad7347aSAndré Almeida 
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)905443c7f3cSJinzhou.Su int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
906443c7f3cSJinzhou.Su {
907443c7f3cSJinzhou.Su 
908443c7f3cSJinzhou.Su 	int r = 0;
909443c7f3cSJinzhou.Su 
910443c7f3cSJinzhou.Su 	mutex_lock(&adev->gfx.gfx_off_mutex);
911443c7f3cSJinzhou.Su 
912bc143d8bSEvan Quan 	r = amdgpu_dpm_get_status_gfxoff(adev, value);
913443c7f3cSJinzhou.Su 
914443c7f3cSJinzhou.Su 	mutex_unlock(&adev->gfx.gfx_off_mutex);
915443c7f3cSJinzhou.Su 
916443c7f3cSJinzhou.Su 	return r;
917443c7f3cSJinzhou.Su }
918443c7f3cSJinzhou.Su 
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)9194e9b1fa5Syipechai int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
9206caeee7aSHawking Zhang {
9216caeee7aSHawking Zhang 	int r;
9226caeee7aSHawking Zhang 
923caae42f0Syipechai 	if (amdgpu_ras_is_supported(adev, ras_block->block)) {
924c0277b9dSTim Huang 		if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
925c0277b9dSTim Huang 			r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
926c0277b9dSTim Huang 			if (r)
927c0277b9dSTim Huang 				return r;
928c0277b9dSTim Huang 		}
929761d86d3SDennis Li 
9302a460963SCandice Li 		r = amdgpu_ras_block_late_init(adev, ras_block);
9312a460963SCandice Li 		if (r)
9322a460963SCandice Li 			return r;
9332a460963SCandice Li 
93484a2947eSVictor Skvortsov 		if (amdgpu_sriov_vf(adev))
93584a2947eSVictor Skvortsov 			return r;
93684a2947eSVictor Skvortsov 
9372f48965bSHoratio Zhang 		if (adev->gfx.cp_ecc_error_irq.funcs) {
9386caeee7aSHawking Zhang 			r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
9396caeee7aSHawking Zhang 			if (r)
9406caeee7aSHawking Zhang 				goto late_fini;
9412f48965bSHoratio Zhang 		}
9422a460963SCandice Li 	} else {
9432a460963SCandice Li 		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
9446caeee7aSHawking Zhang 	}
9456caeee7aSHawking Zhang 
9466caeee7aSHawking Zhang 	return 0;
9476caeee7aSHawking Zhang late_fini:
948caae42f0Syipechai 	amdgpu_ras_block_late_fini(adev, ras_block);
9496caeee7aSHawking Zhang 	return r;
9506caeee7aSHawking Zhang }
951725253abSTao Zhou 
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)95289e4c448SYiPeng Chai int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
95389e4c448SYiPeng Chai {
95489e4c448SYiPeng Chai 	int err = 0;
95589e4c448SYiPeng Chai 	struct amdgpu_gfx_ras *ras = NULL;
95689e4c448SYiPeng Chai 
95789e4c448SYiPeng Chai 	/* adev->gfx.ras is NULL, which means gfx does not
95889e4c448SYiPeng Chai 	 * support ras function, then do nothing here.
95989e4c448SYiPeng Chai 	 */
96089e4c448SYiPeng Chai 	if (!adev->gfx.ras)
96189e4c448SYiPeng Chai 		return 0;
96289e4c448SYiPeng Chai 
96389e4c448SYiPeng Chai 	ras = adev->gfx.ras;
96489e4c448SYiPeng Chai 
96589e4c448SYiPeng Chai 	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
96689e4c448SYiPeng Chai 	if (err) {
96789e4c448SYiPeng Chai 		dev_err(adev->dev, "Failed to register gfx ras block!\n");
96889e4c448SYiPeng Chai 		return err;
96989e4c448SYiPeng Chai 	}
97089e4c448SYiPeng Chai 
97189e4c448SYiPeng Chai 	strcpy(ras->ras_block.ras_comm.name, "gfx");
97289e4c448SYiPeng Chai 	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
97389e4c448SYiPeng Chai 	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
97489e4c448SYiPeng Chai 	adev->gfx.ras_if = &ras->ras_block.ras_comm;
97589e4c448SYiPeng Chai 
97689e4c448SYiPeng Chai 	/* If not define special ras_late_init function, use gfx default ras_late_init */
97789e4c448SYiPeng Chai 	if (!ras->ras_block.ras_late_init)
978af8312a3SHawking Zhang 		ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
97989e4c448SYiPeng Chai 
98089e4c448SYiPeng Chai 	/* If not defined special ras_cb function, use default ras_cb */
98189e4c448SYiPeng Chai 	if (!ras->ras_block.ras_cb)
98289e4c448SYiPeng Chai 		ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
98389e4c448SYiPeng Chai 
98489e4c448SYiPeng Chai 	return 0;
98589e4c448SYiPeng Chai }
98689e4c448SYiPeng Chai 
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)987ac7b25d9SYiPeng Chai int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
988ac7b25d9SYiPeng Chai 						struct amdgpu_iv_entry *entry)
989ac7b25d9SYiPeng Chai {
990ac7b25d9SYiPeng Chai 	if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
991ac7b25d9SYiPeng Chai 		return adev->gfx.ras->poison_consumption_handler(adev, entry);
992ac7b25d9SYiPeng Chai 
993ac7b25d9SYiPeng Chai 	return 0;
994ac7b25d9SYiPeng Chai }
995ac7b25d9SYiPeng Chai 
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)996725253abSTao Zhou int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
997725253abSTao Zhou 		void *err_data,
998725253abSTao Zhou 		struct amdgpu_iv_entry *entry)
999725253abSTao Zhou {
10003d8361b1STao Zhou 	/* TODO ue will trigger an interrupt.
10013d8361b1STao Zhou 	 *
10023d8361b1STao Zhou 	 * When “Full RAS” is enabled, the per-IP interrupt sources should
10033d8361b1STao Zhou 	 * be disabled and the driver should only look for the aggregated
10043d8361b1STao Zhou 	 * interrupt via sync flood
10053d8361b1STao Zhou 	 */
1006725253abSTao Zhou 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
1007725253abSTao Zhou 		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
10088b0fb0e9Syipechai 		if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
10098b0fb0e9Syipechai 		    adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
10108b0fb0e9Syipechai 			adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
101161934624SGuchun Chen 		amdgpu_ras_reset_gpu(adev);
1012725253abSTao Zhou 	}
1013725253abSTao Zhou 	return AMDGPU_RAS_SUCCESS;
1014725253abSTao Zhou }
1015725253abSTao Zhou 
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1016725253abSTao Zhou int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1017725253abSTao Zhou 				  struct amdgpu_irq_src *source,
1018725253abSTao Zhou 				  struct amdgpu_iv_entry *entry)
1019725253abSTao Zhou {
1020725253abSTao Zhou 	struct ras_common_if *ras_if = adev->gfx.ras_if;
1021725253abSTao Zhou 	struct ras_dispatch_if ih_data = {
1022725253abSTao Zhou 		.entry = entry,
1023725253abSTao Zhou 	};
1024725253abSTao Zhou 
1025725253abSTao Zhou 	if (!ras_if)
1026725253abSTao Zhou 		return 0;
1027725253abSTao Zhou 
1028725253abSTao Zhou 	ih_data.head = *ras_if;
1029725253abSTao Zhou 
1030725253abSTao Zhou 	DRM_ERROR("CP ECC ERROR IRQ\n");
1031725253abSTao Zhou 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1032725253abSTao Zhou 	return 0;
1033725253abSTao Zhou }
1034d33a99c4Schen gong 
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1035d78c7132STao Zhou void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1036d78c7132STao Zhou 		void *ras_error_status,
1037d78c7132STao Zhou 		void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1038d78c7132STao Zhou 				int xcc_id))
1039d78c7132STao Zhou {
1040d78c7132STao Zhou 	int i;
1041d78c7132STao Zhou 	int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1042d78c7132STao Zhou 	uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1043d78c7132STao Zhou 	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1044d78c7132STao Zhou 
1045d78c7132STao Zhou 	if (err_data) {
1046d78c7132STao Zhou 		err_data->ue_count = 0;
1047d78c7132STao Zhou 		err_data->ce_count = 0;
1048d78c7132STao Zhou 	}
1049d78c7132STao Zhou 
1050d78c7132STao Zhou 	for_each_inst(i, xcc_mask)
1051d78c7132STao Zhou 		func(adev, ras_error_status, i);
1052d78c7132STao Zhou }
1053d78c7132STao Zhou 
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)105485150626SVictor Lu uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1055d33a99c4Schen gong {
1056d33a99c4Schen gong 	signed long r, cnt = 0;
1057d33a99c4Schen gong 	unsigned long flags;
105854208194SYintian Tao 	uint32_t seq, reg_val_offs = 0, value = 0;
105985150626SVictor Lu 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1060d33a99c4Schen gong 	struct amdgpu_ring *ring = &kiq->ring;
1061d33a99c4Schen gong 
106256b53c0bSDennis Li 	if (amdgpu_device_skip_hw_access(adev))
1063bf36b52eSAndrey Grodzovsky 		return 0;
1064bf36b52eSAndrey Grodzovsky 
1065c7d43556SJack Xiao 	if (adev->mes.ring[0].sched.ready)
1066cf606729SJack Xiao 		return amdgpu_mes_rreg(adev, reg);
1067cf606729SJack Xiao 
1068d33a99c4Schen gong 	BUG_ON(!ring->funcs->emit_rreg);
1069d33a99c4Schen gong 
1070d33a99c4Schen gong 	spin_lock_irqsave(&kiq->ring_lock, flags);
107154208194SYintian Tao 	if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
107254208194SYintian Tao 		pr_err("critical bug! too many kiq readers\n");
107304e4e2e9SYintian Tao 		goto failed_unlock;
107454208194SYintian Tao 	}
1075c0277b9dSTim Huang 	r = amdgpu_ring_alloc(ring, 32);
1076c0277b9dSTim Huang 	if (r)
1077c0277b9dSTim Huang 		goto failed_unlock;
1078c0277b9dSTim Huang 
107954208194SYintian Tao 	amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
108004e4e2e9SYintian Tao 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
108104e4e2e9SYintian Tao 	if (r)
108204e4e2e9SYintian Tao 		goto failed_undo;
108304e4e2e9SYintian Tao 
1084d33a99c4Schen gong 	amdgpu_ring_commit(ring);
1085d33a99c4Schen gong 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1086d33a99c4Schen gong 
1087d33a99c4Schen gong 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1088d33a99c4Schen gong 
1089d33a99c4Schen gong 	/* don't wait anymore for gpu reset case because this way may
1090d33a99c4Schen gong 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1091d33a99c4Schen gong 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1092d33a99c4Schen gong 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1093d33a99c4Schen gong 	 * gpu_recover() hang there.
1094d33a99c4Schen gong 	 *
1095d33a99c4Schen gong 	 * also don't wait anymore for IRQ context
1096d33a99c4Schen gong 	 * */
109753b3f8f4SDennis Li 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1098d33a99c4Schen gong 		goto failed_kiq_read;
1099d33a99c4Schen gong 
1100d33a99c4Schen gong 	might_sleep();
1101d33a99c4Schen gong 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1102d33a99c4Schen gong 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1103d33a99c4Schen gong 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1104d33a99c4Schen gong 	}
1105d33a99c4Schen gong 
1106d33a99c4Schen gong 	if (cnt > MAX_KIQ_REG_TRY)
1107d33a99c4Schen gong 		goto failed_kiq_read;
1108d33a99c4Schen gong 
110954208194SYintian Tao 	mb();
111054208194SYintian Tao 	value = adev->wb.wb[reg_val_offs];
111154208194SYintian Tao 	amdgpu_device_wb_free(adev, reg_val_offs);
111254208194SYintian Tao 	return value;
1113d33a99c4Schen gong 
111404e4e2e9SYintian Tao failed_undo:
111504e4e2e9SYintian Tao 	amdgpu_ring_undo(ring);
111604e4e2e9SYintian Tao failed_unlock:
111704e4e2e9SYintian Tao 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1118d33a99c4Schen gong failed_kiq_read:
111904e4e2e9SYintian Tao 	if (reg_val_offs)
112004e4e2e9SYintian Tao 		amdgpu_device_wb_free(adev, reg_val_offs);
1121aac89168SDennis Li 	dev_err(adev->dev, "failed to read reg:%x\n", reg);
1122d33a99c4Schen gong 	return ~0;
1123d33a99c4Schen gong }
1124d33a99c4Schen gong 
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)112585150626SVictor Lu void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1126d33a99c4Schen gong {
1127d33a99c4Schen gong 	signed long r, cnt = 0;
1128d33a99c4Schen gong 	unsigned long flags;
1129d33a99c4Schen gong 	uint32_t seq;
113085150626SVictor Lu 	struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1131d33a99c4Schen gong 	struct amdgpu_ring *ring = &kiq->ring;
1132d33a99c4Schen gong 
1133d33a99c4Schen gong 	BUG_ON(!ring->funcs->emit_wreg);
1134d33a99c4Schen gong 
113556b53c0bSDennis Li 	if (amdgpu_device_skip_hw_access(adev))
1136bf36b52eSAndrey Grodzovsky 		return;
1137bf36b52eSAndrey Grodzovsky 
1138c7d43556SJack Xiao 	if (adev->mes.ring[0].sched.ready) {
1139cf606729SJack Xiao 		amdgpu_mes_wreg(adev, reg, v);
1140cf606729SJack Xiao 		return;
1141cf606729SJack Xiao 	}
1142cf606729SJack Xiao 
1143d33a99c4Schen gong 	spin_lock_irqsave(&kiq->ring_lock, flags);
1144c0277b9dSTim Huang 	r = amdgpu_ring_alloc(ring, 32);
1145c0277b9dSTim Huang 	if (r)
1146c0277b9dSTim Huang 		goto failed_unlock;
1147c0277b9dSTim Huang 
1148d33a99c4Schen gong 	amdgpu_ring_emit_wreg(ring, reg, v);
114904e4e2e9SYintian Tao 	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
115004e4e2e9SYintian Tao 	if (r)
115104e4e2e9SYintian Tao 		goto failed_undo;
115204e4e2e9SYintian Tao 
1153d33a99c4Schen gong 	amdgpu_ring_commit(ring);
1154d33a99c4Schen gong 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1155d33a99c4Schen gong 
1156d33a99c4Schen gong 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1157d33a99c4Schen gong 
1158d33a99c4Schen gong 	/* don't wait anymore for gpu reset case because this way may
1159d33a99c4Schen gong 	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1160d33a99c4Schen gong 	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1161d33a99c4Schen gong 	 * never return if we keep waiting in virt_kiq_rreg, which cause
1162d33a99c4Schen gong 	 * gpu_recover() hang there.
1163d33a99c4Schen gong 	 *
1164d33a99c4Schen gong 	 * also don't wait anymore for IRQ context
1165d33a99c4Schen gong 	 * */
116653b3f8f4SDennis Li 	if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1167d33a99c4Schen gong 		goto failed_kiq_write;
1168d33a99c4Schen gong 
1169d33a99c4Schen gong 	might_sleep();
1170d33a99c4Schen gong 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1171d33a99c4Schen gong 
1172d33a99c4Schen gong 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1173d33a99c4Schen gong 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1174d33a99c4Schen gong 	}
1175d33a99c4Schen gong 
1176d33a99c4Schen gong 	if (cnt > MAX_KIQ_REG_TRY)
1177d33a99c4Schen gong 		goto failed_kiq_write;
1178d33a99c4Schen gong 
1179d33a99c4Schen gong 	return;
1180d33a99c4Schen gong 
118104e4e2e9SYintian Tao failed_undo:
118204e4e2e9SYintian Tao 	amdgpu_ring_undo(ring);
1183c0277b9dSTim Huang failed_unlock:
118404e4e2e9SYintian Tao 	spin_unlock_irqrestore(&kiq->ring_lock, flags);
1185d33a99c4Schen gong failed_kiq_write:
1186aac89168SDennis Li 	dev_err(adev->dev, "failed to write reg:%x\n", reg);
1187d33a99c4Schen gong }
1188a3bab325SAlex Deucher 
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1189a3bab325SAlex Deucher int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1190a3bab325SAlex Deucher {
1191a3bab325SAlex Deucher 	if (amdgpu_num_kcq == -1) {
1192a3bab325SAlex Deucher 		return 8;
1193a3bab325SAlex Deucher 	} else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1194a3bab325SAlex Deucher 		dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1195a3bab325SAlex Deucher 		return 8;
1196a3bab325SAlex Deucher 	}
1197a3bab325SAlex Deucher 	return amdgpu_num_kcq;
1198a3bab325SAlex Deucher }
1199ec71b250SLikun Gao 
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1200ec71b250SLikun Gao void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
12012d89e2ddSLikun Gao 				  uint32_t ucode_id)
1202ec71b250SLikun Gao {
1203ec71b250SLikun Gao 	const struct gfx_firmware_header_v1_0 *cp_hdr;
1204ec71b250SLikun Gao 	const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1205ec71b250SLikun Gao 	struct amdgpu_firmware_info *info = NULL;
1206ec71b250SLikun Gao 	const struct firmware *ucode_fw;
1207ec71b250SLikun Gao 	unsigned int fw_size;
1208ec71b250SLikun Gao 
1209ec71b250SLikun Gao 	switch (ucode_id) {
1210ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_PFP:
1211ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1212ec71b250SLikun Gao 			adev->gfx.pfp_fw->data;
1213ec71b250SLikun Gao 		adev->gfx.pfp_fw_version =
1214ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->header.ucode_version);
1215ec71b250SLikun Gao 		adev->gfx.pfp_feature_version =
1216ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->ucode_feature_version);
1217ec71b250SLikun Gao 		ucode_fw = adev->gfx.pfp_fw;
1218ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1219ec71b250SLikun Gao 		break;
1220ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_PFP:
1221ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1222ec71b250SLikun Gao 			adev->gfx.pfp_fw->data;
1223ec71b250SLikun Gao 		adev->gfx.pfp_fw_version =
1224ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1225ec71b250SLikun Gao 		adev->gfx.pfp_feature_version =
1226ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1227ec71b250SLikun Gao 		ucode_fw = adev->gfx.pfp_fw;
1228ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1229ec71b250SLikun Gao 		break;
1230ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1231ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1232ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1233ec71b250SLikun Gao 			adev->gfx.pfp_fw->data;
1234ec71b250SLikun Gao 		ucode_fw = adev->gfx.pfp_fw;
1235ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1236ec71b250SLikun Gao 		break;
1237ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_ME:
1238ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1239ec71b250SLikun Gao 			adev->gfx.me_fw->data;
1240ec71b250SLikun Gao 		adev->gfx.me_fw_version =
1241ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->header.ucode_version);
1242ec71b250SLikun Gao 		adev->gfx.me_feature_version =
1243ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->ucode_feature_version);
1244ec71b250SLikun Gao 		ucode_fw = adev->gfx.me_fw;
1245ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1246ec71b250SLikun Gao 		break;
1247ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_ME:
1248ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1249ec71b250SLikun Gao 			adev->gfx.me_fw->data;
1250ec71b250SLikun Gao 		adev->gfx.me_fw_version =
1251ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1252ec71b250SLikun Gao 		adev->gfx.me_feature_version =
1253ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1254ec71b250SLikun Gao 		ucode_fw = adev->gfx.me_fw;
1255ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1256ec71b250SLikun Gao 		break;
1257ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1258ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1259ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1260ec71b250SLikun Gao 			adev->gfx.me_fw->data;
1261ec71b250SLikun Gao 		ucode_fw = adev->gfx.me_fw;
1262ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1263ec71b250SLikun Gao 		break;
1264ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_CE:
1265ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1266ec71b250SLikun Gao 			adev->gfx.ce_fw->data;
1267ec71b250SLikun Gao 		adev->gfx.ce_fw_version =
1268ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->header.ucode_version);
1269ec71b250SLikun Gao 		adev->gfx.ce_feature_version =
1270ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->ucode_feature_version);
1271ec71b250SLikun Gao 		ucode_fw = adev->gfx.ce_fw;
1272ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1273ec71b250SLikun Gao 		break;
1274ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_MEC1:
1275ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1276ec71b250SLikun Gao 			adev->gfx.mec_fw->data;
1277ec71b250SLikun Gao 		adev->gfx.mec_fw_version =
1278ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->header.ucode_version);
1279ec71b250SLikun Gao 		adev->gfx.mec_feature_version =
1280ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->ucode_feature_version);
1281ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec_fw;
1282ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1283ec71b250SLikun Gao 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1284ec71b250SLikun Gao 		break;
1285ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_MEC1_JT:
1286ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1287ec71b250SLikun Gao 			adev->gfx.mec_fw->data;
1288ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec_fw;
1289ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1290ec71b250SLikun Gao 		break;
1291ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_MEC2:
1292ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1293ec71b250SLikun Gao 			adev->gfx.mec2_fw->data;
1294ec71b250SLikun Gao 		adev->gfx.mec2_fw_version =
1295ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->header.ucode_version);
1296ec71b250SLikun Gao 		adev->gfx.mec2_feature_version =
1297ec71b250SLikun Gao 			le32_to_cpu(cp_hdr->ucode_feature_version);
1298ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec2_fw;
1299ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1300ec71b250SLikun Gao 			  le32_to_cpu(cp_hdr->jt_size) * 4;
1301ec71b250SLikun Gao 		break;
1302ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_MEC2_JT:
1303ec71b250SLikun Gao 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1304ec71b250SLikun Gao 			adev->gfx.mec2_fw->data;
1305ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec2_fw;
1306ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1307ec71b250SLikun Gao 		break;
1308ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_MEC:
1309ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1310ec71b250SLikun Gao 			adev->gfx.mec_fw->data;
1311ec71b250SLikun Gao 		adev->gfx.mec_fw_version =
1312ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1313ec71b250SLikun Gao 		adev->gfx.mec_feature_version =
1314ec71b250SLikun Gao 			le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1315ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec_fw;
1316ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1317ec71b250SLikun Gao 		break;
1318ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1319ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1320ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1321ec71b250SLikun Gao 	case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1322ec71b250SLikun Gao 		cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1323ec71b250SLikun Gao 			adev->gfx.mec_fw->data;
1324ec71b250SLikun Gao 		ucode_fw = adev->gfx.mec_fw;
1325ec71b250SLikun Gao 		fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1326ec71b250SLikun Gao 		break;
1327ec71b250SLikun Gao 	default:
13289a5f15d2STim Huang 		dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
13299a5f15d2STim Huang 		return;
1330ec71b250SLikun Gao 	}
1331ec71b250SLikun Gao 
1332ec71b250SLikun Gao 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1333ec71b250SLikun Gao 		info = &adev->firmware.ucode[ucode_id];
1334ec71b250SLikun Gao 		info->ucode_id = ucode_id;
1335ec71b250SLikun Gao 		info->fw = ucode_fw;
1336ec71b250SLikun Gao 		adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1337ec71b250SLikun Gao 	}
1338ec71b250SLikun Gao }
133966daccdeSLe Ma 
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)134066daccdeSLe Ma bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
134166daccdeSLe Ma {
134266daccdeSLe Ma 	return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
134366daccdeSLe Ma 			adev->gfx.num_xcc_per_xcp : 1));
134466daccdeSLe Ma }
134598a54e88SLe Ma 
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)134698a54e88SLe Ma static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
134798a54e88SLe Ma 						struct device_attribute *addr,
134898a54e88SLe Ma 						char *buf)
134998a54e88SLe Ma {
135098a54e88SLe Ma 	struct drm_device *ddev = dev_get_drvdata(dev);
135198a54e88SLe Ma 	struct amdgpu_device *adev = drm_to_adev(ddev);
13528e7fd193SLijo Lazar 	int mode;
135398a54e88SLe Ma 
1354ded7d99eSLijo Lazar 	mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1355ded7d99eSLijo Lazar 					       AMDGPU_XCP_FL_NONE);
135698a54e88SLe Ma 
1357f9632096SLijo Lazar 	return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
135898a54e88SLe Ma }
135998a54e88SLe Ma 
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)136098a54e88SLe Ma static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
136198a54e88SLe Ma 						struct device_attribute *addr,
136298a54e88SLe Ma 						const char *buf, size_t count)
136398a54e88SLe Ma {
136498a54e88SLe Ma 	struct drm_device *ddev = dev_get_drvdata(dev);
136598a54e88SLe Ma 	struct amdgpu_device *adev = drm_to_adev(ddev);
136698a54e88SLe Ma 	enum amdgpu_gfx_partition mode;
13678078f1c6SLijo Lazar 	int ret = 0, num_xcc;
136898a54e88SLe Ma 
13698078f1c6SLijo Lazar 	num_xcc = NUM_XCC(adev->gfx.xcc_mask);
13708078f1c6SLijo Lazar 	if (num_xcc % 2 != 0)
137198a54e88SLe Ma 		return -EINVAL;
137298a54e88SLe Ma 
137398a54e88SLe Ma 	if (!strncasecmp("SPX", buf, strlen("SPX"))) {
137498a54e88SLe Ma 		mode = AMDGPU_SPX_PARTITION_MODE;
137598a54e88SLe Ma 	} else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1376cb30544eSMukul Joshi 		/*
1377cb30544eSMukul Joshi 		 * DPX mode needs AIDs to be in multiple of 2.
1378cb30544eSMukul Joshi 		 * Each AID connects 2 XCCs.
1379cb30544eSMukul Joshi 		 */
1380cb30544eSMukul Joshi 		if (num_xcc%4)
138198a54e88SLe Ma 			return -EINVAL;
138298a54e88SLe Ma 		mode = AMDGPU_DPX_PARTITION_MODE;
138398a54e88SLe Ma 	} else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
13848078f1c6SLijo Lazar 		if (num_xcc != 6)
138598a54e88SLe Ma 			return -EINVAL;
138698a54e88SLe Ma 		mode = AMDGPU_TPX_PARTITION_MODE;
138798a54e88SLe Ma 	} else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
13888078f1c6SLijo Lazar 		if (num_xcc != 8)
138998a54e88SLe Ma 			return -EINVAL;
139098a54e88SLe Ma 		mode = AMDGPU_QPX_PARTITION_MODE;
139198a54e88SLe Ma 	} else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
139298a54e88SLe Ma 		mode = AMDGPU_CPX_PARTITION_MODE;
139398a54e88SLe Ma 	} else {
139498a54e88SLe Ma 		return -EINVAL;
139598a54e88SLe Ma 	}
139698a54e88SLe Ma 
13978e7fd193SLijo Lazar 	ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
139898a54e88SLe Ma 
139998a54e88SLe Ma 	if (ret)
140098a54e88SLe Ma 		return ret;
140198a54e88SLe Ma 
140298a54e88SLe Ma 	return count;
140398a54e88SLe Ma }
140498a54e88SLe Ma 
14051bc0b339SLijo Lazar static const char *xcp_desc[] = {
14061bc0b339SLijo Lazar 	[AMDGPU_SPX_PARTITION_MODE] = "SPX",
14071bc0b339SLijo Lazar 	[AMDGPU_DPX_PARTITION_MODE] = "DPX",
14081bc0b339SLijo Lazar 	[AMDGPU_TPX_PARTITION_MODE] = "TPX",
14091bc0b339SLijo Lazar 	[AMDGPU_QPX_PARTITION_MODE] = "QPX",
14101bc0b339SLijo Lazar 	[AMDGPU_CPX_PARTITION_MODE] = "CPX",
14111bc0b339SLijo Lazar };
14121bc0b339SLijo Lazar 
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)141398a54e88SLe Ma static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
141498a54e88SLe Ma 						struct device_attribute *addr,
141598a54e88SLe Ma 						char *buf)
141698a54e88SLe Ma {
141798a54e88SLe Ma 	struct drm_device *ddev = dev_get_drvdata(dev);
141898a54e88SLe Ma 	struct amdgpu_device *adev = drm_to_adev(ddev);
14191bc0b339SLijo Lazar 	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
14201bc0b339SLijo Lazar 	int size = 0, mode;
14211bc0b339SLijo Lazar 	char *sep = "";
142298a54e88SLe Ma 
14231bc0b339SLijo Lazar 	if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
14241bc0b339SLijo Lazar 		return sysfs_emit(buf, "Not supported\n");
14251bc0b339SLijo Lazar 
14261bc0b339SLijo Lazar 	for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
14271bc0b339SLijo Lazar 		size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
14281bc0b339SLijo Lazar 		sep = ", ";
142998a54e88SLe Ma 	}
143098a54e88SLe Ma 
14311bc0b339SLijo Lazar 	size += sysfs_emit_at(buf, size, "\n");
14321bc0b339SLijo Lazar 
14331bc0b339SLijo Lazar 	return size;
143498a54e88SLe Ma }
143598a54e88SLe Ma 
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1436d361ad5dSSrinivasan Shanmugam static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1437d361ad5dSSrinivasan Shanmugam {
1438d361ad5dSSrinivasan Shanmugam 	struct amdgpu_device *adev = ring->adev;
1439559a2858SSrinivasan Shanmugam 	struct drm_gpu_scheduler *sched = &ring->sched;
1440559a2858SSrinivasan Shanmugam 	struct drm_sched_entity entity;
1441*447fab30SChristian König 	static atomic_t counter;
1442559a2858SSrinivasan Shanmugam 	struct dma_fence *f;
1443d361ad5dSSrinivasan Shanmugam 	struct amdgpu_job *job;
1444d361ad5dSSrinivasan Shanmugam 	struct amdgpu_ib *ib;
1445*447fab30SChristian König 	void *owner;
1446d361ad5dSSrinivasan Shanmugam 	int i, r;
1447d361ad5dSSrinivasan Shanmugam 
1448559a2858SSrinivasan Shanmugam 	/* Initialize the scheduler entity */
1449559a2858SSrinivasan Shanmugam 	r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1450559a2858SSrinivasan Shanmugam 				  &sched, 1, NULL);
1451559a2858SSrinivasan Shanmugam 	if (r) {
1452559a2858SSrinivasan Shanmugam 		dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1453559a2858SSrinivasan Shanmugam 		goto err;
1454559a2858SSrinivasan Shanmugam 	}
1455559a2858SSrinivasan Shanmugam 
1456*447fab30SChristian König 	/*
1457*447fab30SChristian König 	 * Use some unique dummy value as the owner to make sure we execute
1458*447fab30SChristian König 	 * the cleaner shader on each submission. The value just need to change
1459*447fab30SChristian König 	 * for each submission and is otherwise meaningless.
1460*447fab30SChristian König 	 */
1461*447fab30SChristian König 	owner = (void *)(unsigned long)atomic_inc_return(&counter);
1462*447fab30SChristian König 
1463*447fab30SChristian König 	r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
1464*447fab30SChristian König 				     64, 0, &job);
1465d361ad5dSSrinivasan Shanmugam 	if (r)
1466d361ad5dSSrinivasan Shanmugam 		goto err;
1467d361ad5dSSrinivasan Shanmugam 
1468d361ad5dSSrinivasan Shanmugam 	job->enforce_isolation = true;
1469d361ad5dSSrinivasan Shanmugam 
1470d361ad5dSSrinivasan Shanmugam 	ib = &job->ibs[0];
1471d361ad5dSSrinivasan Shanmugam 	for (i = 0; i <= ring->funcs->align_mask; ++i)
1472d361ad5dSSrinivasan Shanmugam 		ib->ptr[i] = ring->funcs->nop;
1473d361ad5dSSrinivasan Shanmugam 	ib->length_dw = ring->funcs->align_mask + 1;
1474d361ad5dSSrinivasan Shanmugam 
1475559a2858SSrinivasan Shanmugam 	f = amdgpu_job_submit(job);
1476559a2858SSrinivasan Shanmugam 
1477559a2858SSrinivasan Shanmugam 	r = dma_fence_wait(f, false);
1478d361ad5dSSrinivasan Shanmugam 	if (r)
1479559a2858SSrinivasan Shanmugam 		goto err;
1480d361ad5dSSrinivasan Shanmugam 
1481d361ad5dSSrinivasan Shanmugam 	dma_fence_put(f);
1482d361ad5dSSrinivasan Shanmugam 
1483559a2858SSrinivasan Shanmugam 	/* Clean up the scheduler entity */
1484559a2858SSrinivasan Shanmugam 	drm_sched_entity_destroy(&entity);
1485d361ad5dSSrinivasan Shanmugam 	return 0;
1486d361ad5dSSrinivasan Shanmugam 
1487d361ad5dSSrinivasan Shanmugam err:
1488d361ad5dSSrinivasan Shanmugam 	return r;
1489d361ad5dSSrinivasan Shanmugam }
1490d361ad5dSSrinivasan Shanmugam 
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1491d361ad5dSSrinivasan Shanmugam static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1492d361ad5dSSrinivasan Shanmugam {
1493d361ad5dSSrinivasan Shanmugam 	int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1494d361ad5dSSrinivasan Shanmugam 	struct amdgpu_ring *ring;
1495d361ad5dSSrinivasan Shanmugam 	int num_xcc_to_clear;
1496d361ad5dSSrinivasan Shanmugam 	int i, r, xcc_id;
1497d361ad5dSSrinivasan Shanmugam 
1498d361ad5dSSrinivasan Shanmugam 	if (adev->gfx.num_xcc_per_xcp)
1499d361ad5dSSrinivasan Shanmugam 		num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1500d361ad5dSSrinivasan Shanmugam 	else
1501d361ad5dSSrinivasan Shanmugam 		num_xcc_to_clear = 1;
1502d361ad5dSSrinivasan Shanmugam 
1503d361ad5dSSrinivasan Shanmugam 	for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1504d361ad5dSSrinivasan Shanmugam 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1505d361ad5dSSrinivasan Shanmugam 			ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1506d361ad5dSSrinivasan Shanmugam 			if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1507d361ad5dSSrinivasan Shanmugam 				r = amdgpu_gfx_run_cleaner_shader_job(ring);
1508d361ad5dSSrinivasan Shanmugam 				if (r)
1509d361ad5dSSrinivasan Shanmugam 					return r;
1510d361ad5dSSrinivasan Shanmugam 				num_xcc_to_clear--;
1511d361ad5dSSrinivasan Shanmugam 				break;
1512d361ad5dSSrinivasan Shanmugam 			}
1513d361ad5dSSrinivasan Shanmugam 		}
1514d361ad5dSSrinivasan Shanmugam 	}
1515d361ad5dSSrinivasan Shanmugam 
1516d361ad5dSSrinivasan Shanmugam 	if (num_xcc_to_clear)
1517d361ad5dSSrinivasan Shanmugam 		return -ENOENT;
1518d361ad5dSSrinivasan Shanmugam 
1519d361ad5dSSrinivasan Shanmugam 	return 0;
1520d361ad5dSSrinivasan Shanmugam }
1521d361ad5dSSrinivasan Shanmugam 
1522a69f4cc2SSrinivasan Shanmugam /**
1523a69f4cc2SSrinivasan Shanmugam  * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
1524a69f4cc2SSrinivasan Shanmugam  * @dev: The device structure
1525a69f4cc2SSrinivasan Shanmugam  * @attr: The device attribute structure
1526a69f4cc2SSrinivasan Shanmugam  * @buf: The buffer containing the input data
1527a69f4cc2SSrinivasan Shanmugam  * @count: The size of the input data
1528a69f4cc2SSrinivasan Shanmugam  *
1529a69f4cc2SSrinivasan Shanmugam  * Provides the sysfs interface to manually run a cleaner shader, which is
1530a69f4cc2SSrinivasan Shanmugam  * used to clear the GPU state between different tasks. Writing a value to the
1531a69f4cc2SSrinivasan Shanmugam  * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
1532a69f4cc2SSrinivasan Shanmugam  * The value written corresponds to the partition index on multi-partition
1533a69f4cc2SSrinivasan Shanmugam  * devices. On single-partition devices, the value should be '0'.
1534a69f4cc2SSrinivasan Shanmugam  *
1535a69f4cc2SSrinivasan Shanmugam  * The cleaner shader clears the Local Data Store (LDS) and General Purpose
1536a69f4cc2SSrinivasan Shanmugam  * Registers (GPRs) to ensure data isolation between GPU workloads.
1537a69f4cc2SSrinivasan Shanmugam  *
1538a69f4cc2SSrinivasan Shanmugam  * Return: The number of bytes written to the sysfs file.
1539a69f4cc2SSrinivasan Shanmugam  */
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1540d361ad5dSSrinivasan Shanmugam static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1541d361ad5dSSrinivasan Shanmugam 						 struct device_attribute *attr,
1542d361ad5dSSrinivasan Shanmugam 						 const char *buf,
1543d361ad5dSSrinivasan Shanmugam 						 size_t count)
1544d361ad5dSSrinivasan Shanmugam {
1545d361ad5dSSrinivasan Shanmugam 	struct drm_device *ddev = dev_get_drvdata(dev);
1546d361ad5dSSrinivasan Shanmugam 	struct amdgpu_device *adev = drm_to_adev(ddev);
1547d361ad5dSSrinivasan Shanmugam 	int ret;
1548d361ad5dSSrinivasan Shanmugam 	long value;
1549d361ad5dSSrinivasan Shanmugam 
1550d361ad5dSSrinivasan Shanmugam 	if (amdgpu_in_reset(adev))
1551d361ad5dSSrinivasan Shanmugam 		return -EPERM;
1552d361ad5dSSrinivasan Shanmugam 	if (adev->in_suspend && !adev->in_runpm)
1553d361ad5dSSrinivasan Shanmugam 		return -EPERM;
1554d361ad5dSSrinivasan Shanmugam 
1555d361ad5dSSrinivasan Shanmugam 	ret = kstrtol(buf, 0, &value);
1556d361ad5dSSrinivasan Shanmugam 
1557d361ad5dSSrinivasan Shanmugam 	if (ret)
1558d361ad5dSSrinivasan Shanmugam 		return -EINVAL;
1559d361ad5dSSrinivasan Shanmugam 
1560d361ad5dSSrinivasan Shanmugam 	if (value < 0)
1561d361ad5dSSrinivasan Shanmugam 		return -EINVAL;
1562d361ad5dSSrinivasan Shanmugam 
1563d361ad5dSSrinivasan Shanmugam 	if (adev->xcp_mgr) {
1564d361ad5dSSrinivasan Shanmugam 		if (value >= adev->xcp_mgr->num_xcps)
1565d361ad5dSSrinivasan Shanmugam 			return -EINVAL;
1566d361ad5dSSrinivasan Shanmugam 	} else {
1567d361ad5dSSrinivasan Shanmugam 		if (value > 1)
1568d361ad5dSSrinivasan Shanmugam 			return -EINVAL;
1569d361ad5dSSrinivasan Shanmugam 	}
1570d361ad5dSSrinivasan Shanmugam 
1571d361ad5dSSrinivasan Shanmugam 	ret = pm_runtime_get_sync(ddev->dev);
1572d361ad5dSSrinivasan Shanmugam 	if (ret < 0) {
1573d361ad5dSSrinivasan Shanmugam 		pm_runtime_put_autosuspend(ddev->dev);
1574d361ad5dSSrinivasan Shanmugam 		return ret;
1575d361ad5dSSrinivasan Shanmugam 	}
1576d361ad5dSSrinivasan Shanmugam 
1577d361ad5dSSrinivasan Shanmugam 	ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1578d361ad5dSSrinivasan Shanmugam 
1579d361ad5dSSrinivasan Shanmugam 	pm_runtime_mark_last_busy(ddev->dev);
1580d361ad5dSSrinivasan Shanmugam 	pm_runtime_put_autosuspend(ddev->dev);
1581d361ad5dSSrinivasan Shanmugam 
1582d361ad5dSSrinivasan Shanmugam 	if (ret)
1583d361ad5dSSrinivasan Shanmugam 		return ret;
1584d361ad5dSSrinivasan Shanmugam 
1585d361ad5dSSrinivasan Shanmugam 	return count;
1586d361ad5dSSrinivasan Shanmugam }
1587d361ad5dSSrinivasan Shanmugam 
1588a69f4cc2SSrinivasan Shanmugam /**
1589a69f4cc2SSrinivasan Shanmugam  * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
1590a69f4cc2SSrinivasan Shanmugam  * @dev: The device structure
1591a69f4cc2SSrinivasan Shanmugam  * @attr: The device attribute structure
1592a69f4cc2SSrinivasan Shanmugam  * @buf: The buffer to store the output data
1593a69f4cc2SSrinivasan Shanmugam  *
1594a69f4cc2SSrinivasan Shanmugam  * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
1595a69f4cc2SSrinivasan Shanmugam  * feature for each GPU partition. Reading from the 'enforce_isolation'
1596a69f4cc2SSrinivasan Shanmugam  * sysfs file returns the isolation settings for all partitions, where '0'
1597a69f4cc2SSrinivasan Shanmugam  * indicates disabled and '1' indicates enabled.
1598a69f4cc2SSrinivasan Shanmugam  *
1599a69f4cc2SSrinivasan Shanmugam  * Return: The number of bytes read from the sysfs file.
1600a69f4cc2SSrinivasan Shanmugam  */
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1601e189be9bSSrinivasan Shanmugam static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1602e189be9bSSrinivasan Shanmugam 						struct device_attribute *attr,
1603e189be9bSSrinivasan Shanmugam 						char *buf)
1604e189be9bSSrinivasan Shanmugam {
1605e189be9bSSrinivasan Shanmugam 	struct drm_device *ddev = dev_get_drvdata(dev);
1606e189be9bSSrinivasan Shanmugam 	struct amdgpu_device *adev = drm_to_adev(ddev);
1607e189be9bSSrinivasan Shanmugam 	int i;
1608e189be9bSSrinivasan Shanmugam 	ssize_t size = 0;
1609e189be9bSSrinivasan Shanmugam 
1610e189be9bSSrinivasan Shanmugam 	if (adev->xcp_mgr) {
1611e189be9bSSrinivasan Shanmugam 		for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1612e189be9bSSrinivasan Shanmugam 			size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1613e189be9bSSrinivasan Shanmugam 			if (i < (adev->xcp_mgr->num_xcps - 1))
1614e189be9bSSrinivasan Shanmugam 				size += sysfs_emit_at(buf, size, " ");
1615e189be9bSSrinivasan Shanmugam 		}
1616e189be9bSSrinivasan Shanmugam 		buf[size++] = '\n';
1617e189be9bSSrinivasan Shanmugam 	} else {
1618e189be9bSSrinivasan Shanmugam 		size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1619e189be9bSSrinivasan Shanmugam 	}
1620e189be9bSSrinivasan Shanmugam 
1621e189be9bSSrinivasan Shanmugam 	return size;
1622e189be9bSSrinivasan Shanmugam }
1623e189be9bSSrinivasan Shanmugam 
1624a69f4cc2SSrinivasan Shanmugam /**
1625a69f4cc2SSrinivasan Shanmugam  * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
1626a69f4cc2SSrinivasan Shanmugam  * @dev: The device structure
1627a69f4cc2SSrinivasan Shanmugam  * @attr: The device attribute structure
1628a69f4cc2SSrinivasan Shanmugam  * @buf: The buffer containing the input data
1629a69f4cc2SSrinivasan Shanmugam  * @count: The size of the input data
1630a69f4cc2SSrinivasan Shanmugam  *
1631a69f4cc2SSrinivasan Shanmugam  * This function allows control over the 'enforce_isolation' feature, which
1632a69f4cc2SSrinivasan Shanmugam  * serializes access to the graphics engine. Writing '1' or '0' to the
1633a69f4cc2SSrinivasan Shanmugam  * 'enforce_isolation' sysfs file enables or disables process isolation for
1634a69f4cc2SSrinivasan Shanmugam  * each partition. The input should specify the setting for all partitions.
1635a69f4cc2SSrinivasan Shanmugam  *
1636a69f4cc2SSrinivasan Shanmugam  * Return: The number of bytes written to the sysfs file.
1637a69f4cc2SSrinivasan Shanmugam  */
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1638e189be9bSSrinivasan Shanmugam static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1639e189be9bSSrinivasan Shanmugam 						struct device_attribute *attr,
1640e189be9bSSrinivasan Shanmugam 						const char *buf, size_t count)
1641e189be9bSSrinivasan Shanmugam {
1642e189be9bSSrinivasan Shanmugam 	struct drm_device *ddev = dev_get_drvdata(dev);
1643e189be9bSSrinivasan Shanmugam 	struct amdgpu_device *adev = drm_to_adev(ddev);
1644e189be9bSSrinivasan Shanmugam 	long partition_values[MAX_XCP] = {0};
1645e189be9bSSrinivasan Shanmugam 	int ret, i, num_partitions;
1646e189be9bSSrinivasan Shanmugam 	const char *input_buf = buf;
1647e189be9bSSrinivasan Shanmugam 
1648e189be9bSSrinivasan Shanmugam 	for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1649e189be9bSSrinivasan Shanmugam 		ret = sscanf(input_buf, "%ld", &partition_values[i]);
1650e189be9bSSrinivasan Shanmugam 		if (ret <= 0)
1651e189be9bSSrinivasan Shanmugam 			break;
1652e189be9bSSrinivasan Shanmugam 
1653e189be9bSSrinivasan Shanmugam 		/* Move the pointer to the next value in the string */
1654e189be9bSSrinivasan Shanmugam 		input_buf = strchr(input_buf, ' ');
1655e189be9bSSrinivasan Shanmugam 		if (input_buf) {
1656e189be9bSSrinivasan Shanmugam 			input_buf++;
1657e189be9bSSrinivasan Shanmugam 		} else {
1658e189be9bSSrinivasan Shanmugam 			i++;
1659e189be9bSSrinivasan Shanmugam 			break;
1660e189be9bSSrinivasan Shanmugam 		}
1661e189be9bSSrinivasan Shanmugam 	}
1662e189be9bSSrinivasan Shanmugam 	num_partitions = i;
1663e189be9bSSrinivasan Shanmugam 
1664e189be9bSSrinivasan Shanmugam 	if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1665e189be9bSSrinivasan Shanmugam 		return -EINVAL;
1666e189be9bSSrinivasan Shanmugam 
1667e189be9bSSrinivasan Shanmugam 	if (!adev->xcp_mgr && num_partitions != 1)
1668e189be9bSSrinivasan Shanmugam 		return -EINVAL;
1669e189be9bSSrinivasan Shanmugam 
1670e189be9bSSrinivasan Shanmugam 	for (i = 0; i < num_partitions; i++) {
1671e189be9bSSrinivasan Shanmugam 		if (partition_values[i] != 0 && partition_values[i] != 1)
1672e189be9bSSrinivasan Shanmugam 			return -EINVAL;
1673e189be9bSSrinivasan Shanmugam 	}
1674e189be9bSSrinivasan Shanmugam 
1675e189be9bSSrinivasan Shanmugam 	mutex_lock(&adev->enforce_isolation_mutex);
1676db1e58ecSChristian König 	for (i = 0; i < num_partitions; i++)
1677e189be9bSSrinivasan Shanmugam 		adev->enforce_isolation[i] = partition_values[i];
1678e189be9bSSrinivasan Shanmugam 	mutex_unlock(&adev->enforce_isolation_mutex);
1679e189be9bSSrinivasan Shanmugam 
168027b79151SAlex Deucher 	amdgpu_mes_update_enforce_isolation(adev);
168127b79151SAlex Deucher 
1682e189be9bSSrinivasan Shanmugam 	return count;
1683e189be9bSSrinivasan Shanmugam }
1684e189be9bSSrinivasan Shanmugam 
amdgpu_gfx_get_gfx_reset_mask(struct device * dev,struct device_attribute * attr,char * buf)16856c8d1f4bS[email protected] static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
16866c8d1f4bS[email protected] 						struct device_attribute *attr,
16876c8d1f4bS[email protected] 						char *buf)
16886c8d1f4bS[email protected] {
16896c8d1f4bS[email protected] 	struct drm_device *ddev = dev_get_drvdata(dev);
16906c8d1f4bS[email protected] 	struct amdgpu_device *adev = drm_to_adev(ddev);
16916c8d1f4bS[email protected] 
16926c8d1f4bS[email protected] 	if (!adev)
16936c8d1f4bS[email protected] 		return -ENODEV;
16946c8d1f4bS[email protected] 
16956c8d1f4bS[email protected] 	return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
16966c8d1f4bS[email protected] }
16976c8d1f4bS[email protected] 
amdgpu_gfx_get_compute_reset_mask(struct device * dev,struct device_attribute * attr,char * buf)16986c8d1f4bS[email protected] static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
16996c8d1f4bS[email protected] 						struct device_attribute *attr,
17006c8d1f4bS[email protected] 						char *buf)
17016c8d1f4bS[email protected] {
17026c8d1f4bS[email protected] 	struct drm_device *ddev = dev_get_drvdata(dev);
17036c8d1f4bS[email protected] 	struct amdgpu_device *adev = drm_to_adev(ddev);
17046c8d1f4bS[email protected] 
17056c8d1f4bS[email protected] 	if (!adev)
17066c8d1f4bS[email protected] 		return -ENODEV;
17076c8d1f4bS[email protected] 
17086c8d1f4bS[email protected] 	return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
17096c8d1f4bS[email protected] }
17106c8d1f4bS[email protected] 
1711d361ad5dSSrinivasan Shanmugam static DEVICE_ATTR(run_cleaner_shader, 0200,
1712d361ad5dSSrinivasan Shanmugam 		   NULL, amdgpu_gfx_set_run_cleaner_shader);
1713d361ad5dSSrinivasan Shanmugam 
1714e189be9bSSrinivasan Shanmugam static DEVICE_ATTR(enforce_isolation, 0644,
1715e189be9bSSrinivasan Shanmugam 		   amdgpu_gfx_get_enforce_isolation,
1716e189be9bSSrinivasan Shanmugam 		   amdgpu_gfx_set_enforce_isolation);
1717e189be9bSSrinivasan Shanmugam 
171850fbe0ccSSrinivasan Shanmugam static DEVICE_ATTR(current_compute_partition, 0644,
171998a54e88SLe Ma 		   amdgpu_gfx_get_current_compute_partition,
172098a54e88SLe Ma 		   amdgpu_gfx_set_compute_partition);
172198a54e88SLe Ma 
172250fbe0ccSSrinivasan Shanmugam static DEVICE_ATTR(available_compute_partition, 0444,
172398a54e88SLe Ma 		   amdgpu_gfx_get_available_compute_partition, NULL);
17246c8d1f4bS[email protected] static DEVICE_ATTR(gfx_reset_mask, 0444,
17256c8d1f4bS[email protected] 		   amdgpu_gfx_get_gfx_reset_mask, NULL);
17266c8d1f4bS[email protected] 
17276c8d1f4bS[email protected] static DEVICE_ATTR(compute_reset_mask, 0444,
17286c8d1f4bS[email protected] 		   amdgpu_gfx_get_compute_reset_mask, NULL);
172998a54e88SLe Ma 
amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device * adev)1730047767ddSLijo Lazar static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
173198a54e88SLe Ma {
1732f8588f05SLijo Lazar 	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1733f8588f05SLijo Lazar 	bool xcp_switch_supported;
173498a54e88SLe Ma 	int r;
173598a54e88SLe Ma 
1736f8588f05SLijo Lazar 	if (!xcp_mgr)
1737f8588f05SLijo Lazar 		return 0;
1738f8588f05SLijo Lazar 
1739f8588f05SLijo Lazar 	xcp_switch_supported =
1740f8588f05SLijo Lazar 		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1741f8588f05SLijo Lazar 
1742f8588f05SLijo Lazar 	if (!xcp_switch_supported)
1743f8588f05SLijo Lazar 		dev_attr_current_compute_partition.attr.mode &=
1744f8588f05SLijo Lazar 			~(S_IWUSR | S_IWGRP | S_IWOTH);
1745f8588f05SLijo Lazar 
174698a54e88SLe Ma 	r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
174798a54e88SLe Ma 	if (r)
174898a54e88SLe Ma 		return r;
174998a54e88SLe Ma 
1750f8588f05SLijo Lazar 	if (xcp_switch_supported)
1751f8588f05SLijo Lazar 		r = device_create_file(adev->dev,
1752f8588f05SLijo Lazar 				       &dev_attr_available_compute_partition);
175398a54e88SLe Ma 
1754ea2d2f8eSRajneesh Bhardwaj 	return r;
175598a54e88SLe Ma }
1756993d218fSShiwu Zhang 
amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device * adev)1757047767ddSLijo Lazar static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
1758993d218fSShiwu Zhang {
1759f8588f05SLijo Lazar 	struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1760f8588f05SLijo Lazar 	bool xcp_switch_supported;
1761f8588f05SLijo Lazar 
1762f8588f05SLijo Lazar 	if (!xcp_mgr)
1763f8588f05SLijo Lazar 		return;
1764f8588f05SLijo Lazar 
1765f8588f05SLijo Lazar 	xcp_switch_supported =
1766f8588f05SLijo Lazar 		(xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1767993d218fSShiwu Zhang 	device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1768f8588f05SLijo Lazar 
1769f8588f05SLijo Lazar 	if (xcp_switch_supported)
1770f8588f05SLijo Lazar 		device_remove_file(adev->dev,
1771f8588f05SLijo Lazar 				   &dev_attr_available_compute_partition);
1772993d218fSShiwu Zhang }
1773aec773a1SSrinivasan Shanmugam 
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1774047767ddSLijo Lazar static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1775e189be9bSSrinivasan Shanmugam {
1776e189be9bSSrinivasan Shanmugam 	int r;
1777e189be9bSSrinivasan Shanmugam 
1778e189be9bSSrinivasan Shanmugam 	r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1779e189be9bSSrinivasan Shanmugam 	if (r)
1780e189be9bSSrinivasan Shanmugam 		return r;
1781047767ddSLijo Lazar 	if (adev->gfx.enable_cleaner_shader)
1782d361ad5dSSrinivasan Shanmugam 		r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1783d361ad5dSSrinivasan Shanmugam 
1784047767ddSLijo Lazar 	return r;
1785e189be9bSSrinivasan Shanmugam }
1786e189be9bSSrinivasan Shanmugam 
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1787047767ddSLijo Lazar static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1788e189be9bSSrinivasan Shanmugam {
1789e189be9bSSrinivasan Shanmugam 	device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1790047767ddSLijo Lazar 	if (adev->gfx.enable_cleaner_shader)
1791d361ad5dSSrinivasan Shanmugam 		device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1792e189be9bSSrinivasan Shanmugam }
1793e189be9bSSrinivasan Shanmugam 
amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device * adev)17946c8d1f4bS[email protected] static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
17956c8d1f4bS[email protected] {
17966c8d1f4bS[email protected] 	int r = 0;
17976c8d1f4bS[email protected] 
17986c8d1f4bS[email protected] 	if (!amdgpu_gpu_recovery)
17996c8d1f4bS[email protected] 		return r;
18006c8d1f4bS[email protected] 
18016c8d1f4bS[email protected] 	if (adev->gfx.num_gfx_rings) {
18026c8d1f4bS[email protected] 		r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
18036c8d1f4bS[email protected] 		if (r)
18046c8d1f4bS[email protected] 			return r;
18056c8d1f4bS[email protected] 	}
18066c8d1f4bS[email protected] 
18076c8d1f4bS[email protected] 	if (adev->gfx.num_compute_rings) {
18086c8d1f4bS[email protected] 		r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
18096c8d1f4bS[email protected] 		if (r)
18106c8d1f4bS[email protected] 			return r;
18116c8d1f4bS[email protected] 	}
18126c8d1f4bS[email protected] 
18136c8d1f4bS[email protected] 	return r;
18146c8d1f4bS[email protected] }
18156c8d1f4bS[email protected] 
amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device * adev)18166c8d1f4bS[email protected] static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
18176c8d1f4bS[email protected] {
18186c8d1f4bS[email protected] 	if (!amdgpu_gpu_recovery)
18196c8d1f4bS[email protected] 		return;
18206c8d1f4bS[email protected] 
18216c8d1f4bS[email protected] 	if (adev->gfx.num_gfx_rings)
18226c8d1f4bS[email protected] 		device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
18236c8d1f4bS[email protected] 
18246c8d1f4bS[email protected] 	if (adev->gfx.num_compute_rings)
18256c8d1f4bS[email protected] 		device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
18266c8d1f4bS[email protected] }
18276c8d1f4bS[email protected] 
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1828047767ddSLijo Lazar int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1829047767ddSLijo Lazar {
1830047767ddSLijo Lazar 	int r;
1831047767ddSLijo Lazar 
1832047767ddSLijo Lazar 	r = amdgpu_gfx_sysfs_xcp_init(adev);
1833047767ddSLijo Lazar 	if (r) {
1834047767ddSLijo Lazar 		dev_err(adev->dev, "failed to create xcp sysfs files");
1835047767ddSLijo Lazar 		return r;
1836047767ddSLijo Lazar 	}
1837047767ddSLijo Lazar 
1838047767ddSLijo Lazar 	r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
1839047767ddSLijo Lazar 	if (r)
1840047767ddSLijo Lazar 		dev_err(adev->dev, "failed to create isolation sysfs files");
1841047767ddSLijo Lazar 
18426c8d1f4bS[email protected] 	r = amdgpu_gfx_sysfs_reset_mask_init(adev);
18436c8d1f4bS[email protected] 	if (r)
18446c8d1f4bS[email protected] 		dev_err(adev->dev, "failed to create reset mask sysfs files");
18456c8d1f4bS[email protected] 
1846047767ddSLijo Lazar 	return r;
1847047767ddSLijo Lazar }
1848047767ddSLijo Lazar 
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1849047767ddSLijo Lazar void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1850047767ddSLijo Lazar {
18512f1b1352S[email protected] 	if (adev->dev->kobj.sd) {
1852047767ddSLijo Lazar 		amdgpu_gfx_sysfs_xcp_fini(adev);
1853047767ddSLijo Lazar 		amdgpu_gfx_sysfs_isolation_shader_fini(adev);
18546c8d1f4bS[email protected] 		amdgpu_gfx_sysfs_reset_mask_fini(adev);
1855047767ddSLijo Lazar 	}
18562f1b1352S[email protected] }
1857047767ddSLijo Lazar 
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1858aec773a1SSrinivasan Shanmugam int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1859aec773a1SSrinivasan Shanmugam 				      unsigned int cleaner_shader_size)
1860aec773a1SSrinivasan Shanmugam {
1861aec773a1SSrinivasan Shanmugam 	if (!adev->gfx.enable_cleaner_shader)
1862aec773a1SSrinivasan Shanmugam 		return -EOPNOTSUPP;
1863aec773a1SSrinivasan Shanmugam 
1864aec773a1SSrinivasan Shanmugam 	return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1865aec773a1SSrinivasan Shanmugam 				       AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1866aec773a1SSrinivasan Shanmugam 				       &adev->gfx.cleaner_shader_obj,
1867aec773a1SSrinivasan Shanmugam 				       &adev->gfx.cleaner_shader_gpu_addr,
1868aec773a1SSrinivasan Shanmugam 				       (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1869aec773a1SSrinivasan Shanmugam }
1870aec773a1SSrinivasan Shanmugam 
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1871aec773a1SSrinivasan Shanmugam void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1872aec773a1SSrinivasan Shanmugam {
1873aec773a1SSrinivasan Shanmugam 	if (!adev->gfx.enable_cleaner_shader)
1874aec773a1SSrinivasan Shanmugam 		return;
1875aec773a1SSrinivasan Shanmugam 
1876aec773a1SSrinivasan Shanmugam 	amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1877aec773a1SSrinivasan Shanmugam 			      &adev->gfx.cleaner_shader_gpu_addr,
1878aec773a1SSrinivasan Shanmugam 			      (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1879aec773a1SSrinivasan Shanmugam }
1880aec773a1SSrinivasan Shanmugam 
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1881aec773a1SSrinivasan Shanmugam void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1882aec773a1SSrinivasan Shanmugam 				    unsigned int cleaner_shader_size,
1883aec773a1SSrinivasan Shanmugam 				    const void *cleaner_shader_ptr)
1884aec773a1SSrinivasan Shanmugam {
1885aec773a1SSrinivasan Shanmugam 	if (!adev->gfx.enable_cleaner_shader)
1886aec773a1SSrinivasan Shanmugam 		return;
1887aec773a1SSrinivasan Shanmugam 
1888aec773a1SSrinivasan Shanmugam 	if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1889aec773a1SSrinivasan Shanmugam 		memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1890aec773a1SSrinivasan Shanmugam 			    cleaner_shader_size);
1891aec773a1SSrinivasan Shanmugam }
1892afefd6f2SSrinivasan Shanmugam 
1893afefd6f2SSrinivasan Shanmugam /**
1894afefd6f2SSrinivasan Shanmugam  * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1895afefd6f2SSrinivasan Shanmugam  * @adev: amdgpu_device pointer
1896afefd6f2SSrinivasan Shanmugam  * @idx: Index of the scheduler to control
1897afefd6f2SSrinivasan Shanmugam  * @enable: Whether to enable or disable the KFD scheduler
1898afefd6f2SSrinivasan Shanmugam  *
1899afefd6f2SSrinivasan Shanmugam  * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1900afefd6f2SSrinivasan Shanmugam  * from the KGD. It is part of the cleaner shader feature. This function plays
1901afefd6f2SSrinivasan Shanmugam  * a key role in enforcing process isolation on the GPU.
1902afefd6f2SSrinivasan Shanmugam  *
1903afefd6f2SSrinivasan Shanmugam  * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1904afefd6f2SSrinivasan Shanmugam  * track of the number of requests to enable the KFD scheduler. When a request
1905afefd6f2SSrinivasan Shanmugam  * to enable the KFD scheduler is made, the reference count is decremented.
1906afefd6f2SSrinivasan Shanmugam  * When the reference count reaches zero, a delayed work is scheduled to
1907afefd6f2SSrinivasan Shanmugam  * enforce isolation after a delay of GFX_SLICE_PERIOD.
1908afefd6f2SSrinivasan Shanmugam  *
1909afefd6f2SSrinivasan Shanmugam  * When a request to disable the KFD scheduler is made, the function first
1910afefd6f2SSrinivasan Shanmugam  * checks if the reference count is zero. If it is, it cancels the delayed work
1911afefd6f2SSrinivasan Shanmugam  * for enforcing isolation and checks if the KFD scheduler is active. If the
1912afefd6f2SSrinivasan Shanmugam  * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1913afefd6f2SSrinivasan Shanmugam  * sets the KFD scheduler state to inactive. Then, it increments the reference
1914afefd6f2SSrinivasan Shanmugam  * count.
1915afefd6f2SSrinivasan Shanmugam  *
1916afefd6f2SSrinivasan Shanmugam  * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1917afefd6f2SSrinivasan Shanmugam  * scheduler state and reference count are updated atomically.
1918afefd6f2SSrinivasan Shanmugam  *
1919afefd6f2SSrinivasan Shanmugam  * Note: If the reference count is already zero when a request to enable the
1920afefd6f2SSrinivasan Shanmugam  * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1921afefd6f2SSrinivasan Shanmugam  * function triggers a warning in this case.
1922afefd6f2SSrinivasan Shanmugam  */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1923afefd6f2SSrinivasan Shanmugam static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1924afefd6f2SSrinivasan Shanmugam 				    bool enable)
1925afefd6f2SSrinivasan Shanmugam {
1926afefd6f2SSrinivasan Shanmugam 	mutex_lock(&adev->gfx.kfd_sch_mutex);
1927afefd6f2SSrinivasan Shanmugam 
1928afefd6f2SSrinivasan Shanmugam 	if (enable) {
1929afefd6f2SSrinivasan Shanmugam 		/* If the count is already 0, it means there's an imbalance bug somewhere.
1930afefd6f2SSrinivasan Shanmugam 		 * Note that the bug may be in a different caller than the one which triggers the
1931afefd6f2SSrinivasan Shanmugam 		 * WARN_ON_ONCE.
1932afefd6f2SSrinivasan Shanmugam 		 */
1933afefd6f2SSrinivasan Shanmugam 		if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1934afefd6f2SSrinivasan Shanmugam 			dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1935afefd6f2SSrinivasan Shanmugam 			goto unlock;
1936afefd6f2SSrinivasan Shanmugam 		}
1937afefd6f2SSrinivasan Shanmugam 
1938afefd6f2SSrinivasan Shanmugam 		adev->gfx.kfd_sch_req_count[idx]--;
1939afefd6f2SSrinivasan Shanmugam 
1940afefd6f2SSrinivasan Shanmugam 		if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1941afefd6f2SSrinivasan Shanmugam 		    adev->gfx.kfd_sch_inactive[idx]) {
1942afefd6f2SSrinivasan Shanmugam 			schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1943efe6a877SAlex Deucher 					      msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
1944afefd6f2SSrinivasan Shanmugam 		}
1945afefd6f2SSrinivasan Shanmugam 	} else {
1946afefd6f2SSrinivasan Shanmugam 		if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1947afefd6f2SSrinivasan Shanmugam 			cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1948afefd6f2SSrinivasan Shanmugam 			if (!adev->gfx.kfd_sch_inactive[idx]) {
1949afefd6f2SSrinivasan Shanmugam 				amdgpu_amdkfd_stop_sched(adev, idx);
1950afefd6f2SSrinivasan Shanmugam 				adev->gfx.kfd_sch_inactive[idx] = true;
1951afefd6f2SSrinivasan Shanmugam 			}
1952afefd6f2SSrinivasan Shanmugam 		}
1953afefd6f2SSrinivasan Shanmugam 
1954afefd6f2SSrinivasan Shanmugam 		adev->gfx.kfd_sch_req_count[idx]++;
1955afefd6f2SSrinivasan Shanmugam 	}
1956afefd6f2SSrinivasan Shanmugam 
1957afefd6f2SSrinivasan Shanmugam unlock:
1958afefd6f2SSrinivasan Shanmugam 	mutex_unlock(&adev->gfx.kfd_sch_mutex);
1959afefd6f2SSrinivasan Shanmugam }
1960afefd6f2SSrinivasan Shanmugam 
1961afefd6f2SSrinivasan Shanmugam /**
1962afefd6f2SSrinivasan Shanmugam  * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1963afefd6f2SSrinivasan Shanmugam  *
1964afefd6f2SSrinivasan Shanmugam  * @work: work_struct.
1965afefd6f2SSrinivasan Shanmugam  *
1966afefd6f2SSrinivasan Shanmugam  * This function is the work handler for enforcing shader isolation on AMD GPUs.
1967afefd6f2SSrinivasan Shanmugam  * It counts the number of emitted fences for each GFX and compute ring. If there
1968afefd6f2SSrinivasan Shanmugam  * are any fences, it schedules the `enforce_isolation_work` to be run after a
1969afefd6f2SSrinivasan Shanmugam  * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1970afefd6f2SSrinivasan Shanmugam  * Driver (KFD) to resume the runqueue. The function is synchronized using the
1971afefd6f2SSrinivasan Shanmugam  * `enforce_isolation_mutex`.
1972afefd6f2SSrinivasan Shanmugam  */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1973afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1974afefd6f2SSrinivasan Shanmugam {
1975afefd6f2SSrinivasan Shanmugam 	struct amdgpu_isolation_work *isolation_work =
1976afefd6f2SSrinivasan Shanmugam 		container_of(work, struct amdgpu_isolation_work, work.work);
1977afefd6f2SSrinivasan Shanmugam 	struct amdgpu_device *adev = isolation_work->adev;
1978afefd6f2SSrinivasan Shanmugam 	u32 i, idx, fences = 0;
1979afefd6f2SSrinivasan Shanmugam 
1980afefd6f2SSrinivasan Shanmugam 	if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1981afefd6f2SSrinivasan Shanmugam 		idx = 0;
1982afefd6f2SSrinivasan Shanmugam 	else
1983afefd6f2SSrinivasan Shanmugam 		idx = isolation_work->xcp_id;
1984afefd6f2SSrinivasan Shanmugam 
1985afefd6f2SSrinivasan Shanmugam 	if (idx >= MAX_XCP)
1986afefd6f2SSrinivasan Shanmugam 		return;
1987afefd6f2SSrinivasan Shanmugam 
1988afefd6f2SSrinivasan Shanmugam 	mutex_lock(&adev->enforce_isolation_mutex);
1989afefd6f2SSrinivasan Shanmugam 	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1990afefd6f2SSrinivasan Shanmugam 		if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1991afefd6f2SSrinivasan Shanmugam 			fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1992afefd6f2SSrinivasan Shanmugam 	}
1993afefd6f2SSrinivasan Shanmugam 	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1994afefd6f2SSrinivasan Shanmugam 		if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1995afefd6f2SSrinivasan Shanmugam 			fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1996afefd6f2SSrinivasan Shanmugam 	}
1997afefd6f2SSrinivasan Shanmugam 	if (fences) {
1998efe6a877SAlex Deucher 		/* we've already had our timeslice, so let's wrap this up */
1999afefd6f2SSrinivasan Shanmugam 		schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
2000efe6a877SAlex Deucher 				      msecs_to_jiffies(1));
2001afefd6f2SSrinivasan Shanmugam 	} else {
2002afefd6f2SSrinivasan Shanmugam 		/* Tell KFD to resume the runqueue */
2003afefd6f2SSrinivasan Shanmugam 		if (adev->kfd.init_complete) {
2004afefd6f2SSrinivasan Shanmugam 			WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
2005afefd6f2SSrinivasan Shanmugam 			WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
2006afefd6f2SSrinivasan Shanmugam 			amdgpu_amdkfd_start_sched(adev, idx);
2007afefd6f2SSrinivasan Shanmugam 			adev->gfx.kfd_sch_inactive[idx] = false;
2008afefd6f2SSrinivasan Shanmugam 		}
2009afefd6f2SSrinivasan Shanmugam 	}
2010afefd6f2SSrinivasan Shanmugam 	mutex_unlock(&adev->enforce_isolation_mutex);
2011afefd6f2SSrinivasan Shanmugam }
2012afefd6f2SSrinivasan Shanmugam 
201355f4139bSSrinivasan Shanmugam /**
201455f4139bSSrinivasan Shanmugam  * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
201555f4139bSSrinivasan Shanmugam  * @adev: amdgpu_device pointer
201655f4139bSSrinivasan Shanmugam  * @idx: Index of the GPU partition
201755f4139bSSrinivasan Shanmugam  *
201855f4139bSSrinivasan Shanmugam  * When kernel submissions come in, the jobs are given a time slice and once
201955f4139bSSrinivasan Shanmugam  * that time slice is up, if there are KFD user queues active, kernel
202055f4139bSSrinivasan Shanmugam  * submissions are blocked until KFD has had its time slice. Once the KFD time
202155f4139bSSrinivasan Shanmugam  * slice is up, KFD user queues are preempted and kernel submissions are
202255f4139bSSrinivasan Shanmugam  * unblocked and allowed to run again.
202355f4139bSSrinivasan Shanmugam  */
2024efe6a877SAlex Deucher static void
amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device * adev,u32 idx)2025efe6a877SAlex Deucher amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
2026efe6a877SAlex Deucher 					  u32 idx)
2027efe6a877SAlex Deucher {
2028efe6a877SAlex Deucher 	unsigned long cjiffies;
2029efe6a877SAlex Deucher 	bool wait = false;
2030efe6a877SAlex Deucher 
2031efe6a877SAlex Deucher 	mutex_lock(&adev->enforce_isolation_mutex);
2032efe6a877SAlex Deucher 	if (adev->enforce_isolation[idx]) {
2033efe6a877SAlex Deucher 		/* set the initial values if nothing is set */
2034efe6a877SAlex Deucher 		if (!adev->gfx.enforce_isolation_jiffies[idx]) {
2035efe6a877SAlex Deucher 			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2036efe6a877SAlex Deucher 			adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
2037efe6a877SAlex Deucher 		}
2038efe6a877SAlex Deucher 		/* Make sure KFD gets a chance to run */
2039efe6a877SAlex Deucher 		if (amdgpu_amdkfd_compute_active(adev, idx)) {
2040efe6a877SAlex Deucher 			cjiffies = jiffies;
2041efe6a877SAlex Deucher 			if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
2042efe6a877SAlex Deucher 				cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
2043efe6a877SAlex Deucher 				if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
2044efe6a877SAlex Deucher 					/* if our time is up, let KGD work drain before scheduling more */
2045efe6a877SAlex Deucher 					wait = true;
2046efe6a877SAlex Deucher 					/* reset the timer period */
2047efe6a877SAlex Deucher 					adev->gfx.enforce_isolation_time[idx] =	GFX_SLICE_PERIOD_MS;
2048efe6a877SAlex Deucher 				} else {
2049efe6a877SAlex Deucher 					/* set the timer period to what's left in our time slice */
2050efe6a877SAlex Deucher 					adev->gfx.enforce_isolation_time[idx] =
2051efe6a877SAlex Deucher 						GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
2052efe6a877SAlex Deucher 				}
2053efe6a877SAlex Deucher 			} else {
2054efe6a877SAlex Deucher 				/* if jiffies wrap around we will just wait a little longer */
2055efe6a877SAlex Deucher 				adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2056efe6a877SAlex Deucher 			}
2057efe6a877SAlex Deucher 		} else {
2058efe6a877SAlex Deucher 			/* if there is no KFD work, then set the full slice period */
2059efe6a877SAlex Deucher 			adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2060efe6a877SAlex Deucher 			adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
2061efe6a877SAlex Deucher 		}
2062efe6a877SAlex Deucher 	}
2063efe6a877SAlex Deucher 	mutex_unlock(&adev->enforce_isolation_mutex);
2064efe6a877SAlex Deucher 
2065efe6a877SAlex Deucher 	if (wait)
2066efe6a877SAlex Deucher 		msleep(GFX_SLICE_PERIOD_MS);
2067efe6a877SAlex Deucher }
2068efe6a877SAlex Deucher 
206955f4139bSSrinivasan Shanmugam /**
207055f4139bSSrinivasan Shanmugam  * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
207155f4139bSSrinivasan Shanmugam  * @ring: Pointer to the amdgpu_ring structure
207255f4139bSSrinivasan Shanmugam  *
207355f4139bSSrinivasan Shanmugam  * Ring begin_use helper implementation for gfx which serializes access to the
207455f4139bSSrinivasan Shanmugam  * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
207555f4139bSSrinivasan Shanmugam  * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
207655f4139bSSrinivasan Shanmugam  * each get a time slice when both are active.
207755f4139bSSrinivasan Shanmugam  */
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)2078afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
2079afefd6f2SSrinivasan Shanmugam {
2080afefd6f2SSrinivasan Shanmugam 	struct amdgpu_device *adev = ring->adev;
2081afefd6f2SSrinivasan Shanmugam 	u32 idx;
20821e8c193fSSrinivasan Shanmugam 	bool sched_work = false;
2083afefd6f2SSrinivasan Shanmugam 
2084afefd6f2SSrinivasan Shanmugam 	if (!adev->gfx.enable_cleaner_shader)
2085afefd6f2SSrinivasan Shanmugam 		return;
2086afefd6f2SSrinivasan Shanmugam 
2087afefd6f2SSrinivasan Shanmugam 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2088afefd6f2SSrinivasan Shanmugam 		idx = 0;
2089afefd6f2SSrinivasan Shanmugam 	else
2090afefd6f2SSrinivasan Shanmugam 		idx = ring->xcp_id;
2091afefd6f2SSrinivasan Shanmugam 
2092afefd6f2SSrinivasan Shanmugam 	if (idx >= MAX_XCP)
2093afefd6f2SSrinivasan Shanmugam 		return;
2094afefd6f2SSrinivasan Shanmugam 
2095efe6a877SAlex Deucher 	/* Don't submit more work until KFD has had some time */
2096efe6a877SAlex Deucher 	amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
2097efe6a877SAlex Deucher 
2098afefd6f2SSrinivasan Shanmugam 	mutex_lock(&adev->enforce_isolation_mutex);
2099afefd6f2SSrinivasan Shanmugam 	if (adev->enforce_isolation[idx]) {
2100afefd6f2SSrinivasan Shanmugam 		if (adev->kfd.init_complete)
21011e8c193fSSrinivasan Shanmugam 			sched_work = true;
2102afefd6f2SSrinivasan Shanmugam 	}
2103afefd6f2SSrinivasan Shanmugam 	mutex_unlock(&adev->enforce_isolation_mutex);
21041e8c193fSSrinivasan Shanmugam 
21051e8c193fSSrinivasan Shanmugam 	if (sched_work)
21061e8c193fSSrinivasan Shanmugam 		amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
2107afefd6f2SSrinivasan Shanmugam }
2108afefd6f2SSrinivasan Shanmugam 
210955f4139bSSrinivasan Shanmugam /**
211055f4139bSSrinivasan Shanmugam  * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
211155f4139bSSrinivasan Shanmugam  * @ring: Pointer to the amdgpu_ring structure
211255f4139bSSrinivasan Shanmugam  *
211355f4139bSSrinivasan Shanmugam  * Ring end_use helper implementation for gfx which serializes access to the
211455f4139bSSrinivasan Shanmugam  * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
211555f4139bSSrinivasan Shanmugam  * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
211655f4139bSSrinivasan Shanmugam  * each get a time slice when both are active.
211755f4139bSSrinivasan Shanmugam  */
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)2118afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
2119afefd6f2SSrinivasan Shanmugam {
2120afefd6f2SSrinivasan Shanmugam 	struct amdgpu_device *adev = ring->adev;
2121afefd6f2SSrinivasan Shanmugam 	u32 idx;
21221e8c193fSSrinivasan Shanmugam 	bool sched_work = false;
2123afefd6f2SSrinivasan Shanmugam 
2124afefd6f2SSrinivasan Shanmugam 	if (!adev->gfx.enable_cleaner_shader)
2125afefd6f2SSrinivasan Shanmugam 		return;
2126afefd6f2SSrinivasan Shanmugam 
2127afefd6f2SSrinivasan Shanmugam 	if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2128afefd6f2SSrinivasan Shanmugam 		idx = 0;
2129afefd6f2SSrinivasan Shanmugam 	else
2130afefd6f2SSrinivasan Shanmugam 		idx = ring->xcp_id;
2131afefd6f2SSrinivasan Shanmugam 
2132afefd6f2SSrinivasan Shanmugam 	if (idx >= MAX_XCP)
2133afefd6f2SSrinivasan Shanmugam 		return;
2134afefd6f2SSrinivasan Shanmugam 
2135afefd6f2SSrinivasan Shanmugam 	mutex_lock(&adev->enforce_isolation_mutex);
2136afefd6f2SSrinivasan Shanmugam 	if (adev->enforce_isolation[idx]) {
2137afefd6f2SSrinivasan Shanmugam 		if (adev->kfd.init_complete)
21381e8c193fSSrinivasan Shanmugam 			sched_work = true;
2139afefd6f2SSrinivasan Shanmugam 	}
2140afefd6f2SSrinivasan Shanmugam 	mutex_unlock(&adev->enforce_isolation_mutex);
21411e8c193fSSrinivasan Shanmugam 
21421e8c193fSSrinivasan Shanmugam 	if (sched_work)
21431e8c193fSSrinivasan Shanmugam 		amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
2144afefd6f2SSrinivasan Shanmugam }
2145c5c63d9cSJesse Zhang 
amdgpu_gfx_profile_idle_work_handler(struct work_struct * work)21468fdb3958SAlex Deucher void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
21478fdb3958SAlex Deucher {
21488fdb3958SAlex Deucher 	struct amdgpu_device *adev =
21498fdb3958SAlex Deucher 		container_of(work, struct amdgpu_device, gfx.idle_work.work);
21508fdb3958SAlex Deucher 	enum PP_SMC_POWER_PROFILE profile;
21518fdb3958SAlex Deucher 	u32 i, fences = 0;
21528fdb3958SAlex Deucher 	int r;
21538fdb3958SAlex Deucher 
21548fdb3958SAlex Deucher 	if (adev->gfx.num_gfx_rings)
21558fdb3958SAlex Deucher 		profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
21568fdb3958SAlex Deucher 	else
21578fdb3958SAlex Deucher 		profile = PP_SMC_POWER_PROFILE_COMPUTE;
21588fdb3958SAlex Deucher 
21598fdb3958SAlex Deucher 	for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
21608fdb3958SAlex Deucher 		fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
21618fdb3958SAlex Deucher 	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
21628fdb3958SAlex Deucher 		fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
21638fdb3958SAlex Deucher 	if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
2164553673a3SAlex Deucher 		mutex_lock(&adev->gfx.workload_profile_mutex);
2165553673a3SAlex Deucher 		if (adev->gfx.workload_profile_active) {
21668fdb3958SAlex Deucher 			r = amdgpu_dpm_switch_power_profile(adev, profile, false);
21678fdb3958SAlex Deucher 			if (r)
21688fdb3958SAlex Deucher 				dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
21698fdb3958SAlex Deucher 					 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
21708fdb3958SAlex Deucher 					 "fullscreen 3D" : "compute");
2171553673a3SAlex Deucher 			adev->gfx.workload_profile_active = false;
2172553673a3SAlex Deucher 		}
2173553673a3SAlex Deucher 		mutex_unlock(&adev->gfx.workload_profile_mutex);
21748fdb3958SAlex Deucher 	} else {
21758fdb3958SAlex Deucher 		schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
21768fdb3958SAlex Deucher 	}
21778fdb3958SAlex Deucher }
21788fdb3958SAlex Deucher 
amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring * ring)21798fdb3958SAlex Deucher void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
21808fdb3958SAlex Deucher {
21818fdb3958SAlex Deucher 	struct amdgpu_device *adev = ring->adev;
21828fdb3958SAlex Deucher 	enum PP_SMC_POWER_PROFILE profile;
21838fdb3958SAlex Deucher 	int r;
21848fdb3958SAlex Deucher 
21858fdb3958SAlex Deucher 	if (adev->gfx.num_gfx_rings)
21868fdb3958SAlex Deucher 		profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
21878fdb3958SAlex Deucher 	else
21888fdb3958SAlex Deucher 		profile = PP_SMC_POWER_PROFILE_COMPUTE;
21898fdb3958SAlex Deucher 
21908fdb3958SAlex Deucher 	atomic_inc(&adev->gfx.total_submission_cnt);
21918fdb3958SAlex Deucher 
21929e34d8d1SAlex Deucher 	cancel_delayed_work_sync(&adev->gfx.idle_work);
21939e34d8d1SAlex Deucher 
21949e34d8d1SAlex Deucher 	/* We can safely return early here because we've cancelled the
21959e34d8d1SAlex Deucher 	 * the delayed work so there is no one else to set it to false
21969e34d8d1SAlex Deucher 	 * and we don't care if someone else sets it to true.
21979e34d8d1SAlex Deucher 	 */
21989e34d8d1SAlex Deucher 	if (adev->gfx.workload_profile_active)
21999e34d8d1SAlex Deucher 		return;
22009e34d8d1SAlex Deucher 
2201553673a3SAlex Deucher 	mutex_lock(&adev->gfx.workload_profile_mutex);
2202553673a3SAlex Deucher 	if (!adev->gfx.workload_profile_active) {
22038fdb3958SAlex Deucher 		r = amdgpu_dpm_switch_power_profile(adev, profile, true);
22048fdb3958SAlex Deucher 		if (r)
22058fdb3958SAlex Deucher 			dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
22068fdb3958SAlex Deucher 				 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
22078fdb3958SAlex Deucher 				 "fullscreen 3D" : "compute");
2208553673a3SAlex Deucher 		adev->gfx.workload_profile_active = true;
2209553673a3SAlex Deucher 	}
2210553673a3SAlex Deucher 	mutex_unlock(&adev->gfx.workload_profile_mutex);
22118fdb3958SAlex Deucher }
22128fdb3958SAlex Deucher 
amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring * ring)22138fdb3958SAlex Deucher void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
22148fdb3958SAlex Deucher {
22158fdb3958SAlex Deucher 	atomic_dec(&ring->adev->gfx.total_submission_cnt);
22168fdb3958SAlex Deucher 
22178fdb3958SAlex Deucher 	schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
22188fdb3958SAlex Deucher }
22198fdb3958SAlex Deucher 
2220c5c63d9cSJesse Zhang /*
2221c5c63d9cSJesse Zhang  * debugfs for to enable/disable gfx job submission to specific core.
2222c5c63d9cSJesse Zhang  */
2223c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
amdgpu_debugfs_gfx_sched_mask_set(void * data,u64 val)2224c5c63d9cSJesse Zhang static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
2225c5c63d9cSJesse Zhang {
2226c5c63d9cSJesse Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2227c5c63d9cSJesse Zhang 	u32 i;
2228c5c63d9cSJesse Zhang 	u64 mask = 0;
2229c5c63d9cSJesse Zhang 	struct amdgpu_ring *ring;
2230c5c63d9cSJesse Zhang 
2231c5c63d9cSJesse Zhang 	if (!adev)
2232c5c63d9cSJesse Zhang 		return -ENODEV;
2233c5c63d9cSJesse Zhang 
223434c4eb7dSKarol Przybylski 	mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
2235c5c63d9cSJesse Zhang 	if ((val & mask) == 0)
2236c5c63d9cSJesse Zhang 		return -EINVAL;
2237c5c63d9cSJesse Zhang 
2238c5c63d9cSJesse Zhang 	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2239c5c63d9cSJesse Zhang 		ring = &adev->gfx.gfx_ring[i];
2240c5c63d9cSJesse Zhang 		if (val & (1 << i))
2241c5c63d9cSJesse Zhang 			ring->sched.ready = true;
2242c5c63d9cSJesse Zhang 		else
2243c5c63d9cSJesse Zhang 			ring->sched.ready = false;
2244c5c63d9cSJesse Zhang 	}
2245c5c63d9cSJesse Zhang 	/* publish sched.ready flag update effective immediately across smp */
2246c5c63d9cSJesse Zhang 	smp_rmb();
2247c5c63d9cSJesse Zhang 	return 0;
2248c5c63d9cSJesse Zhang }
2249c5c63d9cSJesse Zhang 
amdgpu_debugfs_gfx_sched_mask_get(void * data,u64 * val)2250c5c63d9cSJesse Zhang static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
2251c5c63d9cSJesse Zhang {
2252c5c63d9cSJesse Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2253c5c63d9cSJesse Zhang 	u32 i;
2254c5c63d9cSJesse Zhang 	u64 mask = 0;
2255c5c63d9cSJesse Zhang 	struct amdgpu_ring *ring;
2256c5c63d9cSJesse Zhang 
2257c5c63d9cSJesse Zhang 	if (!adev)
2258c5c63d9cSJesse Zhang 		return -ENODEV;
2259c5c63d9cSJesse Zhang 	for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2260c5c63d9cSJesse Zhang 		ring = &adev->gfx.gfx_ring[i];
2261c5c63d9cSJesse Zhang 		if (ring->sched.ready)
226234c4eb7dSKarol Przybylski 			mask |= 1ULL << i;
2263c5c63d9cSJesse Zhang 	}
2264c5c63d9cSJesse Zhang 
2265c5c63d9cSJesse Zhang 	*val = mask;
2266c5c63d9cSJesse Zhang 	return 0;
2267c5c63d9cSJesse Zhang }
2268c5c63d9cSJesse Zhang 
2269c5c63d9cSJesse Zhang DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
2270c5c63d9cSJesse Zhang 			 amdgpu_debugfs_gfx_sched_mask_get,
2271c5c63d9cSJesse Zhang 			 amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
2272c5c63d9cSJesse Zhang 
2273c5c63d9cSJesse Zhang #endif
2274c5c63d9cSJesse Zhang 
amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device * adev)2275c5c63d9cSJesse Zhang void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
2276c5c63d9cSJesse Zhang {
2277c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
2278c5c63d9cSJesse Zhang 	struct drm_minor *minor = adev_to_drm(adev)->primary;
2279c5c63d9cSJesse Zhang 	struct dentry *root = minor->debugfs_root;
2280c5c63d9cSJesse Zhang 	char name[32];
2281c5c63d9cSJesse Zhang 
2282c5c63d9cSJesse Zhang 	if (!(adev->gfx.num_gfx_rings > 1))
2283c5c63d9cSJesse Zhang 		return;
2284c5c63d9cSJesse Zhang 	sprintf(name, "amdgpu_gfx_sched_mask");
2285c5c63d9cSJesse Zhang 	debugfs_create_file(name, 0600, root, adev,
2286c5c63d9cSJesse Zhang 			    &amdgpu_debugfs_gfx_sched_mask_fops);
2287c5c63d9cSJesse Zhang #endif
2288c5c63d9cSJesse Zhang }
2289c5c63d9cSJesse Zhang 
2290c5c63d9cSJesse Zhang /*
2291c5c63d9cSJesse Zhang  * debugfs for to enable/disable compute job submission to specific core.
2292c5c63d9cSJesse Zhang  */
2293c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
amdgpu_debugfs_compute_sched_mask_set(void * data,u64 val)2294c5c63d9cSJesse Zhang static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
2295c5c63d9cSJesse Zhang {
2296c5c63d9cSJesse Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2297c5c63d9cSJesse Zhang 	u32 i;
2298c5c63d9cSJesse Zhang 	u64 mask = 0;
2299c5c63d9cSJesse Zhang 	struct amdgpu_ring *ring;
2300c5c63d9cSJesse Zhang 
2301c5c63d9cSJesse Zhang 	if (!adev)
2302c5c63d9cSJesse Zhang 		return -ENODEV;
2303c5c63d9cSJesse Zhang 
230434c4eb7dSKarol Przybylski 	mask = (1ULL << adev->gfx.num_compute_rings) - 1;
2305c5c63d9cSJesse Zhang 	if ((val & mask) == 0)
2306c5c63d9cSJesse Zhang 		return -EINVAL;
2307c5c63d9cSJesse Zhang 
2308c5c63d9cSJesse Zhang 	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2309c5c63d9cSJesse Zhang 		ring = &adev->gfx.compute_ring[i];
2310c5c63d9cSJesse Zhang 		if (val & (1 << i))
2311c5c63d9cSJesse Zhang 			ring->sched.ready = true;
2312c5c63d9cSJesse Zhang 		else
2313c5c63d9cSJesse Zhang 			ring->sched.ready = false;
2314c5c63d9cSJesse Zhang 	}
2315c5c63d9cSJesse Zhang 
2316c5c63d9cSJesse Zhang 	/* publish sched.ready flag update effective immediately across smp */
2317c5c63d9cSJesse Zhang 	smp_rmb();
2318c5c63d9cSJesse Zhang 	return 0;
2319c5c63d9cSJesse Zhang }
2320c5c63d9cSJesse Zhang 
amdgpu_debugfs_compute_sched_mask_get(void * data,u64 * val)2321c5c63d9cSJesse Zhang static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
2322c5c63d9cSJesse Zhang {
2323c5c63d9cSJesse Zhang 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
2324c5c63d9cSJesse Zhang 	u32 i;
2325c5c63d9cSJesse Zhang 	u64 mask = 0;
2326c5c63d9cSJesse Zhang 	struct amdgpu_ring *ring;
2327c5c63d9cSJesse Zhang 
2328c5c63d9cSJesse Zhang 	if (!adev)
2329c5c63d9cSJesse Zhang 		return -ENODEV;
2330c5c63d9cSJesse Zhang 	for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2331c5c63d9cSJesse Zhang 		ring = &adev->gfx.compute_ring[i];
2332c5c63d9cSJesse Zhang 		if (ring->sched.ready)
233334c4eb7dSKarol Przybylski 			mask |= 1ULL << i;
2334c5c63d9cSJesse Zhang 	}
2335c5c63d9cSJesse Zhang 
2336c5c63d9cSJesse Zhang 	*val = mask;
2337c5c63d9cSJesse Zhang 	return 0;
2338c5c63d9cSJesse Zhang }
2339c5c63d9cSJesse Zhang 
2340c5c63d9cSJesse Zhang DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
2341c5c63d9cSJesse Zhang 			 amdgpu_debugfs_compute_sched_mask_get,
2342c5c63d9cSJesse Zhang 			 amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
2343c5c63d9cSJesse Zhang 
2344c5c63d9cSJesse Zhang #endif
2345c5c63d9cSJesse Zhang 
amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device * adev)2346c5c63d9cSJesse Zhang void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
2347c5c63d9cSJesse Zhang {
2348c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
2349c5c63d9cSJesse Zhang 	struct drm_minor *minor = adev_to_drm(adev)->primary;
2350c5c63d9cSJesse Zhang 	struct dentry *root = minor->debugfs_root;
2351c5c63d9cSJesse Zhang 	char name[32];
2352c5c63d9cSJesse Zhang 
2353c5c63d9cSJesse Zhang 	if (!(adev->gfx.num_compute_rings > 1))
2354c5c63d9cSJesse Zhang 		return;
2355c5c63d9cSJesse Zhang 	sprintf(name, "amdgpu_compute_sched_mask");
2356c5c63d9cSJesse Zhang 	debugfs_create_file(name, 0600, root, adev,
2357c5c63d9cSJesse Zhang 			    &amdgpu_debugfs_compute_sched_mask_fops);
2358c5c63d9cSJesse Zhang #endif
2359c5c63d9cSJesse Zhang }
2360