1d38ceaf9SAlex Deucher /*
2d38ceaf9SAlex Deucher * Copyright 2014 Advanced Micro Devices, Inc.
3d38ceaf9SAlex Deucher * Copyright 2008 Red Hat Inc.
4d38ceaf9SAlex Deucher * Copyright 2009 Jerome Glisse.
5d38ceaf9SAlex Deucher *
6d38ceaf9SAlex Deucher * Permission is hereby granted, free of charge, to any person obtaining a
7d38ceaf9SAlex Deucher * copy of this software and associated documentation files (the "Software"),
8d38ceaf9SAlex Deucher * to deal in the Software without restriction, including without limitation
9d38ceaf9SAlex Deucher * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10d38ceaf9SAlex Deucher * and/or sell copies of the Software, and to permit persons to whom the
11d38ceaf9SAlex Deucher * Software is furnished to do so, subject to the following conditions:
12d38ceaf9SAlex Deucher *
13d38ceaf9SAlex Deucher * The above copyright notice and this permission notice shall be included in
14d38ceaf9SAlex Deucher * all copies or substantial portions of the Software.
15d38ceaf9SAlex Deucher *
16d38ceaf9SAlex Deucher * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17d38ceaf9SAlex Deucher * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18d38ceaf9SAlex Deucher * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19d38ceaf9SAlex Deucher * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20d38ceaf9SAlex Deucher * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21d38ceaf9SAlex Deucher * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22d38ceaf9SAlex Deucher * OTHER DEALINGS IN THE SOFTWARE.
23d38ceaf9SAlex Deucher *
24d38ceaf9SAlex Deucher */
25fdf2f6c5SSam Ravnborg
26ec71b250SLikun Gao #include <linux/firmware.h>
27d361ad5dSSrinivasan Shanmugam #include <linux/pm_runtime.h>
28d361ad5dSSrinivasan Shanmugam
29d38ceaf9SAlex Deucher #include "amdgpu.h"
30356aee30SBaoyou Xie #include "amdgpu_gfx.h"
3188dfc9a3SLikun Gao #include "amdgpu_rlc.h"
326caeee7aSHawking Zhang #include "amdgpu_ras.h"
33d361ad5dSSrinivasan Shanmugam #include "amdgpu_reset.h"
348e7fd193SLijo Lazar #include "amdgpu_xcp.h"
35b1338a8eSStanley.Yang #include "amdgpu_xgmi.h"
36d38ceaf9SAlex Deucher
37bf9b1d9dSRex Zhu /* delay 0.1 second to enable gfx off feature */
38bf9b1d9dSRex Zhu #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
391e317b99SRex Zhu
401d617c02SLijo Lazar #define GFX_OFF_NO_DELAY 0
411d617c02SLijo Lazar
42d38ceaf9SAlex Deucher /*
43448fe192SHuang Rui * GPU GFX IP block helpers function.
44d38ceaf9SAlex Deucher */
45448fe192SHuang Rui
amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device * adev,int mec,int pipe,int queue)467470bfcfSHawking Zhang int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
47448fe192SHuang Rui int pipe, int queue)
48448fe192SHuang Rui {
49448fe192SHuang Rui int bit = 0;
50448fe192SHuang Rui
51448fe192SHuang Rui bit += mec * adev->gfx.mec.num_pipe_per_mec
52448fe192SHuang Rui * adev->gfx.mec.num_queue_per_pipe;
53448fe192SHuang Rui bit += pipe * adev->gfx.mec.num_queue_per_pipe;
54448fe192SHuang Rui bit += queue;
55448fe192SHuang Rui
56448fe192SHuang Rui return bit;
57448fe192SHuang Rui }
58448fe192SHuang Rui
amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device * adev,int bit,int * mec,int * pipe,int * queue)595c180eb9SYong Zhao void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
60448fe192SHuang Rui int *mec, int *pipe, int *queue)
61448fe192SHuang Rui {
62448fe192SHuang Rui *queue = bit % adev->gfx.mec.num_queue_per_pipe;
63448fe192SHuang Rui *pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
64448fe192SHuang Rui % adev->gfx.mec.num_pipe_per_mec;
65448fe192SHuang Rui *mec = (bit / adev->gfx.mec.num_queue_per_pipe)
66448fe192SHuang Rui / adev->gfx.mec.num_pipe_per_mec;
67448fe192SHuang Rui
68448fe192SHuang Rui }
69448fe192SHuang Rui
amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device * adev,int xcc_id,int mec,int pipe,int queue)70448fe192SHuang Rui bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
71be697aa3SLe Ma int xcc_id, int mec, int pipe, int queue)
72448fe192SHuang Rui {
737470bfcfSHawking Zhang return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
74be697aa3SLe Ma adev->gfx.mec_bitmap[xcc_id].queue_bitmap);
75448fe192SHuang Rui }
76448fe192SHuang Rui
amdgpu_gfx_me_queue_to_bit(struct amdgpu_device * adev,int me,int pipe,int queue)777470bfcfSHawking Zhang int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
787470bfcfSHawking Zhang int me, int pipe, int queue)
797470bfcfSHawking Zhang {
807470bfcfSHawking Zhang int bit = 0;
817470bfcfSHawking Zhang
827470bfcfSHawking Zhang bit += me * adev->gfx.me.num_pipe_per_me
837470bfcfSHawking Zhang * adev->gfx.me.num_queue_per_pipe;
847470bfcfSHawking Zhang bit += pipe * adev->gfx.me.num_queue_per_pipe;
857470bfcfSHawking Zhang bit += queue;
867470bfcfSHawking Zhang
877470bfcfSHawking Zhang return bit;
887470bfcfSHawking Zhang }
897470bfcfSHawking Zhang
amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device * adev,int me,int pipe,int queue)907470bfcfSHawking Zhang bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
917470bfcfSHawking Zhang int me, int pipe, int queue)
927470bfcfSHawking Zhang {
937470bfcfSHawking Zhang return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
947470bfcfSHawking Zhang adev->gfx.me.queue_bitmap);
957470bfcfSHawking Zhang }
967470bfcfSHawking Zhang
97d38ceaf9SAlex Deucher /**
986f8941a2SNicolai Hähnle * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
996f8941a2SNicolai Hähnle *
1006f8941a2SNicolai Hähnle * @mask: array in which the per-shader array disable masks will be stored
1016f8941a2SNicolai Hähnle * @max_se: number of SEs
1026f8941a2SNicolai Hähnle * @max_sh: number of SHs
1036f8941a2SNicolai Hähnle *
1046f8941a2SNicolai Hähnle * The bitmask of CUs to be disabled in the shader array determined by se and
1056f8941a2SNicolai Hähnle * sh is stored in mask[se * max_sh + sh].
1066f8941a2SNicolai Hähnle */
amdgpu_gfx_parse_disable_cu(unsigned int * mask,unsigned int max_se,unsigned int max_sh)10750fbe0ccSSrinivasan Shanmugam void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh)
1086f8941a2SNicolai Hähnle {
10950fbe0ccSSrinivasan Shanmugam unsigned int se, sh, cu;
1106f8941a2SNicolai Hähnle const char *p;
1116f8941a2SNicolai Hähnle
1126f8941a2SNicolai Hähnle memset(mask, 0, sizeof(*mask) * max_se * max_sh);
1136f8941a2SNicolai Hähnle
1146f8941a2SNicolai Hähnle if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
1156f8941a2SNicolai Hähnle return;
1166f8941a2SNicolai Hähnle
1176f8941a2SNicolai Hähnle p = amdgpu_disable_cu;
1186f8941a2SNicolai Hähnle for (;;) {
1196f8941a2SNicolai Hähnle char *next;
1206f8941a2SNicolai Hähnle int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
12150fbe0ccSSrinivasan Shanmugam
1226f8941a2SNicolai Hähnle if (ret < 3) {
1236f8941a2SNicolai Hähnle DRM_ERROR("amdgpu: could not parse disable_cu\n");
1246f8941a2SNicolai Hähnle return;
1256f8941a2SNicolai Hähnle }
1266f8941a2SNicolai Hähnle
1276f8941a2SNicolai Hähnle if (se < max_se && sh < max_sh && cu < 16) {
1286f8941a2SNicolai Hähnle DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
1296f8941a2SNicolai Hähnle mask[se * max_sh + sh] |= 1u << cu;
1306f8941a2SNicolai Hähnle } else {
1316f8941a2SNicolai Hähnle DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
1326f8941a2SNicolai Hähnle se, sh, cu);
1336f8941a2SNicolai Hähnle }
1346f8941a2SNicolai Hähnle
1356f8941a2SNicolai Hähnle next = strchr(p, ',');
1366f8941a2SNicolai Hähnle if (!next)
1376f8941a2SNicolai Hähnle break;
1386f8941a2SNicolai Hähnle p = next + 1;
1396f8941a2SNicolai Hähnle }
1406f8941a2SNicolai Hähnle }
14141f6a99aSAlex Deucher
amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device * adev)142b07d1d73SArunpravin Paneer Selvam static bool amdgpu_gfx_is_graphics_multipipe_capable(struct amdgpu_device *adev)
143b07d1d73SArunpravin Paneer Selvam {
144b07d1d73SArunpravin Paneer Selvam return amdgpu_async_gfx_ring && adev->gfx.me.num_pipe_per_me > 1;
145b07d1d73SArunpravin Paneer Selvam }
146b07d1d73SArunpravin Paneer Selvam
amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device * adev)147b07d1d73SArunpravin Paneer Selvam static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev)
1480f7607d4SAndres Rodriguez {
1494a75aefeSAndres Rodriguez if (amdgpu_compute_multipipe != -1) {
1504a75aefeSAndres Rodriguez DRM_INFO("amdgpu: forcing compute pipe policy %d\n",
1514a75aefeSAndres Rodriguez amdgpu_compute_multipipe);
1524a75aefeSAndres Rodriguez return amdgpu_compute_multipipe == 1;
1534a75aefeSAndres Rodriguez }
1544a75aefeSAndres Rodriguez
1554e8303cfSLijo Lazar if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0))
15625959dd6SLang Yu return true;
15725959dd6SLang Yu
1580f7607d4SAndres Rodriguez /* FIXME: spreading the queues across pipes causes perf regressions
1590f7607d4SAndres Rodriguez * on POLARIS11 compute workloads */
1600f7607d4SAndres Rodriguez if (adev->asic_type == CHIP_POLARIS11)
1610f7607d4SAndres Rodriguez return false;
1620f7607d4SAndres Rodriguez
1630f7607d4SAndres Rodriguez return adev->gfx.mec.num_mec > 1;
1640f7607d4SAndres Rodriguez }
1650f7607d4SAndres Rodriguez
amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)166b07d1d73SArunpravin Paneer Selvam bool amdgpu_gfx_is_high_priority_graphics_queue(struct amdgpu_device *adev,
167b07d1d73SArunpravin Paneer Selvam struct amdgpu_ring *ring)
168b07d1d73SArunpravin Paneer Selvam {
169b07d1d73SArunpravin Paneer Selvam int queue = ring->queue;
170b07d1d73SArunpravin Paneer Selvam int pipe = ring->pipe;
171b07d1d73SArunpravin Paneer Selvam
172b07d1d73SArunpravin Paneer Selvam /* Policy: use pipe1 queue0 as high priority graphics queue if we
173b07d1d73SArunpravin Paneer Selvam * have more than one gfx pipe.
174b07d1d73SArunpravin Paneer Selvam */
175b07d1d73SArunpravin Paneer Selvam if (amdgpu_gfx_is_graphics_multipipe_capable(adev) &&
176b07d1d73SArunpravin Paneer Selvam adev->gfx.num_gfx_rings > 1 && pipe == 1 && queue == 0) {
177b07d1d73SArunpravin Paneer Selvam int me = ring->me;
178b07d1d73SArunpravin Paneer Selvam int bit;
179b07d1d73SArunpravin Paneer Selvam
180b07d1d73SArunpravin Paneer Selvam bit = amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue);
181b07d1d73SArunpravin Paneer Selvam if (ring == &adev->gfx.gfx_ring[bit])
182b07d1d73SArunpravin Paneer Selvam return true;
183b07d1d73SArunpravin Paneer Selvam }
184b07d1d73SArunpravin Paneer Selvam
185b07d1d73SArunpravin Paneer Selvam return false;
186b07d1d73SArunpravin Paneer Selvam }
187b07d1d73SArunpravin Paneer Selvam
amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device * adev,struct amdgpu_ring * ring)18833abcb1fSNirmoy Das bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
1898c0225d7SNirmoy Das struct amdgpu_ring *ring)
19033abcb1fSNirmoy Das {
1918c0225d7SNirmoy Das /* Policy: use 1st queue as high priority compute queue if we
1928c0225d7SNirmoy Das * have more than one compute queue.
1938c0225d7SNirmoy Das */
1948c0225d7SNirmoy Das if (adev->gfx.num_compute_rings > 1 &&
1958c0225d7SNirmoy Das ring == &adev->gfx.compute_ring[0])
1968c0225d7SNirmoy Das return true;
1973f66bf40SNirmoy Das
1988c0225d7SNirmoy Das return false;
19933abcb1fSNirmoy Das }
20033abcb1fSNirmoy Das
amdgpu_gfx_compute_queue_acquire(struct amdgpu_device * adev)20141f6a99aSAlex Deucher void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
20241f6a99aSAlex Deucher {
203be697aa3SLe Ma int i, j, queue, pipe;
204b07d1d73SArunpravin Paneer Selvam bool multipipe_policy = amdgpu_gfx_is_compute_multipipe_capable(adev);
205a300de40SMonk Liu int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
206a300de40SMonk Liu adev->gfx.mec.num_queue_per_pipe,
207a300de40SMonk Liu adev->gfx.num_compute_rings);
2088078f1c6SLijo Lazar int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
20941f6a99aSAlex Deucher
2100f7607d4SAndres Rodriguez if (multipipe_policy) {
211be697aa3SLe Ma /* policy: make queues evenly cross all pipes on MEC1 only
212be697aa3SLe Ma * for multiple xcc, just use the original policy for simplicity */
2138078f1c6SLijo Lazar for (j = 0; j < num_xcc; j++) {
214a300de40SMonk Liu for (i = 0; i < max_queues_per_mec; i++) {
215a300de40SMonk Liu pipe = i % adev->gfx.mec.num_pipe_per_mec;
216a300de40SMonk Liu queue = (i / adev->gfx.mec.num_pipe_per_mec) %
217a300de40SMonk Liu adev->gfx.mec.num_queue_per_pipe;
218a300de40SMonk Liu
219a300de40SMonk Liu set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
220be697aa3SLe Ma adev->gfx.mec_bitmap[j].queue_bitmap);
221be697aa3SLe Ma }
222a300de40SMonk Liu }
22341f6a99aSAlex Deucher } else {
224a300de40SMonk Liu /* policy: amdgpu owns all queues in the given pipe */
2258078f1c6SLijo Lazar for (j = 0; j < num_xcc; j++) {
226a300de40SMonk Liu for (i = 0; i < max_queues_per_mec; ++i)
227be697aa3SLe Ma set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap);
228be697aa3SLe Ma }
22941f6a99aSAlex Deucher }
23041f6a99aSAlex Deucher
2318078f1c6SLijo Lazar for (j = 0; j < num_xcc; j++) {
232be697aa3SLe Ma dev_dbg(adev->dev, "mec queue bitmap weight=%d\n",
233be697aa3SLe Ma bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
234be697aa3SLe Ma }
23541f6a99aSAlex Deucher }
23671c37505SAlex Deucher
amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device * adev)237e537c994SHawking Zhang void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
238e537c994SHawking Zhang {
239b07d1d73SArunpravin Paneer Selvam int i, queue, pipe;
240b07d1d73SArunpravin Paneer Selvam bool multipipe_policy = amdgpu_gfx_is_graphics_multipipe_capable(adev);
241b07d1d73SArunpravin Paneer Selvam int max_queues_per_me = adev->gfx.me.num_pipe_per_me *
242b07d1d73SArunpravin Paneer Selvam adev->gfx.me.num_queue_per_pipe;
243e537c994SHawking Zhang
244b07d1d73SArunpravin Paneer Selvam if (multipipe_policy) {
245e537c994SHawking Zhang /* policy: amdgpu owns the first queue per pipe at this stage
246e537c994SHawking Zhang * will extend to mulitple queues per pipe later */
247b07d1d73SArunpravin Paneer Selvam for (i = 0; i < max_queues_per_me; i++) {
248b07d1d73SArunpravin Paneer Selvam pipe = i % adev->gfx.me.num_pipe_per_me;
249b07d1d73SArunpravin Paneer Selvam queue = (i / adev->gfx.me.num_pipe_per_me) %
250b07d1d73SArunpravin Paneer Selvam adev->gfx.me.num_queue_per_pipe;
251b07d1d73SArunpravin Paneer Selvam
252b07d1d73SArunpravin Paneer Selvam set_bit(pipe * adev->gfx.me.num_queue_per_pipe + queue,
253b07d1d73SArunpravin Paneer Selvam adev->gfx.me.queue_bitmap);
254b07d1d73SArunpravin Paneer Selvam }
255b07d1d73SArunpravin Paneer Selvam } else {
256b07d1d73SArunpravin Paneer Selvam for (i = 0; i < max_queues_per_me; ++i)
257e537c994SHawking Zhang set_bit(i, adev->gfx.me.queue_bitmap);
258e537c994SHawking Zhang }
259e537c994SHawking Zhang
260e537c994SHawking Zhang /* update the number of active graphics rings */
261e537c994SHawking Zhang adev->gfx.num_gfx_rings =
262e537c994SHawking Zhang bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
263e537c994SHawking Zhang }
264e537c994SHawking Zhang
amdgpu_gfx_kiq_acquire(struct amdgpu_device * adev,struct amdgpu_ring * ring,int xcc_id)26571c37505SAlex Deucher static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
266def799c6SLe Ma struct amdgpu_ring *ring, int xcc_id)
26771c37505SAlex Deucher {
26871c37505SAlex Deucher int queue_bit;
26971c37505SAlex Deucher int mec, pipe, queue;
27071c37505SAlex Deucher
27171c37505SAlex Deucher queue_bit = adev->gfx.mec.num_mec
27271c37505SAlex Deucher * adev->gfx.mec.num_pipe_per_mec
27371c37505SAlex Deucher * adev->gfx.mec.num_queue_per_pipe;
27471c37505SAlex Deucher
2751647b54eSDan Carpenter while (--queue_bit >= 0) {
276def799c6SLe Ma if (test_bit(queue_bit, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
27771c37505SAlex Deucher continue;
27871c37505SAlex Deucher
2795c180eb9SYong Zhao amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
28071c37505SAlex Deucher
28159fd27cdSHuang Rui /*
28259fd27cdSHuang Rui * 1. Using pipes 2/3 from MEC 2 seems cause problems.
28359fd27cdSHuang Rui * 2. It must use queue id 0, because CGPG_IDLE/SAVE/LOAD/RUN
28459fd27cdSHuang Rui * only can be issued on queue 0.
28559fd27cdSHuang Rui */
28659fd27cdSHuang Rui if ((mec == 1 && pipe > 1) || queue != 0)
28771c37505SAlex Deucher continue;
28871c37505SAlex Deucher
28971c37505SAlex Deucher ring->me = mec + 1;
29071c37505SAlex Deucher ring->pipe = pipe;
29171c37505SAlex Deucher ring->queue = queue;
29271c37505SAlex Deucher
29371c37505SAlex Deucher return 0;
29471c37505SAlex Deucher }
29571c37505SAlex Deucher
29671c37505SAlex Deucher dev_err(adev->dev, "Failed to find a queue for KIQ\n");
29771c37505SAlex Deucher return -EINVAL;
29871c37505SAlex Deucher }
29971c37505SAlex Deucher
amdgpu_gfx_kiq_init_ring(struct amdgpu_device * adev,int xcc_id)3004acd31e6SMa Jun int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
30171c37505SAlex Deucher {
302def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
3034acd31e6SMa Jun struct amdgpu_irq_src *irq = &kiq->irq;
3044acd31e6SMa Jun struct amdgpu_ring *ring = &kiq->ring;
30571c37505SAlex Deucher int r = 0;
30671c37505SAlex Deucher
30743ca8efaSpding spin_lock_init(&kiq->ring_lock);
30871c37505SAlex Deucher
30971c37505SAlex Deucher ring->adev = NULL;
31071c37505SAlex Deucher ring->ring_obj = NULL;
31171c37505SAlex Deucher ring->use_doorbell = true;
312def799c6SLe Ma ring->xcc_id = xcc_id;
3133566938bSLe Ma ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
314233bb373SLijo Lazar ring->doorbell_index =
315233bb373SLijo Lazar (adev->doorbell_index.kiq +
316233bb373SLijo Lazar xcc_id * adev->doorbell_index.xcc_doorbell_range)
317233bb373SLijo Lazar << 1;
31871c37505SAlex Deucher
319def799c6SLe Ma r = amdgpu_gfx_kiq_acquire(adev, ring, xcc_id);
32071c37505SAlex Deucher if (r)
32171c37505SAlex Deucher return r;
32271c37505SAlex Deucher
32371c37505SAlex Deucher ring->eop_gpu_addr = kiq->eop_gpu_addr;
324a783910dSAlex Deucher ring->no_scheduler = true;
3250ea55445SSrinivasan Shanmugam snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
326745f7170SSrinivasan Shanmugam (unsigned char)xcc_id, (unsigned char)ring->me,
327745f7170SSrinivasan Shanmugam (unsigned char)ring->pipe, (unsigned char)ring->queue);
328c107171bSChristian König r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
329c107171bSChristian König AMDGPU_RING_PRIO_DEFAULT, NULL);
33071c37505SAlex Deucher if (r)
33171c37505SAlex Deucher dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
33271c37505SAlex Deucher
33371c37505SAlex Deucher return r;
33471c37505SAlex Deucher }
33571c37505SAlex Deucher
amdgpu_gfx_kiq_free_ring(struct amdgpu_ring * ring)3369f0256daSNirmoy Das void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
33771c37505SAlex Deucher {
33871c37505SAlex Deucher amdgpu_ring_fini(ring);
33971c37505SAlex Deucher }
34071c37505SAlex Deucher
amdgpu_gfx_kiq_fini(struct amdgpu_device * adev,int xcc_id)341def799c6SLe Ma void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id)
34271c37505SAlex Deucher {
343def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
34471c37505SAlex Deucher
34571c37505SAlex Deucher amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
34671c37505SAlex Deucher }
34771c37505SAlex Deucher
amdgpu_gfx_kiq_init(struct amdgpu_device * adev,unsigned int hpd_size,int xcc_id)34871c37505SAlex Deucher int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
34950fbe0ccSSrinivasan Shanmugam unsigned int hpd_size, int xcc_id)
35071c37505SAlex Deucher {
35171c37505SAlex Deucher int r;
35271c37505SAlex Deucher u32 *hpd;
353def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
35471c37505SAlex Deucher
35571c37505SAlex Deucher r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE,
35671c37505SAlex Deucher AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
35771c37505SAlex Deucher &kiq->eop_gpu_addr, (void **)&hpd);
35871c37505SAlex Deucher if (r) {
35971c37505SAlex Deucher dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
36071c37505SAlex Deucher return r;
36171c37505SAlex Deucher }
36271c37505SAlex Deucher
36371c37505SAlex Deucher memset(hpd, 0, hpd_size);
36471c37505SAlex Deucher
36571c37505SAlex Deucher r = amdgpu_bo_reserve(kiq->eop_obj, true);
36671c37505SAlex Deucher if (unlikely(r != 0))
36771c37505SAlex Deucher dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
36871c37505SAlex Deucher amdgpu_bo_kunmap(kiq->eop_obj);
36971c37505SAlex Deucher amdgpu_bo_unreserve(kiq->eop_obj);
37071c37505SAlex Deucher
37171c37505SAlex Deucher return 0;
37271c37505SAlex Deucher }
373b9683c21SAlex Deucher
3744fc6a88fSHawking Zhang /* create MQD for each compute/gfx queue */
amdgpu_gfx_mqd_sw_init(struct amdgpu_device * adev,unsigned int mqd_size,int xcc_id)3754fc6a88fSHawking Zhang int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
37650fbe0ccSSrinivasan Shanmugam unsigned int mqd_size, int xcc_id)
377b9683c21SAlex Deucher {
37834305ac3SGuchun Chen int r, i, j;
379def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
380def799c6SLe Ma struct amdgpu_ring *ring = &kiq->ring;
3811cfb4d61SAlex Deucher u32 domain = AMDGPU_GEM_DOMAIN_GTT;
3821cfb4d61SAlex Deucher
383ba0fb4b4SAlex Deucher #if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
3841cfb4d61SAlex Deucher /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */
3854e8303cfSLijo Lazar if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0))
3861cfb4d61SAlex Deucher domain |= AMDGPU_GEM_DOMAIN_VRAM;
387ba0fb4b4SAlex Deucher #endif
388b9683c21SAlex Deucher
389b9683c21SAlex Deucher /* create MQD for KIQ */
39018ee4ce6SJack Xiao if (!adev->enable_mes_kiq && !ring->mqd_obj) {
391beb84102SMonk Liu /* originaly the KIQ MQD is put in GTT domain, but for SRIOV VRAM domain is a must
392beb84102SMonk Liu * otherwise hypervisor trigger SAVE_VF fail after driver unloaded which mean MQD
393beb84102SMonk Liu * deallocated and gart_unbind, to strict diverage we decide to use VRAM domain for
394beb84102SMonk Liu * KIQ MQD no matter SRIOV or Bare-metal
395beb84102SMonk Liu */
396b9683c21SAlex Deucher r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
39758ab2c08SChristian König AMDGPU_GEM_DOMAIN_VRAM |
39858ab2c08SChristian König AMDGPU_GEM_DOMAIN_GTT,
39958ab2c08SChristian König &ring->mqd_obj,
40058ab2c08SChristian König &ring->mqd_gpu_addr,
40158ab2c08SChristian König &ring->mqd_ptr);
402b9683c21SAlex Deucher if (r) {
403b9683c21SAlex Deucher dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
404b9683c21SAlex Deucher return r;
405b9683c21SAlex Deucher }
406b9683c21SAlex Deucher
407b9683c21SAlex Deucher /* prepare MQD backup */
4088e3a3e84SLijo Lazar kiq->mqd_backup = kzalloc(mqd_size, GFP_KERNEL);
40937c3fc66SSrinivasan Shanmugam if (!kiq->mqd_backup) {
41037c3fc66SSrinivasan Shanmugam dev_warn(adev->dev,
41137c3fc66SSrinivasan Shanmugam "no memory to create MQD backup for ring %s\n", ring->name);
41237c3fc66SSrinivasan Shanmugam return -ENOMEM;
41337c3fc66SSrinivasan Shanmugam }
414b9683c21SAlex Deucher }
415b9683c21SAlex Deucher
4165e0f378dSXiaojie Yuan if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
41754fc4472SHawking Zhang /* create MQD for each KGQ */
41854fc4472SHawking Zhang for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
41954fc4472SHawking Zhang ring = &adev->gfx.gfx_ring[i];
42054fc4472SHawking Zhang if (!ring->mqd_obj) {
42154fc4472SHawking Zhang r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
4221cfb4d61SAlex Deucher domain, &ring->mqd_obj,
42354fc4472SHawking Zhang &ring->mqd_gpu_addr, &ring->mqd_ptr);
42454fc4472SHawking Zhang if (r) {
42554fc4472SHawking Zhang dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
42654fc4472SHawking Zhang return r;
42754fc4472SHawking Zhang }
42854fc4472SHawking Zhang
429b185c318SAlex Deucher ring->mqd_size = mqd_size;
43054fc4472SHawking Zhang /* prepare MQD backup */
4318e3a3e84SLijo Lazar adev->gfx.me.mqd_backup[i] = kzalloc(mqd_size, GFP_KERNEL);
43237c3fc66SSrinivasan Shanmugam if (!adev->gfx.me.mqd_backup[i]) {
43354fc4472SHawking Zhang dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
43437c3fc66SSrinivasan Shanmugam return -ENOMEM;
43537c3fc66SSrinivasan Shanmugam }
43654fc4472SHawking Zhang }
43754fc4472SHawking Zhang }
43854fc4472SHawking Zhang }
43954fc4472SHawking Zhang
440b9683c21SAlex Deucher /* create MQD for each KCQ */
441b9683c21SAlex Deucher for (i = 0; i < adev->gfx.num_compute_rings; i++) {
44234305ac3SGuchun Chen j = i + xcc_id * adev->gfx.num_compute_rings;
44334305ac3SGuchun Chen ring = &adev->gfx.compute_ring[j];
444b9683c21SAlex Deucher if (!ring->mqd_obj) {
445b9683c21SAlex Deucher r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
4461cfb4d61SAlex Deucher domain, &ring->mqd_obj,
447b9683c21SAlex Deucher &ring->mqd_gpu_addr, &ring->mqd_ptr);
448b9683c21SAlex Deucher if (r) {
44954fc4472SHawking Zhang dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
450b9683c21SAlex Deucher return r;
451b9683c21SAlex Deucher }
452b9683c21SAlex Deucher
453b185c318SAlex Deucher ring->mqd_size = mqd_size;
454b9683c21SAlex Deucher /* prepare MQD backup */
4558e3a3e84SLijo Lazar adev->gfx.mec.mqd_backup[j] = kzalloc(mqd_size, GFP_KERNEL);
45650fbe0ccSSrinivasan Shanmugam if (!adev->gfx.mec.mqd_backup[j]) {
457b9683c21SAlex Deucher dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
45850fbe0ccSSrinivasan Shanmugam return -ENOMEM;
45950fbe0ccSSrinivasan Shanmugam }
460b9683c21SAlex Deucher }
461b9683c21SAlex Deucher }
462b9683c21SAlex Deucher
463b9683c21SAlex Deucher return 0;
464b9683c21SAlex Deucher }
465b9683c21SAlex Deucher
amdgpu_gfx_mqd_sw_fini(struct amdgpu_device * adev,int xcc_id)466def799c6SLe Ma void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev, int xcc_id)
467b9683c21SAlex Deucher {
468b9683c21SAlex Deucher struct amdgpu_ring *ring = NULL;
469def799c6SLe Ma int i, j;
470def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
471b9683c21SAlex Deucher
4725e0f378dSXiaojie Yuan if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) {
47354fc4472SHawking Zhang for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
47454fc4472SHawking Zhang ring = &adev->gfx.gfx_ring[i];
47554fc4472SHawking Zhang kfree(adev->gfx.me.mqd_backup[i]);
47654fc4472SHawking Zhang amdgpu_bo_free_kernel(&ring->mqd_obj,
47754fc4472SHawking Zhang &ring->mqd_gpu_addr,
47854fc4472SHawking Zhang &ring->mqd_ptr);
47954fc4472SHawking Zhang }
48054fc4472SHawking Zhang }
48154fc4472SHawking Zhang
482b9683c21SAlex Deucher for (i = 0; i < adev->gfx.num_compute_rings; i++) {
483def799c6SLe Ma j = i + xcc_id * adev->gfx.num_compute_rings;
4847a4685cdSGuchun Chen ring = &adev->gfx.compute_ring[j];
4857a4685cdSGuchun Chen kfree(adev->gfx.mec.mqd_backup[j]);
486b9683c21SAlex Deucher amdgpu_bo_free_kernel(&ring->mqd_obj,
487b9683c21SAlex Deucher &ring->mqd_gpu_addr,
488b9683c21SAlex Deucher &ring->mqd_ptr);
489b9683c21SAlex Deucher }
490b9683c21SAlex Deucher
491def799c6SLe Ma ring = &kiq->ring;
492def799c6SLe Ma kfree(kiq->mqd_backup);
493b9683c21SAlex Deucher amdgpu_bo_free_kernel(&ring->mqd_obj,
494b9683c21SAlex Deucher &ring->mqd_gpu_addr,
495b9683c21SAlex Deucher &ring->mqd_ptr);
496b9683c21SAlex Deucher }
497d23ee13fSRex Zhu
amdgpu_gfx_disable_kcq(struct amdgpu_device * adev,int xcc_id)498def799c6SLe Ma int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
499ba0c13b7SRex Zhu {
500def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
501ba0c13b7SRex Zhu struct amdgpu_ring *kiq_ring = &kiq->ring;
50218ee4ce6SJack Xiao int i, r = 0;
503def799c6SLe Ma int j;
504ba0c13b7SRex Zhu
505f7fb9d67SJack Xiao if (adev->enable_mes) {
506f7fb9d67SJack Xiao for (i = 0; i < adev->gfx.num_compute_rings; i++) {
507f7fb9d67SJack Xiao j = i + xcc_id * adev->gfx.num_compute_rings;
508f7fb9d67SJack Xiao amdgpu_mes_unmap_legacy_queue(adev,
509f7fb9d67SJack Xiao &adev->gfx.compute_ring[j],
510f7fb9d67SJack Xiao RESET_QUEUES, 0, 0);
511f7fb9d67SJack Xiao }
512f7fb9d67SJack Xiao return 0;
513f7fb9d67SJack Xiao }
514f7fb9d67SJack Xiao
515ba0c13b7SRex Zhu if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
516ba0c13b7SRex Zhu return -EINVAL;
517ba0c13b7SRex Zhu
51811815bb0SChristian König if (!kiq_ring->sched.ready || amdgpu_in_reset(adev))
519fa317985SLijo Lazar return 0;
520fa317985SLijo Lazar
521def799c6SLe Ma spin_lock(&kiq->ring_lock);
522ba0c13b7SRex Zhu if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
5235a8cd98eSNirmoy Das adev->gfx.num_compute_rings)) {
52466daccdeSLe Ma spin_unlock(&kiq->ring_lock);
525ba0c13b7SRex Zhu return -ENOMEM;
5265a8cd98eSNirmoy Das }
527ba0c13b7SRex Zhu
528def799c6SLe Ma for (i = 0; i < adev->gfx.num_compute_rings; i++) {
529def799c6SLe Ma j = i + xcc_id * adev->gfx.num_compute_rings;
53066daccdeSLe Ma kiq->pmf->kiq_unmap_queues(kiq_ring,
5318cce1682SSrinivasan Shanmugam &adev->gfx.compute_ring[j],
53219191961SJack Xiao RESET_QUEUES, 0, 0);
533def799c6SLe Ma }
534fa317985SLijo Lazar /* Submit unmap queue packet */
535fa317985SLijo Lazar amdgpu_ring_commit(kiq_ring);
536fa317985SLijo Lazar /*
537fa317985SLijo Lazar * Ring test will do a basic scratch register change check. Just run
538fa317985SLijo Lazar * this to ensure that unmap queues that is submitted before got
539fa317985SLijo Lazar * processed successfully before returning.
540b1338a8eSStanley.Yang */
5415a8cd98eSNirmoy Das r = amdgpu_ring_test_helper(kiq_ring);
542fa317985SLijo Lazar
543def799c6SLe Ma spin_unlock(&kiq->ring_lock);
544ba0c13b7SRex Zhu
5455a8cd98eSNirmoy Das return r;
546ba0c13b7SRex Zhu }
547ba0c13b7SRex Zhu
amdgpu_gfx_disable_kgq(struct amdgpu_device * adev,int xcc_id)5481156e1a6SAlex Deucher int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id)
5491156e1a6SAlex Deucher {
5501156e1a6SAlex Deucher struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
5511156e1a6SAlex Deucher struct amdgpu_ring *kiq_ring = &kiq->ring;
5521156e1a6SAlex Deucher int i, r = 0;
5531156e1a6SAlex Deucher int j;
5541156e1a6SAlex Deucher
555f7fb9d67SJack Xiao if (adev->enable_mes) {
556f7fb9d67SJack Xiao if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
557f7fb9d67SJack Xiao for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
558f7fb9d67SJack Xiao j = i + xcc_id * adev->gfx.num_gfx_rings;
559f7fb9d67SJack Xiao amdgpu_mes_unmap_legacy_queue(adev,
560f7fb9d67SJack Xiao &adev->gfx.gfx_ring[j],
561f7fb9d67SJack Xiao PREEMPT_QUEUES, 0, 0);
562f7fb9d67SJack Xiao }
563f7fb9d67SJack Xiao }
564f7fb9d67SJack Xiao return 0;
565f7fb9d67SJack Xiao }
566f7fb9d67SJack Xiao
5671156e1a6SAlex Deucher if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5681156e1a6SAlex Deucher return -EINVAL;
5691156e1a6SAlex Deucher
57011815bb0SChristian König if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev))
571fa317985SLijo Lazar return 0;
572fa317985SLijo Lazar
5731156e1a6SAlex Deucher if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
574fa317985SLijo Lazar spin_lock(&kiq->ring_lock);
5751156e1a6SAlex Deucher if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
5761156e1a6SAlex Deucher adev->gfx.num_gfx_rings)) {
5771156e1a6SAlex Deucher spin_unlock(&kiq->ring_lock);
5781156e1a6SAlex Deucher return -ENOMEM;
5791156e1a6SAlex Deucher }
5801156e1a6SAlex Deucher
5811156e1a6SAlex Deucher for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
5821156e1a6SAlex Deucher j = i + xcc_id * adev->gfx.num_gfx_rings;
5831156e1a6SAlex Deucher kiq->pmf->kiq_unmap_queues(kiq_ring,
5848cce1682SSrinivasan Shanmugam &adev->gfx.gfx_ring[j],
5851156e1a6SAlex Deucher PREEMPT_QUEUES, 0, 0);
5861156e1a6SAlex Deucher }
587fa317985SLijo Lazar /* Submit unmap queue packet */
588fa317985SLijo Lazar amdgpu_ring_commit(kiq_ring);
5891156e1a6SAlex Deucher
590fa317985SLijo Lazar /*
591fa317985SLijo Lazar * Ring test will do a basic scratch register change check.
592fa317985SLijo Lazar * Just run this to ensure that unmap queues that is submitted
593fa317985SLijo Lazar * before got processed successfully before returning.
594fa317985SLijo Lazar */
5951156e1a6SAlex Deucher r = amdgpu_ring_test_helper(kiq_ring);
5961156e1a6SAlex Deucher spin_unlock(&kiq->ring_lock);
597fa317985SLijo Lazar }
5981156e1a6SAlex Deucher
5991156e1a6SAlex Deucher return r;
6001156e1a6SAlex Deucher }
6011156e1a6SAlex Deucher
amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device * adev,int queue_bit)6025c180eb9SYong Zhao int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
6033ab6fe4bSLikun Gao int queue_bit)
6043ab6fe4bSLikun Gao {
6053ab6fe4bSLikun Gao int mec, pipe, queue;
6065c180eb9SYong Zhao int set_resource_bit = 0;
6073ab6fe4bSLikun Gao
6085c180eb9SYong Zhao amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
6093ab6fe4bSLikun Gao
6105c180eb9SYong Zhao set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
6113ab6fe4bSLikun Gao
6125c180eb9SYong Zhao return set_resource_bit;
6133ab6fe4bSLikun Gao }
6143ab6fe4bSLikun Gao
amdgpu_gfx_mes_enable_kcq(struct amdgpu_device * adev,int xcc_id)615745e0a90SJack Xiao static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
616745e0a90SJack Xiao {
617745e0a90SJack Xiao struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
618745e0a90SJack Xiao struct amdgpu_ring *kiq_ring = &kiq->ring;
619745e0a90SJack Xiao uint64_t queue_mask = ~0ULL;
620745e0a90SJack Xiao int r, i, j;
621745e0a90SJack Xiao
622745e0a90SJack Xiao amdgpu_device_flush_hdp(adev, NULL);
623745e0a90SJack Xiao
624745e0a90SJack Xiao if (!adev->enable_uni_mes) {
625745e0a90SJack Xiao spin_lock(&kiq->ring_lock);
626745e0a90SJack Xiao r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
627745e0a90SJack Xiao if (r) {
628745e0a90SJack Xiao dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
629745e0a90SJack Xiao spin_unlock(&kiq->ring_lock);
630745e0a90SJack Xiao return r;
631745e0a90SJack Xiao }
632745e0a90SJack Xiao
633745e0a90SJack Xiao kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
634745e0a90SJack Xiao r = amdgpu_ring_test_helper(kiq_ring);
635745e0a90SJack Xiao spin_unlock(&kiq->ring_lock);
636745e0a90SJack Xiao if (r)
637745e0a90SJack Xiao dev_err(adev->dev, "KIQ failed to set resources\n");
638745e0a90SJack Xiao }
639745e0a90SJack Xiao
640745e0a90SJack Xiao for (i = 0; i < adev->gfx.num_compute_rings; i++) {
641745e0a90SJack Xiao j = i + xcc_id * adev->gfx.num_compute_rings;
642745e0a90SJack Xiao r = amdgpu_mes_map_legacy_queue(adev,
643745e0a90SJack Xiao &adev->gfx.compute_ring[j]);
644745e0a90SJack Xiao if (r) {
645745e0a90SJack Xiao dev_err(adev->dev, "failed to map compute queue\n");
646745e0a90SJack Xiao return r;
647745e0a90SJack Xiao }
648745e0a90SJack Xiao }
649745e0a90SJack Xiao
650745e0a90SJack Xiao return 0;
651745e0a90SJack Xiao }
652745e0a90SJack Xiao
amdgpu_gfx_enable_kcq(struct amdgpu_device * adev,int xcc_id)653def799c6SLe Ma int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
654849aca9fSHawking Zhang {
655def799c6SLe Ma struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
656def799c6SLe Ma struct amdgpu_ring *kiq_ring = &kiq->ring;
657849aca9fSHawking Zhang uint64_t queue_mask = 0;
658def799c6SLe Ma int r, i, j;
659849aca9fSHawking Zhang
66052491d97SJack Xiao if (adev->mes.enable_legacy_queue_map)
661745e0a90SJack Xiao return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
662745e0a90SJack Xiao
663849aca9fSHawking Zhang if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
664849aca9fSHawking Zhang return -EINVAL;
665849aca9fSHawking Zhang
666849aca9fSHawking Zhang for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
667def799c6SLe Ma if (!test_bit(i, adev->gfx.mec_bitmap[xcc_id].queue_bitmap))
668849aca9fSHawking Zhang continue;
669849aca9fSHawking Zhang
670849aca9fSHawking Zhang /* This situation may be hit in the future if a new HW
671849aca9fSHawking Zhang * generation exposes more than 64 queues. If so, the
672849aca9fSHawking Zhang * definition of queue_mask needs updating */
673849aca9fSHawking Zhang if (WARN_ON(i > (sizeof(queue_mask)*8))) {
674849aca9fSHawking Zhang DRM_ERROR("Invalid KCQ enabled: %d\n", i);
675849aca9fSHawking Zhang break;
676849aca9fSHawking Zhang }
677849aca9fSHawking Zhang
6785c180eb9SYong Zhao queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
679849aca9fSHawking Zhang }
680849aca9fSHawking Zhang
681f9d8c5c7SJack Xiao amdgpu_device_flush_hdp(adev, NULL);
682f9d8c5c7SJack Xiao
683849aca9fSHawking Zhang DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
684849aca9fSHawking Zhang kiq_ring->queue);
685e602157eSJack Xiao
686def799c6SLe Ma spin_lock(&kiq->ring_lock);
687849aca9fSHawking Zhang r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
688849aca9fSHawking Zhang adev->gfx.num_compute_rings +
689849aca9fSHawking Zhang kiq->pmf->set_resources_size);
690849aca9fSHawking Zhang if (r) {
691849aca9fSHawking Zhang DRM_ERROR("Failed to lock KIQ (%d).\n", r);
6927a6a2e59SDan Carpenter spin_unlock(&kiq->ring_lock);
693849aca9fSHawking Zhang return r;
694849aca9fSHawking Zhang }
695849aca9fSHawking Zhang
696849aca9fSHawking Zhang kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
697def799c6SLe Ma for (i = 0; i < adev->gfx.num_compute_rings; i++) {
698def799c6SLe Ma j = i + xcc_id * adev->gfx.num_compute_rings;
69966daccdeSLe Ma kiq->pmf->kiq_map_queues(kiq_ring,
700147862d0SShiwu Zhang &adev->gfx.compute_ring[j]);
701def799c6SLe Ma }
702fa317985SLijo Lazar /* Submit map queue packet */
703fa317985SLijo Lazar amdgpu_ring_commit(kiq_ring);
704fa317985SLijo Lazar /*
705fa317985SLijo Lazar * Ring test will do a basic scratch register change check. Just run
706fa317985SLijo Lazar * this to ensure that map queues that is submitted before got
707fa317985SLijo Lazar * processed successfully before returning.
708fa317985SLijo Lazar */
709849aca9fSHawking Zhang r = amdgpu_ring_test_helper(kiq_ring);
710def799c6SLe Ma spin_unlock(&kiq->ring_lock);
711849aca9fSHawking Zhang if (r)
712849aca9fSHawking Zhang DRM_ERROR("KCQ enable failed\n");
713849aca9fSHawking Zhang
714849aca9fSHawking Zhang return r;
715849aca9fSHawking Zhang }
716849aca9fSHawking Zhang
amdgpu_gfx_enable_kgq(struct amdgpu_device * adev,int xcc_id)7171156e1a6SAlex Deucher int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
7181156e1a6SAlex Deucher {
7191156e1a6SAlex Deucher struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
7201156e1a6SAlex Deucher struct amdgpu_ring *kiq_ring = &kiq->ring;
7211156e1a6SAlex Deucher int r, i, j;
7221156e1a6SAlex Deucher
7231156e1a6SAlex Deucher if (!kiq->pmf || !kiq->pmf->kiq_map_queues)
7241156e1a6SAlex Deucher return -EINVAL;
7251156e1a6SAlex Deucher
726e602157eSJack Xiao amdgpu_device_flush_hdp(adev, NULL);
727e602157eSJack Xiao
72852491d97SJack Xiao if (adev->mes.enable_legacy_queue_map) {
729f9d8c5c7SJack Xiao for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
730f9d8c5c7SJack Xiao j = i + xcc_id * adev->gfx.num_gfx_rings;
731f9d8c5c7SJack Xiao r = amdgpu_mes_map_legacy_queue(adev,
732f9d8c5c7SJack Xiao &adev->gfx.gfx_ring[j]);
733f9d8c5c7SJack Xiao if (r) {
734f9d8c5c7SJack Xiao DRM_ERROR("failed to map gfx queue\n");
735f9d8c5c7SJack Xiao return r;
736f9d8c5c7SJack Xiao }
737f9d8c5c7SJack Xiao }
738f9d8c5c7SJack Xiao
739f9d8c5c7SJack Xiao return 0;
740f9d8c5c7SJack Xiao }
741f9d8c5c7SJack Xiao
7421156e1a6SAlex Deucher spin_lock(&kiq->ring_lock);
7431156e1a6SAlex Deucher /* No need to map kcq on the slave */
7441156e1a6SAlex Deucher if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {
7451156e1a6SAlex Deucher r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
7461156e1a6SAlex Deucher adev->gfx.num_gfx_rings);
7471156e1a6SAlex Deucher if (r) {
7481156e1a6SAlex Deucher DRM_ERROR("Failed to lock KIQ (%d).\n", r);
7493fb9dd5fSDan Carpenter spin_unlock(&kiq->ring_lock);
7501156e1a6SAlex Deucher return r;
7511156e1a6SAlex Deucher }
7521156e1a6SAlex Deucher
7531156e1a6SAlex Deucher for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
7541156e1a6SAlex Deucher j = i + xcc_id * adev->gfx.num_gfx_rings;
7551156e1a6SAlex Deucher kiq->pmf->kiq_map_queues(kiq_ring,
7568cce1682SSrinivasan Shanmugam &adev->gfx.gfx_ring[j]);
7571156e1a6SAlex Deucher }
7581156e1a6SAlex Deucher }
759fa317985SLijo Lazar /* Submit map queue packet */
760fa317985SLijo Lazar amdgpu_ring_commit(kiq_ring);
761fa317985SLijo Lazar /*
762fa317985SLijo Lazar * Ring test will do a basic scratch register change check. Just run
763fa317985SLijo Lazar * this to ensure that map queues that is submitted before got
764fa317985SLijo Lazar * processed successfully before returning.
765fa317985SLijo Lazar */
7661156e1a6SAlex Deucher r = amdgpu_ring_test_helper(kiq_ring);
7671156e1a6SAlex Deucher spin_unlock(&kiq->ring_lock);
7681156e1a6SAlex Deucher if (r)
76943bda3e7SPrike Liang DRM_ERROR("KGQ enable failed\n");
7701156e1a6SAlex Deucher
7711156e1a6SAlex Deucher return r;
7721156e1a6SAlex Deucher }
7731156e1a6SAlex Deucher
amdgpu_gfx_do_off_ctrl(struct amdgpu_device * adev,bool enable,bool no_delay)774250d9769SAlex Deucher static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable,
775250d9769SAlex Deucher bool no_delay)
776d23ee13fSRex Zhu {
7771d617c02SLijo Lazar unsigned long delay = GFX_OFF_DELAY_ENABLE;
7781d617c02SLijo Lazar
7793b94fb10SLikun Gao if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
780d23ee13fSRex Zhu return;
781d23ee13fSRex Zhu
782d23ee13fSRex Zhu mutex_lock(&adev->gfx.gfx_off_mutex);
783d23ee13fSRex Zhu
78490a92662SMichel Dänzer if (enable) {
78590a92662SMichel Dänzer /* If the count is already 0, it means there's an imbalance bug somewhere.
78690a92662SMichel Dänzer * Note that the bug may be in a different caller than the one which triggers the
78790a92662SMichel Dänzer * WARN_ON_ONCE.
78890a92662SMichel Dänzer */
78990a92662SMichel Dänzer if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
79090a92662SMichel Dänzer goto unlock;
79190a92662SMichel Dänzer
792d23ee13fSRex Zhu adev->gfx.gfx_off_req_count--;
793d23ee13fSRex Zhu
7941d617c02SLijo Lazar if (adev->gfx.gfx_off_req_count == 0 &&
7951d617c02SLijo Lazar !adev->gfx.gfx_off_state) {
796ce311df9SMario Limonciello /* If going to s2idle, no need to wait */
797250d9769SAlex Deucher if (no_delay) {
798ce311df9SMario Limonciello if (!amdgpu_dpm_set_powergating_by_smu(adev,
799ff69bba0SBoyuan Zhang AMD_IP_BLOCK_TYPE_GFX, true, 0))
800ce311df9SMario Limonciello adev->gfx.gfx_off_state = true;
801ce311df9SMario Limonciello } else {
8021d617c02SLijo Lazar schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
8031d617c02SLijo Lazar delay);
8041d617c02SLijo Lazar }
805ce311df9SMario Limonciello }
80690a92662SMichel Dänzer } else {
80790a92662SMichel Dänzer if (adev->gfx.gfx_off_req_count == 0) {
80890a92662SMichel Dänzer cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
80990a92662SMichel Dänzer
81090a92662SMichel Dänzer if (adev->gfx.gfx_off_state &&
811ff69bba0SBoyuan Zhang !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) {
812d23ee13fSRex Zhu adev->gfx.gfx_off_state = false;
813425a78f4STianci.Yin
814425a78f4STianci.Yin if (adev->gfx.funcs->init_spm_golden) {
81590a92662SMichel Dänzer dev_dbg(adev->dev,
81690a92662SMichel Dänzer "GFXOFF is disabled, re-init SPM golden settings\n");
817425a78f4STianci.Yin amdgpu_gfx_init_spm_golden(adev);
818425a78f4STianci.Yin }
819425a78f4STianci.Yin }
820d23ee13fSRex Zhu }
8211e317b99SRex Zhu
82290a92662SMichel Dänzer adev->gfx.gfx_off_req_count++;
82390a92662SMichel Dänzer }
82490a92662SMichel Dänzer
82590a92662SMichel Dänzer unlock:
826d23ee13fSRex Zhu mutex_unlock(&adev->gfx.gfx_off_mutex);
827d23ee13fSRex Zhu }
8286caeee7aSHawking Zhang
829250d9769SAlex Deucher /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
830250d9769SAlex Deucher *
831250d9769SAlex Deucher * @adev: amdgpu_device pointer
832250d9769SAlex Deucher * @bool enable true: enable gfx off feature, false: disable gfx off feature
833250d9769SAlex Deucher *
834250d9769SAlex Deucher * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
835250d9769SAlex Deucher * 2. other client can send request to disable gfx off feature, the request should be honored.
836250d9769SAlex Deucher * 3. other client can cancel their request of disable gfx off feature
837250d9769SAlex Deucher * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
838250d9769SAlex Deucher *
839250d9769SAlex Deucher * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms.
840250d9769SAlex Deucher */
amdgpu_gfx_off_ctrl(struct amdgpu_device * adev,bool enable)841250d9769SAlex Deucher void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
842250d9769SAlex Deucher {
843250d9769SAlex Deucher /* If going to s2idle, no need to wait */
844250d9769SAlex Deucher bool no_delay = adev->in_s0ix ? true : false;
845250d9769SAlex Deucher
846250d9769SAlex Deucher amdgpu_gfx_do_off_ctrl(adev, enable, no_delay);
847250d9769SAlex Deucher }
848250d9769SAlex Deucher
849250d9769SAlex Deucher /* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable
850250d9769SAlex Deucher *
851250d9769SAlex Deucher * @adev: amdgpu_device pointer
852250d9769SAlex Deucher * @bool enable true: enable gfx off feature, false: disable gfx off feature
853250d9769SAlex Deucher *
854250d9769SAlex Deucher * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled.
855250d9769SAlex Deucher * 2. other client can send request to disable gfx off feature, the request should be honored.
856250d9769SAlex Deucher * 3. other client can cancel their request of disable gfx off feature
857250d9769SAlex Deucher * 4. other client should not send request to enable gfx off feature before disable gfx off feature.
858250d9769SAlex Deucher *
859250d9769SAlex Deucher * gfx off allow will be issued immediately.
860250d9769SAlex Deucher */
amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device * adev,bool enable)861250d9769SAlex Deucher void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable)
862250d9769SAlex Deucher {
863250d9769SAlex Deucher amdgpu_gfx_do_off_ctrl(adev, enable, true);
864250d9769SAlex Deucher }
865250d9769SAlex Deucher
amdgpu_set_gfx_off_residency(struct amdgpu_device * adev,bool value)8660ad7347aSAndré Almeida int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value)
8670ad7347aSAndré Almeida {
8680ad7347aSAndré Almeida int r = 0;
8690ad7347aSAndré Almeida
8700ad7347aSAndré Almeida mutex_lock(&adev->gfx.gfx_off_mutex);
8710ad7347aSAndré Almeida
8720ad7347aSAndré Almeida r = amdgpu_dpm_set_residency_gfxoff(adev, value);
8730ad7347aSAndré Almeida
8740ad7347aSAndré Almeida mutex_unlock(&adev->gfx.gfx_off_mutex);
8750ad7347aSAndré Almeida
8760ad7347aSAndré Almeida return r;
8770ad7347aSAndré Almeida }
8780ad7347aSAndré Almeida
amdgpu_get_gfx_off_residency(struct amdgpu_device * adev,u32 * value)8790ad7347aSAndré Almeida int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value)
8800ad7347aSAndré Almeida {
8810ad7347aSAndré Almeida int r = 0;
8820ad7347aSAndré Almeida
8830ad7347aSAndré Almeida mutex_lock(&adev->gfx.gfx_off_mutex);
8840ad7347aSAndré Almeida
8850ad7347aSAndré Almeida r = amdgpu_dpm_get_residency_gfxoff(adev, value);
8860ad7347aSAndré Almeida
8870ad7347aSAndré Almeida mutex_unlock(&adev->gfx.gfx_off_mutex);
8880ad7347aSAndré Almeida
8890ad7347aSAndré Almeida return r;
8900ad7347aSAndré Almeida }
8910ad7347aSAndré Almeida
amdgpu_get_gfx_off_entrycount(struct amdgpu_device * adev,u64 * value)8920ad7347aSAndré Almeida int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value)
8930ad7347aSAndré Almeida {
8940ad7347aSAndré Almeida int r = 0;
8950ad7347aSAndré Almeida
8960ad7347aSAndré Almeida mutex_lock(&adev->gfx.gfx_off_mutex);
8970ad7347aSAndré Almeida
8980ad7347aSAndré Almeida r = amdgpu_dpm_get_entrycount_gfxoff(adev, value);
8990ad7347aSAndré Almeida
9000ad7347aSAndré Almeida mutex_unlock(&adev->gfx.gfx_off_mutex);
9010ad7347aSAndré Almeida
9020ad7347aSAndré Almeida return r;
9030ad7347aSAndré Almeida }
9040ad7347aSAndré Almeida
amdgpu_get_gfx_off_status(struct amdgpu_device * adev,uint32_t * value)905443c7f3cSJinzhou.Su int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
906443c7f3cSJinzhou.Su {
907443c7f3cSJinzhou.Su
908443c7f3cSJinzhou.Su int r = 0;
909443c7f3cSJinzhou.Su
910443c7f3cSJinzhou.Su mutex_lock(&adev->gfx.gfx_off_mutex);
911443c7f3cSJinzhou.Su
912bc143d8bSEvan Quan r = amdgpu_dpm_get_status_gfxoff(adev, value);
913443c7f3cSJinzhou.Su
914443c7f3cSJinzhou.Su mutex_unlock(&adev->gfx.gfx_off_mutex);
915443c7f3cSJinzhou.Su
916443c7f3cSJinzhou.Su return r;
917443c7f3cSJinzhou.Su }
918443c7f3cSJinzhou.Su
amdgpu_gfx_ras_late_init(struct amdgpu_device * adev,struct ras_common_if * ras_block)9194e9b1fa5Syipechai int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
9206caeee7aSHawking Zhang {
9216caeee7aSHawking Zhang int r;
9226caeee7aSHawking Zhang
923caae42f0Syipechai if (amdgpu_ras_is_supported(adev, ras_block->block)) {
924c0277b9dSTim Huang if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
925c0277b9dSTim Huang r = amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
926c0277b9dSTim Huang if (r)
927c0277b9dSTim Huang return r;
928c0277b9dSTim Huang }
929761d86d3SDennis Li
9302a460963SCandice Li r = amdgpu_ras_block_late_init(adev, ras_block);
9312a460963SCandice Li if (r)
9322a460963SCandice Li return r;
9332a460963SCandice Li
93484a2947eSVictor Skvortsov if (amdgpu_sriov_vf(adev))
93584a2947eSVictor Skvortsov return r;
93684a2947eSVictor Skvortsov
9372f48965bSHoratio Zhang if (adev->gfx.cp_ecc_error_irq.funcs) {
9386caeee7aSHawking Zhang r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
9396caeee7aSHawking Zhang if (r)
9406caeee7aSHawking Zhang goto late_fini;
9412f48965bSHoratio Zhang }
9422a460963SCandice Li } else {
9432a460963SCandice Li amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
9446caeee7aSHawking Zhang }
9456caeee7aSHawking Zhang
9466caeee7aSHawking Zhang return 0;
9476caeee7aSHawking Zhang late_fini:
948caae42f0Syipechai amdgpu_ras_block_late_fini(adev, ras_block);
9496caeee7aSHawking Zhang return r;
9506caeee7aSHawking Zhang }
951725253abSTao Zhou
amdgpu_gfx_ras_sw_init(struct amdgpu_device * adev)95289e4c448SYiPeng Chai int amdgpu_gfx_ras_sw_init(struct amdgpu_device *adev)
95389e4c448SYiPeng Chai {
95489e4c448SYiPeng Chai int err = 0;
95589e4c448SYiPeng Chai struct amdgpu_gfx_ras *ras = NULL;
95689e4c448SYiPeng Chai
95789e4c448SYiPeng Chai /* adev->gfx.ras is NULL, which means gfx does not
95889e4c448SYiPeng Chai * support ras function, then do nothing here.
95989e4c448SYiPeng Chai */
96089e4c448SYiPeng Chai if (!adev->gfx.ras)
96189e4c448SYiPeng Chai return 0;
96289e4c448SYiPeng Chai
96389e4c448SYiPeng Chai ras = adev->gfx.ras;
96489e4c448SYiPeng Chai
96589e4c448SYiPeng Chai err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
96689e4c448SYiPeng Chai if (err) {
96789e4c448SYiPeng Chai dev_err(adev->dev, "Failed to register gfx ras block!\n");
96889e4c448SYiPeng Chai return err;
96989e4c448SYiPeng Chai }
97089e4c448SYiPeng Chai
97189e4c448SYiPeng Chai strcpy(ras->ras_block.ras_comm.name, "gfx");
97289e4c448SYiPeng Chai ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
97389e4c448SYiPeng Chai ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
97489e4c448SYiPeng Chai adev->gfx.ras_if = &ras->ras_block.ras_comm;
97589e4c448SYiPeng Chai
97689e4c448SYiPeng Chai /* If not define special ras_late_init function, use gfx default ras_late_init */
97789e4c448SYiPeng Chai if (!ras->ras_block.ras_late_init)
978af8312a3SHawking Zhang ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
97989e4c448SYiPeng Chai
98089e4c448SYiPeng Chai /* If not defined special ras_cb function, use default ras_cb */
98189e4c448SYiPeng Chai if (!ras->ras_block.ras_cb)
98289e4c448SYiPeng Chai ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
98389e4c448SYiPeng Chai
98489e4c448SYiPeng Chai return 0;
98589e4c448SYiPeng Chai }
98689e4c448SYiPeng Chai
amdgpu_gfx_poison_consumption_handler(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)987ac7b25d9SYiPeng Chai int amdgpu_gfx_poison_consumption_handler(struct amdgpu_device *adev,
988ac7b25d9SYiPeng Chai struct amdgpu_iv_entry *entry)
989ac7b25d9SYiPeng Chai {
990ac7b25d9SYiPeng Chai if (adev->gfx.ras && adev->gfx.ras->poison_consumption_handler)
991ac7b25d9SYiPeng Chai return adev->gfx.ras->poison_consumption_handler(adev, entry);
992ac7b25d9SYiPeng Chai
993ac7b25d9SYiPeng Chai return 0;
994ac7b25d9SYiPeng Chai }
995ac7b25d9SYiPeng Chai
amdgpu_gfx_process_ras_data_cb(struct amdgpu_device * adev,void * err_data,struct amdgpu_iv_entry * entry)996725253abSTao Zhou int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
997725253abSTao Zhou void *err_data,
998725253abSTao Zhou struct amdgpu_iv_entry *entry)
999725253abSTao Zhou {
10003d8361b1STao Zhou /* TODO ue will trigger an interrupt.
10013d8361b1STao Zhou *
10023d8361b1STao Zhou * When “Full RAS” is enabled, the per-IP interrupt sources should
10033d8361b1STao Zhou * be disabled and the driver should only look for the aggregated
10043d8361b1STao Zhou * interrupt via sync flood
10053d8361b1STao Zhou */
1006725253abSTao Zhou if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
1007725253abSTao Zhou kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
10088b0fb0e9Syipechai if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops &&
10098b0fb0e9Syipechai adev->gfx.ras->ras_block.hw_ops->query_ras_error_count)
10108b0fb0e9Syipechai adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
101161934624SGuchun Chen amdgpu_ras_reset_gpu(adev);
1012725253abSTao Zhou }
1013725253abSTao Zhou return AMDGPU_RAS_SUCCESS;
1014725253abSTao Zhou }
1015725253abSTao Zhou
amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1016725253abSTao Zhou int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
1017725253abSTao Zhou struct amdgpu_irq_src *source,
1018725253abSTao Zhou struct amdgpu_iv_entry *entry)
1019725253abSTao Zhou {
1020725253abSTao Zhou struct ras_common_if *ras_if = adev->gfx.ras_if;
1021725253abSTao Zhou struct ras_dispatch_if ih_data = {
1022725253abSTao Zhou .entry = entry,
1023725253abSTao Zhou };
1024725253abSTao Zhou
1025725253abSTao Zhou if (!ras_if)
1026725253abSTao Zhou return 0;
1027725253abSTao Zhou
1028725253abSTao Zhou ih_data.head = *ras_if;
1029725253abSTao Zhou
1030725253abSTao Zhou DRM_ERROR("CP ECC ERROR IRQ\n");
1031725253abSTao Zhou amdgpu_ras_interrupt_dispatch(adev, &ih_data);
1032725253abSTao Zhou return 0;
1033725253abSTao Zhou }
1034d33a99c4Schen gong
amdgpu_gfx_ras_error_func(struct amdgpu_device * adev,void * ras_error_status,void (* func)(struct amdgpu_device * adev,void * ras_error_status,int xcc_id))1035d78c7132STao Zhou void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev,
1036d78c7132STao Zhou void *ras_error_status,
1037d78c7132STao Zhou void (*func)(struct amdgpu_device *adev, void *ras_error_status,
1038d78c7132STao Zhou int xcc_id))
1039d78c7132STao Zhou {
1040d78c7132STao Zhou int i;
1041d78c7132STao Zhou int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
1042d78c7132STao Zhou uint32_t xcc_mask = GENMASK(num_xcc - 1, 0);
1043d78c7132STao Zhou struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
1044d78c7132STao Zhou
1045d78c7132STao Zhou if (err_data) {
1046d78c7132STao Zhou err_data->ue_count = 0;
1047d78c7132STao Zhou err_data->ce_count = 0;
1048d78c7132STao Zhou }
1049d78c7132STao Zhou
1050d78c7132STao Zhou for_each_inst(i, xcc_mask)
1051d78c7132STao Zhou func(adev, ras_error_status, i);
1052d78c7132STao Zhou }
1053d78c7132STao Zhou
amdgpu_kiq_rreg(struct amdgpu_device * adev,uint32_t reg,uint32_t xcc_id)105485150626SVictor Lu uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id)
1055d33a99c4Schen gong {
1056d33a99c4Schen gong signed long r, cnt = 0;
1057d33a99c4Schen gong unsigned long flags;
105854208194SYintian Tao uint32_t seq, reg_val_offs = 0, value = 0;
105985150626SVictor Lu struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1060d33a99c4Schen gong struct amdgpu_ring *ring = &kiq->ring;
1061d33a99c4Schen gong
106256b53c0bSDennis Li if (amdgpu_device_skip_hw_access(adev))
1063bf36b52eSAndrey Grodzovsky return 0;
1064bf36b52eSAndrey Grodzovsky
1065c7d43556SJack Xiao if (adev->mes.ring[0].sched.ready)
1066cf606729SJack Xiao return amdgpu_mes_rreg(adev, reg);
1067cf606729SJack Xiao
1068d33a99c4Schen gong BUG_ON(!ring->funcs->emit_rreg);
1069d33a99c4Schen gong
1070d33a99c4Schen gong spin_lock_irqsave(&kiq->ring_lock, flags);
107154208194SYintian Tao if (amdgpu_device_wb_get(adev, ®_val_offs)) {
107254208194SYintian Tao pr_err("critical bug! too many kiq readers\n");
107304e4e2e9SYintian Tao goto failed_unlock;
107454208194SYintian Tao }
1075c0277b9dSTim Huang r = amdgpu_ring_alloc(ring, 32);
1076c0277b9dSTim Huang if (r)
1077c0277b9dSTim Huang goto failed_unlock;
1078c0277b9dSTim Huang
107954208194SYintian Tao amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
108004e4e2e9SYintian Tao r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
108104e4e2e9SYintian Tao if (r)
108204e4e2e9SYintian Tao goto failed_undo;
108304e4e2e9SYintian Tao
1084d33a99c4Schen gong amdgpu_ring_commit(ring);
1085d33a99c4Schen gong spin_unlock_irqrestore(&kiq->ring_lock, flags);
1086d33a99c4Schen gong
1087d33a99c4Schen gong r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1088d33a99c4Schen gong
1089d33a99c4Schen gong /* don't wait anymore for gpu reset case because this way may
1090d33a99c4Schen gong * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1091d33a99c4Schen gong * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1092d33a99c4Schen gong * never return if we keep waiting in virt_kiq_rreg, which cause
1093d33a99c4Schen gong * gpu_recover() hang there.
1094d33a99c4Schen gong *
1095d33a99c4Schen gong * also don't wait anymore for IRQ context
1096d33a99c4Schen gong * */
109753b3f8f4SDennis Li if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1098d33a99c4Schen gong goto failed_kiq_read;
1099d33a99c4Schen gong
1100d33a99c4Schen gong might_sleep();
1101d33a99c4Schen gong while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1102d33a99c4Schen gong msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1103d33a99c4Schen gong r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1104d33a99c4Schen gong }
1105d33a99c4Schen gong
1106d33a99c4Schen gong if (cnt > MAX_KIQ_REG_TRY)
1107d33a99c4Schen gong goto failed_kiq_read;
1108d33a99c4Schen gong
110954208194SYintian Tao mb();
111054208194SYintian Tao value = adev->wb.wb[reg_val_offs];
111154208194SYintian Tao amdgpu_device_wb_free(adev, reg_val_offs);
111254208194SYintian Tao return value;
1113d33a99c4Schen gong
111404e4e2e9SYintian Tao failed_undo:
111504e4e2e9SYintian Tao amdgpu_ring_undo(ring);
111604e4e2e9SYintian Tao failed_unlock:
111704e4e2e9SYintian Tao spin_unlock_irqrestore(&kiq->ring_lock, flags);
1118d33a99c4Schen gong failed_kiq_read:
111904e4e2e9SYintian Tao if (reg_val_offs)
112004e4e2e9SYintian Tao amdgpu_device_wb_free(adev, reg_val_offs);
1121aac89168SDennis Li dev_err(adev->dev, "failed to read reg:%x\n", reg);
1122d33a99c4Schen gong return ~0;
1123d33a99c4Schen gong }
1124d33a99c4Schen gong
amdgpu_kiq_wreg(struct amdgpu_device * adev,uint32_t reg,uint32_t v,uint32_t xcc_id)112585150626SVictor Lu void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id)
1126d33a99c4Schen gong {
1127d33a99c4Schen gong signed long r, cnt = 0;
1128d33a99c4Schen gong unsigned long flags;
1129d33a99c4Schen gong uint32_t seq;
113085150626SVictor Lu struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
1131d33a99c4Schen gong struct amdgpu_ring *ring = &kiq->ring;
1132d33a99c4Schen gong
1133d33a99c4Schen gong BUG_ON(!ring->funcs->emit_wreg);
1134d33a99c4Schen gong
113556b53c0bSDennis Li if (amdgpu_device_skip_hw_access(adev))
1136bf36b52eSAndrey Grodzovsky return;
1137bf36b52eSAndrey Grodzovsky
1138c7d43556SJack Xiao if (adev->mes.ring[0].sched.ready) {
1139cf606729SJack Xiao amdgpu_mes_wreg(adev, reg, v);
1140cf606729SJack Xiao return;
1141cf606729SJack Xiao }
1142cf606729SJack Xiao
1143d33a99c4Schen gong spin_lock_irqsave(&kiq->ring_lock, flags);
1144c0277b9dSTim Huang r = amdgpu_ring_alloc(ring, 32);
1145c0277b9dSTim Huang if (r)
1146c0277b9dSTim Huang goto failed_unlock;
1147c0277b9dSTim Huang
1148d33a99c4Schen gong amdgpu_ring_emit_wreg(ring, reg, v);
114904e4e2e9SYintian Tao r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
115004e4e2e9SYintian Tao if (r)
115104e4e2e9SYintian Tao goto failed_undo;
115204e4e2e9SYintian Tao
1153d33a99c4Schen gong amdgpu_ring_commit(ring);
1154d33a99c4Schen gong spin_unlock_irqrestore(&kiq->ring_lock, flags);
1155d33a99c4Schen gong
1156d33a99c4Schen gong r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1157d33a99c4Schen gong
1158d33a99c4Schen gong /* don't wait anymore for gpu reset case because this way may
1159d33a99c4Schen gong * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
1160d33a99c4Schen gong * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
1161d33a99c4Schen gong * never return if we keep waiting in virt_kiq_rreg, which cause
1162d33a99c4Schen gong * gpu_recover() hang there.
1163d33a99c4Schen gong *
1164d33a99c4Schen gong * also don't wait anymore for IRQ context
1165d33a99c4Schen gong * */
116653b3f8f4SDennis Li if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
1167d33a99c4Schen gong goto failed_kiq_write;
1168d33a99c4Schen gong
1169d33a99c4Schen gong might_sleep();
1170d33a99c4Schen gong while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1171d33a99c4Schen gong
1172d33a99c4Schen gong msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
1173d33a99c4Schen gong r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
1174d33a99c4Schen gong }
1175d33a99c4Schen gong
1176d33a99c4Schen gong if (cnt > MAX_KIQ_REG_TRY)
1177d33a99c4Schen gong goto failed_kiq_write;
1178d33a99c4Schen gong
1179d33a99c4Schen gong return;
1180d33a99c4Schen gong
118104e4e2e9SYintian Tao failed_undo:
118204e4e2e9SYintian Tao amdgpu_ring_undo(ring);
1183c0277b9dSTim Huang failed_unlock:
118404e4e2e9SYintian Tao spin_unlock_irqrestore(&kiq->ring_lock, flags);
1185d33a99c4Schen gong failed_kiq_write:
1186aac89168SDennis Li dev_err(adev->dev, "failed to write reg:%x\n", reg);
1187d33a99c4Schen gong }
1188a3bab325SAlex Deucher
amdgpu_gfx_get_num_kcq(struct amdgpu_device * adev)1189a3bab325SAlex Deucher int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
1190a3bab325SAlex Deucher {
1191a3bab325SAlex Deucher if (amdgpu_num_kcq == -1) {
1192a3bab325SAlex Deucher return 8;
1193a3bab325SAlex Deucher } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
1194a3bab325SAlex Deucher dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
1195a3bab325SAlex Deucher return 8;
1196a3bab325SAlex Deucher }
1197a3bab325SAlex Deucher return amdgpu_num_kcq;
1198a3bab325SAlex Deucher }
1199ec71b250SLikun Gao
amdgpu_gfx_cp_init_microcode(struct amdgpu_device * adev,uint32_t ucode_id)1200ec71b250SLikun Gao void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev,
12012d89e2ddSLikun Gao uint32_t ucode_id)
1202ec71b250SLikun Gao {
1203ec71b250SLikun Gao const struct gfx_firmware_header_v1_0 *cp_hdr;
1204ec71b250SLikun Gao const struct gfx_firmware_header_v2_0 *cp_hdr_v2_0;
1205ec71b250SLikun Gao struct amdgpu_firmware_info *info = NULL;
1206ec71b250SLikun Gao const struct firmware *ucode_fw;
1207ec71b250SLikun Gao unsigned int fw_size;
1208ec71b250SLikun Gao
1209ec71b250SLikun Gao switch (ucode_id) {
1210ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_PFP:
1211ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1212ec71b250SLikun Gao adev->gfx.pfp_fw->data;
1213ec71b250SLikun Gao adev->gfx.pfp_fw_version =
1214ec71b250SLikun Gao le32_to_cpu(cp_hdr->header.ucode_version);
1215ec71b250SLikun Gao adev->gfx.pfp_feature_version =
1216ec71b250SLikun Gao le32_to_cpu(cp_hdr->ucode_feature_version);
1217ec71b250SLikun Gao ucode_fw = adev->gfx.pfp_fw;
1218ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1219ec71b250SLikun Gao break;
1220ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_PFP:
1221ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1222ec71b250SLikun Gao adev->gfx.pfp_fw->data;
1223ec71b250SLikun Gao adev->gfx.pfp_fw_version =
1224ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1225ec71b250SLikun Gao adev->gfx.pfp_feature_version =
1226ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1227ec71b250SLikun Gao ucode_fw = adev->gfx.pfp_fw;
1228ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1229ec71b250SLikun Gao break;
1230ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
1231ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
1232ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1233ec71b250SLikun Gao adev->gfx.pfp_fw->data;
1234ec71b250SLikun Gao ucode_fw = adev->gfx.pfp_fw;
1235ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1236ec71b250SLikun Gao break;
1237ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_ME:
1238ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1239ec71b250SLikun Gao adev->gfx.me_fw->data;
1240ec71b250SLikun Gao adev->gfx.me_fw_version =
1241ec71b250SLikun Gao le32_to_cpu(cp_hdr->header.ucode_version);
1242ec71b250SLikun Gao adev->gfx.me_feature_version =
1243ec71b250SLikun Gao le32_to_cpu(cp_hdr->ucode_feature_version);
1244ec71b250SLikun Gao ucode_fw = adev->gfx.me_fw;
1245ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1246ec71b250SLikun Gao break;
1247ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_ME:
1248ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1249ec71b250SLikun Gao adev->gfx.me_fw->data;
1250ec71b250SLikun Gao adev->gfx.me_fw_version =
1251ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1252ec71b250SLikun Gao adev->gfx.me_feature_version =
1253ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1254ec71b250SLikun Gao ucode_fw = adev->gfx.me_fw;
1255ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1256ec71b250SLikun Gao break;
1257ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
1258ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
1259ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1260ec71b250SLikun Gao adev->gfx.me_fw->data;
1261ec71b250SLikun Gao ucode_fw = adev->gfx.me_fw;
1262ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1263ec71b250SLikun Gao break;
1264ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_CE:
1265ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1266ec71b250SLikun Gao adev->gfx.ce_fw->data;
1267ec71b250SLikun Gao adev->gfx.ce_fw_version =
1268ec71b250SLikun Gao le32_to_cpu(cp_hdr->header.ucode_version);
1269ec71b250SLikun Gao adev->gfx.ce_feature_version =
1270ec71b250SLikun Gao le32_to_cpu(cp_hdr->ucode_feature_version);
1271ec71b250SLikun Gao ucode_fw = adev->gfx.ce_fw;
1272ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes);
1273ec71b250SLikun Gao break;
1274ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_MEC1:
1275ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1276ec71b250SLikun Gao adev->gfx.mec_fw->data;
1277ec71b250SLikun Gao adev->gfx.mec_fw_version =
1278ec71b250SLikun Gao le32_to_cpu(cp_hdr->header.ucode_version);
1279ec71b250SLikun Gao adev->gfx.mec_feature_version =
1280ec71b250SLikun Gao le32_to_cpu(cp_hdr->ucode_feature_version);
1281ec71b250SLikun Gao ucode_fw = adev->gfx.mec_fw;
1282ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1283ec71b250SLikun Gao le32_to_cpu(cp_hdr->jt_size) * 4;
1284ec71b250SLikun Gao break;
1285ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_MEC1_JT:
1286ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1287ec71b250SLikun Gao adev->gfx.mec_fw->data;
1288ec71b250SLikun Gao ucode_fw = adev->gfx.mec_fw;
1289ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1290ec71b250SLikun Gao break;
1291ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_MEC2:
1292ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1293ec71b250SLikun Gao adev->gfx.mec2_fw->data;
1294ec71b250SLikun Gao adev->gfx.mec2_fw_version =
1295ec71b250SLikun Gao le32_to_cpu(cp_hdr->header.ucode_version);
1296ec71b250SLikun Gao adev->gfx.mec2_feature_version =
1297ec71b250SLikun Gao le32_to_cpu(cp_hdr->ucode_feature_version);
1298ec71b250SLikun Gao ucode_fw = adev->gfx.mec2_fw;
1299ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->header.ucode_size_bytes) -
1300ec71b250SLikun Gao le32_to_cpu(cp_hdr->jt_size) * 4;
1301ec71b250SLikun Gao break;
1302ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_MEC2_JT:
1303ec71b250SLikun Gao cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1304ec71b250SLikun Gao adev->gfx.mec2_fw->data;
1305ec71b250SLikun Gao ucode_fw = adev->gfx.mec2_fw;
1306ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr->jt_size) * 4;
1307ec71b250SLikun Gao break;
1308ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_MEC:
1309ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1310ec71b250SLikun Gao adev->gfx.mec_fw->data;
1311ec71b250SLikun Gao adev->gfx.mec_fw_version =
1312ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->header.ucode_version);
1313ec71b250SLikun Gao adev->gfx.mec_feature_version =
1314ec71b250SLikun Gao le32_to_cpu(cp_hdr_v2_0->ucode_feature_version);
1315ec71b250SLikun Gao ucode_fw = adev->gfx.mec_fw;
1316ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->ucode_size_bytes);
1317ec71b250SLikun Gao break;
1318ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
1319ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
1320ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
1321ec71b250SLikun Gao case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
1322ec71b250SLikun Gao cp_hdr_v2_0 = (const struct gfx_firmware_header_v2_0 *)
1323ec71b250SLikun Gao adev->gfx.mec_fw->data;
1324ec71b250SLikun Gao ucode_fw = adev->gfx.mec_fw;
1325ec71b250SLikun Gao fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes);
1326ec71b250SLikun Gao break;
1327ec71b250SLikun Gao default:
13289a5f15d2STim Huang dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id);
13299a5f15d2STim Huang return;
1330ec71b250SLikun Gao }
1331ec71b250SLikun Gao
1332ec71b250SLikun Gao if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1333ec71b250SLikun Gao info = &adev->firmware.ucode[ucode_id];
1334ec71b250SLikun Gao info->ucode_id = ucode_id;
1335ec71b250SLikun Gao info->fw = ucode_fw;
1336ec71b250SLikun Gao adev->firmware.fw_size += ALIGN(fw_size, PAGE_SIZE);
1337ec71b250SLikun Gao }
1338ec71b250SLikun Gao }
133966daccdeSLe Ma
amdgpu_gfx_is_master_xcc(struct amdgpu_device * adev,int xcc_id)134066daccdeSLe Ma bool amdgpu_gfx_is_master_xcc(struct amdgpu_device *adev, int xcc_id)
134166daccdeSLe Ma {
134266daccdeSLe Ma return !(xcc_id % (adev->gfx.num_xcc_per_xcp ?
134366daccdeSLe Ma adev->gfx.num_xcc_per_xcp : 1));
134466daccdeSLe Ma }
134598a54e88SLe Ma
amdgpu_gfx_get_current_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)134698a54e88SLe Ma static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev,
134798a54e88SLe Ma struct device_attribute *addr,
134898a54e88SLe Ma char *buf)
134998a54e88SLe Ma {
135098a54e88SLe Ma struct drm_device *ddev = dev_get_drvdata(dev);
135198a54e88SLe Ma struct amdgpu_device *adev = drm_to_adev(ddev);
13528e7fd193SLijo Lazar int mode;
135398a54e88SLe Ma
1354ded7d99eSLijo Lazar mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
1355ded7d99eSLijo Lazar AMDGPU_XCP_FL_NONE);
135698a54e88SLe Ma
1357f9632096SLijo Lazar return sysfs_emit(buf, "%s\n", amdgpu_gfx_compute_mode_desc(mode));
135898a54e88SLe Ma }
135998a54e88SLe Ma
amdgpu_gfx_set_compute_partition(struct device * dev,struct device_attribute * addr,const char * buf,size_t count)136098a54e88SLe Ma static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev,
136198a54e88SLe Ma struct device_attribute *addr,
136298a54e88SLe Ma const char *buf, size_t count)
136398a54e88SLe Ma {
136498a54e88SLe Ma struct drm_device *ddev = dev_get_drvdata(dev);
136598a54e88SLe Ma struct amdgpu_device *adev = drm_to_adev(ddev);
136698a54e88SLe Ma enum amdgpu_gfx_partition mode;
13678078f1c6SLijo Lazar int ret = 0, num_xcc;
136898a54e88SLe Ma
13698078f1c6SLijo Lazar num_xcc = NUM_XCC(adev->gfx.xcc_mask);
13708078f1c6SLijo Lazar if (num_xcc % 2 != 0)
137198a54e88SLe Ma return -EINVAL;
137298a54e88SLe Ma
137398a54e88SLe Ma if (!strncasecmp("SPX", buf, strlen("SPX"))) {
137498a54e88SLe Ma mode = AMDGPU_SPX_PARTITION_MODE;
137598a54e88SLe Ma } else if (!strncasecmp("DPX", buf, strlen("DPX"))) {
1376cb30544eSMukul Joshi /*
1377cb30544eSMukul Joshi * DPX mode needs AIDs to be in multiple of 2.
1378cb30544eSMukul Joshi * Each AID connects 2 XCCs.
1379cb30544eSMukul Joshi */
1380cb30544eSMukul Joshi if (num_xcc%4)
138198a54e88SLe Ma return -EINVAL;
138298a54e88SLe Ma mode = AMDGPU_DPX_PARTITION_MODE;
138398a54e88SLe Ma } else if (!strncasecmp("TPX", buf, strlen("TPX"))) {
13848078f1c6SLijo Lazar if (num_xcc != 6)
138598a54e88SLe Ma return -EINVAL;
138698a54e88SLe Ma mode = AMDGPU_TPX_PARTITION_MODE;
138798a54e88SLe Ma } else if (!strncasecmp("QPX", buf, strlen("QPX"))) {
13888078f1c6SLijo Lazar if (num_xcc != 8)
138998a54e88SLe Ma return -EINVAL;
139098a54e88SLe Ma mode = AMDGPU_QPX_PARTITION_MODE;
139198a54e88SLe Ma } else if (!strncasecmp("CPX", buf, strlen("CPX"))) {
139298a54e88SLe Ma mode = AMDGPU_CPX_PARTITION_MODE;
139398a54e88SLe Ma } else {
139498a54e88SLe Ma return -EINVAL;
139598a54e88SLe Ma }
139698a54e88SLe Ma
13978e7fd193SLijo Lazar ret = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, mode);
139898a54e88SLe Ma
139998a54e88SLe Ma if (ret)
140098a54e88SLe Ma return ret;
140198a54e88SLe Ma
140298a54e88SLe Ma return count;
140398a54e88SLe Ma }
140498a54e88SLe Ma
14051bc0b339SLijo Lazar static const char *xcp_desc[] = {
14061bc0b339SLijo Lazar [AMDGPU_SPX_PARTITION_MODE] = "SPX",
14071bc0b339SLijo Lazar [AMDGPU_DPX_PARTITION_MODE] = "DPX",
14081bc0b339SLijo Lazar [AMDGPU_TPX_PARTITION_MODE] = "TPX",
14091bc0b339SLijo Lazar [AMDGPU_QPX_PARTITION_MODE] = "QPX",
14101bc0b339SLijo Lazar [AMDGPU_CPX_PARTITION_MODE] = "CPX",
14111bc0b339SLijo Lazar };
14121bc0b339SLijo Lazar
amdgpu_gfx_get_available_compute_partition(struct device * dev,struct device_attribute * addr,char * buf)141398a54e88SLe Ma static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
141498a54e88SLe Ma struct device_attribute *addr,
141598a54e88SLe Ma char *buf)
141698a54e88SLe Ma {
141798a54e88SLe Ma struct drm_device *ddev = dev_get_drvdata(dev);
141898a54e88SLe Ma struct amdgpu_device *adev = drm_to_adev(ddev);
14191bc0b339SLijo Lazar struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
14201bc0b339SLijo Lazar int size = 0, mode;
14211bc0b339SLijo Lazar char *sep = "";
142298a54e88SLe Ma
14231bc0b339SLijo Lazar if (!xcp_mgr || !xcp_mgr->avail_xcp_modes)
14241bc0b339SLijo Lazar return sysfs_emit(buf, "Not supported\n");
14251bc0b339SLijo Lazar
14261bc0b339SLijo Lazar for_each_inst(mode, xcp_mgr->avail_xcp_modes) {
14271bc0b339SLijo Lazar size += sysfs_emit_at(buf, size, "%s%s", sep, xcp_desc[mode]);
14281bc0b339SLijo Lazar sep = ", ";
142998a54e88SLe Ma }
143098a54e88SLe Ma
14311bc0b339SLijo Lazar size += sysfs_emit_at(buf, size, "\n");
14321bc0b339SLijo Lazar
14331bc0b339SLijo Lazar return size;
143498a54e88SLe Ma }
143598a54e88SLe Ma
amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring * ring)1436d361ad5dSSrinivasan Shanmugam static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
1437d361ad5dSSrinivasan Shanmugam {
1438d361ad5dSSrinivasan Shanmugam struct amdgpu_device *adev = ring->adev;
1439559a2858SSrinivasan Shanmugam struct drm_gpu_scheduler *sched = &ring->sched;
1440559a2858SSrinivasan Shanmugam struct drm_sched_entity entity;
1441*447fab30SChristian König static atomic_t counter;
1442559a2858SSrinivasan Shanmugam struct dma_fence *f;
1443d361ad5dSSrinivasan Shanmugam struct amdgpu_job *job;
1444d361ad5dSSrinivasan Shanmugam struct amdgpu_ib *ib;
1445*447fab30SChristian König void *owner;
1446d361ad5dSSrinivasan Shanmugam int i, r;
1447d361ad5dSSrinivasan Shanmugam
1448559a2858SSrinivasan Shanmugam /* Initialize the scheduler entity */
1449559a2858SSrinivasan Shanmugam r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
1450559a2858SSrinivasan Shanmugam &sched, 1, NULL);
1451559a2858SSrinivasan Shanmugam if (r) {
1452559a2858SSrinivasan Shanmugam dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
1453559a2858SSrinivasan Shanmugam goto err;
1454559a2858SSrinivasan Shanmugam }
1455559a2858SSrinivasan Shanmugam
1456*447fab30SChristian König /*
1457*447fab30SChristian König * Use some unique dummy value as the owner to make sure we execute
1458*447fab30SChristian König * the cleaner shader on each submission. The value just need to change
1459*447fab30SChristian König * for each submission and is otherwise meaningless.
1460*447fab30SChristian König */
1461*447fab30SChristian König owner = (void *)(unsigned long)atomic_inc_return(&counter);
1462*447fab30SChristian König
1463*447fab30SChristian König r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
1464*447fab30SChristian König 64, 0, &job);
1465d361ad5dSSrinivasan Shanmugam if (r)
1466d361ad5dSSrinivasan Shanmugam goto err;
1467d361ad5dSSrinivasan Shanmugam
1468d361ad5dSSrinivasan Shanmugam job->enforce_isolation = true;
1469d361ad5dSSrinivasan Shanmugam
1470d361ad5dSSrinivasan Shanmugam ib = &job->ibs[0];
1471d361ad5dSSrinivasan Shanmugam for (i = 0; i <= ring->funcs->align_mask; ++i)
1472d361ad5dSSrinivasan Shanmugam ib->ptr[i] = ring->funcs->nop;
1473d361ad5dSSrinivasan Shanmugam ib->length_dw = ring->funcs->align_mask + 1;
1474d361ad5dSSrinivasan Shanmugam
1475559a2858SSrinivasan Shanmugam f = amdgpu_job_submit(job);
1476559a2858SSrinivasan Shanmugam
1477559a2858SSrinivasan Shanmugam r = dma_fence_wait(f, false);
1478d361ad5dSSrinivasan Shanmugam if (r)
1479559a2858SSrinivasan Shanmugam goto err;
1480d361ad5dSSrinivasan Shanmugam
1481d361ad5dSSrinivasan Shanmugam dma_fence_put(f);
1482d361ad5dSSrinivasan Shanmugam
1483559a2858SSrinivasan Shanmugam /* Clean up the scheduler entity */
1484559a2858SSrinivasan Shanmugam drm_sched_entity_destroy(&entity);
1485d361ad5dSSrinivasan Shanmugam return 0;
1486d361ad5dSSrinivasan Shanmugam
1487d361ad5dSSrinivasan Shanmugam err:
1488d361ad5dSSrinivasan Shanmugam return r;
1489d361ad5dSSrinivasan Shanmugam }
1490d361ad5dSSrinivasan Shanmugam
amdgpu_gfx_run_cleaner_shader(struct amdgpu_device * adev,int xcp_id)1491d361ad5dSSrinivasan Shanmugam static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id)
1492d361ad5dSSrinivasan Shanmugam {
1493d361ad5dSSrinivasan Shanmugam int num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1494d361ad5dSSrinivasan Shanmugam struct amdgpu_ring *ring;
1495d361ad5dSSrinivasan Shanmugam int num_xcc_to_clear;
1496d361ad5dSSrinivasan Shanmugam int i, r, xcc_id;
1497d361ad5dSSrinivasan Shanmugam
1498d361ad5dSSrinivasan Shanmugam if (adev->gfx.num_xcc_per_xcp)
1499d361ad5dSSrinivasan Shanmugam num_xcc_to_clear = adev->gfx.num_xcc_per_xcp;
1500d361ad5dSSrinivasan Shanmugam else
1501d361ad5dSSrinivasan Shanmugam num_xcc_to_clear = 1;
1502d361ad5dSSrinivasan Shanmugam
1503d361ad5dSSrinivasan Shanmugam for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1504d361ad5dSSrinivasan Shanmugam for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1505d361ad5dSSrinivasan Shanmugam ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
1506d361ad5dSSrinivasan Shanmugam if ((ring->xcp_id == xcp_id) && ring->sched.ready) {
1507d361ad5dSSrinivasan Shanmugam r = amdgpu_gfx_run_cleaner_shader_job(ring);
1508d361ad5dSSrinivasan Shanmugam if (r)
1509d361ad5dSSrinivasan Shanmugam return r;
1510d361ad5dSSrinivasan Shanmugam num_xcc_to_clear--;
1511d361ad5dSSrinivasan Shanmugam break;
1512d361ad5dSSrinivasan Shanmugam }
1513d361ad5dSSrinivasan Shanmugam }
1514d361ad5dSSrinivasan Shanmugam }
1515d361ad5dSSrinivasan Shanmugam
1516d361ad5dSSrinivasan Shanmugam if (num_xcc_to_clear)
1517d361ad5dSSrinivasan Shanmugam return -ENOENT;
1518d361ad5dSSrinivasan Shanmugam
1519d361ad5dSSrinivasan Shanmugam return 0;
1520d361ad5dSSrinivasan Shanmugam }
1521d361ad5dSSrinivasan Shanmugam
1522a69f4cc2SSrinivasan Shanmugam /**
1523a69f4cc2SSrinivasan Shanmugam * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader
1524a69f4cc2SSrinivasan Shanmugam * @dev: The device structure
1525a69f4cc2SSrinivasan Shanmugam * @attr: The device attribute structure
1526a69f4cc2SSrinivasan Shanmugam * @buf: The buffer containing the input data
1527a69f4cc2SSrinivasan Shanmugam * @count: The size of the input data
1528a69f4cc2SSrinivasan Shanmugam *
1529a69f4cc2SSrinivasan Shanmugam * Provides the sysfs interface to manually run a cleaner shader, which is
1530a69f4cc2SSrinivasan Shanmugam * used to clear the GPU state between different tasks. Writing a value to the
1531a69f4cc2SSrinivasan Shanmugam * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution.
1532a69f4cc2SSrinivasan Shanmugam * The value written corresponds to the partition index on multi-partition
1533a69f4cc2SSrinivasan Shanmugam * devices. On single-partition devices, the value should be '0'.
1534a69f4cc2SSrinivasan Shanmugam *
1535a69f4cc2SSrinivasan Shanmugam * The cleaner shader clears the Local Data Store (LDS) and General Purpose
1536a69f4cc2SSrinivasan Shanmugam * Registers (GPRs) to ensure data isolation between GPU workloads.
1537a69f4cc2SSrinivasan Shanmugam *
1538a69f4cc2SSrinivasan Shanmugam * Return: The number of bytes written to the sysfs file.
1539a69f4cc2SSrinivasan Shanmugam */
amdgpu_gfx_set_run_cleaner_shader(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1540d361ad5dSSrinivasan Shanmugam static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev,
1541d361ad5dSSrinivasan Shanmugam struct device_attribute *attr,
1542d361ad5dSSrinivasan Shanmugam const char *buf,
1543d361ad5dSSrinivasan Shanmugam size_t count)
1544d361ad5dSSrinivasan Shanmugam {
1545d361ad5dSSrinivasan Shanmugam struct drm_device *ddev = dev_get_drvdata(dev);
1546d361ad5dSSrinivasan Shanmugam struct amdgpu_device *adev = drm_to_adev(ddev);
1547d361ad5dSSrinivasan Shanmugam int ret;
1548d361ad5dSSrinivasan Shanmugam long value;
1549d361ad5dSSrinivasan Shanmugam
1550d361ad5dSSrinivasan Shanmugam if (amdgpu_in_reset(adev))
1551d361ad5dSSrinivasan Shanmugam return -EPERM;
1552d361ad5dSSrinivasan Shanmugam if (adev->in_suspend && !adev->in_runpm)
1553d361ad5dSSrinivasan Shanmugam return -EPERM;
1554d361ad5dSSrinivasan Shanmugam
1555d361ad5dSSrinivasan Shanmugam ret = kstrtol(buf, 0, &value);
1556d361ad5dSSrinivasan Shanmugam
1557d361ad5dSSrinivasan Shanmugam if (ret)
1558d361ad5dSSrinivasan Shanmugam return -EINVAL;
1559d361ad5dSSrinivasan Shanmugam
1560d361ad5dSSrinivasan Shanmugam if (value < 0)
1561d361ad5dSSrinivasan Shanmugam return -EINVAL;
1562d361ad5dSSrinivasan Shanmugam
1563d361ad5dSSrinivasan Shanmugam if (adev->xcp_mgr) {
1564d361ad5dSSrinivasan Shanmugam if (value >= adev->xcp_mgr->num_xcps)
1565d361ad5dSSrinivasan Shanmugam return -EINVAL;
1566d361ad5dSSrinivasan Shanmugam } else {
1567d361ad5dSSrinivasan Shanmugam if (value > 1)
1568d361ad5dSSrinivasan Shanmugam return -EINVAL;
1569d361ad5dSSrinivasan Shanmugam }
1570d361ad5dSSrinivasan Shanmugam
1571d361ad5dSSrinivasan Shanmugam ret = pm_runtime_get_sync(ddev->dev);
1572d361ad5dSSrinivasan Shanmugam if (ret < 0) {
1573d361ad5dSSrinivasan Shanmugam pm_runtime_put_autosuspend(ddev->dev);
1574d361ad5dSSrinivasan Shanmugam return ret;
1575d361ad5dSSrinivasan Shanmugam }
1576d361ad5dSSrinivasan Shanmugam
1577d361ad5dSSrinivasan Shanmugam ret = amdgpu_gfx_run_cleaner_shader(adev, value);
1578d361ad5dSSrinivasan Shanmugam
1579d361ad5dSSrinivasan Shanmugam pm_runtime_mark_last_busy(ddev->dev);
1580d361ad5dSSrinivasan Shanmugam pm_runtime_put_autosuspend(ddev->dev);
1581d361ad5dSSrinivasan Shanmugam
1582d361ad5dSSrinivasan Shanmugam if (ret)
1583d361ad5dSSrinivasan Shanmugam return ret;
1584d361ad5dSSrinivasan Shanmugam
1585d361ad5dSSrinivasan Shanmugam return count;
1586d361ad5dSSrinivasan Shanmugam }
1587d361ad5dSSrinivasan Shanmugam
1588a69f4cc2SSrinivasan Shanmugam /**
1589a69f4cc2SSrinivasan Shanmugam * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings
1590a69f4cc2SSrinivasan Shanmugam * @dev: The device structure
1591a69f4cc2SSrinivasan Shanmugam * @attr: The device attribute structure
1592a69f4cc2SSrinivasan Shanmugam * @buf: The buffer to store the output data
1593a69f4cc2SSrinivasan Shanmugam *
1594a69f4cc2SSrinivasan Shanmugam * Provides the sysfs read interface to get the current settings of the 'enforce_isolation'
1595a69f4cc2SSrinivasan Shanmugam * feature for each GPU partition. Reading from the 'enforce_isolation'
1596a69f4cc2SSrinivasan Shanmugam * sysfs file returns the isolation settings for all partitions, where '0'
1597a69f4cc2SSrinivasan Shanmugam * indicates disabled and '1' indicates enabled.
1598a69f4cc2SSrinivasan Shanmugam *
1599a69f4cc2SSrinivasan Shanmugam * Return: The number of bytes read from the sysfs file.
1600a69f4cc2SSrinivasan Shanmugam */
amdgpu_gfx_get_enforce_isolation(struct device * dev,struct device_attribute * attr,char * buf)1601e189be9bSSrinivasan Shanmugam static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
1602e189be9bSSrinivasan Shanmugam struct device_attribute *attr,
1603e189be9bSSrinivasan Shanmugam char *buf)
1604e189be9bSSrinivasan Shanmugam {
1605e189be9bSSrinivasan Shanmugam struct drm_device *ddev = dev_get_drvdata(dev);
1606e189be9bSSrinivasan Shanmugam struct amdgpu_device *adev = drm_to_adev(ddev);
1607e189be9bSSrinivasan Shanmugam int i;
1608e189be9bSSrinivasan Shanmugam ssize_t size = 0;
1609e189be9bSSrinivasan Shanmugam
1610e189be9bSSrinivasan Shanmugam if (adev->xcp_mgr) {
1611e189be9bSSrinivasan Shanmugam for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
1612e189be9bSSrinivasan Shanmugam size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
1613e189be9bSSrinivasan Shanmugam if (i < (adev->xcp_mgr->num_xcps - 1))
1614e189be9bSSrinivasan Shanmugam size += sysfs_emit_at(buf, size, " ");
1615e189be9bSSrinivasan Shanmugam }
1616e189be9bSSrinivasan Shanmugam buf[size++] = '\n';
1617e189be9bSSrinivasan Shanmugam } else {
1618e189be9bSSrinivasan Shanmugam size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
1619e189be9bSSrinivasan Shanmugam }
1620e189be9bSSrinivasan Shanmugam
1621e189be9bSSrinivasan Shanmugam return size;
1622e189be9bSSrinivasan Shanmugam }
1623e189be9bSSrinivasan Shanmugam
1624a69f4cc2SSrinivasan Shanmugam /**
1625a69f4cc2SSrinivasan Shanmugam * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation
1626a69f4cc2SSrinivasan Shanmugam * @dev: The device structure
1627a69f4cc2SSrinivasan Shanmugam * @attr: The device attribute structure
1628a69f4cc2SSrinivasan Shanmugam * @buf: The buffer containing the input data
1629a69f4cc2SSrinivasan Shanmugam * @count: The size of the input data
1630a69f4cc2SSrinivasan Shanmugam *
1631a69f4cc2SSrinivasan Shanmugam * This function allows control over the 'enforce_isolation' feature, which
1632a69f4cc2SSrinivasan Shanmugam * serializes access to the graphics engine. Writing '1' or '0' to the
1633a69f4cc2SSrinivasan Shanmugam * 'enforce_isolation' sysfs file enables or disables process isolation for
1634a69f4cc2SSrinivasan Shanmugam * each partition. The input should specify the setting for all partitions.
1635a69f4cc2SSrinivasan Shanmugam *
1636a69f4cc2SSrinivasan Shanmugam * Return: The number of bytes written to the sysfs file.
1637a69f4cc2SSrinivasan Shanmugam */
amdgpu_gfx_set_enforce_isolation(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1638e189be9bSSrinivasan Shanmugam static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
1639e189be9bSSrinivasan Shanmugam struct device_attribute *attr,
1640e189be9bSSrinivasan Shanmugam const char *buf, size_t count)
1641e189be9bSSrinivasan Shanmugam {
1642e189be9bSSrinivasan Shanmugam struct drm_device *ddev = dev_get_drvdata(dev);
1643e189be9bSSrinivasan Shanmugam struct amdgpu_device *adev = drm_to_adev(ddev);
1644e189be9bSSrinivasan Shanmugam long partition_values[MAX_XCP] = {0};
1645e189be9bSSrinivasan Shanmugam int ret, i, num_partitions;
1646e189be9bSSrinivasan Shanmugam const char *input_buf = buf;
1647e189be9bSSrinivasan Shanmugam
1648e189be9bSSrinivasan Shanmugam for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
1649e189be9bSSrinivasan Shanmugam ret = sscanf(input_buf, "%ld", &partition_values[i]);
1650e189be9bSSrinivasan Shanmugam if (ret <= 0)
1651e189be9bSSrinivasan Shanmugam break;
1652e189be9bSSrinivasan Shanmugam
1653e189be9bSSrinivasan Shanmugam /* Move the pointer to the next value in the string */
1654e189be9bSSrinivasan Shanmugam input_buf = strchr(input_buf, ' ');
1655e189be9bSSrinivasan Shanmugam if (input_buf) {
1656e189be9bSSrinivasan Shanmugam input_buf++;
1657e189be9bSSrinivasan Shanmugam } else {
1658e189be9bSSrinivasan Shanmugam i++;
1659e189be9bSSrinivasan Shanmugam break;
1660e189be9bSSrinivasan Shanmugam }
1661e189be9bSSrinivasan Shanmugam }
1662e189be9bSSrinivasan Shanmugam num_partitions = i;
1663e189be9bSSrinivasan Shanmugam
1664e189be9bSSrinivasan Shanmugam if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
1665e189be9bSSrinivasan Shanmugam return -EINVAL;
1666e189be9bSSrinivasan Shanmugam
1667e189be9bSSrinivasan Shanmugam if (!adev->xcp_mgr && num_partitions != 1)
1668e189be9bSSrinivasan Shanmugam return -EINVAL;
1669e189be9bSSrinivasan Shanmugam
1670e189be9bSSrinivasan Shanmugam for (i = 0; i < num_partitions; i++) {
1671e189be9bSSrinivasan Shanmugam if (partition_values[i] != 0 && partition_values[i] != 1)
1672e189be9bSSrinivasan Shanmugam return -EINVAL;
1673e189be9bSSrinivasan Shanmugam }
1674e189be9bSSrinivasan Shanmugam
1675e189be9bSSrinivasan Shanmugam mutex_lock(&adev->enforce_isolation_mutex);
1676db1e58ecSChristian König for (i = 0; i < num_partitions; i++)
1677e189be9bSSrinivasan Shanmugam adev->enforce_isolation[i] = partition_values[i];
1678e189be9bSSrinivasan Shanmugam mutex_unlock(&adev->enforce_isolation_mutex);
1679e189be9bSSrinivasan Shanmugam
168027b79151SAlex Deucher amdgpu_mes_update_enforce_isolation(adev);
168127b79151SAlex Deucher
1682e189be9bSSrinivasan Shanmugam return count;
1683e189be9bSSrinivasan Shanmugam }
1684e189be9bSSrinivasan Shanmugam
amdgpu_gfx_get_gfx_reset_mask(struct device * dev,struct device_attribute * attr,char * buf)16856c8d1f4bS[email protected] static ssize_t amdgpu_gfx_get_gfx_reset_mask(struct device *dev,
16866c8d1f4bS[email protected] struct device_attribute *attr,
16876c8d1f4bS[email protected] char *buf)
16886c8d1f4bS[email protected] {
16896c8d1f4bS[email protected] struct drm_device *ddev = dev_get_drvdata(dev);
16906c8d1f4bS[email protected] struct amdgpu_device *adev = drm_to_adev(ddev);
16916c8d1f4bS[email protected]
16926c8d1f4bS[email protected] if (!adev)
16936c8d1f4bS[email protected] return -ENODEV;
16946c8d1f4bS[email protected]
16956c8d1f4bS[email protected] return amdgpu_show_reset_mask(buf, adev->gfx.gfx_supported_reset);
16966c8d1f4bS[email protected] }
16976c8d1f4bS[email protected]
amdgpu_gfx_get_compute_reset_mask(struct device * dev,struct device_attribute * attr,char * buf)16986c8d1f4bS[email protected] static ssize_t amdgpu_gfx_get_compute_reset_mask(struct device *dev,
16996c8d1f4bS[email protected] struct device_attribute *attr,
17006c8d1f4bS[email protected] char *buf)
17016c8d1f4bS[email protected] {
17026c8d1f4bS[email protected] struct drm_device *ddev = dev_get_drvdata(dev);
17036c8d1f4bS[email protected] struct amdgpu_device *adev = drm_to_adev(ddev);
17046c8d1f4bS[email protected]
17056c8d1f4bS[email protected] if (!adev)
17066c8d1f4bS[email protected] return -ENODEV;
17076c8d1f4bS[email protected]
17086c8d1f4bS[email protected] return amdgpu_show_reset_mask(buf, adev->gfx.compute_supported_reset);
17096c8d1f4bS[email protected] }
17106c8d1f4bS[email protected]
1711d361ad5dSSrinivasan Shanmugam static DEVICE_ATTR(run_cleaner_shader, 0200,
1712d361ad5dSSrinivasan Shanmugam NULL, amdgpu_gfx_set_run_cleaner_shader);
1713d361ad5dSSrinivasan Shanmugam
1714e189be9bSSrinivasan Shanmugam static DEVICE_ATTR(enforce_isolation, 0644,
1715e189be9bSSrinivasan Shanmugam amdgpu_gfx_get_enforce_isolation,
1716e189be9bSSrinivasan Shanmugam amdgpu_gfx_set_enforce_isolation);
1717e189be9bSSrinivasan Shanmugam
171850fbe0ccSSrinivasan Shanmugam static DEVICE_ATTR(current_compute_partition, 0644,
171998a54e88SLe Ma amdgpu_gfx_get_current_compute_partition,
172098a54e88SLe Ma amdgpu_gfx_set_compute_partition);
172198a54e88SLe Ma
172250fbe0ccSSrinivasan Shanmugam static DEVICE_ATTR(available_compute_partition, 0444,
172398a54e88SLe Ma amdgpu_gfx_get_available_compute_partition, NULL);
17246c8d1f4bS[email protected] static DEVICE_ATTR(gfx_reset_mask, 0444,
17256c8d1f4bS[email protected] amdgpu_gfx_get_gfx_reset_mask, NULL);
17266c8d1f4bS[email protected]
17276c8d1f4bS[email protected] static DEVICE_ATTR(compute_reset_mask, 0444,
17286c8d1f4bS[email protected] amdgpu_gfx_get_compute_reset_mask, NULL);
172998a54e88SLe Ma
amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device * adev)1730047767ddSLijo Lazar static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev)
173198a54e88SLe Ma {
1732f8588f05SLijo Lazar struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1733f8588f05SLijo Lazar bool xcp_switch_supported;
173498a54e88SLe Ma int r;
173598a54e88SLe Ma
1736f8588f05SLijo Lazar if (!xcp_mgr)
1737f8588f05SLijo Lazar return 0;
1738f8588f05SLijo Lazar
1739f8588f05SLijo Lazar xcp_switch_supported =
1740f8588f05SLijo Lazar (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1741f8588f05SLijo Lazar
1742f8588f05SLijo Lazar if (!xcp_switch_supported)
1743f8588f05SLijo Lazar dev_attr_current_compute_partition.attr.mode &=
1744f8588f05SLijo Lazar ~(S_IWUSR | S_IWGRP | S_IWOTH);
1745f8588f05SLijo Lazar
174698a54e88SLe Ma r = device_create_file(adev->dev, &dev_attr_current_compute_partition);
174798a54e88SLe Ma if (r)
174898a54e88SLe Ma return r;
174998a54e88SLe Ma
1750f8588f05SLijo Lazar if (xcp_switch_supported)
1751f8588f05SLijo Lazar r = device_create_file(adev->dev,
1752f8588f05SLijo Lazar &dev_attr_available_compute_partition);
175398a54e88SLe Ma
1754ea2d2f8eSRajneesh Bhardwaj return r;
175598a54e88SLe Ma }
1756993d218fSShiwu Zhang
amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device * adev)1757047767ddSLijo Lazar static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev)
1758993d218fSShiwu Zhang {
1759f8588f05SLijo Lazar struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr;
1760f8588f05SLijo Lazar bool xcp_switch_supported;
1761f8588f05SLijo Lazar
1762f8588f05SLijo Lazar if (!xcp_mgr)
1763f8588f05SLijo Lazar return;
1764f8588f05SLijo Lazar
1765f8588f05SLijo Lazar xcp_switch_supported =
1766f8588f05SLijo Lazar (xcp_mgr->funcs && xcp_mgr->funcs->switch_partition_mode);
1767993d218fSShiwu Zhang device_remove_file(adev->dev, &dev_attr_current_compute_partition);
1768f8588f05SLijo Lazar
1769f8588f05SLijo Lazar if (xcp_switch_supported)
1770f8588f05SLijo Lazar device_remove_file(adev->dev,
1771f8588f05SLijo Lazar &dev_attr_available_compute_partition);
1772993d218fSShiwu Zhang }
1773aec773a1SSrinivasan Shanmugam
amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device * adev)1774047767ddSLijo Lazar static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
1775e189be9bSSrinivasan Shanmugam {
1776e189be9bSSrinivasan Shanmugam int r;
1777e189be9bSSrinivasan Shanmugam
1778e189be9bSSrinivasan Shanmugam r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
1779e189be9bSSrinivasan Shanmugam if (r)
1780e189be9bSSrinivasan Shanmugam return r;
1781047767ddSLijo Lazar if (adev->gfx.enable_cleaner_shader)
1782d361ad5dSSrinivasan Shanmugam r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader);
1783d361ad5dSSrinivasan Shanmugam
1784047767ddSLijo Lazar return r;
1785e189be9bSSrinivasan Shanmugam }
1786e189be9bSSrinivasan Shanmugam
amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device * adev)1787047767ddSLijo Lazar static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
1788e189be9bSSrinivasan Shanmugam {
1789e189be9bSSrinivasan Shanmugam device_remove_file(adev->dev, &dev_attr_enforce_isolation);
1790047767ddSLijo Lazar if (adev->gfx.enable_cleaner_shader)
1791d361ad5dSSrinivasan Shanmugam device_remove_file(adev->dev, &dev_attr_run_cleaner_shader);
1792e189be9bSSrinivasan Shanmugam }
1793e189be9bSSrinivasan Shanmugam
amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device * adev)17946c8d1f4bS[email protected] static int amdgpu_gfx_sysfs_reset_mask_init(struct amdgpu_device *adev)
17956c8d1f4bS[email protected] {
17966c8d1f4bS[email protected] int r = 0;
17976c8d1f4bS[email protected]
17986c8d1f4bS[email protected] if (!amdgpu_gpu_recovery)
17996c8d1f4bS[email protected] return r;
18006c8d1f4bS[email protected]
18016c8d1f4bS[email protected] if (adev->gfx.num_gfx_rings) {
18026c8d1f4bS[email protected] r = device_create_file(adev->dev, &dev_attr_gfx_reset_mask);
18036c8d1f4bS[email protected] if (r)
18046c8d1f4bS[email protected] return r;
18056c8d1f4bS[email protected] }
18066c8d1f4bS[email protected]
18076c8d1f4bS[email protected] if (adev->gfx.num_compute_rings) {
18086c8d1f4bS[email protected] r = device_create_file(adev->dev, &dev_attr_compute_reset_mask);
18096c8d1f4bS[email protected] if (r)
18106c8d1f4bS[email protected] return r;
18116c8d1f4bS[email protected] }
18126c8d1f4bS[email protected]
18136c8d1f4bS[email protected] return r;
18146c8d1f4bS[email protected] }
18156c8d1f4bS[email protected]
amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device * adev)18166c8d1f4bS[email protected] static void amdgpu_gfx_sysfs_reset_mask_fini(struct amdgpu_device *adev)
18176c8d1f4bS[email protected] {
18186c8d1f4bS[email protected] if (!amdgpu_gpu_recovery)
18196c8d1f4bS[email protected] return;
18206c8d1f4bS[email protected]
18216c8d1f4bS[email protected] if (adev->gfx.num_gfx_rings)
18226c8d1f4bS[email protected] device_remove_file(adev->dev, &dev_attr_gfx_reset_mask);
18236c8d1f4bS[email protected]
18246c8d1f4bS[email protected] if (adev->gfx.num_compute_rings)
18256c8d1f4bS[email protected] device_remove_file(adev->dev, &dev_attr_compute_reset_mask);
18266c8d1f4bS[email protected] }
18276c8d1f4bS[email protected]
amdgpu_gfx_sysfs_init(struct amdgpu_device * adev)1828047767ddSLijo Lazar int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev)
1829047767ddSLijo Lazar {
1830047767ddSLijo Lazar int r;
1831047767ddSLijo Lazar
1832047767ddSLijo Lazar r = amdgpu_gfx_sysfs_xcp_init(adev);
1833047767ddSLijo Lazar if (r) {
1834047767ddSLijo Lazar dev_err(adev->dev, "failed to create xcp sysfs files");
1835047767ddSLijo Lazar return r;
1836047767ddSLijo Lazar }
1837047767ddSLijo Lazar
1838047767ddSLijo Lazar r = amdgpu_gfx_sysfs_isolation_shader_init(adev);
1839047767ddSLijo Lazar if (r)
1840047767ddSLijo Lazar dev_err(adev->dev, "failed to create isolation sysfs files");
1841047767ddSLijo Lazar
18426c8d1f4bS[email protected] r = amdgpu_gfx_sysfs_reset_mask_init(adev);
18436c8d1f4bS[email protected] if (r)
18446c8d1f4bS[email protected] dev_err(adev->dev, "failed to create reset mask sysfs files");
18456c8d1f4bS[email protected]
1846047767ddSLijo Lazar return r;
1847047767ddSLijo Lazar }
1848047767ddSLijo Lazar
amdgpu_gfx_sysfs_fini(struct amdgpu_device * adev)1849047767ddSLijo Lazar void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
1850047767ddSLijo Lazar {
18512f1b1352S[email protected] if (adev->dev->kobj.sd) {
1852047767ddSLijo Lazar amdgpu_gfx_sysfs_xcp_fini(adev);
1853047767ddSLijo Lazar amdgpu_gfx_sysfs_isolation_shader_fini(adev);
18546c8d1f4bS[email protected] amdgpu_gfx_sysfs_reset_mask_fini(adev);
1855047767ddSLijo Lazar }
18562f1b1352S[email protected] }
1857047767ddSLijo Lazar
amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size)1858aec773a1SSrinivasan Shanmugam int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
1859aec773a1SSrinivasan Shanmugam unsigned int cleaner_shader_size)
1860aec773a1SSrinivasan Shanmugam {
1861aec773a1SSrinivasan Shanmugam if (!adev->gfx.enable_cleaner_shader)
1862aec773a1SSrinivasan Shanmugam return -EOPNOTSUPP;
1863aec773a1SSrinivasan Shanmugam
1864aec773a1SSrinivasan Shanmugam return amdgpu_bo_create_kernel(adev, cleaner_shader_size, PAGE_SIZE,
1865aec773a1SSrinivasan Shanmugam AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT,
1866aec773a1SSrinivasan Shanmugam &adev->gfx.cleaner_shader_obj,
1867aec773a1SSrinivasan Shanmugam &adev->gfx.cleaner_shader_gpu_addr,
1868aec773a1SSrinivasan Shanmugam (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1869aec773a1SSrinivasan Shanmugam }
1870aec773a1SSrinivasan Shanmugam
amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device * adev)1871aec773a1SSrinivasan Shanmugam void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev)
1872aec773a1SSrinivasan Shanmugam {
1873aec773a1SSrinivasan Shanmugam if (!adev->gfx.enable_cleaner_shader)
1874aec773a1SSrinivasan Shanmugam return;
1875aec773a1SSrinivasan Shanmugam
1876aec773a1SSrinivasan Shanmugam amdgpu_bo_free_kernel(&adev->gfx.cleaner_shader_obj,
1877aec773a1SSrinivasan Shanmugam &adev->gfx.cleaner_shader_gpu_addr,
1878aec773a1SSrinivasan Shanmugam (void **)&adev->gfx.cleaner_shader_cpu_ptr);
1879aec773a1SSrinivasan Shanmugam }
1880aec773a1SSrinivasan Shanmugam
amdgpu_gfx_cleaner_shader_init(struct amdgpu_device * adev,unsigned int cleaner_shader_size,const void * cleaner_shader_ptr)1881aec773a1SSrinivasan Shanmugam void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
1882aec773a1SSrinivasan Shanmugam unsigned int cleaner_shader_size,
1883aec773a1SSrinivasan Shanmugam const void *cleaner_shader_ptr)
1884aec773a1SSrinivasan Shanmugam {
1885aec773a1SSrinivasan Shanmugam if (!adev->gfx.enable_cleaner_shader)
1886aec773a1SSrinivasan Shanmugam return;
1887aec773a1SSrinivasan Shanmugam
1888aec773a1SSrinivasan Shanmugam if (adev->gfx.cleaner_shader_cpu_ptr && cleaner_shader_ptr)
1889aec773a1SSrinivasan Shanmugam memcpy_toio(adev->gfx.cleaner_shader_cpu_ptr, cleaner_shader_ptr,
1890aec773a1SSrinivasan Shanmugam cleaner_shader_size);
1891aec773a1SSrinivasan Shanmugam }
1892afefd6f2SSrinivasan Shanmugam
1893afefd6f2SSrinivasan Shanmugam /**
1894afefd6f2SSrinivasan Shanmugam * amdgpu_gfx_kfd_sch_ctrl - Control the KFD scheduler from the KGD (Graphics Driver)
1895afefd6f2SSrinivasan Shanmugam * @adev: amdgpu_device pointer
1896afefd6f2SSrinivasan Shanmugam * @idx: Index of the scheduler to control
1897afefd6f2SSrinivasan Shanmugam * @enable: Whether to enable or disable the KFD scheduler
1898afefd6f2SSrinivasan Shanmugam *
1899afefd6f2SSrinivasan Shanmugam * This function is used to control the KFD (Kernel Fusion Driver) scheduler
1900afefd6f2SSrinivasan Shanmugam * from the KGD. It is part of the cleaner shader feature. This function plays
1901afefd6f2SSrinivasan Shanmugam * a key role in enforcing process isolation on the GPU.
1902afefd6f2SSrinivasan Shanmugam *
1903afefd6f2SSrinivasan Shanmugam * The function uses a reference count mechanism (kfd_sch_req_count) to keep
1904afefd6f2SSrinivasan Shanmugam * track of the number of requests to enable the KFD scheduler. When a request
1905afefd6f2SSrinivasan Shanmugam * to enable the KFD scheduler is made, the reference count is decremented.
1906afefd6f2SSrinivasan Shanmugam * When the reference count reaches zero, a delayed work is scheduled to
1907afefd6f2SSrinivasan Shanmugam * enforce isolation after a delay of GFX_SLICE_PERIOD.
1908afefd6f2SSrinivasan Shanmugam *
1909afefd6f2SSrinivasan Shanmugam * When a request to disable the KFD scheduler is made, the function first
1910afefd6f2SSrinivasan Shanmugam * checks if the reference count is zero. If it is, it cancels the delayed work
1911afefd6f2SSrinivasan Shanmugam * for enforcing isolation and checks if the KFD scheduler is active. If the
1912afefd6f2SSrinivasan Shanmugam * KFD scheduler is active, it sends a request to stop the KFD scheduler and
1913afefd6f2SSrinivasan Shanmugam * sets the KFD scheduler state to inactive. Then, it increments the reference
1914afefd6f2SSrinivasan Shanmugam * count.
1915afefd6f2SSrinivasan Shanmugam *
1916afefd6f2SSrinivasan Shanmugam * The function is synchronized using the kfd_sch_mutex to ensure that the KFD
1917afefd6f2SSrinivasan Shanmugam * scheduler state and reference count are updated atomically.
1918afefd6f2SSrinivasan Shanmugam *
1919afefd6f2SSrinivasan Shanmugam * Note: If the reference count is already zero when a request to enable the
1920afefd6f2SSrinivasan Shanmugam * KFD scheduler is made, it means there's an imbalance bug somewhere. The
1921afefd6f2SSrinivasan Shanmugam * function triggers a warning in this case.
1922afefd6f2SSrinivasan Shanmugam */
amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device * adev,u32 idx,bool enable)1923afefd6f2SSrinivasan Shanmugam static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx,
1924afefd6f2SSrinivasan Shanmugam bool enable)
1925afefd6f2SSrinivasan Shanmugam {
1926afefd6f2SSrinivasan Shanmugam mutex_lock(&adev->gfx.kfd_sch_mutex);
1927afefd6f2SSrinivasan Shanmugam
1928afefd6f2SSrinivasan Shanmugam if (enable) {
1929afefd6f2SSrinivasan Shanmugam /* If the count is already 0, it means there's an imbalance bug somewhere.
1930afefd6f2SSrinivasan Shanmugam * Note that the bug may be in a different caller than the one which triggers the
1931afefd6f2SSrinivasan Shanmugam * WARN_ON_ONCE.
1932afefd6f2SSrinivasan Shanmugam */
1933afefd6f2SSrinivasan Shanmugam if (WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx] == 0)) {
1934afefd6f2SSrinivasan Shanmugam dev_err(adev->dev, "Attempted to enable KFD scheduler when reference count is already zero\n");
1935afefd6f2SSrinivasan Shanmugam goto unlock;
1936afefd6f2SSrinivasan Shanmugam }
1937afefd6f2SSrinivasan Shanmugam
1938afefd6f2SSrinivasan Shanmugam adev->gfx.kfd_sch_req_count[idx]--;
1939afefd6f2SSrinivasan Shanmugam
1940afefd6f2SSrinivasan Shanmugam if (adev->gfx.kfd_sch_req_count[idx] == 0 &&
1941afefd6f2SSrinivasan Shanmugam adev->gfx.kfd_sch_inactive[idx]) {
1942afefd6f2SSrinivasan Shanmugam schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
1943efe6a877SAlex Deucher msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx]));
1944afefd6f2SSrinivasan Shanmugam }
1945afefd6f2SSrinivasan Shanmugam } else {
1946afefd6f2SSrinivasan Shanmugam if (adev->gfx.kfd_sch_req_count[idx] == 0) {
1947afefd6f2SSrinivasan Shanmugam cancel_delayed_work_sync(&adev->gfx.enforce_isolation[idx].work);
1948afefd6f2SSrinivasan Shanmugam if (!adev->gfx.kfd_sch_inactive[idx]) {
1949afefd6f2SSrinivasan Shanmugam amdgpu_amdkfd_stop_sched(adev, idx);
1950afefd6f2SSrinivasan Shanmugam adev->gfx.kfd_sch_inactive[idx] = true;
1951afefd6f2SSrinivasan Shanmugam }
1952afefd6f2SSrinivasan Shanmugam }
1953afefd6f2SSrinivasan Shanmugam
1954afefd6f2SSrinivasan Shanmugam adev->gfx.kfd_sch_req_count[idx]++;
1955afefd6f2SSrinivasan Shanmugam }
1956afefd6f2SSrinivasan Shanmugam
1957afefd6f2SSrinivasan Shanmugam unlock:
1958afefd6f2SSrinivasan Shanmugam mutex_unlock(&adev->gfx.kfd_sch_mutex);
1959afefd6f2SSrinivasan Shanmugam }
1960afefd6f2SSrinivasan Shanmugam
1961afefd6f2SSrinivasan Shanmugam /**
1962afefd6f2SSrinivasan Shanmugam * amdgpu_gfx_enforce_isolation_handler - work handler for enforcing shader isolation
1963afefd6f2SSrinivasan Shanmugam *
1964afefd6f2SSrinivasan Shanmugam * @work: work_struct.
1965afefd6f2SSrinivasan Shanmugam *
1966afefd6f2SSrinivasan Shanmugam * This function is the work handler for enforcing shader isolation on AMD GPUs.
1967afefd6f2SSrinivasan Shanmugam * It counts the number of emitted fences for each GFX and compute ring. If there
1968afefd6f2SSrinivasan Shanmugam * are any fences, it schedules the `enforce_isolation_work` to be run after a
1969afefd6f2SSrinivasan Shanmugam * delay of `GFX_SLICE_PERIOD`. If there are no fences, it signals the Kernel Fusion
1970afefd6f2SSrinivasan Shanmugam * Driver (KFD) to resume the runqueue. The function is synchronized using the
1971afefd6f2SSrinivasan Shanmugam * `enforce_isolation_mutex`.
1972afefd6f2SSrinivasan Shanmugam */
amdgpu_gfx_enforce_isolation_handler(struct work_struct * work)1973afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
1974afefd6f2SSrinivasan Shanmugam {
1975afefd6f2SSrinivasan Shanmugam struct amdgpu_isolation_work *isolation_work =
1976afefd6f2SSrinivasan Shanmugam container_of(work, struct amdgpu_isolation_work, work.work);
1977afefd6f2SSrinivasan Shanmugam struct amdgpu_device *adev = isolation_work->adev;
1978afefd6f2SSrinivasan Shanmugam u32 i, idx, fences = 0;
1979afefd6f2SSrinivasan Shanmugam
1980afefd6f2SSrinivasan Shanmugam if (isolation_work->xcp_id == AMDGPU_XCP_NO_PARTITION)
1981afefd6f2SSrinivasan Shanmugam idx = 0;
1982afefd6f2SSrinivasan Shanmugam else
1983afefd6f2SSrinivasan Shanmugam idx = isolation_work->xcp_id;
1984afefd6f2SSrinivasan Shanmugam
1985afefd6f2SSrinivasan Shanmugam if (idx >= MAX_XCP)
1986afefd6f2SSrinivasan Shanmugam return;
1987afefd6f2SSrinivasan Shanmugam
1988afefd6f2SSrinivasan Shanmugam mutex_lock(&adev->enforce_isolation_mutex);
1989afefd6f2SSrinivasan Shanmugam for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) {
1990afefd6f2SSrinivasan Shanmugam if (isolation_work->xcp_id == adev->gfx.gfx_ring[i].xcp_id)
1991afefd6f2SSrinivasan Shanmugam fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
1992afefd6f2SSrinivasan Shanmugam }
1993afefd6f2SSrinivasan Shanmugam for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) {
1994afefd6f2SSrinivasan Shanmugam if (isolation_work->xcp_id == adev->gfx.compute_ring[i].xcp_id)
1995afefd6f2SSrinivasan Shanmugam fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
1996afefd6f2SSrinivasan Shanmugam }
1997afefd6f2SSrinivasan Shanmugam if (fences) {
1998efe6a877SAlex Deucher /* we've already had our timeslice, so let's wrap this up */
1999afefd6f2SSrinivasan Shanmugam schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work,
2000efe6a877SAlex Deucher msecs_to_jiffies(1));
2001afefd6f2SSrinivasan Shanmugam } else {
2002afefd6f2SSrinivasan Shanmugam /* Tell KFD to resume the runqueue */
2003afefd6f2SSrinivasan Shanmugam if (adev->kfd.init_complete) {
2004afefd6f2SSrinivasan Shanmugam WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
2005afefd6f2SSrinivasan Shanmugam WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
2006afefd6f2SSrinivasan Shanmugam amdgpu_amdkfd_start_sched(adev, idx);
2007afefd6f2SSrinivasan Shanmugam adev->gfx.kfd_sch_inactive[idx] = false;
2008afefd6f2SSrinivasan Shanmugam }
2009afefd6f2SSrinivasan Shanmugam }
2010afefd6f2SSrinivasan Shanmugam mutex_unlock(&adev->enforce_isolation_mutex);
2011afefd6f2SSrinivasan Shanmugam }
2012afefd6f2SSrinivasan Shanmugam
201355f4139bSSrinivasan Shanmugam /**
201455f4139bSSrinivasan Shanmugam * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation
201555f4139bSSrinivasan Shanmugam * @adev: amdgpu_device pointer
201655f4139bSSrinivasan Shanmugam * @idx: Index of the GPU partition
201755f4139bSSrinivasan Shanmugam *
201855f4139bSSrinivasan Shanmugam * When kernel submissions come in, the jobs are given a time slice and once
201955f4139bSSrinivasan Shanmugam * that time slice is up, if there are KFD user queues active, kernel
202055f4139bSSrinivasan Shanmugam * submissions are blocked until KFD has had its time slice. Once the KFD time
202155f4139bSSrinivasan Shanmugam * slice is up, KFD user queues are preempted and kernel submissions are
202255f4139bSSrinivasan Shanmugam * unblocked and allowed to run again.
202355f4139bSSrinivasan Shanmugam */
2024efe6a877SAlex Deucher static void
amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device * adev,u32 idx)2025efe6a877SAlex Deucher amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev,
2026efe6a877SAlex Deucher u32 idx)
2027efe6a877SAlex Deucher {
2028efe6a877SAlex Deucher unsigned long cjiffies;
2029efe6a877SAlex Deucher bool wait = false;
2030efe6a877SAlex Deucher
2031efe6a877SAlex Deucher mutex_lock(&adev->enforce_isolation_mutex);
2032efe6a877SAlex Deucher if (adev->enforce_isolation[idx]) {
2033efe6a877SAlex Deucher /* set the initial values if nothing is set */
2034efe6a877SAlex Deucher if (!adev->gfx.enforce_isolation_jiffies[idx]) {
2035efe6a877SAlex Deucher adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2036efe6a877SAlex Deucher adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
2037efe6a877SAlex Deucher }
2038efe6a877SAlex Deucher /* Make sure KFD gets a chance to run */
2039efe6a877SAlex Deucher if (amdgpu_amdkfd_compute_active(adev, idx)) {
2040efe6a877SAlex Deucher cjiffies = jiffies;
2041efe6a877SAlex Deucher if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) {
2042efe6a877SAlex Deucher cjiffies -= adev->gfx.enforce_isolation_jiffies[idx];
2043efe6a877SAlex Deucher if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) {
2044efe6a877SAlex Deucher /* if our time is up, let KGD work drain before scheduling more */
2045efe6a877SAlex Deucher wait = true;
2046efe6a877SAlex Deucher /* reset the timer period */
2047efe6a877SAlex Deucher adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
2048efe6a877SAlex Deucher } else {
2049efe6a877SAlex Deucher /* set the timer period to what's left in our time slice */
2050efe6a877SAlex Deucher adev->gfx.enforce_isolation_time[idx] =
2051efe6a877SAlex Deucher GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies);
2052efe6a877SAlex Deucher }
2053efe6a877SAlex Deucher } else {
2054efe6a877SAlex Deucher /* if jiffies wrap around we will just wait a little longer */
2055efe6a877SAlex Deucher adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2056efe6a877SAlex Deucher }
2057efe6a877SAlex Deucher } else {
2058efe6a877SAlex Deucher /* if there is no KFD work, then set the full slice period */
2059efe6a877SAlex Deucher adev->gfx.enforce_isolation_jiffies[idx] = jiffies;
2060efe6a877SAlex Deucher adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS;
2061efe6a877SAlex Deucher }
2062efe6a877SAlex Deucher }
2063efe6a877SAlex Deucher mutex_unlock(&adev->enforce_isolation_mutex);
2064efe6a877SAlex Deucher
2065efe6a877SAlex Deucher if (wait)
2066efe6a877SAlex Deucher msleep(GFX_SLICE_PERIOD_MS);
2067efe6a877SAlex Deucher }
2068efe6a877SAlex Deucher
206955f4139bSSrinivasan Shanmugam /**
207055f4139bSSrinivasan Shanmugam * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation
207155f4139bSSrinivasan Shanmugam * @ring: Pointer to the amdgpu_ring structure
207255f4139bSSrinivasan Shanmugam *
207355f4139bSSrinivasan Shanmugam * Ring begin_use helper implementation for gfx which serializes access to the
207455f4139bSSrinivasan Shanmugam * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
207555f4139bSSrinivasan Shanmugam * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
207655f4139bSSrinivasan Shanmugam * each get a time slice when both are active.
207755f4139bSSrinivasan Shanmugam */
amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring * ring)2078afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring)
2079afefd6f2SSrinivasan Shanmugam {
2080afefd6f2SSrinivasan Shanmugam struct amdgpu_device *adev = ring->adev;
2081afefd6f2SSrinivasan Shanmugam u32 idx;
20821e8c193fSSrinivasan Shanmugam bool sched_work = false;
2083afefd6f2SSrinivasan Shanmugam
2084afefd6f2SSrinivasan Shanmugam if (!adev->gfx.enable_cleaner_shader)
2085afefd6f2SSrinivasan Shanmugam return;
2086afefd6f2SSrinivasan Shanmugam
2087afefd6f2SSrinivasan Shanmugam if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2088afefd6f2SSrinivasan Shanmugam idx = 0;
2089afefd6f2SSrinivasan Shanmugam else
2090afefd6f2SSrinivasan Shanmugam idx = ring->xcp_id;
2091afefd6f2SSrinivasan Shanmugam
2092afefd6f2SSrinivasan Shanmugam if (idx >= MAX_XCP)
2093afefd6f2SSrinivasan Shanmugam return;
2094afefd6f2SSrinivasan Shanmugam
2095efe6a877SAlex Deucher /* Don't submit more work until KFD has had some time */
2096efe6a877SAlex Deucher amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx);
2097efe6a877SAlex Deucher
2098afefd6f2SSrinivasan Shanmugam mutex_lock(&adev->enforce_isolation_mutex);
2099afefd6f2SSrinivasan Shanmugam if (adev->enforce_isolation[idx]) {
2100afefd6f2SSrinivasan Shanmugam if (adev->kfd.init_complete)
21011e8c193fSSrinivasan Shanmugam sched_work = true;
2102afefd6f2SSrinivasan Shanmugam }
2103afefd6f2SSrinivasan Shanmugam mutex_unlock(&adev->enforce_isolation_mutex);
21041e8c193fSSrinivasan Shanmugam
21051e8c193fSSrinivasan Shanmugam if (sched_work)
21061e8c193fSSrinivasan Shanmugam amdgpu_gfx_kfd_sch_ctrl(adev, idx, false);
2107afefd6f2SSrinivasan Shanmugam }
2108afefd6f2SSrinivasan Shanmugam
210955f4139bSSrinivasan Shanmugam /**
211055f4139bSSrinivasan Shanmugam * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation
211155f4139bSSrinivasan Shanmugam * @ring: Pointer to the amdgpu_ring structure
211255f4139bSSrinivasan Shanmugam *
211355f4139bSSrinivasan Shanmugam * Ring end_use helper implementation for gfx which serializes access to the
211455f4139bSSrinivasan Shanmugam * gfx IP between kernel submission IOCTLs and KFD user queues when isolation
211555f4139bSSrinivasan Shanmugam * enforcement is enabled. The kernel submission IOCTLs and KFD user queues
211655f4139bSSrinivasan Shanmugam * each get a time slice when both are active.
211755f4139bSSrinivasan Shanmugam */
amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring * ring)2118afefd6f2SSrinivasan Shanmugam void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring)
2119afefd6f2SSrinivasan Shanmugam {
2120afefd6f2SSrinivasan Shanmugam struct amdgpu_device *adev = ring->adev;
2121afefd6f2SSrinivasan Shanmugam u32 idx;
21221e8c193fSSrinivasan Shanmugam bool sched_work = false;
2123afefd6f2SSrinivasan Shanmugam
2124afefd6f2SSrinivasan Shanmugam if (!adev->gfx.enable_cleaner_shader)
2125afefd6f2SSrinivasan Shanmugam return;
2126afefd6f2SSrinivasan Shanmugam
2127afefd6f2SSrinivasan Shanmugam if (ring->xcp_id == AMDGPU_XCP_NO_PARTITION)
2128afefd6f2SSrinivasan Shanmugam idx = 0;
2129afefd6f2SSrinivasan Shanmugam else
2130afefd6f2SSrinivasan Shanmugam idx = ring->xcp_id;
2131afefd6f2SSrinivasan Shanmugam
2132afefd6f2SSrinivasan Shanmugam if (idx >= MAX_XCP)
2133afefd6f2SSrinivasan Shanmugam return;
2134afefd6f2SSrinivasan Shanmugam
2135afefd6f2SSrinivasan Shanmugam mutex_lock(&adev->enforce_isolation_mutex);
2136afefd6f2SSrinivasan Shanmugam if (adev->enforce_isolation[idx]) {
2137afefd6f2SSrinivasan Shanmugam if (adev->kfd.init_complete)
21381e8c193fSSrinivasan Shanmugam sched_work = true;
2139afefd6f2SSrinivasan Shanmugam }
2140afefd6f2SSrinivasan Shanmugam mutex_unlock(&adev->enforce_isolation_mutex);
21411e8c193fSSrinivasan Shanmugam
21421e8c193fSSrinivasan Shanmugam if (sched_work)
21431e8c193fSSrinivasan Shanmugam amdgpu_gfx_kfd_sch_ctrl(adev, idx, true);
2144afefd6f2SSrinivasan Shanmugam }
2145c5c63d9cSJesse Zhang
amdgpu_gfx_profile_idle_work_handler(struct work_struct * work)21468fdb3958SAlex Deucher void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
21478fdb3958SAlex Deucher {
21488fdb3958SAlex Deucher struct amdgpu_device *adev =
21498fdb3958SAlex Deucher container_of(work, struct amdgpu_device, gfx.idle_work.work);
21508fdb3958SAlex Deucher enum PP_SMC_POWER_PROFILE profile;
21518fdb3958SAlex Deucher u32 i, fences = 0;
21528fdb3958SAlex Deucher int r;
21538fdb3958SAlex Deucher
21548fdb3958SAlex Deucher if (adev->gfx.num_gfx_rings)
21558fdb3958SAlex Deucher profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
21568fdb3958SAlex Deucher else
21578fdb3958SAlex Deucher profile = PP_SMC_POWER_PROFILE_COMPUTE;
21588fdb3958SAlex Deucher
21598fdb3958SAlex Deucher for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i)
21608fdb3958SAlex Deucher fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]);
21618fdb3958SAlex Deucher for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
21628fdb3958SAlex Deucher fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
21638fdb3958SAlex Deucher if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
2164553673a3SAlex Deucher mutex_lock(&adev->gfx.workload_profile_mutex);
2165553673a3SAlex Deucher if (adev->gfx.workload_profile_active) {
21668fdb3958SAlex Deucher r = amdgpu_dpm_switch_power_profile(adev, profile, false);
21678fdb3958SAlex Deucher if (r)
21688fdb3958SAlex Deucher dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
21698fdb3958SAlex Deucher profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
21708fdb3958SAlex Deucher "fullscreen 3D" : "compute");
2171553673a3SAlex Deucher adev->gfx.workload_profile_active = false;
2172553673a3SAlex Deucher }
2173553673a3SAlex Deucher mutex_unlock(&adev->gfx.workload_profile_mutex);
21748fdb3958SAlex Deucher } else {
21758fdb3958SAlex Deucher schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
21768fdb3958SAlex Deucher }
21778fdb3958SAlex Deucher }
21788fdb3958SAlex Deucher
amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring * ring)21798fdb3958SAlex Deucher void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
21808fdb3958SAlex Deucher {
21818fdb3958SAlex Deucher struct amdgpu_device *adev = ring->adev;
21828fdb3958SAlex Deucher enum PP_SMC_POWER_PROFILE profile;
21838fdb3958SAlex Deucher int r;
21848fdb3958SAlex Deucher
21858fdb3958SAlex Deucher if (adev->gfx.num_gfx_rings)
21868fdb3958SAlex Deucher profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
21878fdb3958SAlex Deucher else
21888fdb3958SAlex Deucher profile = PP_SMC_POWER_PROFILE_COMPUTE;
21898fdb3958SAlex Deucher
21908fdb3958SAlex Deucher atomic_inc(&adev->gfx.total_submission_cnt);
21918fdb3958SAlex Deucher
21929e34d8d1SAlex Deucher cancel_delayed_work_sync(&adev->gfx.idle_work);
21939e34d8d1SAlex Deucher
21949e34d8d1SAlex Deucher /* We can safely return early here because we've cancelled the
21959e34d8d1SAlex Deucher * the delayed work so there is no one else to set it to false
21969e34d8d1SAlex Deucher * and we don't care if someone else sets it to true.
21979e34d8d1SAlex Deucher */
21989e34d8d1SAlex Deucher if (adev->gfx.workload_profile_active)
21999e34d8d1SAlex Deucher return;
22009e34d8d1SAlex Deucher
2201553673a3SAlex Deucher mutex_lock(&adev->gfx.workload_profile_mutex);
2202553673a3SAlex Deucher if (!adev->gfx.workload_profile_active) {
22038fdb3958SAlex Deucher r = amdgpu_dpm_switch_power_profile(adev, profile, true);
22048fdb3958SAlex Deucher if (r)
22058fdb3958SAlex Deucher dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
22068fdb3958SAlex Deucher profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
22078fdb3958SAlex Deucher "fullscreen 3D" : "compute");
2208553673a3SAlex Deucher adev->gfx.workload_profile_active = true;
2209553673a3SAlex Deucher }
2210553673a3SAlex Deucher mutex_unlock(&adev->gfx.workload_profile_mutex);
22118fdb3958SAlex Deucher }
22128fdb3958SAlex Deucher
amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring * ring)22138fdb3958SAlex Deucher void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
22148fdb3958SAlex Deucher {
22158fdb3958SAlex Deucher atomic_dec(&ring->adev->gfx.total_submission_cnt);
22168fdb3958SAlex Deucher
22178fdb3958SAlex Deucher schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
22188fdb3958SAlex Deucher }
22198fdb3958SAlex Deucher
2220c5c63d9cSJesse Zhang /*
2221c5c63d9cSJesse Zhang * debugfs for to enable/disable gfx job submission to specific core.
2222c5c63d9cSJesse Zhang */
2223c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
amdgpu_debugfs_gfx_sched_mask_set(void * data,u64 val)2224c5c63d9cSJesse Zhang static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val)
2225c5c63d9cSJesse Zhang {
2226c5c63d9cSJesse Zhang struct amdgpu_device *adev = (struct amdgpu_device *)data;
2227c5c63d9cSJesse Zhang u32 i;
2228c5c63d9cSJesse Zhang u64 mask = 0;
2229c5c63d9cSJesse Zhang struct amdgpu_ring *ring;
2230c5c63d9cSJesse Zhang
2231c5c63d9cSJesse Zhang if (!adev)
2232c5c63d9cSJesse Zhang return -ENODEV;
2233c5c63d9cSJesse Zhang
223434c4eb7dSKarol Przybylski mask = (1ULL << adev->gfx.num_gfx_rings) - 1;
2235c5c63d9cSJesse Zhang if ((val & mask) == 0)
2236c5c63d9cSJesse Zhang return -EINVAL;
2237c5c63d9cSJesse Zhang
2238c5c63d9cSJesse Zhang for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2239c5c63d9cSJesse Zhang ring = &adev->gfx.gfx_ring[i];
2240c5c63d9cSJesse Zhang if (val & (1 << i))
2241c5c63d9cSJesse Zhang ring->sched.ready = true;
2242c5c63d9cSJesse Zhang else
2243c5c63d9cSJesse Zhang ring->sched.ready = false;
2244c5c63d9cSJesse Zhang }
2245c5c63d9cSJesse Zhang /* publish sched.ready flag update effective immediately across smp */
2246c5c63d9cSJesse Zhang smp_rmb();
2247c5c63d9cSJesse Zhang return 0;
2248c5c63d9cSJesse Zhang }
2249c5c63d9cSJesse Zhang
amdgpu_debugfs_gfx_sched_mask_get(void * data,u64 * val)2250c5c63d9cSJesse Zhang static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val)
2251c5c63d9cSJesse Zhang {
2252c5c63d9cSJesse Zhang struct amdgpu_device *adev = (struct amdgpu_device *)data;
2253c5c63d9cSJesse Zhang u32 i;
2254c5c63d9cSJesse Zhang u64 mask = 0;
2255c5c63d9cSJesse Zhang struct amdgpu_ring *ring;
2256c5c63d9cSJesse Zhang
2257c5c63d9cSJesse Zhang if (!adev)
2258c5c63d9cSJesse Zhang return -ENODEV;
2259c5c63d9cSJesse Zhang for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
2260c5c63d9cSJesse Zhang ring = &adev->gfx.gfx_ring[i];
2261c5c63d9cSJesse Zhang if (ring->sched.ready)
226234c4eb7dSKarol Przybylski mask |= 1ULL << i;
2263c5c63d9cSJesse Zhang }
2264c5c63d9cSJesse Zhang
2265c5c63d9cSJesse Zhang *val = mask;
2266c5c63d9cSJesse Zhang return 0;
2267c5c63d9cSJesse Zhang }
2268c5c63d9cSJesse Zhang
2269c5c63d9cSJesse Zhang DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops,
2270c5c63d9cSJesse Zhang amdgpu_debugfs_gfx_sched_mask_get,
2271c5c63d9cSJesse Zhang amdgpu_debugfs_gfx_sched_mask_set, "%llx\n");
2272c5c63d9cSJesse Zhang
2273c5c63d9cSJesse Zhang #endif
2274c5c63d9cSJesse Zhang
amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device * adev)2275c5c63d9cSJesse Zhang void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev)
2276c5c63d9cSJesse Zhang {
2277c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
2278c5c63d9cSJesse Zhang struct drm_minor *minor = adev_to_drm(adev)->primary;
2279c5c63d9cSJesse Zhang struct dentry *root = minor->debugfs_root;
2280c5c63d9cSJesse Zhang char name[32];
2281c5c63d9cSJesse Zhang
2282c5c63d9cSJesse Zhang if (!(adev->gfx.num_gfx_rings > 1))
2283c5c63d9cSJesse Zhang return;
2284c5c63d9cSJesse Zhang sprintf(name, "amdgpu_gfx_sched_mask");
2285c5c63d9cSJesse Zhang debugfs_create_file(name, 0600, root, adev,
2286c5c63d9cSJesse Zhang &amdgpu_debugfs_gfx_sched_mask_fops);
2287c5c63d9cSJesse Zhang #endif
2288c5c63d9cSJesse Zhang }
2289c5c63d9cSJesse Zhang
2290c5c63d9cSJesse Zhang /*
2291c5c63d9cSJesse Zhang * debugfs for to enable/disable compute job submission to specific core.
2292c5c63d9cSJesse Zhang */
2293c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
amdgpu_debugfs_compute_sched_mask_set(void * data,u64 val)2294c5c63d9cSJesse Zhang static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val)
2295c5c63d9cSJesse Zhang {
2296c5c63d9cSJesse Zhang struct amdgpu_device *adev = (struct amdgpu_device *)data;
2297c5c63d9cSJesse Zhang u32 i;
2298c5c63d9cSJesse Zhang u64 mask = 0;
2299c5c63d9cSJesse Zhang struct amdgpu_ring *ring;
2300c5c63d9cSJesse Zhang
2301c5c63d9cSJesse Zhang if (!adev)
2302c5c63d9cSJesse Zhang return -ENODEV;
2303c5c63d9cSJesse Zhang
230434c4eb7dSKarol Przybylski mask = (1ULL << adev->gfx.num_compute_rings) - 1;
2305c5c63d9cSJesse Zhang if ((val & mask) == 0)
2306c5c63d9cSJesse Zhang return -EINVAL;
2307c5c63d9cSJesse Zhang
2308c5c63d9cSJesse Zhang for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2309c5c63d9cSJesse Zhang ring = &adev->gfx.compute_ring[i];
2310c5c63d9cSJesse Zhang if (val & (1 << i))
2311c5c63d9cSJesse Zhang ring->sched.ready = true;
2312c5c63d9cSJesse Zhang else
2313c5c63d9cSJesse Zhang ring->sched.ready = false;
2314c5c63d9cSJesse Zhang }
2315c5c63d9cSJesse Zhang
2316c5c63d9cSJesse Zhang /* publish sched.ready flag update effective immediately across smp */
2317c5c63d9cSJesse Zhang smp_rmb();
2318c5c63d9cSJesse Zhang return 0;
2319c5c63d9cSJesse Zhang }
2320c5c63d9cSJesse Zhang
amdgpu_debugfs_compute_sched_mask_get(void * data,u64 * val)2321c5c63d9cSJesse Zhang static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val)
2322c5c63d9cSJesse Zhang {
2323c5c63d9cSJesse Zhang struct amdgpu_device *adev = (struct amdgpu_device *)data;
2324c5c63d9cSJesse Zhang u32 i;
2325c5c63d9cSJesse Zhang u64 mask = 0;
2326c5c63d9cSJesse Zhang struct amdgpu_ring *ring;
2327c5c63d9cSJesse Zhang
2328c5c63d9cSJesse Zhang if (!adev)
2329c5c63d9cSJesse Zhang return -ENODEV;
2330c5c63d9cSJesse Zhang for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
2331c5c63d9cSJesse Zhang ring = &adev->gfx.compute_ring[i];
2332c5c63d9cSJesse Zhang if (ring->sched.ready)
233334c4eb7dSKarol Przybylski mask |= 1ULL << i;
2334c5c63d9cSJesse Zhang }
2335c5c63d9cSJesse Zhang
2336c5c63d9cSJesse Zhang *val = mask;
2337c5c63d9cSJesse Zhang return 0;
2338c5c63d9cSJesse Zhang }
2339c5c63d9cSJesse Zhang
2340c5c63d9cSJesse Zhang DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops,
2341c5c63d9cSJesse Zhang amdgpu_debugfs_compute_sched_mask_get,
2342c5c63d9cSJesse Zhang amdgpu_debugfs_compute_sched_mask_set, "%llx\n");
2343c5c63d9cSJesse Zhang
2344c5c63d9cSJesse Zhang #endif
2345c5c63d9cSJesse Zhang
amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device * adev)2346c5c63d9cSJesse Zhang void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev)
2347c5c63d9cSJesse Zhang {
2348c5c63d9cSJesse Zhang #if defined(CONFIG_DEBUG_FS)
2349c5c63d9cSJesse Zhang struct drm_minor *minor = adev_to_drm(adev)->primary;
2350c5c63d9cSJesse Zhang struct dentry *root = minor->debugfs_root;
2351c5c63d9cSJesse Zhang char name[32];
2352c5c63d9cSJesse Zhang
2353c5c63d9cSJesse Zhang if (!(adev->gfx.num_compute_rings > 1))
2354c5c63d9cSJesse Zhang return;
2355c5c63d9cSJesse Zhang sprintf(name, "amdgpu_compute_sched_mask");
2356c5c63d9cSJesse Zhang debugfs_create_file(name, 0600, root, adev,
2357c5c63d9cSJesse Zhang &amdgpu_debugfs_compute_sched_mask_fops);
2358c5c63d9cSJesse Zhang #endif
2359c5c63d9cSJesse Zhang }
2360