1a538bbe7SJack Xiao /*
2a538bbe7SJack Xiao * Copyright 2019 Advanced Micro Devices, Inc.
3a538bbe7SJack Xiao *
4a538bbe7SJack Xiao * Permission is hereby granted, free of charge, to any person obtaining a
5a538bbe7SJack Xiao * copy of this software and associated documentation files (the "Software"),
6a538bbe7SJack Xiao * to deal in the Software without restriction, including without limitation
7a538bbe7SJack Xiao * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8a538bbe7SJack Xiao * and/or sell copies of the Software, and to permit persons to whom the
9a538bbe7SJack Xiao * Software is furnished to do so, subject to the following conditions:
10a538bbe7SJack Xiao *
11a538bbe7SJack Xiao * The above copyright notice and this permission notice shall be included in
12a538bbe7SJack Xiao * all copies or substantial portions of the Software.
13a538bbe7SJack Xiao *
14a538bbe7SJack Xiao * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15a538bbe7SJack Xiao * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16a538bbe7SJack Xiao * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17a538bbe7SJack Xiao * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18a538bbe7SJack Xiao * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19a538bbe7SJack Xiao * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20a538bbe7SJack Xiao * OTHER DEALINGS IN THE SOFTWARE.
21a538bbe7SJack Xiao *
22a538bbe7SJack Xiao */
23a538bbe7SJack Xiao
24a538bbe7SJack Xiao #ifndef __AMDGPU_MES_H__
25a538bbe7SJack Xiao #define __AMDGPU_MES_H__
26a538bbe7SJack Xiao
273a42c7f3SJack Xiao #include "amdgpu_irq.h"
283a42c7f3SJack Xiao #include "kgd_kfd_interface.h"
293a42c7f3SJack Xiao #include "amdgpu_gfx.h"
30e3cbb1f4SShashank Sharma #include "amdgpu_doorbell.h"
313a42c7f3SJack Xiao #include <linux/sched/mm.h>
323a42c7f3SJack Xiao
333bbd31e0SJack Xiao #define AMDGPU_MES_MAX_COMPUTE_PIPES 8
343bbd31e0SJack Xiao #define AMDGPU_MES_MAX_GFX_PIPES 2
353bbd31e0SJack Xiao #define AMDGPU_MES_MAX_SDMA_PIPES 2
363bbd31e0SJack Xiao
37e77a541fSGraham Sider #define AMDGPU_MES_API_VERSION_SHIFT 12
38e77a541fSGraham Sider #define AMDGPU_MES_FEAT_VERSION_SHIFT 24
39e77a541fSGraham Sider
40e77a541fSGraham Sider #define AMDGPU_MES_VERSION_MASK 0x00000fff
41e77a541fSGraham Sider #define AMDGPU_MES_API_VERSION_MASK 0x00fff000
42e77a541fSGraham Sider #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000
43335acfb6SShaoyun Liu #define AMDGPU_MES_MSCRATCH_SIZE 0x40000
44e77a541fSGraham Sider
453bbd31e0SJack Xiao enum amdgpu_mes_priority_level {
463bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_LEVEL_LOW = 0,
473bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_LEVEL_NORMAL = 1,
483bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_LEVEL_MEDIUM = 2,
493bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_LEVEL_HIGH = 3,
503bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_LEVEL_REALTIME = 4,
513bbd31e0SJack Xiao AMDGPU_MES_PRIORITY_NUM_LEVELS
523bbd31e0SJack Xiao };
533bbd31e0SJack Xiao
543a42c7f3SJack Xiao #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
553a42c7f3SJack Xiao #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
563a42c7f3SJack Xiao
577bbc3676SJack Xiao struct amdgpu_mes_funcs;
58a538bbe7SJack Xiao
5971209c96SLikun Gao enum amdgpu_mes_pipe {
60207e8bbeSJack Xiao AMDGPU_MES_SCHED_PIPE = 0,
61207e8bbeSJack Xiao AMDGPU_MES_KIQ_PIPE,
62207e8bbeSJack Xiao AMDGPU_MAX_MES_PIPES = 2,
63207e8bbeSJack Xiao };
64207e8bbeSJack Xiao
657bbc3676SJack Xiao struct amdgpu_mes {
663bbd31e0SJack Xiao struct amdgpu_device *adev;
673bbd31e0SJack Xiao
6818ee4ce6SJack Xiao struct mutex mutex_hidden;
693a42c7f3SJack Xiao
703a42c7f3SJack Xiao struct idr pasid_idr;
713a42c7f3SJack Xiao struct idr gang_id_idr;
723a42c7f3SJack Xiao struct idr queue_id_idr;
733a42c7f3SJack Xiao struct ida doorbell_ida;
743a42c7f3SJack Xiao
753a42c7f3SJack Xiao spinlock_t queue_id_lock;
763a42c7f3SJack Xiao
77ff83e6e7SGraham Sider uint32_t sched_version;
78ff83e6e7SGraham Sider uint32_t kiq_version;
79e89bd361SAlex Deucher uint32_t fw_version[AMDGPU_MAX_MES_PIPES];
8052491d97SJack Xiao bool enable_legacy_queue_map;
81ff83e6e7SGraham Sider
823bbd31e0SJack Xiao uint32_t total_max_queue;
833bbd31e0SJack Xiao uint32_t max_doorbell_slices;
843bbd31e0SJack Xiao
853bbd31e0SJack Xiao uint64_t default_process_quantum;
863bbd31e0SJack Xiao uint64_t default_gang_quantum;
873bbd31e0SJack Xiao
88c7d43556SJack Xiao struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES];
89c7d43556SJack Xiao spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES];
907bbc3676SJack Xiao
91207e8bbeSJack Xiao const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
925aa91248SJack Xiao
935aa91248SJack Xiao /* mes ucode */
94207e8bbeSJack Xiao struct amdgpu_bo *ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
95207e8bbeSJack Xiao uint64_t ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
96207e8bbeSJack Xiao uint32_t *ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
97207e8bbeSJack Xiao uint64_t uc_start_addr[AMDGPU_MAX_MES_PIPES];
985aa91248SJack Xiao
995aa91248SJack Xiao /* mes ucode data */
100207e8bbeSJack Xiao struct amdgpu_bo *data_fw_obj[AMDGPU_MAX_MES_PIPES];
101207e8bbeSJack Xiao uint64_t data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
102207e8bbeSJack Xiao uint32_t *data_fw_ptr[AMDGPU_MAX_MES_PIPES];
103207e8bbeSJack Xiao uint64_t data_start_addr[AMDGPU_MAX_MES_PIPES];
1045aa91248SJack Xiao
1053bbd31e0SJack Xiao /* eop gpu obj */
106207e8bbeSJack Xiao struct amdgpu_bo *eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
107207e8bbeSJack Xiao uint64_t eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
1083bbd31e0SJack Xiao
109207e8bbeSJack Xiao void *mqd_backup[AMDGPU_MAX_MES_PIPES];
110207e8bbeSJack Xiao struct amdgpu_irq_src irq[AMDGPU_MAX_MES_PIPES];
1113bbd31e0SJack Xiao
1123bbd31e0SJack Xiao uint32_t vmid_mask_gfxhub;
1133bbd31e0SJack Xiao uint32_t vmid_mask_mmhub;
1143bbd31e0SJack Xiao uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
1153bbd31e0SJack Xiao uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
1163bbd31e0SJack Xiao uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
1170fe69062SLe Ma uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
118ea5d6db1SJack Xiao uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
119ea5d6db1SJack Xiao uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
120ea5d6db1SJack Xiao uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
121ea5d6db1SJack Xiao uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
122ea5d6db1SJack Xiao uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
123ea5d6db1SJack Xiao uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
1246a4a1f60SJack Xiao
12518ee4ce6SJack Xiao uint32_t saved_flags;
1263bbd31e0SJack Xiao
127cf064b45SJack Xiao /* initialize kiq pipe */
128cf064b45SJack Xiao int (*kiq_hw_init)(struct amdgpu_device *adev);
12918ee4ce6SJack Xiao int (*kiq_hw_fini)(struct amdgpu_device *adev);
130cf064b45SJack Xiao
131e3cbb1f4SShashank Sharma /* MES doorbells */
132e3cbb1f4SShashank Sharma uint32_t db_start_dw_offset;
133e3cbb1f4SShashank Sharma uint32_t num_mes_dbs;
134e3cbb1f4SShashank Sharma unsigned long *doorbell_bitmap;
135e3cbb1f4SShashank Sharma
136b2662d4cSshaoyunl /* MES event log buffer */
137739d0f3eSMichael Chen uint32_t event_log_size;
138b2662d4cSshaoyunl struct amdgpu_bo *event_log_gpu_obj;
139b2662d4cSshaoyunl uint64_t event_log_gpu_addr;
140b2662d4cSshaoyunl void *event_log_cpu_addr;
141b2662d4cSshaoyunl
1427bbc3676SJack Xiao /* ip specific functions */
1433bbd31e0SJack Xiao const struct amdgpu_mes_funcs *funcs;
144f6ac0842Schongli2
145f6ac0842Schongli2 /* mes resource_1 bo*/
146*f81cd793SShaoyun Liu struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
147*f81cd793SShaoyun Liu uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
148*f81cd793SShaoyun Liu void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
149f6ac0842Schongli2
1507bbc3676SJack Xiao };
1517bbc3676SJack Xiao
1523a42c7f3SJack Xiao struct amdgpu_mes_process {
1533a42c7f3SJack Xiao int pasid;
1543a42c7f3SJack Xiao struct amdgpu_vm *vm;
1553a42c7f3SJack Xiao uint64_t pd_gpu_addr;
1563a42c7f3SJack Xiao struct amdgpu_bo *proc_ctx_bo;
1573a42c7f3SJack Xiao uint64_t proc_ctx_gpu_addr;
1583a42c7f3SJack Xiao void *proc_ctx_cpu_ptr;
1593a42c7f3SJack Xiao uint64_t process_quantum;
1603a42c7f3SJack Xiao struct list_head gang_list;
1613a42c7f3SJack Xiao uint32_t doorbell_index;
1623a42c7f3SJack Xiao struct mutex doorbell_lock;
1633a42c7f3SJack Xiao };
1643a42c7f3SJack Xiao
1653a42c7f3SJack Xiao struct amdgpu_mes_gang {
1663a42c7f3SJack Xiao int gang_id;
1673a42c7f3SJack Xiao int priority;
1683a42c7f3SJack Xiao int inprocess_gang_priority;
1693a42c7f3SJack Xiao int global_priority_level;
1703a42c7f3SJack Xiao struct list_head list;
1713a42c7f3SJack Xiao struct amdgpu_mes_process *process;
1723a42c7f3SJack Xiao struct amdgpu_bo *gang_ctx_bo;
1733a42c7f3SJack Xiao uint64_t gang_ctx_gpu_addr;
1743a42c7f3SJack Xiao void *gang_ctx_cpu_ptr;
1753a42c7f3SJack Xiao uint64_t gang_quantum;
1763a42c7f3SJack Xiao struct list_head queue_list;
1773a42c7f3SJack Xiao };
1783a42c7f3SJack Xiao
1793a42c7f3SJack Xiao struct amdgpu_mes_queue {
1803a42c7f3SJack Xiao struct list_head list;
1813a42c7f3SJack Xiao struct amdgpu_mes_gang *gang;
1823a42c7f3SJack Xiao int queue_id;
1833a42c7f3SJack Xiao uint64_t doorbell_off;
1843a42c7f3SJack Xiao struct amdgpu_bo *mqd_obj;
1853a42c7f3SJack Xiao void *mqd_cpu_ptr;
1863a42c7f3SJack Xiao uint64_t mqd_gpu_addr;
1873a42c7f3SJack Xiao uint64_t wptr_gpu_addr;
1883a42c7f3SJack Xiao int queue_type;
1893a42c7f3SJack Xiao int paging;
1903a42c7f3SJack Xiao struct amdgpu_ring *ring;
1913a42c7f3SJack Xiao };
1923a42c7f3SJack Xiao
193be5609deSJack Xiao struct amdgpu_mes_queue_properties {
194be5609deSJack Xiao int queue_type;
195be5609deSJack Xiao uint64_t hqd_base_gpu_addr;
196be5609deSJack Xiao uint64_t rptr_gpu_addr;
197be5609deSJack Xiao uint64_t wptr_gpu_addr;
198fe4e9ff9SJack Xiao uint64_t wptr_mc_addr;
199be5609deSJack Xiao uint32_t queue_size;
200be5609deSJack Xiao uint64_t eop_gpu_addr;
201be5609deSJack Xiao uint32_t hqd_pipe_priority;
202be5609deSJack Xiao uint32_t hqd_queue_priority;
203be5609deSJack Xiao bool paging;
204be5609deSJack Xiao struct amdgpu_ring *ring;
205be5609deSJack Xiao /* out */
206be5609deSJack Xiao uint64_t doorbell_off;
207be5609deSJack Xiao };
208be5609deSJack Xiao
2095d0f619fSJack Xiao struct amdgpu_mes_gang_properties {
2105d0f619fSJack Xiao uint32_t priority;
2115d0f619fSJack Xiao uint32_t gang_quantum;
2125d0f619fSJack Xiao uint32_t inprocess_gang_priority;
2135d0f619fSJack Xiao uint32_t priority_level;
2145d0f619fSJack Xiao int global_priority_level;
2155d0f619fSJack Xiao };
2165d0f619fSJack Xiao
2177bbc3676SJack Xiao struct mes_add_queue_input {
2187bbc3676SJack Xiao uint32_t process_id;
2197bbc3676SJack Xiao uint64_t page_table_base_addr;
2207bbc3676SJack Xiao uint64_t process_va_start;
2217bbc3676SJack Xiao uint64_t process_va_end;
2227bbc3676SJack Xiao uint64_t process_quantum;
2237bbc3676SJack Xiao uint64_t process_context_addr;
2247bbc3676SJack Xiao uint64_t gang_quantum;
2257bbc3676SJack Xiao uint64_t gang_context_addr;
2267bbc3676SJack Xiao uint32_t inprocess_gang_priority;
2277bbc3676SJack Xiao uint32_t gang_global_priority_level;
2287bbc3676SJack Xiao uint32_t doorbell_offset;
2297bbc3676SJack Xiao uint64_t mqd_addr;
2307bbc3676SJack Xiao uint64_t wptr_addr;
231fe4e9ff9SJack Xiao uint64_t wptr_mc_addr;
2327bbc3676SJack Xiao uint32_t queue_type;
2337bbc3676SJack Xiao uint32_t paging;
23418ee4ce6SJack Xiao uint32_t gws_base;
23518ee4ce6SJack Xiao uint32_t gws_size;
23618ee4ce6SJack Xiao uint64_t tba_addr;
23718ee4ce6SJack Xiao uint64_t tma_addr;
23869a8c3aeSJonathan Kim uint32_t trap_en;
23969a8c3aeSJonathan Kim uint32_t skip_process_ctx_clear;
240a9579956SGraham Sider uint32_t is_kfd_process;
24191ef6cfdSGraham Sider uint32_t is_aql_queue;
24291ef6cfdSGraham Sider uint32_t queue_size;
2437a1c5c67SJonathan Kim uint32_t exclusively_scheduled;
2447bbc3676SJack Xiao };
2457bbc3676SJack Xiao
2467bbc3676SJack Xiao struct mes_remove_queue_input {
2477bbc3676SJack Xiao uint32_t doorbell_offset;
2487bbc3676SJack Xiao uint64_t gang_context_addr;
2497bbc3676SJack Xiao };
2507bbc3676SJack Xiao
2515b7a59deSAlex Deucher struct mes_reset_queue_input {
2525b7a59deSAlex Deucher uint32_t doorbell_offset;
2535b7a59deSAlex Deucher uint64_t gang_context_addr;
2548b2429a1SJiadong Zhu bool use_mmio;
2558b2429a1SJiadong Zhu uint32_t queue_type;
2568b2429a1SJiadong Zhu uint32_t me_id;
2578b2429a1SJiadong Zhu uint32_t pipe_id;
2588b2429a1SJiadong Zhu uint32_t queue_id;
2598b2429a1SJiadong Zhu uint32_t xcc_id;
2608b2429a1SJiadong Zhu uint32_t vmid;
2615b7a59deSAlex Deucher };
2625b7a59deSAlex Deucher
263029c2b03SJack Xiao struct mes_map_legacy_queue_input {
264029c2b03SJack Xiao uint32_t queue_type;
265029c2b03SJack Xiao uint32_t doorbell_offset;
266029c2b03SJack Xiao uint32_t pipe_id;
267029c2b03SJack Xiao uint32_t queue_id;
268029c2b03SJack Xiao uint64_t mqd_addr;
269029c2b03SJack Xiao uint64_t wptr_addr;
270029c2b03SJack Xiao };
271029c2b03SJack Xiao
27218ee4ce6SJack Xiao struct mes_unmap_legacy_queue_input {
27318ee4ce6SJack Xiao enum amdgpu_unmap_queues_action action;
27418ee4ce6SJack Xiao uint32_t queue_type;
27518ee4ce6SJack Xiao uint32_t doorbell_offset;
27618ee4ce6SJack Xiao uint32_t pipe_id;
27718ee4ce6SJack Xiao uint32_t queue_id;
27818ee4ce6SJack Xiao uint64_t trail_fence_addr;
27918ee4ce6SJack Xiao uint64_t trail_fence_data;
28018ee4ce6SJack Xiao };
28118ee4ce6SJack Xiao
2827bbc3676SJack Xiao struct mes_suspend_gang_input {
2837bbc3676SJack Xiao bool suspend_all_gangs;
2847bbc3676SJack Xiao uint64_t gang_context_addr;
2857bbc3676SJack Xiao uint64_t suspend_fence_addr;
2867bbc3676SJack Xiao uint32_t suspend_fence_value;
2877bbc3676SJack Xiao };
2887bbc3676SJack Xiao
2897bbc3676SJack Xiao struct mes_resume_gang_input {
2907bbc3676SJack Xiao bool resume_all_gangs;
2917bbc3676SJack Xiao uint64_t gang_context_addr;
2927bbc3676SJack Xiao };
2937bbc3676SJack Xiao
294c30fb344SAlex Deucher struct mes_reset_legacy_queue_input {
295c30fb344SAlex Deucher uint32_t queue_type;
296c30fb344SAlex Deucher uint32_t doorbell_offset;
2978b2429a1SJiadong Zhu bool use_mmio;
2988b2429a1SJiadong Zhu uint32_t me_id;
299c30fb344SAlex Deucher uint32_t pipe_id;
300c30fb344SAlex Deucher uint32_t queue_id;
301c30fb344SAlex Deucher uint64_t mqd_addr;
302c30fb344SAlex Deucher uint64_t wptr_addr;
303c30fb344SAlex Deucher uint32_t vmid;
304c30fb344SAlex Deucher };
305c30fb344SAlex Deucher
3066a4a1f60SJack Xiao enum mes_misc_opcode {
3076a4a1f60SJack Xiao MES_MISC_OP_WRITE_REG,
3086a4a1f60SJack Xiao MES_MISC_OP_READ_REG,
3096a4a1f60SJack Xiao MES_MISC_OP_WRM_REG_WAIT,
3106a4a1f60SJack Xiao MES_MISC_OP_WRM_REG_WR_WAIT,
311a9818854SJonathan Kim MES_MISC_OP_SET_SHADER_DEBUGGER,
3128521e3c5SShaoyun Liu MES_MISC_OP_CHANGE_CONFIG,
3136a4a1f60SJack Xiao };
3146a4a1f60SJack Xiao
3156a4a1f60SJack Xiao struct mes_misc_op_input {
3166a4a1f60SJack Xiao enum mes_misc_opcode op;
3176a4a1f60SJack Xiao
3186a4a1f60SJack Xiao union {
3196a4a1f60SJack Xiao struct {
3206a4a1f60SJack Xiao uint32_t reg_offset;
3216a4a1f60SJack Xiao uint64_t buffer_addr;
3226a4a1f60SJack Xiao } read_reg;
3236a4a1f60SJack Xiao
3246a4a1f60SJack Xiao struct {
3256a4a1f60SJack Xiao uint32_t reg_offset;
3266a4a1f60SJack Xiao uint32_t reg_value;
3276a4a1f60SJack Xiao } write_reg;
3286a4a1f60SJack Xiao
3296a4a1f60SJack Xiao struct {
3306a4a1f60SJack Xiao uint32_t ref;
3316a4a1f60SJack Xiao uint32_t mask;
3326a4a1f60SJack Xiao uint32_t reg0;
3336a4a1f60SJack Xiao uint32_t reg1;
3346a4a1f60SJack Xiao } wrm_reg;
335a9818854SJonathan Kim
336a9818854SJonathan Kim struct {
337a9818854SJonathan Kim uint64_t process_context_addr;
338a9818854SJonathan Kim union {
339a9818854SJonathan Kim struct {
340bd33bb14SJonathan Kim uint32_t single_memop : 1;
341bd33bb14SJonathan Kim uint32_t single_alu_op : 1;
342bd33bb14SJonathan Kim uint32_t reserved: 29;
343bd33bb14SJonathan Kim uint32_t process_ctx_flush: 1;
344a9818854SJonathan Kim };
345a9818854SJonathan Kim uint32_t u32all;
346a9818854SJonathan Kim } flags;
347a9818854SJonathan Kim uint32_t spi_gdbg_per_vmid_cntl;
348a9818854SJonathan Kim uint32_t tcp_watch_cntl[4];
34909d49e14SJonathan Kim uint32_t trap_en;
350a9818854SJonathan Kim } set_shader_debugger;
3518521e3c5SShaoyun Liu
3528521e3c5SShaoyun Liu struct {
3538521e3c5SShaoyun Liu union {
3548521e3c5SShaoyun Liu struct {
3558521e3c5SShaoyun Liu uint32_t limit_single_process : 1;
3568521e3c5SShaoyun Liu uint32_t enable_hws_logging_buffer : 1;
3578521e3c5SShaoyun Liu uint32_t reserved : 30;
3588521e3c5SShaoyun Liu };
3598521e3c5SShaoyun Liu uint32_t all;
3608521e3c5SShaoyun Liu } option;
3618521e3c5SShaoyun Liu struct {
3628521e3c5SShaoyun Liu uint32_t tdr_level;
3638521e3c5SShaoyun Liu uint32_t tdr_delay;
3648521e3c5SShaoyun Liu } tdr_config;
3658521e3c5SShaoyun Liu } change_config;
3666a4a1f60SJack Xiao };
3676a4a1f60SJack Xiao };
3686a4a1f60SJack Xiao
3697bbc3676SJack Xiao struct amdgpu_mes_funcs {
3707bbc3676SJack Xiao int (*add_hw_queue)(struct amdgpu_mes *mes,
3717bbc3676SJack Xiao struct mes_add_queue_input *input);
3727bbc3676SJack Xiao
3737bbc3676SJack Xiao int (*remove_hw_queue)(struct amdgpu_mes *mes,
3747bbc3676SJack Xiao struct mes_remove_queue_input *input);
3757bbc3676SJack Xiao
376029c2b03SJack Xiao int (*map_legacy_queue)(struct amdgpu_mes *mes,
377029c2b03SJack Xiao struct mes_map_legacy_queue_input *input);
378029c2b03SJack Xiao
37918ee4ce6SJack Xiao int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
38018ee4ce6SJack Xiao struct mes_unmap_legacy_queue_input *input);
38118ee4ce6SJack Xiao
3827bbc3676SJack Xiao int (*suspend_gang)(struct amdgpu_mes *mes,
3837bbc3676SJack Xiao struct mes_suspend_gang_input *input);
3847bbc3676SJack Xiao
3857bbc3676SJack Xiao int (*resume_gang)(struct amdgpu_mes *mes,
3867bbc3676SJack Xiao struct mes_resume_gang_input *input);
3876a4a1f60SJack Xiao
3886a4a1f60SJack Xiao int (*misc_op)(struct amdgpu_mes *mes,
3896a4a1f60SJack Xiao struct mes_misc_op_input *input);
390c30fb344SAlex Deucher
391c30fb344SAlex Deucher int (*reset_legacy_queue)(struct amdgpu_mes *mes,
392c30fb344SAlex Deucher struct mes_reset_legacy_queue_input *input);
3935b7a59deSAlex Deucher
3945b7a59deSAlex Deucher int (*reset_hw_queue)(struct amdgpu_mes *mes,
3955b7a59deSAlex Deucher struct mes_reset_queue_input *input);
396a538bbe7SJack Xiao };
397a538bbe7SJack Xiao
398cf064b45SJack Xiao #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
39918ee4ce6SJack Xiao #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
400cf064b45SJack Xiao
40111ec5b36SJack Xiao int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
40211ec5b36SJack Xiao
403cc42e76eSMario Limonciello int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
404b04c1d64SJack Xiao int amdgpu_mes_init(struct amdgpu_device *adev);
405b04c1d64SJack Xiao void amdgpu_mes_fini(struct amdgpu_device *adev);
406b04c1d64SJack Xiao
40748dcd2b7SJack Xiao int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
40848dcd2b7SJack Xiao struct amdgpu_vm *vm);
409063a38d6SJack Xiao void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
41048dcd2b7SJack Xiao
4115d0f619fSJack Xiao int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
4125d0f619fSJack Xiao struct amdgpu_mes_gang_properties *gprops,
4135d0f619fSJack Xiao int *gang_id);
414b0306e58SJack Xiao int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
4155d0f619fSJack Xiao
416c8bb1057SJack Xiao int amdgpu_mes_suspend(struct amdgpu_device *adev);
417ea756bd5SJack Xiao int amdgpu_mes_resume(struct amdgpu_device *adev);
418c8bb1057SJack Xiao
419be5609deSJack Xiao int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
420be5609deSJack Xiao struct amdgpu_mes_queue_properties *qprops,
421be5609deSJack Xiao int *queue_id);
422bcc4e1e1SJack Xiao int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
4235b7a59deSAlex Deucher int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id);
4248b2429a1SJiadong Zhu int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
4258b2429a1SJiadong Zhu int me_id, int pipe_id, int queue_id, int vmid);
426be5609deSJack Xiao
427029c2b03SJack Xiao int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
428029c2b03SJack Xiao struct amdgpu_ring *ring);
42918ee4ce6SJack Xiao int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
43018ee4ce6SJack Xiao struct amdgpu_ring *ring,
43118ee4ce6SJack Xiao enum amdgpu_unmap_queues_action action,
43218ee4ce6SJack Xiao u64 gpu_addr, u64 seq);
433c30fb344SAlex Deucher int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
434c30fb344SAlex Deucher struct amdgpu_ring *ring,
4358b2429a1SJiadong Zhu unsigned int vmid,
4368b2429a1SJiadong Zhu bool use_mmio);
43718ee4ce6SJack Xiao
4386a4a1f60SJack Xiao uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
4396a4a1f60SJack Xiao int amdgpu_mes_wreg(struct amdgpu_device *adev,
4406a4a1f60SJack Xiao uint32_t reg, uint32_t val);
4416a4a1f60SJack Xiao int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
4426a4a1f60SJack Xiao uint32_t val, uint32_t mask);
4436a4a1f60SJack Xiao int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
4446a4a1f60SJack Xiao uint32_t reg0, uint32_t reg1,
4456a4a1f60SJack Xiao uint32_t ref, uint32_t mask);
446a9818854SJonathan Kim int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
447a9818854SJonathan Kim uint64_t process_context_addr,
448a9818854SJonathan Kim uint32_t spi_gdbg_per_vmid_cntl,
449a9818854SJonathan Kim const uint32_t *tcp_watch_cntl,
45009d49e14SJonathan Kim uint32_t flags,
45109d49e14SJonathan Kim bool trap_en);
452bd33bb14SJonathan Kim int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
453bd33bb14SJonathan Kim uint64_t process_context_addr);
454d0c423b6SJack Xiao int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
455d0c423b6SJack Xiao int queue_type, int idx,
456d0c423b6SJack Xiao struct amdgpu_mes_ctx_data *ctx_data,
457d0c423b6SJack Xiao struct amdgpu_ring **out);
4589cc654c8SJack Xiao void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
4599cc654c8SJack Xiao struct amdgpu_ring *ring);
460d0c423b6SJack Xiao
4612d7a1f71SLe Ma uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
4622d7a1f71SLe Ma enum amdgpu_mes_priority_level prio);
4632d7a1f71SLe Ma
464e3652b09SJack Xiao int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
465e3652b09SJack Xiao struct amdgpu_mes_ctx_data *ctx_data);
466e3652b09SJack Xiao void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
4677c18b40eSJack Xiao int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
4687c18b40eSJack Xiao struct amdgpu_vm *vm,
4697c18b40eSJack Xiao struct amdgpu_mes_ctx_data *ctx_data);
470737dad0bSJack Xiao int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
471737dad0bSJack Xiao struct amdgpu_mes_ctx_data *ctx_data);
472e3652b09SJack Xiao
4736624d161SJack Xiao int amdgpu_mes_self_test(struct amdgpu_device *adev);
4746624d161SJack Xiao
475464913c0SMukul Joshi int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
47618ee4ce6SJack Xiao
47718ee4ce6SJack Xiao /*
47818ee4ce6SJack Xiao * MES lock can be taken in MMU notifiers.
47918ee4ce6SJack Xiao *
48018ee4ce6SJack Xiao * A bit more detail about why to set no-FS reclaim with MES lock:
48118ee4ce6SJack Xiao *
48218ee4ce6SJack Xiao * The purpose of the MMU notifier is to stop GPU access to memory so
48318ee4ce6SJack Xiao * that the Linux VM subsystem can move pages around safely. This is
48418ee4ce6SJack Xiao * done by preempting user mode queues for the affected process. When
48518ee4ce6SJack Xiao * MES is used, MES lock needs to be taken to preempt the queues.
48618ee4ce6SJack Xiao *
48718ee4ce6SJack Xiao * The MMU notifier callback entry point in the driver is
48818ee4ce6SJack Xiao * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
48918ee4ce6SJack Xiao * there is:
49018ee4ce6SJack Xiao * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
49118ee4ce6SJack Xiao * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
49218ee4ce6SJack Xiao *
49318ee4ce6SJack Xiao * The last part of the chain is a function pointer where we take the
49418ee4ce6SJack Xiao * MES lock.
49518ee4ce6SJack Xiao *
49618ee4ce6SJack Xiao * The problem with taking locks in the MMU notifier is, that MMU
49718ee4ce6SJack Xiao * notifiers can be called in reclaim-FS context. That's where the
49818ee4ce6SJack Xiao * kernel frees up pages to make room for new page allocations under
49918ee4ce6SJack Xiao * memory pressure. While we are running in reclaim-FS context, we must
50018ee4ce6SJack Xiao * not trigger another memory reclaim operation because that would
50118ee4ce6SJack Xiao * recursively reenter the reclaim code and cause a deadlock. The
50218ee4ce6SJack Xiao * memalloc_nofs_save/restore calls guarantee that.
50318ee4ce6SJack Xiao *
50418ee4ce6SJack Xiao * In addition we also need to avoid lock dependencies on other locks taken
50518ee4ce6SJack Xiao * under the MES lock, for example reservation locks. Here is a possible
50618ee4ce6SJack Xiao * scenario of a deadlock:
50718ee4ce6SJack Xiao * Thread A: takes and holds reservation lock | triggers reclaim-FS |
50818ee4ce6SJack Xiao * MMU notifier | blocks trying to take MES lock
50918ee4ce6SJack Xiao * Thread B: takes and holds MES lock | blocks trying to take reservation lock
51018ee4ce6SJack Xiao *
51118ee4ce6SJack Xiao * In this scenario Thread B gets involved in a deadlock even without
51218ee4ce6SJack Xiao * triggering a reclaim-FS operation itself.
51318ee4ce6SJack Xiao * To fix this and break the lock dependency chain you'd need to either:
51418ee4ce6SJack Xiao * 1. protect reservation locks with memalloc_nofs_save/restore, or
51518ee4ce6SJack Xiao * 2. avoid taking reservation locks under the MES lock.
51618ee4ce6SJack Xiao *
51718ee4ce6SJack Xiao * Reservation locks are taken all over the kernel in different subsystems, we
51818ee4ce6SJack Xiao * have no control over them and their lock dependencies.So the only workable
51918ee4ce6SJack Xiao * solution is to avoid taking other locks under the MES lock.
52018ee4ce6SJack Xiao * As a result, make sure no reclaim-FS happens while holding this lock anywhere
52118ee4ce6SJack Xiao * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
52218ee4ce6SJack Xiao */
amdgpu_mes_lock(struct amdgpu_mes * mes)52318ee4ce6SJack Xiao static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
52418ee4ce6SJack Xiao {
52518ee4ce6SJack Xiao mutex_lock(&mes->mutex_hidden);
52618ee4ce6SJack Xiao mes->saved_flags = memalloc_noreclaim_save();
52718ee4ce6SJack Xiao }
52818ee4ce6SJack Xiao
amdgpu_mes_unlock(struct amdgpu_mes * mes)52918ee4ce6SJack Xiao static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
53018ee4ce6SJack Xiao {
53118ee4ce6SJack Xiao memalloc_noreclaim_restore(mes->saved_flags);
53218ee4ce6SJack Xiao mutex_unlock(&mes->mutex_hidden);
53318ee4ce6SJack Xiao }
534ccf8ef6bSMukul Joshi
535ccf8ef6bSMukul Joshi bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
5368521e3c5SShaoyun Liu
53727b79151SAlex Deucher int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
5388521e3c5SShaoyun Liu
539a538bbe7SJack Xiao #endif /* __AMDGPU_MES_H__ */
540