1a538bbe7SJack Xiao /*
2a538bbe7SJack Xiao  * Copyright 2019 Advanced Micro Devices, Inc.
3a538bbe7SJack Xiao  *
4a538bbe7SJack Xiao  * Permission is hereby granted, free of charge, to any person obtaining a
5a538bbe7SJack Xiao  * copy of this software and associated documentation files (the "Software"),
6a538bbe7SJack Xiao  * to deal in the Software without restriction, including without limitation
7a538bbe7SJack Xiao  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8a538bbe7SJack Xiao  * and/or sell copies of the Software, and to permit persons to whom the
9a538bbe7SJack Xiao  * Software is furnished to do so, subject to the following conditions:
10a538bbe7SJack Xiao  *
11a538bbe7SJack Xiao  * The above copyright notice and this permission notice shall be included in
12a538bbe7SJack Xiao  * all copies or substantial portions of the Software.
13a538bbe7SJack Xiao  *
14a538bbe7SJack Xiao  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15a538bbe7SJack Xiao  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16a538bbe7SJack Xiao  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17a538bbe7SJack Xiao  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18a538bbe7SJack Xiao  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19a538bbe7SJack Xiao  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20a538bbe7SJack Xiao  * OTHER DEALINGS IN THE SOFTWARE.
21a538bbe7SJack Xiao  *
22a538bbe7SJack Xiao  */
23a538bbe7SJack Xiao 
24a538bbe7SJack Xiao #ifndef __AMDGPU_MES_H__
25a538bbe7SJack Xiao #define __AMDGPU_MES_H__
26a538bbe7SJack Xiao 
273a42c7f3SJack Xiao #include "amdgpu_irq.h"
283a42c7f3SJack Xiao #include "kgd_kfd_interface.h"
293a42c7f3SJack Xiao #include "amdgpu_gfx.h"
30e3cbb1f4SShashank Sharma #include "amdgpu_doorbell.h"
313a42c7f3SJack Xiao #include <linux/sched/mm.h>
323a42c7f3SJack Xiao 
333bbd31e0SJack Xiao #define AMDGPU_MES_MAX_COMPUTE_PIPES        8
343bbd31e0SJack Xiao #define AMDGPU_MES_MAX_GFX_PIPES            2
353bbd31e0SJack Xiao #define AMDGPU_MES_MAX_SDMA_PIPES           2
363bbd31e0SJack Xiao 
37e77a541fSGraham Sider #define AMDGPU_MES_API_VERSION_SHIFT	12
38e77a541fSGraham Sider #define AMDGPU_MES_FEAT_VERSION_SHIFT	24
39e77a541fSGraham Sider 
40e77a541fSGraham Sider #define AMDGPU_MES_VERSION_MASK		0x00000fff
41e77a541fSGraham Sider #define AMDGPU_MES_API_VERSION_MASK	0x00fff000
42e77a541fSGraham Sider #define AMDGPU_MES_FEAT_VERSION_MASK	0xff000000
43335acfb6SShaoyun Liu #define AMDGPU_MES_MSCRATCH_SIZE	0x40000
44e77a541fSGraham Sider 
453bbd31e0SJack Xiao enum amdgpu_mes_priority_level {
463bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_LEVEL_LOW       = 0,
473bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_LEVEL_NORMAL    = 1,
483bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_LEVEL_MEDIUM    = 2,
493bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_LEVEL_HIGH      = 3,
503bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_LEVEL_REALTIME  = 4,
513bbd31e0SJack Xiao 	AMDGPU_MES_PRIORITY_NUM_LEVELS
523bbd31e0SJack Xiao };
533bbd31e0SJack Xiao 
543a42c7f3SJack Xiao #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
553a42c7f3SJack Xiao #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
563a42c7f3SJack Xiao 
577bbc3676SJack Xiao struct amdgpu_mes_funcs;
58a538bbe7SJack Xiao 
5971209c96SLikun Gao enum amdgpu_mes_pipe {
60207e8bbeSJack Xiao 	AMDGPU_MES_SCHED_PIPE = 0,
61207e8bbeSJack Xiao 	AMDGPU_MES_KIQ_PIPE,
62207e8bbeSJack Xiao 	AMDGPU_MAX_MES_PIPES = 2,
63207e8bbeSJack Xiao };
64207e8bbeSJack Xiao 
657bbc3676SJack Xiao struct amdgpu_mes {
663bbd31e0SJack Xiao 	struct amdgpu_device            *adev;
673bbd31e0SJack Xiao 
6818ee4ce6SJack Xiao 	struct mutex                    mutex_hidden;
693a42c7f3SJack Xiao 
703a42c7f3SJack Xiao 	struct idr                      pasid_idr;
713a42c7f3SJack Xiao 	struct idr                      gang_id_idr;
723a42c7f3SJack Xiao 	struct idr                      queue_id_idr;
733a42c7f3SJack Xiao 	struct ida                      doorbell_ida;
743a42c7f3SJack Xiao 
753a42c7f3SJack Xiao 	spinlock_t                      queue_id_lock;
763a42c7f3SJack Xiao 
77ff83e6e7SGraham Sider 	uint32_t			sched_version;
78ff83e6e7SGraham Sider 	uint32_t			kiq_version;
79e89bd361SAlex Deucher 	uint32_t			fw_version[AMDGPU_MAX_MES_PIPES];
8052491d97SJack Xiao 	bool                            enable_legacy_queue_map;
81ff83e6e7SGraham Sider 
823bbd31e0SJack Xiao 	uint32_t                        total_max_queue;
833bbd31e0SJack Xiao 	uint32_t                        max_doorbell_slices;
843bbd31e0SJack Xiao 
853bbd31e0SJack Xiao 	uint64_t                        default_process_quantum;
863bbd31e0SJack Xiao 	uint64_t                        default_gang_quantum;
873bbd31e0SJack Xiao 
88c7d43556SJack Xiao 	struct amdgpu_ring              ring[AMDGPU_MAX_MES_PIPES];
89c7d43556SJack Xiao 	spinlock_t                      ring_lock[AMDGPU_MAX_MES_PIPES];
907bbc3676SJack Xiao 
91207e8bbeSJack Xiao 	const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
925aa91248SJack Xiao 
935aa91248SJack Xiao 	/* mes ucode */
94207e8bbeSJack Xiao 	struct amdgpu_bo		*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
95207e8bbeSJack Xiao 	uint64_t			ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
96207e8bbeSJack Xiao 	uint32_t			*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
97207e8bbeSJack Xiao 	uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
985aa91248SJack Xiao 
995aa91248SJack Xiao 	/* mes ucode data */
100207e8bbeSJack Xiao 	struct amdgpu_bo		*data_fw_obj[AMDGPU_MAX_MES_PIPES];
101207e8bbeSJack Xiao 	uint64_t			data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
102207e8bbeSJack Xiao 	uint32_t			*data_fw_ptr[AMDGPU_MAX_MES_PIPES];
103207e8bbeSJack Xiao 	uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
1045aa91248SJack Xiao 
1053bbd31e0SJack Xiao 	/* eop gpu obj */
106207e8bbeSJack Xiao 	struct amdgpu_bo		*eop_gpu_obj[AMDGPU_MAX_MES_PIPES];
107207e8bbeSJack Xiao 	uint64_t                        eop_gpu_addr[AMDGPU_MAX_MES_PIPES];
1083bbd31e0SJack Xiao 
109207e8bbeSJack Xiao 	void                            *mqd_backup[AMDGPU_MAX_MES_PIPES];
110207e8bbeSJack Xiao 	struct amdgpu_irq_src	        irq[AMDGPU_MAX_MES_PIPES];
1113bbd31e0SJack Xiao 
1123bbd31e0SJack Xiao 	uint32_t                        vmid_mask_gfxhub;
1133bbd31e0SJack Xiao 	uint32_t                        vmid_mask_mmhub;
1143bbd31e0SJack Xiao 	uint32_t                        compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
1153bbd31e0SJack Xiao 	uint32_t                        gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
1163bbd31e0SJack Xiao 	uint32_t                        sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
1170fe69062SLe Ma 	uint32_t                        aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
118ea5d6db1SJack Xiao 	uint32_t                        sch_ctx_offs[AMDGPU_MAX_MES_PIPES];
119ea5d6db1SJack Xiao 	uint64_t			sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES];
120ea5d6db1SJack Xiao 	uint64_t			*sch_ctx_ptr[AMDGPU_MAX_MES_PIPES];
121ea5d6db1SJack Xiao 	uint32_t			query_status_fence_offs[AMDGPU_MAX_MES_PIPES];
122ea5d6db1SJack Xiao 	uint64_t			query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES];
123ea5d6db1SJack Xiao 	uint64_t			*query_status_fence_ptr[AMDGPU_MAX_MES_PIPES];
1246a4a1f60SJack Xiao 
12518ee4ce6SJack Xiao 	uint32_t			saved_flags;
1263bbd31e0SJack Xiao 
127cf064b45SJack Xiao 	/* initialize kiq pipe */
128cf064b45SJack Xiao 	int                             (*kiq_hw_init)(struct amdgpu_device *adev);
12918ee4ce6SJack Xiao 	int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
130cf064b45SJack Xiao 
131e3cbb1f4SShashank Sharma 	/* MES doorbells */
132e3cbb1f4SShashank Sharma 	uint32_t			db_start_dw_offset;
133e3cbb1f4SShashank Sharma 	uint32_t			num_mes_dbs;
134e3cbb1f4SShashank Sharma 	unsigned long			*doorbell_bitmap;
135e3cbb1f4SShashank Sharma 
136b2662d4cSshaoyunl 	/* MES event log buffer */
137739d0f3eSMichael Chen 	uint32_t			event_log_size;
138b2662d4cSshaoyunl 	struct amdgpu_bo	*event_log_gpu_obj;
139b2662d4cSshaoyunl 	uint64_t			event_log_gpu_addr;
140b2662d4cSshaoyunl 	void				*event_log_cpu_addr;
141b2662d4cSshaoyunl 
1427bbc3676SJack Xiao 	/* ip specific functions */
1433bbd31e0SJack Xiao 	const struct amdgpu_mes_funcs   *funcs;
144f6ac0842Schongli2 
145f6ac0842Schongli2 	/* mes resource_1 bo*/
146*f81cd793SShaoyun Liu 	struct amdgpu_bo    *resource_1[AMDGPU_MAX_MES_PIPES];
147*f81cd793SShaoyun Liu 	uint64_t            resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
148*f81cd793SShaoyun Liu 	void                *resource_1_addr[AMDGPU_MAX_MES_PIPES];
149f6ac0842Schongli2 
1507bbc3676SJack Xiao };
1517bbc3676SJack Xiao 
1523a42c7f3SJack Xiao struct amdgpu_mes_process {
1533a42c7f3SJack Xiao 	int			pasid;
1543a42c7f3SJack Xiao 	struct			amdgpu_vm *vm;
1553a42c7f3SJack Xiao 	uint64_t		pd_gpu_addr;
1563a42c7f3SJack Xiao 	struct amdgpu_bo 	*proc_ctx_bo;
1573a42c7f3SJack Xiao 	uint64_t 		proc_ctx_gpu_addr;
1583a42c7f3SJack Xiao 	void 			*proc_ctx_cpu_ptr;
1593a42c7f3SJack Xiao 	uint64_t 		process_quantum;
1603a42c7f3SJack Xiao 	struct 			list_head gang_list;
1613a42c7f3SJack Xiao 	uint32_t 		doorbell_index;
1623a42c7f3SJack Xiao 	struct mutex		doorbell_lock;
1633a42c7f3SJack Xiao };
1643a42c7f3SJack Xiao 
1653a42c7f3SJack Xiao struct amdgpu_mes_gang {
1663a42c7f3SJack Xiao 	int 				gang_id;
1673a42c7f3SJack Xiao 	int 				priority;
1683a42c7f3SJack Xiao 	int 				inprocess_gang_priority;
1693a42c7f3SJack Xiao 	int 				global_priority_level;
1703a42c7f3SJack Xiao 	struct list_head 		list;
1713a42c7f3SJack Xiao 	struct amdgpu_mes_process 	*process;
1723a42c7f3SJack Xiao 	struct amdgpu_bo 		*gang_ctx_bo;
1733a42c7f3SJack Xiao 	uint64_t 			gang_ctx_gpu_addr;
1743a42c7f3SJack Xiao 	void 				*gang_ctx_cpu_ptr;
1753a42c7f3SJack Xiao 	uint64_t 			gang_quantum;
1763a42c7f3SJack Xiao 	struct list_head 		queue_list;
1773a42c7f3SJack Xiao };
1783a42c7f3SJack Xiao 
1793a42c7f3SJack Xiao struct amdgpu_mes_queue {
1803a42c7f3SJack Xiao 	struct list_head 		list;
1813a42c7f3SJack Xiao 	struct amdgpu_mes_gang 		*gang;
1823a42c7f3SJack Xiao 	int 				queue_id;
1833a42c7f3SJack Xiao 	uint64_t 			doorbell_off;
1843a42c7f3SJack Xiao 	struct amdgpu_bo		*mqd_obj;
1853a42c7f3SJack Xiao 	void				*mqd_cpu_ptr;
1863a42c7f3SJack Xiao 	uint64_t 			mqd_gpu_addr;
1873a42c7f3SJack Xiao 	uint64_t 			wptr_gpu_addr;
1883a42c7f3SJack Xiao 	int 				queue_type;
1893a42c7f3SJack Xiao 	int 				paging;
1903a42c7f3SJack Xiao 	struct amdgpu_ring 		*ring;
1913a42c7f3SJack Xiao };
1923a42c7f3SJack Xiao 
193be5609deSJack Xiao struct amdgpu_mes_queue_properties {
194be5609deSJack Xiao 	int 			queue_type;
195be5609deSJack Xiao 	uint64_t                hqd_base_gpu_addr;
196be5609deSJack Xiao 	uint64_t                rptr_gpu_addr;
197be5609deSJack Xiao 	uint64_t                wptr_gpu_addr;
198fe4e9ff9SJack Xiao 	uint64_t                wptr_mc_addr;
199be5609deSJack Xiao 	uint32_t                queue_size;
200be5609deSJack Xiao 	uint64_t                eop_gpu_addr;
201be5609deSJack Xiao 	uint32_t                hqd_pipe_priority;
202be5609deSJack Xiao 	uint32_t                hqd_queue_priority;
203be5609deSJack Xiao 	bool 			paging;
204be5609deSJack Xiao 	struct amdgpu_ring 	*ring;
205be5609deSJack Xiao 	/* out */
206be5609deSJack Xiao 	uint64_t       		doorbell_off;
207be5609deSJack Xiao };
208be5609deSJack Xiao 
2095d0f619fSJack Xiao struct amdgpu_mes_gang_properties {
2105d0f619fSJack Xiao 	uint32_t 	priority;
2115d0f619fSJack Xiao 	uint32_t 	gang_quantum;
2125d0f619fSJack Xiao 	uint32_t 	inprocess_gang_priority;
2135d0f619fSJack Xiao 	uint32_t 	priority_level;
2145d0f619fSJack Xiao 	int 		global_priority_level;
2155d0f619fSJack Xiao };
2165d0f619fSJack Xiao 
2177bbc3676SJack Xiao struct mes_add_queue_input {
2187bbc3676SJack Xiao 	uint32_t	process_id;
2197bbc3676SJack Xiao 	uint64_t	page_table_base_addr;
2207bbc3676SJack Xiao 	uint64_t	process_va_start;
2217bbc3676SJack Xiao 	uint64_t	process_va_end;
2227bbc3676SJack Xiao 	uint64_t	process_quantum;
2237bbc3676SJack Xiao 	uint64_t	process_context_addr;
2247bbc3676SJack Xiao 	uint64_t	gang_quantum;
2257bbc3676SJack Xiao 	uint64_t	gang_context_addr;
2267bbc3676SJack Xiao 	uint32_t	inprocess_gang_priority;
2277bbc3676SJack Xiao 	uint32_t	gang_global_priority_level;
2287bbc3676SJack Xiao 	uint32_t	doorbell_offset;
2297bbc3676SJack Xiao 	uint64_t	mqd_addr;
2307bbc3676SJack Xiao 	uint64_t	wptr_addr;
231fe4e9ff9SJack Xiao 	uint64_t	wptr_mc_addr;
2327bbc3676SJack Xiao 	uint32_t	queue_type;
2337bbc3676SJack Xiao 	uint32_t	paging;
23418ee4ce6SJack Xiao 	uint32_t        gws_base;
23518ee4ce6SJack Xiao 	uint32_t        gws_size;
23618ee4ce6SJack Xiao 	uint64_t	tba_addr;
23718ee4ce6SJack Xiao 	uint64_t	tma_addr;
23869a8c3aeSJonathan Kim 	uint32_t	trap_en;
23969a8c3aeSJonathan Kim 	uint32_t	skip_process_ctx_clear;
240a9579956SGraham Sider 	uint32_t	is_kfd_process;
24191ef6cfdSGraham Sider 	uint32_t	is_aql_queue;
24291ef6cfdSGraham Sider 	uint32_t	queue_size;
2437a1c5c67SJonathan Kim 	uint32_t	exclusively_scheduled;
2447bbc3676SJack Xiao };
2457bbc3676SJack Xiao 
2467bbc3676SJack Xiao struct mes_remove_queue_input {
2477bbc3676SJack Xiao 	uint32_t	doorbell_offset;
2487bbc3676SJack Xiao 	uint64_t	gang_context_addr;
2497bbc3676SJack Xiao };
2507bbc3676SJack Xiao 
2515b7a59deSAlex Deucher struct mes_reset_queue_input {
2525b7a59deSAlex Deucher 	uint32_t	doorbell_offset;
2535b7a59deSAlex Deucher 	uint64_t	gang_context_addr;
2548b2429a1SJiadong Zhu 	bool		use_mmio;
2558b2429a1SJiadong Zhu 	uint32_t	queue_type;
2568b2429a1SJiadong Zhu 	uint32_t	me_id;
2578b2429a1SJiadong Zhu 	uint32_t	pipe_id;
2588b2429a1SJiadong Zhu 	uint32_t	queue_id;
2598b2429a1SJiadong Zhu 	uint32_t	xcc_id;
2608b2429a1SJiadong Zhu 	uint32_t	vmid;
2615b7a59deSAlex Deucher };
2625b7a59deSAlex Deucher 
263029c2b03SJack Xiao struct mes_map_legacy_queue_input {
264029c2b03SJack Xiao 	uint32_t                           queue_type;
265029c2b03SJack Xiao 	uint32_t                           doorbell_offset;
266029c2b03SJack Xiao 	uint32_t                           pipe_id;
267029c2b03SJack Xiao 	uint32_t                           queue_id;
268029c2b03SJack Xiao 	uint64_t                           mqd_addr;
269029c2b03SJack Xiao 	uint64_t                           wptr_addr;
270029c2b03SJack Xiao };
271029c2b03SJack Xiao 
27218ee4ce6SJack Xiao struct mes_unmap_legacy_queue_input {
27318ee4ce6SJack Xiao 	enum amdgpu_unmap_queues_action    action;
27418ee4ce6SJack Xiao 	uint32_t                           queue_type;
27518ee4ce6SJack Xiao 	uint32_t                           doorbell_offset;
27618ee4ce6SJack Xiao 	uint32_t                           pipe_id;
27718ee4ce6SJack Xiao 	uint32_t                           queue_id;
27818ee4ce6SJack Xiao 	uint64_t                           trail_fence_addr;
27918ee4ce6SJack Xiao 	uint64_t                           trail_fence_data;
28018ee4ce6SJack Xiao };
28118ee4ce6SJack Xiao 
2827bbc3676SJack Xiao struct mes_suspend_gang_input {
2837bbc3676SJack Xiao 	bool		suspend_all_gangs;
2847bbc3676SJack Xiao 	uint64_t	gang_context_addr;
2857bbc3676SJack Xiao 	uint64_t	suspend_fence_addr;
2867bbc3676SJack Xiao 	uint32_t	suspend_fence_value;
2877bbc3676SJack Xiao };
2887bbc3676SJack Xiao 
2897bbc3676SJack Xiao struct mes_resume_gang_input {
2907bbc3676SJack Xiao 	bool		resume_all_gangs;
2917bbc3676SJack Xiao 	uint64_t	gang_context_addr;
2927bbc3676SJack Xiao };
2937bbc3676SJack Xiao 
294c30fb344SAlex Deucher struct mes_reset_legacy_queue_input {
295c30fb344SAlex Deucher 	uint32_t                           queue_type;
296c30fb344SAlex Deucher 	uint32_t                           doorbell_offset;
2978b2429a1SJiadong Zhu 	bool                               use_mmio;
2988b2429a1SJiadong Zhu 	uint32_t                           me_id;
299c30fb344SAlex Deucher 	uint32_t                           pipe_id;
300c30fb344SAlex Deucher 	uint32_t                           queue_id;
301c30fb344SAlex Deucher 	uint64_t                           mqd_addr;
302c30fb344SAlex Deucher 	uint64_t                           wptr_addr;
303c30fb344SAlex Deucher 	uint32_t                           vmid;
304c30fb344SAlex Deucher };
305c30fb344SAlex Deucher 
3066a4a1f60SJack Xiao enum mes_misc_opcode {
3076a4a1f60SJack Xiao 	MES_MISC_OP_WRITE_REG,
3086a4a1f60SJack Xiao 	MES_MISC_OP_READ_REG,
3096a4a1f60SJack Xiao 	MES_MISC_OP_WRM_REG_WAIT,
3106a4a1f60SJack Xiao 	MES_MISC_OP_WRM_REG_WR_WAIT,
311a9818854SJonathan Kim 	MES_MISC_OP_SET_SHADER_DEBUGGER,
3128521e3c5SShaoyun Liu 	MES_MISC_OP_CHANGE_CONFIG,
3136a4a1f60SJack Xiao };
3146a4a1f60SJack Xiao 
3156a4a1f60SJack Xiao struct mes_misc_op_input {
3166a4a1f60SJack Xiao 	enum mes_misc_opcode op;
3176a4a1f60SJack Xiao 
3186a4a1f60SJack Xiao 	union {
3196a4a1f60SJack Xiao 		struct {
3206a4a1f60SJack Xiao 			uint32_t                  reg_offset;
3216a4a1f60SJack Xiao 			uint64_t                  buffer_addr;
3226a4a1f60SJack Xiao 		} read_reg;
3236a4a1f60SJack Xiao 
3246a4a1f60SJack Xiao 		struct {
3256a4a1f60SJack Xiao 			uint32_t                  reg_offset;
3266a4a1f60SJack Xiao 			uint32_t                  reg_value;
3276a4a1f60SJack Xiao 		} write_reg;
3286a4a1f60SJack Xiao 
3296a4a1f60SJack Xiao 		struct {
3306a4a1f60SJack Xiao 			uint32_t                   ref;
3316a4a1f60SJack Xiao 			uint32_t                   mask;
3326a4a1f60SJack Xiao 			uint32_t                   reg0;
3336a4a1f60SJack Xiao 			uint32_t                   reg1;
3346a4a1f60SJack Xiao 		} wrm_reg;
335a9818854SJonathan Kim 
336a9818854SJonathan Kim 		struct {
337a9818854SJonathan Kim 			uint64_t process_context_addr;
338a9818854SJonathan Kim 			union {
339a9818854SJonathan Kim 				struct {
340bd33bb14SJonathan Kim 					uint32_t single_memop : 1;
341bd33bb14SJonathan Kim 					uint32_t single_alu_op : 1;
342bd33bb14SJonathan Kim 					uint32_t reserved: 29;
343bd33bb14SJonathan Kim 					uint32_t process_ctx_flush: 1;
344a9818854SJonathan Kim 				};
345a9818854SJonathan Kim 				uint32_t u32all;
346a9818854SJonathan Kim 			} flags;
347a9818854SJonathan Kim 			uint32_t spi_gdbg_per_vmid_cntl;
348a9818854SJonathan Kim 			uint32_t tcp_watch_cntl[4];
34909d49e14SJonathan Kim 			uint32_t trap_en;
350a9818854SJonathan Kim 		} set_shader_debugger;
3518521e3c5SShaoyun Liu 
3528521e3c5SShaoyun Liu 		struct {
3538521e3c5SShaoyun Liu 			union {
3548521e3c5SShaoyun Liu 				struct {
3558521e3c5SShaoyun Liu 					uint32_t limit_single_process : 1;
3568521e3c5SShaoyun Liu 					uint32_t enable_hws_logging_buffer : 1;
3578521e3c5SShaoyun Liu 					uint32_t reserved : 30;
3588521e3c5SShaoyun Liu 				};
3598521e3c5SShaoyun Liu 				uint32_t all;
3608521e3c5SShaoyun Liu 			} option;
3618521e3c5SShaoyun Liu 			struct {
3628521e3c5SShaoyun Liu 				uint32_t tdr_level;
3638521e3c5SShaoyun Liu 				uint32_t tdr_delay;
3648521e3c5SShaoyun Liu 			} tdr_config;
3658521e3c5SShaoyun Liu 		} change_config;
3666a4a1f60SJack Xiao 	};
3676a4a1f60SJack Xiao };
3686a4a1f60SJack Xiao 
3697bbc3676SJack Xiao struct amdgpu_mes_funcs {
3707bbc3676SJack Xiao 	int (*add_hw_queue)(struct amdgpu_mes *mes,
3717bbc3676SJack Xiao 			    struct mes_add_queue_input *input);
3727bbc3676SJack Xiao 
3737bbc3676SJack Xiao 	int (*remove_hw_queue)(struct amdgpu_mes *mes,
3747bbc3676SJack Xiao 			       struct mes_remove_queue_input *input);
3757bbc3676SJack Xiao 
376029c2b03SJack Xiao 	int (*map_legacy_queue)(struct amdgpu_mes *mes,
377029c2b03SJack Xiao 				struct mes_map_legacy_queue_input *input);
378029c2b03SJack Xiao 
37918ee4ce6SJack Xiao 	int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
38018ee4ce6SJack Xiao 				  struct mes_unmap_legacy_queue_input *input);
38118ee4ce6SJack Xiao 
3827bbc3676SJack Xiao 	int (*suspend_gang)(struct amdgpu_mes *mes,
3837bbc3676SJack Xiao 			    struct mes_suspend_gang_input *input);
3847bbc3676SJack Xiao 
3857bbc3676SJack Xiao 	int (*resume_gang)(struct amdgpu_mes *mes,
3867bbc3676SJack Xiao 			   struct mes_resume_gang_input *input);
3876a4a1f60SJack Xiao 
3886a4a1f60SJack Xiao 	int (*misc_op)(struct amdgpu_mes *mes,
3896a4a1f60SJack Xiao 		       struct mes_misc_op_input *input);
390c30fb344SAlex Deucher 
391c30fb344SAlex Deucher 	int (*reset_legacy_queue)(struct amdgpu_mes *mes,
392c30fb344SAlex Deucher 				  struct mes_reset_legacy_queue_input *input);
3935b7a59deSAlex Deucher 
3945b7a59deSAlex Deucher 	int (*reset_hw_queue)(struct amdgpu_mes *mes,
3955b7a59deSAlex Deucher 			      struct mes_reset_queue_input *input);
396a538bbe7SJack Xiao };
397a538bbe7SJack Xiao 
398cf064b45SJack Xiao #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
39918ee4ce6SJack Xiao #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
400cf064b45SJack Xiao 
40111ec5b36SJack Xiao int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
40211ec5b36SJack Xiao 
403cc42e76eSMario Limonciello int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
404b04c1d64SJack Xiao int amdgpu_mes_init(struct amdgpu_device *adev);
405b04c1d64SJack Xiao void amdgpu_mes_fini(struct amdgpu_device *adev);
406b04c1d64SJack Xiao 
40748dcd2b7SJack Xiao int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid,
40848dcd2b7SJack Xiao 			      struct amdgpu_vm *vm);
409063a38d6SJack Xiao void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid);
41048dcd2b7SJack Xiao 
4115d0f619fSJack Xiao int amdgpu_mes_add_gang(struct amdgpu_device *adev, int pasid,
4125d0f619fSJack Xiao 			struct amdgpu_mes_gang_properties *gprops,
4135d0f619fSJack Xiao 			int *gang_id);
414b0306e58SJack Xiao int amdgpu_mes_remove_gang(struct amdgpu_device *adev, int gang_id);
4155d0f619fSJack Xiao 
416c8bb1057SJack Xiao int amdgpu_mes_suspend(struct amdgpu_device *adev);
417ea756bd5SJack Xiao int amdgpu_mes_resume(struct amdgpu_device *adev);
418c8bb1057SJack Xiao 
419be5609deSJack Xiao int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
420be5609deSJack Xiao 			    struct amdgpu_mes_queue_properties *qprops,
421be5609deSJack Xiao 			    int *queue_id);
422bcc4e1e1SJack Xiao int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
4235b7a59deSAlex Deucher int amdgpu_mes_reset_hw_queue(struct amdgpu_device *adev, int queue_id);
4248b2429a1SJiadong Zhu int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
4258b2429a1SJiadong Zhu 				   int me_id, int pipe_id, int queue_id, int vmid);
426be5609deSJack Xiao 
427029c2b03SJack Xiao int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
428029c2b03SJack Xiao 				struct amdgpu_ring *ring);
42918ee4ce6SJack Xiao int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
43018ee4ce6SJack Xiao 				  struct amdgpu_ring *ring,
43118ee4ce6SJack Xiao 				  enum amdgpu_unmap_queues_action action,
43218ee4ce6SJack Xiao 				  u64 gpu_addr, u64 seq);
433c30fb344SAlex Deucher int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev,
434c30fb344SAlex Deucher 				  struct amdgpu_ring *ring,
4358b2429a1SJiadong Zhu 				  unsigned int vmid,
4368b2429a1SJiadong Zhu 				  bool use_mmio);
43718ee4ce6SJack Xiao 
4386a4a1f60SJack Xiao uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg);
4396a4a1f60SJack Xiao int amdgpu_mes_wreg(struct amdgpu_device *adev,
4406a4a1f60SJack Xiao 		    uint32_t reg, uint32_t val);
4416a4a1f60SJack Xiao int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
4426a4a1f60SJack Xiao 			uint32_t val, uint32_t mask);
4436a4a1f60SJack Xiao int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
4446a4a1f60SJack Xiao 				  uint32_t reg0, uint32_t reg1,
4456a4a1f60SJack Xiao 				  uint32_t ref, uint32_t mask);
446a9818854SJonathan Kim int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
447a9818854SJonathan Kim 				uint64_t process_context_addr,
448a9818854SJonathan Kim 				uint32_t spi_gdbg_per_vmid_cntl,
449a9818854SJonathan Kim 				const uint32_t *tcp_watch_cntl,
45009d49e14SJonathan Kim 				uint32_t flags,
45109d49e14SJonathan Kim 				bool trap_en);
452bd33bb14SJonathan Kim int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev,
453bd33bb14SJonathan Kim 				uint64_t process_context_addr);
454d0c423b6SJack Xiao int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
455d0c423b6SJack Xiao 			int queue_type, int idx,
456d0c423b6SJack Xiao 			struct amdgpu_mes_ctx_data *ctx_data,
457d0c423b6SJack Xiao 			struct amdgpu_ring **out);
4589cc654c8SJack Xiao void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
4599cc654c8SJack Xiao 			    struct amdgpu_ring *ring);
460d0c423b6SJack Xiao 
4612d7a1f71SLe Ma uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
4622d7a1f71SLe Ma 						   enum amdgpu_mes_priority_level prio);
4632d7a1f71SLe Ma 
464e3652b09SJack Xiao int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
465e3652b09SJack Xiao 				   struct amdgpu_mes_ctx_data *ctx_data);
466e3652b09SJack Xiao void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
4677c18b40eSJack Xiao int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
4687c18b40eSJack Xiao 				 struct amdgpu_vm *vm,
4697c18b40eSJack Xiao 				 struct amdgpu_mes_ctx_data *ctx_data);
470737dad0bSJack Xiao int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
471737dad0bSJack Xiao 				   struct amdgpu_mes_ctx_data *ctx_data);
472e3652b09SJack Xiao 
4736624d161SJack Xiao int amdgpu_mes_self_test(struct amdgpu_device *adev);
4746624d161SJack Xiao 
475464913c0SMukul Joshi int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev);
47618ee4ce6SJack Xiao 
47718ee4ce6SJack Xiao /*
47818ee4ce6SJack Xiao  * MES lock can be taken in MMU notifiers.
47918ee4ce6SJack Xiao  *
48018ee4ce6SJack Xiao  * A bit more detail about why to set no-FS reclaim with MES lock:
48118ee4ce6SJack Xiao  *
48218ee4ce6SJack Xiao  * The purpose of the MMU notifier is to stop GPU access to memory so
48318ee4ce6SJack Xiao  * that the Linux VM subsystem can move pages around safely. This is
48418ee4ce6SJack Xiao  * done by preempting user mode queues for the affected process. When
48518ee4ce6SJack Xiao  * MES is used, MES lock needs to be taken to preempt the queues.
48618ee4ce6SJack Xiao  *
48718ee4ce6SJack Xiao  * The MMU notifier callback entry point in the driver is
48818ee4ce6SJack Xiao  * amdgpu_mn_invalidate_range_start_hsa. The relevant call chain from
48918ee4ce6SJack Xiao  * there is:
49018ee4ce6SJack Xiao  * amdgpu_amdkfd_evict_userptr -> kgd2kfd_quiesce_mm ->
49118ee4ce6SJack Xiao  * kfd_process_evict_queues -> pdd->dev->dqm->ops.evict_process_queues
49218ee4ce6SJack Xiao  *
49318ee4ce6SJack Xiao  * The last part of the chain is a function pointer where we take the
49418ee4ce6SJack Xiao  * MES lock.
49518ee4ce6SJack Xiao  *
49618ee4ce6SJack Xiao  * The problem with taking locks in the MMU notifier is, that MMU
49718ee4ce6SJack Xiao  * notifiers can be called in reclaim-FS context. That's where the
49818ee4ce6SJack Xiao  * kernel frees up pages to make room for new page allocations under
49918ee4ce6SJack Xiao  * memory pressure. While we are running in reclaim-FS context, we must
50018ee4ce6SJack Xiao  * not trigger another memory reclaim operation because that would
50118ee4ce6SJack Xiao  * recursively reenter the reclaim code and cause a deadlock. The
50218ee4ce6SJack Xiao  * memalloc_nofs_save/restore calls guarantee that.
50318ee4ce6SJack Xiao  *
50418ee4ce6SJack Xiao  * In addition we also need to avoid lock dependencies on other locks taken
50518ee4ce6SJack Xiao  * under the MES lock, for example reservation locks. Here is a possible
50618ee4ce6SJack Xiao  * scenario of a deadlock:
50718ee4ce6SJack Xiao  * Thread A: takes and holds reservation lock | triggers reclaim-FS |
50818ee4ce6SJack Xiao  * MMU notifier | blocks trying to take MES lock
50918ee4ce6SJack Xiao  * Thread B: takes and holds MES lock | blocks trying to take reservation lock
51018ee4ce6SJack Xiao  *
51118ee4ce6SJack Xiao  * In this scenario Thread B gets involved in a deadlock even without
51218ee4ce6SJack Xiao  * triggering a reclaim-FS operation itself.
51318ee4ce6SJack Xiao  * To fix this and break the lock dependency chain you'd need to either:
51418ee4ce6SJack Xiao  * 1. protect reservation locks with memalloc_nofs_save/restore, or
51518ee4ce6SJack Xiao  * 2. avoid taking reservation locks under the MES lock.
51618ee4ce6SJack Xiao  *
51718ee4ce6SJack Xiao  * Reservation locks are taken all over the kernel in different subsystems, we
51818ee4ce6SJack Xiao  * have no control over them and their lock dependencies.So the only workable
51918ee4ce6SJack Xiao  * solution is to avoid taking other locks under the MES lock.
52018ee4ce6SJack Xiao  * As a result, make sure no reclaim-FS happens while holding this lock anywhere
52118ee4ce6SJack Xiao  * to prevent deadlocks when an MMU notifier runs in reclaim-FS context.
52218ee4ce6SJack Xiao  */
amdgpu_mes_lock(struct amdgpu_mes * mes)52318ee4ce6SJack Xiao static inline void amdgpu_mes_lock(struct amdgpu_mes *mes)
52418ee4ce6SJack Xiao {
52518ee4ce6SJack Xiao 	mutex_lock(&mes->mutex_hidden);
52618ee4ce6SJack Xiao 	mes->saved_flags = memalloc_noreclaim_save();
52718ee4ce6SJack Xiao }
52818ee4ce6SJack Xiao 
amdgpu_mes_unlock(struct amdgpu_mes * mes)52918ee4ce6SJack Xiao static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes)
53018ee4ce6SJack Xiao {
53118ee4ce6SJack Xiao 	memalloc_noreclaim_restore(mes->saved_flags);
53218ee4ce6SJack Xiao 	mutex_unlock(&mes->mutex_hidden);
53318ee4ce6SJack Xiao }
534ccf8ef6bSMukul Joshi 
535ccf8ef6bSMukul Joshi bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev);
5368521e3c5SShaoyun Liu 
53727b79151SAlex Deucher int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev);
5388521e3c5SShaoyun Liu 
539a538bbe7SJack Xiao #endif /* __AMDGPU_MES_H__ */
540