1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT
2ed6e6a34SBen Goz /*
3d87f36a0SRajneesh Bhardwaj  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4ed6e6a34SBen Goz  *
5ed6e6a34SBen Goz  * Permission is hereby granted, free of charge, to any person obtaining a
6ed6e6a34SBen Goz  * copy of this software and associated documentation files (the "Software"),
7ed6e6a34SBen Goz  * to deal in the Software without restriction, including without limitation
8ed6e6a34SBen Goz  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9ed6e6a34SBen Goz  * and/or sell copies of the Software, and to permit persons to whom the
10ed6e6a34SBen Goz  * Software is furnished to do so, subject to the following conditions:
11ed6e6a34SBen Goz  *
12ed6e6a34SBen Goz  * The above copyright notice and this permission notice shall be included in
13ed6e6a34SBen Goz  * all copies or substantial portions of the Software.
14ed6e6a34SBen Goz  *
15ed6e6a34SBen Goz  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16ed6e6a34SBen Goz  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17ed6e6a34SBen Goz  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18ed6e6a34SBen Goz  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19ed6e6a34SBen Goz  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20ed6e6a34SBen Goz  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21ed6e6a34SBen Goz  * OTHER DEALINGS IN THE SOFTWARE.
22ed6e6a34SBen Goz  *
23ed6e6a34SBen Goz  */
24ed6e6a34SBen Goz 
25ed6e6a34SBen Goz #include <linux/types.h>
26ed6e6a34SBen Goz #include <linux/mutex.h>
27ed6e6a34SBen Goz #include <linux/slab.h>
28ed6e6a34SBen Goz #include <linux/printk.h>
299a5634a7SOded Gabbay #include <linux/sched.h>
30ed6e6a34SBen Goz #include "kfd_kernel_queue.h"
31ed6e6a34SBen Goz #include "kfd_priv.h"
32ed6e6a34SBen Goz #include "kfd_device_queue_manager.h"
33ed6e6a34SBen Goz #include "kfd_pm4_headers.h"
34ed6e6a34SBen Goz #include "kfd_pm4_opcodes.h"
351802b042SYunxiang Li #include "amdgpu_reset.h"
36ed6e6a34SBen Goz 
37ed6e6a34SBen Goz #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
38ed6e6a34SBen Goz 
39a5a4d68cSYong Zhao /* Initialize a kernel queue, including allocations of GART memory
40a5a4d68cSYong Zhao  * needed for the queue.
41a5a4d68cSYong Zhao  */
kq_initialize(struct kernel_queue * kq,struct kfd_node * dev,enum kfd_queue_type type,unsigned int queue_size)428dc1db31SMukul Joshi static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
43ed6e6a34SBen Goz 		enum kfd_queue_type type, unsigned int queue_size)
44ed6e6a34SBen Goz {
45ed6e6a34SBen Goz 	struct queue_properties prop;
46ed6e6a34SBen Goz 	int retval;
47ed6e6a34SBen Goz 	union PM4_MES_TYPE_3_HEADER nop;
48ed6e6a34SBen Goz 
4932fa8219SFelix Kuehling 	if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
5032fa8219SFelix Kuehling 		return false;
51ed6e6a34SBen Goz 
5279775b62SKent Russell 	pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
5379775b62SKent Russell 			queue_size);
54ed6e6a34SBen Goz 
5578b13f79SEdward O'Callaghan 	memset(&prop, 0, sizeof(prop));
5678b13f79SEdward O'Callaghan 	memset(&nop, 0, sizeof(nop));
5778b13f79SEdward O'Callaghan 
58ed6e6a34SBen Goz 	nop.opcode = IT_NOP;
59ed6e6a34SBen Goz 	nop.type = PM4_TYPE_3;
60ed6e6a34SBen Goz 	nop.u32all |= PM4_COUNT_ZERO;
61ed6e6a34SBen Goz 
62ed6e6a34SBen Goz 	kq->dev = dev;
63ed6e6a34SBen Goz 	kq->nop_packet = nop.u32all;
64ed6e6a34SBen Goz 	switch (type) {
65ed6e6a34SBen Goz 	case KFD_QUEUE_TYPE_DIQ:
66fdfa090bSOak Zeng 		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
6759f650a0SOak Zeng 		break;
68ed6e6a34SBen Goz 	case KFD_QUEUE_TYPE_HIQ:
69fdfa090bSOak Zeng 		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
70ed6e6a34SBen Goz 		break;
71ed6e6a34SBen Goz 	default:
7262ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
7332fa8219SFelix Kuehling 		return false;
74ed6e6a34SBen Goz 	}
75ed6e6a34SBen Goz 
768d5f3552SYong Zhao 	if (!kq->mqd_mgr)
77ed6e6a34SBen Goz 		return false;
78ed6e6a34SBen Goz 
798dc1db31SMukul Joshi 	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
80ed6e6a34SBen Goz 
814eacc26bSKent Russell 	if (!prop.doorbell_ptr) {
8262ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "Failed to initialize doorbell");
83ed6e6a34SBen Goz 		goto err_get_kernel_doorbell;
84aaad2d8cSBen Goz 	}
85ed6e6a34SBen Goz 
86a86aa3caSOded Gabbay 	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
87aaad2d8cSBen Goz 	if (retval != 0) {
8862ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
8962ec7d38SLijo Lazar 			queue_size);
90ed6e6a34SBen Goz 		goto err_pq_allocate_vidmem;
91aaad2d8cSBen Goz 	}
92ed6e6a34SBen Goz 
93ed6e6a34SBen Goz 	kq->pq_kernel_addr = kq->pq->cpu_ptr;
94ed6e6a34SBen Goz 	kq->pq_gpu_addr = kq->pq->gpu_addr;
95ed6e6a34SBen Goz 
96ccdef35dSYong Zhao 	/* For CIK family asics, kq->eop_mem is not needed */
977eb0502aSGraham Sider 	if (dev->adev->asic_type > CHIP_MULLINS) {
98ccdef35dSYong Zhao 		retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
99ccdef35dSYong Zhao 		if (retval != 0)
1006898f0a5SBen Goz 			goto err_eop_allocate_vidmem;
1016898f0a5SBen Goz 
102ccdef35dSYong Zhao 		kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
103ccdef35dSYong Zhao 		kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
104ccdef35dSYong Zhao 
105ccdef35dSYong Zhao 		memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
106ccdef35dSYong Zhao 	}
107ccdef35dSYong Zhao 
108a86aa3caSOded Gabbay 	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
109a86aa3caSOded Gabbay 					&kq->rptr_mem);
110ed6e6a34SBen Goz 
111ed6e6a34SBen Goz 	if (retval != 0)
112ed6e6a34SBen Goz 		goto err_rptr_allocate_vidmem;
113ed6e6a34SBen Goz 
114ed6e6a34SBen Goz 	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
115ed6e6a34SBen Goz 	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
116ed6e6a34SBen Goz 
1178dc1db31SMukul Joshi 	retval = kfd_gtt_sa_allocate(dev, dev->kfd->device_info.doorbell_size,
118a86aa3caSOded Gabbay 					&kq->wptr_mem);
119ed6e6a34SBen Goz 
120ed6e6a34SBen Goz 	if (retval != 0)
121ed6e6a34SBen Goz 		goto err_wptr_allocate_vidmem;
122ed6e6a34SBen Goz 
123ed6e6a34SBen Goz 	kq->wptr_kernel = kq->wptr_mem->cpu_ptr;
124ed6e6a34SBen Goz 	kq->wptr_gpu_addr = kq->wptr_mem->gpu_addr;
125ed6e6a34SBen Goz 
126ed6e6a34SBen Goz 	memset(kq->pq_kernel_addr, 0, queue_size);
127ed6e6a34SBen Goz 	memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel));
128*cdc6705fSLijo Lazar 	memset(kq->wptr_kernel, 0, dev->kfd->device_info.doorbell_size);
129ed6e6a34SBen Goz 
130ed6e6a34SBen Goz 	prop.queue_size = queue_size;
131ed6e6a34SBen Goz 	prop.is_interop = false;
132b8020b03SJoseph Greathouse 	prop.is_gws = false;
133ed6e6a34SBen Goz 	prop.priority = 1;
134ed6e6a34SBen Goz 	prop.queue_percent = 100;
135ed6e6a34SBen Goz 	prop.type = type;
136ed6e6a34SBen Goz 	prop.vmid = 0;
137ed6e6a34SBen Goz 	prop.queue_address = kq->pq_gpu_addr;
138ed6e6a34SBen Goz 	prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
139ed6e6a34SBen Goz 	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
1406898f0a5SBen Goz 	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
1416898f0a5SBen Goz 	prop.eop_ring_buffer_size = PAGE_SIZE;
142ed6e6a34SBen Goz 
143e88a614cSEdward O'Callaghan 	if (init_queue(&kq->queue, &prop) != 0)
144ed6e6a34SBen Goz 		goto err_init_queue;
145ed6e6a34SBen Goz 
146ed6e6a34SBen Goz 	kq->queue->device = dev;
147ed6e6a34SBen Goz 	kq->queue->process = kfd_get_process(current);
148ed6e6a34SBen Goz 
1498636e53cSOak Zeng 	kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev,
1508636e53cSOak Zeng 					&kq->queue->properties);
1518636e53cSOak Zeng 	if (!kq->queue->mqd_mem_obj)
1528636e53cSOak Zeng 		goto err_allocate_mqd;
1538636e53cSOak Zeng 	kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd,
1548636e53cSOak Zeng 					kq->queue->mqd_mem_obj,
155ed6e6a34SBen Goz 					&kq->queue->gart_mqd_addr,
156ed6e6a34SBen Goz 					&kq->queue->properties);
157ed6e6a34SBen Goz 	/* assign HIQ to HQD */
158ed6e6a34SBen Goz 	if (type == KFD_QUEUE_TYPE_HIQ) {
15979775b62SKent Russell 		pr_debug("Assigning hiq to hqd\n");
160ed6e6a34SBen Goz 		kq->queue->pipe = KFD_CIK_HIQ_PIPE;
161ed6e6a34SBen Goz 		kq->queue->queue = KFD_CIK_HIQ_QUEUE;
1628d5f3552SYong Zhao 		kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
1638d5f3552SYong Zhao 				kq->queue->pipe, kq->queue->queue,
1648d5f3552SYong Zhao 				&kq->queue->properties, NULL);
165ed6e6a34SBen Goz 	} else {
166ed6e6a34SBen Goz 		/* allocate fence for DIQ */
167ed6e6a34SBen Goz 
168a86aa3caSOded Gabbay 		retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
169a86aa3caSOded Gabbay 						&kq->fence_mem_obj);
170ed6e6a34SBen Goz 
171ed6e6a34SBen Goz 		if (retval != 0)
172ed6e6a34SBen Goz 			goto err_alloc_fence;
173ed6e6a34SBen Goz 
174ed6e6a34SBen Goz 		kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
175ed6e6a34SBen Goz 		kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
176ed6e6a34SBen Goz 	}
177ed6e6a34SBen Goz 
178ed6e6a34SBen Goz 	print_queue(kq->queue);
179ed6e6a34SBen Goz 
180ed6e6a34SBen Goz 	return true;
181ed6e6a34SBen Goz err_alloc_fence:
1828636e53cSOak Zeng 	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj);
1838636e53cSOak Zeng err_allocate_mqd:
184ed6e6a34SBen Goz 	uninit_queue(kq->queue);
185ed6e6a34SBen Goz err_init_queue:
186a86aa3caSOded Gabbay 	kfd_gtt_sa_free(dev, kq->wptr_mem);
187ed6e6a34SBen Goz err_wptr_allocate_vidmem:
188a86aa3caSOded Gabbay 	kfd_gtt_sa_free(dev, kq->rptr_mem);
189ed6e6a34SBen Goz err_rptr_allocate_vidmem:
1906898f0a5SBen Goz 	kfd_gtt_sa_free(dev, kq->eop_mem);
1916898f0a5SBen Goz err_eop_allocate_vidmem:
192a86aa3caSOded Gabbay 	kfd_gtt_sa_free(dev, kq->pq);
193ed6e6a34SBen Goz err_pq_allocate_vidmem:
1948dc1db31SMukul Joshi 	kfd_release_kernel_doorbell(dev->kfd, prop.doorbell_ptr);
195ed6e6a34SBen Goz err_get_kernel_doorbell:
196ed6e6a34SBen Goz 	return false;
197ed6e6a34SBen Goz 
198ed6e6a34SBen Goz }
199ed6e6a34SBen Goz 
200a5a4d68cSYong Zhao /* Uninitialize a kernel queue and free all its memory usages. */
kq_uninitialize(struct kernel_queue * kq)2011802b042SYunxiang Li static void kq_uninitialize(struct kernel_queue *kq)
202ed6e6a34SBen Goz {
2031802b042SYunxiang Li 	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ && down_read_trylock(&kq->dev->adev->reset_domain->sem)) {
2048d5f3552SYong Zhao 		kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
2051fabbf78SOded Gabbay 					kq->queue->mqd,
206b22666feSFelix Kuehling 					KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
207b90e3fbeSFelix Kuehling 					KFD_UNMAP_LATENCY_MS,
208ed6e6a34SBen Goz 					kq->queue->pipe,
209ed6e6a34SBen Goz 					kq->queue->queue);
2101802b042SYunxiang Li 		up_read(&kq->dev->adev->reset_domain->sem);
2111802b042SYunxiang Li 	}
212a86aa3caSOded Gabbay 	else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
213a86aa3caSOded Gabbay 		kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
214ed6e6a34SBen Goz 
2158636e53cSOak Zeng 	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd,
2168d5f3552SYong Zhao 				kq->queue->mqd_mem_obj);
217aaad2d8cSBen Goz 
218a86aa3caSOded Gabbay 	kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
219a86aa3caSOded Gabbay 	kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
220ccdef35dSYong Zhao 
221ccdef35dSYong Zhao 	/* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
222ccdef35dSYong Zhao 	 * is able to handle NULL properly.
223ccdef35dSYong Zhao 	 */
224ccdef35dSYong Zhao 	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
225ccdef35dSYong Zhao 
226a86aa3caSOded Gabbay 	kfd_gtt_sa_free(kq->dev, kq->pq);
2278dc1db31SMukul Joshi 	kfd_release_kernel_doorbell(kq->dev->kfd,
2285cd78de5SOded Gabbay 					kq->queue->properties.doorbell_ptr);
229ed6e6a34SBen Goz 	uninit_queue(kq->queue);
230ed6e6a34SBen Goz }
231ed6e6a34SBen Goz 
kq_acquire_packet_buffer(struct kernel_queue * kq,size_t packet_size_in_dwords,unsigned int ** buffer_ptr)232a5a4d68cSYong Zhao int kq_acquire_packet_buffer(struct kernel_queue *kq,
233ed6e6a34SBen Goz 		size_t packet_size_in_dwords, unsigned int **buffer_ptr)
234ed6e6a34SBen Goz {
235ed6e6a34SBen Goz 	size_t available_size;
236ed6e6a34SBen Goz 	size_t queue_size_dwords;
237ed6e6a34SBen Goz 	uint32_t wptr, rptr;
2389d7d0248SFelix Kuehling 	uint64_t wptr64;
239ed6e6a34SBen Goz 	unsigned int *queue_address;
240ed6e6a34SBen Goz 
241cb1d9967SYong Zhao 	/* When rptr == wptr, the buffer is empty.
242cb1d9967SYong Zhao 	 * When rptr == wptr + 1, the buffer is full.
243cb1d9967SYong Zhao 	 * It is always rptr that advances to the position of wptr, rather than
244cb1d9967SYong Zhao 	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
245cb1d9967SYong Zhao 	 */
246ed6e6a34SBen Goz 	rptr = *kq->rptr_kernel;
2479d7d0248SFelix Kuehling 	wptr = kq->pending_wptr;
2489d7d0248SFelix Kuehling 	wptr64 = kq->pending_wptr64;
249ed6e6a34SBen Goz 	queue_address = (unsigned int *)kq->pq_kernel_addr;
2506d566930SFelix Kuehling 	queue_size_dwords = kq->queue->properties.queue_size / 4;
251ed6e6a34SBen Goz 
2528856d8e0SOded Gabbay 	pr_debug("rptr: %d\n", rptr);
2538856d8e0SOded Gabbay 	pr_debug("wptr: %d\n", wptr);
2548856d8e0SOded Gabbay 	pr_debug("queue_address 0x%p\n", queue_address);
255ed6e6a34SBen Goz 
256cb1d9967SYong Zhao 	available_size = (rptr + queue_size_dwords - 1 - wptr) %
257ed6e6a34SBen Goz 							queue_size_dwords;
258ed6e6a34SBen Goz 
259cb1d9967SYong Zhao 	if (packet_size_in_dwords > available_size) {
260a550bb3dSOded Gabbay 		/*
261a550bb3dSOded Gabbay 		 * make sure calling functions know
262a550bb3dSOded Gabbay 		 * acquire_packet_buffer() failed
263a550bb3dSOded Gabbay 		 */
2642a26fbfeSFelix Kuehling 		goto err_no_space;
265a550bb3dSOded Gabbay 	}
266ed6e6a34SBen Goz 
267ed6e6a34SBen Goz 	if (wptr + packet_size_in_dwords >= queue_size_dwords) {
268cb1d9967SYong Zhao 		/* make sure after rolling back to position 0, there is
269cb1d9967SYong Zhao 		 * still enough space.
270cb1d9967SYong Zhao 		 */
2712a26fbfeSFelix Kuehling 		if (packet_size_in_dwords >= rptr)
2722a26fbfeSFelix Kuehling 			goto err_no_space;
2732a26fbfeSFelix Kuehling 
274cb1d9967SYong Zhao 		/* fill nops, roll back and start at position 0 */
275ed6e6a34SBen Goz 		while (wptr > 0) {
276ed6e6a34SBen Goz 			queue_address[wptr] = kq->nop_packet;
277ed6e6a34SBen Goz 			wptr = (wptr + 1) % queue_size_dwords;
2789d7d0248SFelix Kuehling 			wptr64++;
279ed6e6a34SBen Goz 		}
280ed6e6a34SBen Goz 	}
281ed6e6a34SBen Goz 
282ed6e6a34SBen Goz 	*buffer_ptr = &queue_address[wptr];
283ed6e6a34SBen Goz 	kq->pending_wptr = wptr + packet_size_in_dwords;
2849d7d0248SFelix Kuehling 	kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
285ed6e6a34SBen Goz 
286ed6e6a34SBen Goz 	return 0;
2872a26fbfeSFelix Kuehling 
2882a26fbfeSFelix Kuehling err_no_space:
2892a26fbfeSFelix Kuehling 	*buffer_ptr = NULL;
2902a26fbfeSFelix Kuehling 	return -ENOMEM;
291ed6e6a34SBen Goz }
292ed6e6a34SBen Goz 
kq_submit_packet(struct kernel_queue * kq)293e1f6746fSLijo Lazar int kq_submit_packet(struct kernel_queue *kq)
294ed6e6a34SBen Goz {
295ed6e6a34SBen Goz #ifdef DEBUG
296ed6e6a34SBen Goz 	int i;
297ed6e6a34SBen Goz 
298ed6e6a34SBen Goz 	for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
299ed6e6a34SBen Goz 		pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
300ed6e6a34SBen Goz 		if (i % 15 == 0)
301ed6e6a34SBen Goz 			pr_debug("\n");
302ed6e6a34SBen Goz 	}
303ed6e6a34SBen Goz 	pr_debug("\n");
304ed6e6a34SBen Goz #endif
305e1f6746fSLijo Lazar 	/* Fatal err detected, packet submission won't go through */
306e1f6746fSLijo Lazar 	if (amdgpu_amdkfd_is_fed(kq->dev->adev))
307e1f6746fSLijo Lazar 		return -EIO;
308e1f6746fSLijo Lazar 
309097c69d4SVictor Zhao 	/* Make sure ring buffer is updated before wptr updated */
310097c69d4SVictor Zhao 	mb();
311097c69d4SVictor Zhao 
3128dc1db31SMukul Joshi 	if (kq->dev->kfd->device_info.doorbell_size == 8) {
313ccdef35dSYong Zhao 		*kq->wptr64_kernel = kq->pending_wptr64;
314097c69d4SVictor Zhao 		mb(); /* Make sure wptr updated before ring doorbell */
315ccdef35dSYong Zhao 		write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
316ccdef35dSYong Zhao 					kq->pending_wptr64);
317ccdef35dSYong Zhao 	} else {
318ccdef35dSYong Zhao 		*kq->wptr_kernel = kq->pending_wptr;
319097c69d4SVictor Zhao 		mb(); /* Make sure wptr updated before ring doorbell */
320ccdef35dSYong Zhao 		write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
321ccdef35dSYong Zhao 					kq->pending_wptr);
322ccdef35dSYong Zhao 	}
323e1f6746fSLijo Lazar 
324e1f6746fSLijo Lazar 	return 0;
325ed6e6a34SBen Goz }
326ed6e6a34SBen Goz 
kq_rollback_packet(struct kernel_queue * kq)327a5a4d68cSYong Zhao void kq_rollback_packet(struct kernel_queue *kq)
328ed6e6a34SBen Goz {
3298dc1db31SMukul Joshi 	if (kq->dev->kfd->device_info.doorbell_size == 8) {
3309d7d0248SFelix Kuehling 		kq->pending_wptr64 = *kq->wptr64_kernel;
3319d7d0248SFelix Kuehling 		kq->pending_wptr = *kq->wptr_kernel %
3329d7d0248SFelix Kuehling 			(kq->queue->properties.queue_size / 4);
3339d7d0248SFelix Kuehling 	} else {
334bebfd2f4SFelix Kuehling 		kq->pending_wptr = *kq->wptr_kernel;
335ed6e6a34SBen Goz 	}
3369d7d0248SFelix Kuehling }
337ed6e6a34SBen Goz 
kernel_queue_init(struct kfd_node * dev,enum kfd_queue_type type)3388dc1db31SMukul Joshi struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
339ed6e6a34SBen Goz 					enum kfd_queue_type type)
340ed6e6a34SBen Goz {
341ed6e6a34SBen Goz 	struct kernel_queue *kq;
342ed6e6a34SBen Goz 
343dbf56ab1SKent Russell 	kq = kzalloc(sizeof(*kq), GFP_KERNEL);
344ed6e6a34SBen Goz 	if (!kq)
345ed6e6a34SBen Goz 		return NULL;
346ed6e6a34SBen Goz 
347a5a4d68cSYong Zhao 	if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
348e596b903SYong Zhao 		return kq;
349e596b903SYong Zhao 
35062ec7d38SLijo Lazar 	dev_err(dev->adev->dev, "Failed to init kernel queue\n");
351e596b903SYong Zhao 
352ed6e6a34SBen Goz 	kfree(kq);
353ed6e6a34SBen Goz 	return NULL;
354ed6e6a34SBen Goz }
355ed6e6a34SBen Goz 
kernel_queue_uninit(struct kernel_queue * kq)3561802b042SYunxiang Li void kernel_queue_uninit(struct kernel_queue *kq)
357ed6e6a34SBen Goz {
3581802b042SYunxiang Li 	kq_uninitialize(kq);
359ed6e6a34SBen Goz 	kfree(kq);
360ed6e6a34SBen Goz }
361ed6e6a34SBen Goz 
36232fa8219SFelix Kuehling /* FIXME: Can this test be removed? */
test_kq(struct kfd_node * dev)3638dc1db31SMukul Joshi static __attribute__((unused)) void test_kq(struct kfd_node *dev)
364ed6e6a34SBen Goz {
365ed6e6a34SBen Goz 	struct kernel_queue *kq;
366ed6e6a34SBen Goz 	uint32_t *buffer, i;
367ed6e6a34SBen Goz 	int retval;
368ed6e6a34SBen Goz 
36962ec7d38SLijo Lazar 	dev_err(dev->adev->dev, "Starting kernel queue test\n");
370ed6e6a34SBen Goz 
371ed6e6a34SBen Goz 	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
37232fa8219SFelix Kuehling 	if (unlikely(!kq)) {
37362ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "  Failed to initialize HIQ\n");
37462ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "Kernel queue test failed\n");
37532fa8219SFelix Kuehling 		return;
37632fa8219SFelix Kuehling 	}
377ed6e6a34SBen Goz 
378a5a4d68cSYong Zhao 	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
37932fa8219SFelix Kuehling 	if (unlikely(retval != 0)) {
38062ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "  Failed to acquire packet buffer\n");
38162ec7d38SLijo Lazar 		dev_err(dev->adev->dev, "Kernel queue test failed\n");
38232fa8219SFelix Kuehling 		return;
38332fa8219SFelix Kuehling 	}
384ed6e6a34SBen Goz 	for (i = 0; i < 5; i++)
385ed6e6a34SBen Goz 		buffer[i] = kq->nop_packet;
386a5a4d68cSYong Zhao 	kq_submit_packet(kq);
387ed6e6a34SBen Goz 
38862ec7d38SLijo Lazar 	dev_err(dev->adev->dev, "Ending kernel queue test\n");
389ed6e6a34SBen Goz }
390ed6e6a34SBen Goz 
391ed6e6a34SBen Goz 
392