1d38ceaf9SAlex Deucher /*
2d38ceaf9SAlex Deucher * Copyright 2008 Jerome Glisse.
3d38ceaf9SAlex Deucher * All Rights Reserved.
4d38ceaf9SAlex Deucher *
5d38ceaf9SAlex Deucher * Permission is hereby granted, free of charge, to any person obtaining a
6d38ceaf9SAlex Deucher * copy of this software and associated documentation files (the "Software"),
7d38ceaf9SAlex Deucher * to deal in the Software without restriction, including without limitation
8d38ceaf9SAlex Deucher * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9d38ceaf9SAlex Deucher * and/or sell copies of the Software, and to permit persons to whom the
10d38ceaf9SAlex Deucher * Software is furnished to do so, subject to the following conditions:
11d38ceaf9SAlex Deucher *
12d38ceaf9SAlex Deucher * The above copyright notice and this permission notice (including the next
13d38ceaf9SAlex Deucher * paragraph) shall be included in all copies or substantial portions of the
14d38ceaf9SAlex Deucher * Software.
15d38ceaf9SAlex Deucher *
16d38ceaf9SAlex Deucher * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17d38ceaf9SAlex Deucher * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18d38ceaf9SAlex Deucher * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19d38ceaf9SAlex Deucher * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20d38ceaf9SAlex Deucher * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21d38ceaf9SAlex Deucher * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22d38ceaf9SAlex Deucher * DEALINGS IN THE SOFTWARE.
23d38ceaf9SAlex Deucher *
24d38ceaf9SAlex Deucher * Authors:
25d38ceaf9SAlex Deucher * Jerome Glisse <[email protected]>
26d38ceaf9SAlex Deucher */
27fdf2f6c5SSam Ravnborg
28fdf2f6c5SSam Ravnborg #include <linux/file.h>
29568d7c76SStephen Rothwell #include <linux/pagemap.h>
307ca24cf2SMarek Olšák #include <linux/sync_file.h>
314993ba02SChristian König #include <linux/dma-buf.h>
32fdf2f6c5SSam Ravnborg
33d38ceaf9SAlex Deucher #include <drm/amdgpu_drm.h>
34660e8558SDave Airlie #include <drm/drm_syncobj.h>
35a3185f91SChristian König #include <drm/ttm/ttm_tt.h>
36a3185f91SChristian König
37a190f8dcSChristian König #include "amdgpu_cs.h"
38d38ceaf9SAlex Deucher #include "amdgpu.h"
39d38ceaf9SAlex Deucher #include "amdgpu_trace.h"
40c8c5e569SAndrey Grodzovsky #include "amdgpu_gmc.h"
412cddc50eSHuang Rui #include "amdgpu_gem.h"
427c6e68c7SAndrey Grodzovsky #include "amdgpu_ras.h"
43d38ceaf9SAlex Deucher
amdgpu_cs_parser_init(struct amdgpu_cs_parser * p,struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_cs * cs)4488c98d54SChristian König static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
4588c98d54SChristian König struct amdgpu_device *adev,
4688c98d54SChristian König struct drm_file *filp,
4788c98d54SChristian König union drm_amdgpu_cs *cs)
4888c98d54SChristian König {
4988c98d54SChristian König struct amdgpu_fpriv *fpriv = filp->driver_priv;
5088c98d54SChristian König
5188c98d54SChristian König if (cs->in.num_chunks == 0)
5288c98d54SChristian König return -EINVAL;
5388c98d54SChristian König
5488c98d54SChristian König memset(p, 0, sizeof(*p));
5588c98d54SChristian König p->adev = adev;
5688c98d54SChristian König p->filp = filp;
5788c98d54SChristian König
5888c98d54SChristian König p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
5988c98d54SChristian König if (!p->ctx)
6088c98d54SChristian König return -EINVAL;
6188c98d54SChristian König
6288c98d54SChristian König if (atomic_read(&p->ctx->guilty)) {
6388c98d54SChristian König amdgpu_ctx_put(p->ctx);
6488c98d54SChristian König return -ECANCELED;
6588c98d54SChristian König }
663bd68b32SChristian König
673bd68b32SChristian König amdgpu_sync_create(&p->sync);
686b18ef48SChristian König drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
6905d24935SRob Clark DRM_EXEC_IGNORE_DUPLICATES, 0);
7088c98d54SChristian König return 0;
7188c98d54SChristian König }
7288c98d54SChristian König
amdgpu_cs_job_idx(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_ib * chunk_ib)734624459cSChristian König static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
744624459cSChristian König struct drm_amdgpu_cs_chunk_ib *chunk_ib)
7588c98d54SChristian König {
76c2b08e7aSChristian König struct drm_sched_entity *entity;
774624459cSChristian König unsigned int i;
78c2b08e7aSChristian König int r;
79c2b08e7aSChristian König
80c2b08e7aSChristian König r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
81c2b08e7aSChristian König chunk_ib->ip_instance,
82c2b08e7aSChristian König chunk_ib->ring, &entity);
83c2b08e7aSChristian König if (r)
84c2b08e7aSChristian König return r;
85c2b08e7aSChristian König
864624459cSChristian König /*
874624459cSChristian König * Abort if there is no run queue associated with this entity.
884624459cSChristian König * Possibly because of disabled HW IP.
894624459cSChristian König */
90c2b08e7aSChristian König if (entity->rq == NULL)
91c2b08e7aSChristian König return -EINVAL;
92c2b08e7aSChristian König
934624459cSChristian König /* Check if we can add this IB to some existing job */
944624459cSChristian König for (i = 0; i < p->gang_size; ++i)
954624459cSChristian König if (p->entities[i] == entity)
964624459cSChristian König return i;
974624459cSChristian König
984624459cSChristian König /* If not increase the gang size if possible */
994624459cSChristian König if (i == AMDGPU_CS_GANG_SIZE)
100c2b08e7aSChristian König return -EINVAL;
101c2b08e7aSChristian König
1024624459cSChristian König p->entities[i] = entity;
1034624459cSChristian König p->gang_size = i + 1;
1044624459cSChristian König return i;
1054624459cSChristian König }
1064624459cSChristian König
amdgpu_cs_p1_ib(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_ib * chunk_ib,unsigned int * num_ibs)1074624459cSChristian König static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
1084624459cSChristian König struct drm_amdgpu_cs_chunk_ib *chunk_ib,
1094624459cSChristian König unsigned int *num_ibs)
1104624459cSChristian König {
1114624459cSChristian König int r;
1124624459cSChristian König
1134624459cSChristian König r = amdgpu_cs_job_idx(p, chunk_ib);
1144624459cSChristian König if (r < 0)
1154624459cSChristian König return r;
1164624459cSChristian König
117c30ddcecSBas Nieuwenhuizen if (num_ibs[r] >= amdgpu_ring_max_ibs(chunk_ib->ip_type))
118c30ddcecSBas Nieuwenhuizen return -EINVAL;
119c30ddcecSBas Nieuwenhuizen
1204624459cSChristian König ++(num_ibs[r]);
121eca13f3cSChristian König p->gang_leader_idx = r;
12288c98d54SChristian König return 0;
12388c98d54SChristian König }
12488c98d54SChristian König
amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser * p,struct drm_amdgpu_cs_chunk_fence * data,uint32_t * offset)12588c98d54SChristian König static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
126758ac17fSChristian König struct drm_amdgpu_cs_chunk_fence *data,
127758ac17fSChristian König uint32_t *offset)
12891acbeb6SChristian König {
12991acbeb6SChristian König struct drm_gem_object *gobj;
130aa29040bSChristian König unsigned long size;
13191acbeb6SChristian König
132a8ad0bd8SChris Wilson gobj = drm_gem_object_lookup(p->filp, data->handle);
13391acbeb6SChristian König if (gobj == NULL)
13491acbeb6SChristian König return -EINVAL;
13591acbeb6SChristian König
136ca6c1e21SChristian König p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
137e07ddb0cSEmil Velikov drm_gem_object_put(gobj);
1387893499eSChristian König
139ca6c1e21SChristian König size = amdgpu_bo_size(p->uf_bo);
140a5492fe2SChristian König if (size != PAGE_SIZE || data->offset > (size - 8))
141a5492fe2SChristian König return -EINVAL;
1427893499eSChristian König
143a5492fe2SChristian König if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
144a5492fe2SChristian König return -EINVAL;
145aa29040bSChristian König
146758ac17fSChristian König *offset = data->offset;
14791acbeb6SChristian König return 0;
14891acbeb6SChristian König }
14991acbeb6SChristian König
amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser * p,struct drm_amdgpu_bo_list_in * data)15088c98d54SChristian König static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
151964d0fbfSAndrey Grodzovsky struct drm_amdgpu_bo_list_in *data)
152964d0fbfSAndrey Grodzovsky {
15388c98d54SChristian König struct drm_amdgpu_bo_list_entry *info;
154964d0fbfSAndrey Grodzovsky int r;
155964d0fbfSAndrey Grodzovsky
156964d0fbfSAndrey Grodzovsky r = amdgpu_bo_create_list_entry_array(data, &info);
157964d0fbfSAndrey Grodzovsky if (r)
158964d0fbfSAndrey Grodzovsky return r;
159964d0fbfSAndrey Grodzovsky
160964d0fbfSAndrey Grodzovsky r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
161964d0fbfSAndrey Grodzovsky &p->bo_list);
162964d0fbfSAndrey Grodzovsky if (r)
163964d0fbfSAndrey Grodzovsky goto error_free;
164964d0fbfSAndrey Grodzovsky
165964d0fbfSAndrey Grodzovsky kvfree(info);
166964d0fbfSAndrey Grodzovsky return 0;
167964d0fbfSAndrey Grodzovsky
168964d0fbfSAndrey Grodzovsky error_free:
169964d0fbfSAndrey Grodzovsky kvfree(info);
170964d0fbfSAndrey Grodzovsky
171964d0fbfSAndrey Grodzovsky return r;
172964d0fbfSAndrey Grodzovsky }
173964d0fbfSAndrey Grodzovsky
17488c98d54SChristian König /* Copy the data from userspace and go over it the first time */
amdgpu_cs_pass1(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)17588c98d54SChristian König static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
17688c98d54SChristian König union drm_amdgpu_cs *cs)
177d38ceaf9SAlex Deucher {
1784c0b242cSChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
1794624459cSChristian König unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
180c5637837SMonk Liu struct amdgpu_vm *vm = &fpriv->vm;
181d38ceaf9SAlex Deucher uint64_t *chunk_array_user;
1821d263474SDan Carpenter uint64_t *chunk_array;
183758ac17fSChristian König uint32_t uf_offset = 0;
18487c2213eShackyzh002 size_t size;
1851d263474SDan Carpenter int ret;
18688c98d54SChristian König int i;
187d38ceaf9SAlex Deucher
18888c98d54SChristian König chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
18988c98d54SChristian König GFP_KERNEL);
1901d263474SDan Carpenter if (!chunk_array)
1911d263474SDan Carpenter return -ENOMEM;
192d38ceaf9SAlex Deucher
193d38ceaf9SAlex Deucher /* get chunks */
1947ecc245aSChristian König chunk_array_user = u64_to_user_ptr(cs->in.chunks);
195d38ceaf9SAlex Deucher if (copy_from_user(chunk_array, chunk_array_user,
196d38ceaf9SAlex Deucher sizeof(uint64_t)*cs->in.num_chunks)) {
1971d263474SDan Carpenter ret = -EFAULT;
19826eedf6dSAndrey Grodzovsky goto free_chunk;
199d38ceaf9SAlex Deucher }
200d38ceaf9SAlex Deucher
201d38ceaf9SAlex Deucher p->nchunks = cs->in.num_chunks;
202b4d916eeSChen Li p->chunks = kvmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
203d38ceaf9SAlex Deucher GFP_KERNEL);
2041d263474SDan Carpenter if (!p->chunks) {
2051d263474SDan Carpenter ret = -ENOMEM;
20626eedf6dSAndrey Grodzovsky goto free_chunk;
207d38ceaf9SAlex Deucher }
208d38ceaf9SAlex Deucher
209d38ceaf9SAlex Deucher for (i = 0; i < p->nchunks; i++) {
21050d51374SYuanShang struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL;
211d38ceaf9SAlex Deucher struct drm_amdgpu_cs_chunk user_chunk;
212d38ceaf9SAlex Deucher uint32_t __user *cdata;
213d38ceaf9SAlex Deucher
2147ecc245aSChristian König chunk_ptr = u64_to_user_ptr(chunk_array[i]);
215d38ceaf9SAlex Deucher if (copy_from_user(&user_chunk, chunk_ptr,
216d38ceaf9SAlex Deucher sizeof(struct drm_amdgpu_cs_chunk))) {
2171d263474SDan Carpenter ret = -EFAULT;
2181d263474SDan Carpenter i--;
2191d263474SDan Carpenter goto free_partial_kdata;
220d38ceaf9SAlex Deucher }
221d38ceaf9SAlex Deucher p->chunks[i].chunk_id = user_chunk.chunk_id;
222d38ceaf9SAlex Deucher p->chunks[i].length_dw = user_chunk.length_dw;
223d38ceaf9SAlex Deucher
224d38ceaf9SAlex Deucher size = p->chunks[i].length_dw;
2257ecc245aSChristian König cdata = u64_to_user_ptr(user_chunk.chunk_data);
226d38ceaf9SAlex Deucher
22788c98d54SChristian König p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
22888c98d54SChristian König GFP_KERNEL);
229d38ceaf9SAlex Deucher if (p->chunks[i].kdata == NULL) {
2301d263474SDan Carpenter ret = -ENOMEM;
2311d263474SDan Carpenter i--;
2321d263474SDan Carpenter goto free_partial_kdata;
233d38ceaf9SAlex Deucher }
234d38ceaf9SAlex Deucher size *= sizeof(uint32_t);
235d38ceaf9SAlex Deucher if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
2361d263474SDan Carpenter ret = -EFAULT;
2371d263474SDan Carpenter goto free_partial_kdata;
238d38ceaf9SAlex Deucher }
239d38ceaf9SAlex Deucher
24088c98d54SChristian König /* Assume the worst on the following checks */
24188c98d54SChristian König ret = -EINVAL;
2429a5e8fb1SChristian König switch (p->chunks[i].chunk_id) {
2439a5e8fb1SChristian König case AMDGPU_CHUNK_ID_IB:
24488c98d54SChristian König if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
24588c98d54SChristian König goto free_partial_kdata;
24688c98d54SChristian König
2474624459cSChristian König ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
24888c98d54SChristian König if (ret)
24988c98d54SChristian König goto free_partial_kdata;
2509a5e8fb1SChristian König break;
2519a5e8fb1SChristian König
2529a5e8fb1SChristian König case AMDGPU_CHUNK_ID_FENCE:
25388c98d54SChristian König if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
2541d263474SDan Carpenter goto free_partial_kdata;
255d38ceaf9SAlex Deucher
25688c98d54SChristian König ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
257758ac17fSChristian König &uf_offset);
25891acbeb6SChristian König if (ret)
2591d263474SDan Carpenter goto free_partial_kdata;
2609a5e8fb1SChristian König break;
2619a5e8fb1SChristian König
262964d0fbfSAndrey Grodzovsky case AMDGPU_CHUNK_ID_BO_HANDLES:
26388c98d54SChristian König if (size < sizeof(struct drm_amdgpu_bo_list_in))
264964d0fbfSAndrey Grodzovsky goto free_partial_kdata;
265964d0fbfSAndrey Grodzovsky
266fec5f8e8SPierre-Eric Pelloux-Prayer /* Only a single BO list is allowed to simplify handling. */
267fec5f8e8SPierre-Eric Pelloux-Prayer if (p->bo_list)
268c0ec082fSMohammed Anees goto free_partial_kdata;
269fec5f8e8SPierre-Eric Pelloux-Prayer
27088c98d54SChristian König ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
271964d0fbfSAndrey Grodzovsky if (ret)
272964d0fbfSAndrey Grodzovsky goto free_partial_kdata;
273964d0fbfSAndrey Grodzovsky break;
274964d0fbfSAndrey Grodzovsky
2752b48d323SChristian König case AMDGPU_CHUNK_ID_DEPENDENCIES:
276660e8558SDave Airlie case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
277660e8558SDave Airlie case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
27867dd1a36SAndrey Grodzovsky case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
2792624dd15SChunming Zhou case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
2802624dd15SChunming Zhou case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
281ac928705SChristian König case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
2822b48d323SChristian König break;
2832b48d323SChristian König
2849a5e8fb1SChristian König default:
2851d263474SDan Carpenter goto free_partial_kdata;
286d38ceaf9SAlex Deucher }
287d38ceaf9SAlex Deucher }
288d38ceaf9SAlex Deucher
2892ebf61f2SDong Chenchen if (!p->gang_size) {
2902ebf61f2SDong Chenchen ret = -EINVAL;
29173b06481SAlex Deucher goto free_all_kdata;
2922ebf61f2SDong Chenchen }
2934624459cSChristian König
2944624459cSChristian König for (i = 0; i < p->gang_size; ++i) {
295f7d66fb2SChristian König ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
296f7d66fb2SChristian König num_ibs[i], &p->jobs[i]);
297c2b08e7aSChristian König if (ret)
298c2b08e7aSChristian König goto free_all_kdata;
299dba1a6cfSSrinivasan Shanmugam p->jobs[i]->enforce_isolation = p->adev->enforce_isolation[fpriv->xcp_id];
3004624459cSChristian König }
301eca13f3cSChristian König p->gang_leader = p->jobs[p->gang_leader_idx];
302c2b08e7aSChristian König
303f88e295eSChristian König if (p->ctx->generation != p->gang_leader->generation) {
304e55f2b64SChristian König ret = -ECANCELED;
305e55f2b64SChristian König goto free_all_kdata;
306e55f2b64SChristian König }
30714e47f93SChristian König
308ca6c1e21SChristian König if (p->uf_bo)
3094624459cSChristian König p->gang_leader->uf_addr = uf_offset;
310b4d916eeSChen Li kvfree(chunk_array);
311efaa9646SAndrey Grodzovsky
312efaa9646SAndrey Grodzovsky /* Use this opportunity to fill in task info for the vm */
313efaa9646SAndrey Grodzovsky amdgpu_vm_set_task_info(vm);
314efaa9646SAndrey Grodzovsky
3151d263474SDan Carpenter return 0;
3161d263474SDan Carpenter
3171d263474SDan Carpenter free_all_kdata:
3181d263474SDan Carpenter i = p->nchunks - 1;
3191d263474SDan Carpenter free_partial_kdata:
3201d263474SDan Carpenter for (; i >= 0; i--)
3212098105eSMichal Hocko kvfree(p->chunks[i].kdata);
322b4d916eeSChen Li kvfree(p->chunks);
323607523d1SDave Airlie p->chunks = NULL;
324607523d1SDave Airlie p->nchunks = 0;
3251d263474SDan Carpenter free_chunk:
326b4d916eeSChen Li kvfree(chunk_array);
3271d263474SDan Carpenter
3281d263474SDan Carpenter return ret;
329d38ceaf9SAlex Deucher }
330d38ceaf9SAlex Deucher
amdgpu_cs_p2_ib(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk,unsigned int * ce_preempt,unsigned int * de_preempt)331f4b92fcdSChristian König static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
332f4b92fcdSChristian König struct amdgpu_cs_chunk *chunk,
333f4b92fcdSChristian König unsigned int *ce_preempt,
334f4b92fcdSChristian König unsigned int *de_preempt)
335d4e8ad90SChristian König {
336f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
337f4b92fcdSChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
338d4e8ad90SChristian König struct amdgpu_vm *vm = &fpriv->vm;
3394624459cSChristian König struct amdgpu_ring *ring;
3404624459cSChristian König struct amdgpu_job *job;
3414624459cSChristian König struct amdgpu_ib *ib;
342f4b92fcdSChristian König int r;
343d4e8ad90SChristian König
3444624459cSChristian König r = amdgpu_cs_job_idx(p, chunk_ib);
3454624459cSChristian König if (r < 0)
3464624459cSChristian König return r;
3474624459cSChristian König
3484624459cSChristian König job = p->jobs[r];
3494624459cSChristian König ring = amdgpu_job_ring(job);
3504624459cSChristian König ib = &job->ibs[job->num_ibs++];
3514624459cSChristian König
352f4b92fcdSChristian König /* MM engine doesn't support user fences */
353ca6c1e21SChristian König if (p->uf_bo && ring->funcs->no_user_fence)
354f4b92fcdSChristian König return -EINVAL;
355d4e8ad90SChristian König
356d4e8ad90SChristian König if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
357d4e8ad90SChristian König chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
358d4e8ad90SChristian König if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
359f4b92fcdSChristian König (*ce_preempt)++;
360d4e8ad90SChristian König else
361f4b92fcdSChristian König (*de_preempt)++;
362d4e8ad90SChristian König
363f4b92fcdSChristian König /* Each GFX command submit allows only 1 IB max
364f4b92fcdSChristian König * preemptible for CE & DE */
365f4b92fcdSChristian König if (*ce_preempt > 1 || *de_preempt > 1)
366d4e8ad90SChristian König return -EINVAL;
367d4e8ad90SChristian König }
368d4e8ad90SChristian König
369d4e8ad90SChristian König if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
3704624459cSChristian König job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
371d4e8ad90SChristian König
372f4b92fcdSChristian König r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
373d4e8ad90SChristian König chunk_ib->ib_bytes : 0,
374d4e8ad90SChristian König AMDGPU_IB_POOL_DELAYED, ib);
375d4e8ad90SChristian König if (r) {
376d4e8ad90SChristian König DRM_ERROR("Failed to get ib !\n");
377d4e8ad90SChristian König return r;
378d4e8ad90SChristian König }
379d4e8ad90SChristian König
380d4e8ad90SChristian König ib->gpu_addr = chunk_ib->va_start;
381d4e8ad90SChristian König ib->length_dw = chunk_ib->ib_bytes / 4;
382d4e8ad90SChristian König ib->flags = chunk_ib->flags;
383d4e8ad90SChristian König return 0;
384d4e8ad90SChristian König }
385d4e8ad90SChristian König
amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)386f4b92fcdSChristian König static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
387d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk)
388d4e8ad90SChristian König {
389f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
390d4e8ad90SChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
3919e690184SSrinivasan Shanmugam unsigned int num_deps;
392d4e8ad90SChristian König int i, r;
393d4e8ad90SChristian König
394d4e8ad90SChristian König num_deps = chunk->length_dw * 4 /
395d4e8ad90SChristian König sizeof(struct drm_amdgpu_cs_chunk_dep);
396d4e8ad90SChristian König
397d4e8ad90SChristian König for (i = 0; i < num_deps; ++i) {
398d4e8ad90SChristian König struct amdgpu_ctx *ctx;
399d4e8ad90SChristian König struct drm_sched_entity *entity;
400d4e8ad90SChristian König struct dma_fence *fence;
401d4e8ad90SChristian König
402d4e8ad90SChristian König ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
403d4e8ad90SChristian König if (ctx == NULL)
404d4e8ad90SChristian König return -EINVAL;
405d4e8ad90SChristian König
406d4e8ad90SChristian König r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
407d4e8ad90SChristian König deps[i].ip_instance,
408d4e8ad90SChristian König deps[i].ring, &entity);
409d4e8ad90SChristian König if (r) {
410d4e8ad90SChristian König amdgpu_ctx_put(ctx);
411d4e8ad90SChristian König return r;
412d4e8ad90SChristian König }
413d4e8ad90SChristian König
414d4e8ad90SChristian König fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
415d4e8ad90SChristian König amdgpu_ctx_put(ctx);
416d4e8ad90SChristian König
417d4e8ad90SChristian König if (IS_ERR(fence))
418d4e8ad90SChristian König return PTR_ERR(fence);
419d4e8ad90SChristian König else if (!fence)
420d4e8ad90SChristian König continue;
421d4e8ad90SChristian König
422d4e8ad90SChristian König if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
423d4e8ad90SChristian König struct drm_sched_fence *s_fence;
424d4e8ad90SChristian König struct dma_fence *old = fence;
425d4e8ad90SChristian König
426d4e8ad90SChristian König s_fence = to_drm_sched_fence(fence);
427d4e8ad90SChristian König fence = dma_fence_get(&s_fence->scheduled);
428d4e8ad90SChristian König dma_fence_put(old);
429d4e8ad90SChristian König }
430d4e8ad90SChristian König
43116590745SChristian König r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
432d4e8ad90SChristian König dma_fence_put(fence);
433d4e8ad90SChristian König if (r)
434d4e8ad90SChristian König return r;
435d4e8ad90SChristian König }
436d4e8ad90SChristian König return 0;
437d4e8ad90SChristian König }
438d4e8ad90SChristian König
amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser * p,uint32_t handle,u64 point,u64 flags)439f4b92fcdSChristian König static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
440d4e8ad90SChristian König uint32_t handle, u64 point,
441d4e8ad90SChristian König u64 flags)
442d4e8ad90SChristian König {
443d4e8ad90SChristian König struct dma_fence *fence;
444d4e8ad90SChristian König int r;
445d4e8ad90SChristian König
446d4e8ad90SChristian König r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
447d4e8ad90SChristian König if (r) {
448d4e8ad90SChristian König DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
449d4e8ad90SChristian König handle, point, r);
450d4e8ad90SChristian König return r;
451d4e8ad90SChristian König }
452d4e8ad90SChristian König
45316590745SChristian König r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
4541b2d5edaSChristian König dma_fence_put(fence);
455d4e8ad90SChristian König return r;
456d4e8ad90SChristian König }
457d4e8ad90SChristian König
amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)458f4b92fcdSChristian König static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
459d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk)
460d4e8ad90SChristian König {
461f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
4629e690184SSrinivasan Shanmugam unsigned int num_deps;
463d4e8ad90SChristian König int i, r;
464d4e8ad90SChristian König
465d4e8ad90SChristian König num_deps = chunk->length_dw * 4 /
466d4e8ad90SChristian König sizeof(struct drm_amdgpu_cs_chunk_sem);
467d4e8ad90SChristian König for (i = 0; i < num_deps; ++i) {
468f4b92fcdSChristian König r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
469d4e8ad90SChristian König if (r)
470d4e8ad90SChristian König return r;
471d4e8ad90SChristian König }
472d4e8ad90SChristian König
473d4e8ad90SChristian König return 0;
474d4e8ad90SChristian König }
475d4e8ad90SChristian König
amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)476f4b92fcdSChristian König static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
477d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk)
478d4e8ad90SChristian König {
479f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
4809e690184SSrinivasan Shanmugam unsigned int num_deps;
481d4e8ad90SChristian König int i, r;
482d4e8ad90SChristian König
483d4e8ad90SChristian König num_deps = chunk->length_dw * 4 /
484d4e8ad90SChristian König sizeof(struct drm_amdgpu_cs_chunk_syncobj);
485d4e8ad90SChristian König for (i = 0; i < num_deps; ++i) {
486f4b92fcdSChristian König r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
487d4e8ad90SChristian König syncobj_deps[i].point,
488d4e8ad90SChristian König syncobj_deps[i].flags);
489d4e8ad90SChristian König if (r)
490d4e8ad90SChristian König return r;
491d4e8ad90SChristian König }
492d4e8ad90SChristian König
493d4e8ad90SChristian König return 0;
494d4e8ad90SChristian König }
495d4e8ad90SChristian König
amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)496f4b92fcdSChristian König static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
497d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk)
498d4e8ad90SChristian König {
499f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
5009e690184SSrinivasan Shanmugam unsigned int num_deps;
501d4e8ad90SChristian König int i;
502d4e8ad90SChristian König
503d4e8ad90SChristian König num_deps = chunk->length_dw * 4 /
504d4e8ad90SChristian König sizeof(struct drm_amdgpu_cs_chunk_sem);
505d4e8ad90SChristian König
506d4e8ad90SChristian König if (p->post_deps)
507d4e8ad90SChristian König return -EINVAL;
508d4e8ad90SChristian König
509d4e8ad90SChristian König p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
510d4e8ad90SChristian König GFP_KERNEL);
511d4e8ad90SChristian König p->num_post_deps = 0;
512d4e8ad90SChristian König
513d4e8ad90SChristian König if (!p->post_deps)
514d4e8ad90SChristian König return -ENOMEM;
515d4e8ad90SChristian König
516d4e8ad90SChristian König
517d4e8ad90SChristian König for (i = 0; i < num_deps; ++i) {
518d4e8ad90SChristian König p->post_deps[i].syncobj =
519d4e8ad90SChristian König drm_syncobj_find(p->filp, deps[i].handle);
520d4e8ad90SChristian König if (!p->post_deps[i].syncobj)
521d4e8ad90SChristian König return -EINVAL;
522d4e8ad90SChristian König p->post_deps[i].chain = NULL;
523d4e8ad90SChristian König p->post_deps[i].point = 0;
524d4e8ad90SChristian König p->num_post_deps++;
525d4e8ad90SChristian König }
526d4e8ad90SChristian König
527d4e8ad90SChristian König return 0;
528d4e8ad90SChristian König }
529d4e8ad90SChristian König
amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)530f4b92fcdSChristian König static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
531d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk)
532d4e8ad90SChristian König {
533f4b92fcdSChristian König struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
5349e690184SSrinivasan Shanmugam unsigned int num_deps;
535d4e8ad90SChristian König int i;
536d4e8ad90SChristian König
537d4e8ad90SChristian König num_deps = chunk->length_dw * 4 /
538d4e8ad90SChristian König sizeof(struct drm_amdgpu_cs_chunk_syncobj);
539d4e8ad90SChristian König
540d4e8ad90SChristian König if (p->post_deps)
541d4e8ad90SChristian König return -EINVAL;
542d4e8ad90SChristian König
543d4e8ad90SChristian König p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
544d4e8ad90SChristian König GFP_KERNEL);
545d4e8ad90SChristian König p->num_post_deps = 0;
546d4e8ad90SChristian König
547d4e8ad90SChristian König if (!p->post_deps)
548d4e8ad90SChristian König return -ENOMEM;
549d4e8ad90SChristian König
550d4e8ad90SChristian König for (i = 0; i < num_deps; ++i) {
551d4e8ad90SChristian König struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
552d4e8ad90SChristian König
553d4e8ad90SChristian König dep->chain = NULL;
554d4e8ad90SChristian König if (syncobj_deps[i].point) {
555d4e8ad90SChristian König dep->chain = dma_fence_chain_alloc();
556d4e8ad90SChristian König if (!dep->chain)
557d4e8ad90SChristian König return -ENOMEM;
558d4e8ad90SChristian König }
559d4e8ad90SChristian König
560d4e8ad90SChristian König dep->syncobj = drm_syncobj_find(p->filp,
561d4e8ad90SChristian König syncobj_deps[i].handle);
562d4e8ad90SChristian König if (!dep->syncobj) {
563d4e8ad90SChristian König dma_fence_chain_free(dep->chain);
564d4e8ad90SChristian König return -EINVAL;
565d4e8ad90SChristian König }
566d4e8ad90SChristian König dep->point = syncobj_deps[i].point;
567d4e8ad90SChristian König p->num_post_deps++;
568d4e8ad90SChristian König }
569d4e8ad90SChristian König
570d4e8ad90SChristian König return 0;
571d4e8ad90SChristian König }
572d4e8ad90SChristian König
amdgpu_cs_p2_shadow(struct amdgpu_cs_parser * p,struct amdgpu_cs_chunk * chunk)573ac928705SChristian König static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
574ac928705SChristian König struct amdgpu_cs_chunk *chunk)
575ac928705SChristian König {
576ac928705SChristian König struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
577ac928705SChristian König int i;
578ac928705SChristian König
579ac928705SChristian König if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
580ac928705SChristian König return -EINVAL;
581ac928705SChristian König
582ac928705SChristian König for (i = 0; i < p->gang_size; ++i) {
583ac928705SChristian König p->jobs[i]->shadow_va = shadow->shadow_va;
584ac928705SChristian König p->jobs[i]->csa_va = shadow->csa_va;
585ac928705SChristian König p->jobs[i]->gds_va = shadow->gds_va;
586ac928705SChristian König p->jobs[i]->init_shadow =
587ac928705SChristian König shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
588ac928705SChristian König }
589ac928705SChristian König
590ac928705SChristian König return 0;
591ac928705SChristian König }
592ac928705SChristian König
amdgpu_cs_pass2(struct amdgpu_cs_parser * p)593f4b92fcdSChristian König static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
594d4e8ad90SChristian König {
5954624459cSChristian König unsigned int ce_preempt = 0, de_preempt = 0;
596d4e8ad90SChristian König int i, r;
597d4e8ad90SChristian König
598d4e8ad90SChristian König for (i = 0; i < p->nchunks; ++i) {
599d4e8ad90SChristian König struct amdgpu_cs_chunk *chunk;
600d4e8ad90SChristian König
601d4e8ad90SChristian König chunk = &p->chunks[i];
602d4e8ad90SChristian König
603d4e8ad90SChristian König switch (chunk->chunk_id) {
604f4b92fcdSChristian König case AMDGPU_CHUNK_ID_IB:
6054624459cSChristian König r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
606f4b92fcdSChristian König if (r)
607f4b92fcdSChristian König return r;
608f4b92fcdSChristian König break;
609d4e8ad90SChristian König case AMDGPU_CHUNK_ID_DEPENDENCIES:
610d4e8ad90SChristian König case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
611f4b92fcdSChristian König r = amdgpu_cs_p2_dependencies(p, chunk);
612d4e8ad90SChristian König if (r)
613d4e8ad90SChristian König return r;
614d4e8ad90SChristian König break;
615d4e8ad90SChristian König case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
616f4b92fcdSChristian König r = amdgpu_cs_p2_syncobj_in(p, chunk);
617d4e8ad90SChristian König if (r)
618d4e8ad90SChristian König return r;
619d4e8ad90SChristian König break;
620d4e8ad90SChristian König case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
621f4b92fcdSChristian König r = amdgpu_cs_p2_syncobj_out(p, chunk);
622d4e8ad90SChristian König if (r)
623d4e8ad90SChristian König return r;
624d4e8ad90SChristian König break;
625d4e8ad90SChristian König case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
626f4b92fcdSChristian König r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
627d4e8ad90SChristian König if (r)
628d4e8ad90SChristian König return r;
629d4e8ad90SChristian König break;
630d4e8ad90SChristian König case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
631f4b92fcdSChristian König r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
632d4e8ad90SChristian König if (r)
633d4e8ad90SChristian König return r;
634d4e8ad90SChristian König break;
635ac928705SChristian König case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
636ac928705SChristian König r = amdgpu_cs_p2_shadow(p, chunk);
637ac928705SChristian König if (r)
638ac928705SChristian König return r;
639ac928705SChristian König break;
640d4e8ad90SChristian König }
641d4e8ad90SChristian König }
642d4e8ad90SChristian König
643d4e8ad90SChristian König return 0;
644d4e8ad90SChristian König }
645d4e8ad90SChristian König
64695844d20SMarek Olšák /* Convert microseconds to bytes. */
us_to_bytes(struct amdgpu_device * adev,s64 us)64795844d20SMarek Olšák static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
64895844d20SMarek Olšák {
64995844d20SMarek Olšák if (us <= 0 || !adev->mm_stats.log2_max_MBps)
65095844d20SMarek Olšák return 0;
65195844d20SMarek Olšák
65295844d20SMarek Olšák /* Since accum_us is incremented by a million per second, just
65395844d20SMarek Olšák * multiply it by the number of MB/s to get the number of bytes.
65495844d20SMarek Olšák */
65595844d20SMarek Olšák return us << adev->mm_stats.log2_max_MBps;
65695844d20SMarek Olšák }
65795844d20SMarek Olšák
bytes_to_us(struct amdgpu_device * adev,u64 bytes)65895844d20SMarek Olšák static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
65995844d20SMarek Olšák {
66095844d20SMarek Olšák if (!adev->mm_stats.log2_max_MBps)
66195844d20SMarek Olšák return 0;
66295844d20SMarek Olšák
66395844d20SMarek Olšák return bytes >> adev->mm_stats.log2_max_MBps;
66495844d20SMarek Olšák }
66595844d20SMarek Olšák
66695844d20SMarek Olšák /* Returns how many bytes TTM can move right now. If no bytes can be moved,
66795844d20SMarek Olšák * it returns 0. If it returns non-zero, it's OK to move at least one buffer,
66895844d20SMarek Olšák * which means it can go over the threshold once. If that happens, the driver
66995844d20SMarek Olšák * will be in debt and no other buffer migrations can be done until that debt
67095844d20SMarek Olšák * is repaid.
67195844d20SMarek Olšák *
67295844d20SMarek Olšák * This approach allows moving a buffer of any size (it's important to allow
67395844d20SMarek Olšák * that).
67495844d20SMarek Olšák *
67595844d20SMarek Olšák * The currency is simply time in microseconds and it increases as the clock
67695844d20SMarek Olšák * ticks. The accumulated microseconds (us) are converted to bytes and
67795844d20SMarek Olšák * returned.
678d38ceaf9SAlex Deucher */
amdgpu_cs_get_threshold_for_moves(struct amdgpu_device * adev,u64 * max_bytes,u64 * max_vis_bytes)67900f06b24SJohn Brooks static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
68000f06b24SJohn Brooks u64 *max_bytes,
68100f06b24SJohn Brooks u64 *max_vis_bytes)
682d38ceaf9SAlex Deucher {
68395844d20SMarek Olšák s64 time_us, increment_us;
68495844d20SMarek Olšák u64 free_vram, total_vram, used_vram;
68595844d20SMarek Olšák /* Allow a maximum of 200 accumulated ms. This is basically per-IB
68695844d20SMarek Olšák * throttling.
687d38ceaf9SAlex Deucher *
68895844d20SMarek Olšák * It means that in order to get full max MBps, at least 5 IBs per
68995844d20SMarek Olšák * second must be submitted and not more than 200ms apart from each
69095844d20SMarek Olšák * other.
691d38ceaf9SAlex Deucher */
69295844d20SMarek Olšák const s64 us_upper_bound = 200000;
693d38ceaf9SAlex Deucher
69400f06b24SJohn Brooks if (!adev->mm_stats.log2_max_MBps) {
69500f06b24SJohn Brooks *max_bytes = 0;
69600f06b24SJohn Brooks *max_vis_bytes = 0;
69700f06b24SJohn Brooks return;
69800f06b24SJohn Brooks }
69995844d20SMarek Olšák
700a5ccfe5cSMichel Dänzer total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
7017db47b83SChristian König used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
70295844d20SMarek Olšák free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
70395844d20SMarek Olšák
70495844d20SMarek Olšák spin_lock(&adev->mm_stats.lock);
70595844d20SMarek Olšák
70695844d20SMarek Olšák /* Increase the amount of accumulated us. */
70795844d20SMarek Olšák time_us = ktime_to_us(ktime_get());
70895844d20SMarek Olšák increment_us = time_us - adev->mm_stats.last_update_us;
70995844d20SMarek Olšák adev->mm_stats.last_update_us = time_us;
71095844d20SMarek Olšák adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us,
71195844d20SMarek Olšák us_upper_bound);
71295844d20SMarek Olšák
71395844d20SMarek Olšák /* This prevents the short period of low performance when the VRAM
71495844d20SMarek Olšák * usage is low and the driver is in debt or doesn't have enough
71595844d20SMarek Olšák * accumulated us to fill VRAM quickly.
71695844d20SMarek Olšák *
71795844d20SMarek Olšák * The situation can occur in these cases:
71895844d20SMarek Olšák * - a lot of VRAM is freed by userspace
71995844d20SMarek Olšák * - the presence of a big buffer causes a lot of evictions
72095844d20SMarek Olšák * (solution: split buffers into smaller ones)
72195844d20SMarek Olšák *
72295844d20SMarek Olšák * If 128 MB or 1/8th of VRAM is free, start filling it now by setting
72395844d20SMarek Olšák * accum_us to a positive number.
72495844d20SMarek Olšák */
72595844d20SMarek Olšák if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) {
72695844d20SMarek Olšák s64 min_us;
72795844d20SMarek Olšák
72858398727SJulia Lawall /* Be more aggressive on dGPUs. Try to fill a portion of free
72995844d20SMarek Olšák * VRAM now.
73095844d20SMarek Olšák */
73195844d20SMarek Olšák if (!(adev->flags & AMD_IS_APU))
73295844d20SMarek Olšák min_us = bytes_to_us(adev, free_vram / 4);
73395844d20SMarek Olšák else
73495844d20SMarek Olšák min_us = 0; /* Reset accum_us on APUs. */
73595844d20SMarek Olšák
73695844d20SMarek Olšák adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
73795844d20SMarek Olšák }
73895844d20SMarek Olšák
73900f06b24SJohn Brooks /* This is set to 0 if the driver is in debt to disallow (optional)
74095844d20SMarek Olšák * buffer moves.
74195844d20SMarek Olšák */
74200f06b24SJohn Brooks *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
74300f06b24SJohn Brooks
74400f06b24SJohn Brooks /* Do the same for visible VRAM if half of it is free */
745c8c5e569SAndrey Grodzovsky if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
746770d13b1SChristian König u64 total_vis_vram = adev->gmc.visible_vram_size;
7473c848bb3SChristian König u64 used_vis_vram =
748ec6aae97SNirmoy Das amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
74900f06b24SJohn Brooks
75000f06b24SJohn Brooks if (used_vis_vram < total_vis_vram) {
75100f06b24SJohn Brooks u64 free_vis_vram = total_vis_vram - used_vis_vram;
7529e690184SSrinivasan Shanmugam
75300f06b24SJohn Brooks adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
75400f06b24SJohn Brooks increment_us, us_upper_bound);
75500f06b24SJohn Brooks
75600f06b24SJohn Brooks if (free_vis_vram >= total_vis_vram / 2)
75700f06b24SJohn Brooks adev->mm_stats.accum_us_vis =
75800f06b24SJohn Brooks max(bytes_to_us(adev, free_vis_vram / 2),
75900f06b24SJohn Brooks adev->mm_stats.accum_us_vis);
76000f06b24SJohn Brooks }
76100f06b24SJohn Brooks
76200f06b24SJohn Brooks *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
76300f06b24SJohn Brooks } else {
76400f06b24SJohn Brooks *max_vis_bytes = 0;
76500f06b24SJohn Brooks }
76695844d20SMarek Olšák
76795844d20SMarek Olšák spin_unlock(&adev->mm_stats.lock);
76895844d20SMarek Olšák }
76995844d20SMarek Olšák
77095844d20SMarek Olšák /* Report how many bytes have really been moved for the last command
77195844d20SMarek Olšák * submission. This can result in a debt that can stop buffer migrations
77295844d20SMarek Olšák * temporarily.
77395844d20SMarek Olšák */
amdgpu_cs_report_moved_bytes(struct amdgpu_device * adev,u64 num_bytes,u64 num_vis_bytes)77400f06b24SJohn Brooks void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
77500f06b24SJohn Brooks u64 num_vis_bytes)
77695844d20SMarek Olšák {
77795844d20SMarek Olšák spin_lock(&adev->mm_stats.lock);
77895844d20SMarek Olšák adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
77900f06b24SJohn Brooks adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
78095844d20SMarek Olšák spin_unlock(&adev->mm_stats.lock);
781d38ceaf9SAlex Deucher }
782d38ceaf9SAlex Deucher
amdgpu_cs_bo_validate(void * param,struct amdgpu_bo * bo)7832a675640SNirmoy Das static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
784d38ceaf9SAlex Deucher {
785a7d64de6SChristian König struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
7862a675640SNirmoy Das struct amdgpu_cs_parser *p = param;
7879251859aSRoger He struct ttm_operation_ctx ctx = {
7889251859aSRoger He .interruptible = true,
7899251859aSRoger He .no_wait_gpu = false,
790c44dfe4dSChristian König .resv = bo->tbo.base.resv
7919251859aSRoger He };
79214fd833eSChunming Zhou uint32_t domain;
793d38ceaf9SAlex Deucher int r;
794d38ceaf9SAlex Deucher
7954671078eSChristian König if (bo->tbo.pin_count)
79614fd833eSChunming Zhou return 0;
79736409d12SChristian König
79895844d20SMarek Olšák /* Don't move this buffer if we have depleted our allowance
79995844d20SMarek Olšák * to move it. Don't move anything if the threshold is zero.
800d38ceaf9SAlex Deucher */
8014993ba02SChristian König if (p->bytes_moved < p->bytes_moved_threshold &&
8024993ba02SChristian König (!bo->tbo.base.dma_buf ||
8034993ba02SChristian König list_empty(&bo->tbo.base.dma_buf->attachments))) {
804c8c5e569SAndrey Grodzovsky if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
80500f06b24SJohn Brooks (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
80600f06b24SJohn Brooks /* And don't move a CPU_ACCESS_REQUIRED BO to limited
80700f06b24SJohn Brooks * visible VRAM if we've depleted our allowance to do
80800f06b24SJohn Brooks * that.
80900f06b24SJohn Brooks */
81000f06b24SJohn Brooks if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
8116d7d9c5aSKent Russell domain = bo->preferred_domains;
81236409d12SChristian König else
8131ea863fdSChristian König domain = bo->allowed_domains;
81400f06b24SJohn Brooks } else {
8156d7d9c5aSKent Russell domain = bo->preferred_domains;
81600f06b24SJohn Brooks }
81700f06b24SJohn Brooks } else {
81800f06b24SJohn Brooks domain = bo->allowed_domains;
81900f06b24SJohn Brooks }
820d38ceaf9SAlex Deucher
821d38ceaf9SAlex Deucher retry:
822c704ab18SChristian König amdgpu_bo_placement_from_domain(bo, domain);
82319be5570SChristian König r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
8246af046d2SChristian König
8256af046d2SChristian König p->bytes_moved += ctx.bytes_moved;
826c8c5e569SAndrey Grodzovsky if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
827a6ff969fSChristian König amdgpu_res_cpu_visible(adev, bo->tbo.resource))
8286af046d2SChristian König p->bytes_moved_vis += ctx.bytes_moved;
829d38ceaf9SAlex Deucher
8301abdc3d7SChristian König if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
8311ea863fdSChristian König domain = bo->allowed_domains;
832d38ceaf9SAlex Deucher goto retry;
833d38ceaf9SAlex Deucher }
83414fd833eSChunming Zhou
83514fd833eSChunming Zhou return r;
83614fd833eSChunming Zhou }
83714fd833eSChunming Zhou
amdgpu_cs_parser_bos(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)8382a7d9bdaSChristian König static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
8392a7d9bdaSChristian König union drm_amdgpu_cs *cs)
840d38ceaf9SAlex Deucher {
841d38ceaf9SAlex Deucher struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
842ca6c1e21SChristian König struct ttm_operation_ctx ctx = { true, false };
8434a102ad4SChristian König struct amdgpu_vm *vm = &fpriv->vm;
8442f568dbdSChristian König struct amdgpu_bo_list_entry *e;
845ca6c1e21SChristian König struct drm_gem_object *obj;
846ca6c1e21SChristian König unsigned long index;
8474624459cSChristian König unsigned int i;
848636ce25cSChristian König int r;
849d38ceaf9SAlex Deucher
850964d0fbfSAndrey Grodzovsky /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
8510cb7c1f0SChristian König if (cs->in.bo_list_handle) {
8520cb7c1f0SChristian König if (p->bo_list)
8530cb7c1f0SChristian König return -EINVAL;
854964d0fbfSAndrey Grodzovsky
85552c054caSChristian König r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
85652c054caSChristian König &p->bo_list);
85752c054caSChristian König if (r)
85852c054caSChristian König return r;
8594a102ad4SChristian König } else if (!p->bo_list) {
8604a102ad4SChristian König /* Create a empty bo_list when no handle is provided */
8614a102ad4SChristian König r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
8624a102ad4SChristian König &p->bo_list);
8634a102ad4SChristian König if (r)
8644a102ad4SChristian König return r;
86552c054caSChristian König }
86652c054caSChristian König
8675df79aebSLuben Tuikov mutex_lock(&p->bo_list->bo_list_mutex);
8685df79aebSLuben Tuikov
869899fbde1SPhilip Yang /* Get userptr backing pages. If pages are updated after registered
870899fbde1SPhilip Yang * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
871899fbde1SPhilip Yang * amdgpu_ttm_backend_bind() to flush and invalidate new pages
872899fbde1SPhilip Yang */
873899fbde1SPhilip Yang amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
874899fbde1SPhilip Yang bool userpage_invalidated = false;
875ca6c1e21SChristian König struct amdgpu_bo *bo = e->bo;
876899fbde1SPhilip Yang int i;
877899fbde1SPhilip Yang
878b57e3ca1SSrinivasan Shanmugam e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
879899fbde1SPhilip Yang sizeof(struct page *),
880b57e3ca1SSrinivasan Shanmugam GFP_KERNEL);
881899fbde1SPhilip Yang if (!e->user_pages) {
882147ab7a1SChen Li DRM_ERROR("kvmalloc_array failure\n");
883068421b1SPhilip Yang r = -ENOMEM;
884068421b1SPhilip Yang goto out_free_user_pages;
885899fbde1SPhilip Yang }
886899fbde1SPhilip Yang
887fec8fdb5SChristian König r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
888899fbde1SPhilip Yang if (r) {
889899fbde1SPhilip Yang kvfree(e->user_pages);
890899fbde1SPhilip Yang e->user_pages = NULL;
8913da2c382SPhilip Yang goto out_free_user_pages;
892899fbde1SPhilip Yang }
893899fbde1SPhilip Yang
894899fbde1SPhilip Yang for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
895899fbde1SPhilip Yang if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
896899fbde1SPhilip Yang userpage_invalidated = true;
897899fbde1SPhilip Yang break;
898899fbde1SPhilip Yang }
899899fbde1SPhilip Yang }
900899fbde1SPhilip Yang e->user_invalidated = userpage_invalidated;
901899fbde1SPhilip Yang }
9022f568dbdSChristian König
903ca6c1e21SChristian König drm_exec_until_all_locked(&p->exec) {
904ca6c1e21SChristian König r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size);
905ca6c1e21SChristian König drm_exec_retry_on_contention(&p->exec);
906ca6c1e21SChristian König if (unlikely(r))
907ca6c1e21SChristian König goto out_free_user_pages;
908ca6c1e21SChristian König
909ca6c1e21SChristian König amdgpu_bo_list_for_each_entry(e, p->bo_list) {
910ca6c1e21SChristian König /* One fence for TTM and one for each CS job */
911ca6c1e21SChristian König r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base,
912ca6c1e21SChristian König 1 + p->gang_size);
913ca6c1e21SChristian König drm_exec_retry_on_contention(&p->exec);
914ca6c1e21SChristian König if (unlikely(r))
915ca6c1e21SChristian König goto out_free_user_pages;
916ca6c1e21SChristian König
917ca6c1e21SChristian König e->bo_va = amdgpu_vm_bo_find(vm, e->bo);
918ca6c1e21SChristian König }
919ca6c1e21SChristian König
920ca6c1e21SChristian König if (p->uf_bo) {
921ca6c1e21SChristian König r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base,
922ca6c1e21SChristian König 1 + p->gang_size);
923ca6c1e21SChristian König drm_exec_retry_on_contention(&p->exec);
924ca6c1e21SChristian König if (unlikely(r))
925ca6c1e21SChristian König goto out_free_user_pages;
926ca6c1e21SChristian König }
927ca6c1e21SChristian König }
928ca6c1e21SChristian König
929ca6c1e21SChristian König amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
930ca6c1e21SChristian König struct mm_struct *usermm;
931ca6c1e21SChristian König
932ca6c1e21SChristian König usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm);
933ca6c1e21SChristian König if (usermm && usermm != current->mm) {
934ca6c1e21SChristian König r = -EPERM;
9353da2c382SPhilip Yang goto out_free_user_pages;
9362f568dbdSChristian König }
937d38ceaf9SAlex Deucher
938ca6c1e21SChristian König if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) &&
939ca6c1e21SChristian König e->user_invalidated && e->user_pages) {
940ca6c1e21SChristian König amdgpu_bo_placement_from_domain(e->bo,
941ca6c1e21SChristian König AMDGPU_GEM_DOMAIN_CPU);
942ca6c1e21SChristian König r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement,
943ca6c1e21SChristian König &ctx);
944ca6c1e21SChristian König if (r)
945ca6c1e21SChristian König goto out_free_user_pages;
9468c505bdcSChristian König
947ca6c1e21SChristian König amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm,
948ca6c1e21SChristian König e->user_pages);
949ca6c1e21SChristian König }
950ca6c1e21SChristian König
951ca6c1e21SChristian König kvfree(e->user_pages);
952ca6c1e21SChristian König e->user_pages = NULL;
9538c505bdcSChristian König }
9548c505bdcSChristian König
95500f06b24SJohn Brooks amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
95600f06b24SJohn Brooks &p->bytes_moved_vis_threshold);
957f69f90a1SChristian König p->bytes_moved = 0;
95800f06b24SJohn Brooks p->bytes_moved_vis = 0;
959f69f90a1SChristian König
96050661eb1SFelix Kuehling r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL,
9612a675640SNirmoy Das amdgpu_cs_bo_validate, p);
962f7da30d9SChristian König if (r) {
96350661eb1SFelix Kuehling DRM_ERROR("amdgpu_vm_validate() failed.\n");
964ca6c1e21SChristian König goto out_free_user_pages;
965f7da30d9SChristian König }
966f7da30d9SChristian König
967ca6c1e21SChristian König drm_exec_for_each_locked_object(&p->exec, index, obj) {
968ca6c1e21SChristian König r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj));
969ca6c1e21SChristian König if (unlikely(r))
970ca6c1e21SChristian König goto out_free_user_pages;
971ca6c1e21SChristian König }
972a5b75058SChristian König
973ca6c1e21SChristian König if (p->uf_bo) {
974ca6c1e21SChristian König r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo);
975ca6c1e21SChristian König if (unlikely(r))
976ca6c1e21SChristian König goto out_free_user_pages;
977a8480309SChristian König
978ca6c1e21SChristian König p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo);
979c855e250SChristian König }
980b5f5acbcSChristian König
9814953b6b2SChristian König amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
9824953b6b2SChristian König p->bytes_moved_vis);
9834624459cSChristian König
9844624459cSChristian König for (i = 0; i < p->gang_size; ++i)
9854624459cSChristian König amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
9864624459cSChristian König p->bo_list->gws_obj,
9874624459cSChristian König p->bo_list->oa_obj);
9884953b6b2SChristian König return 0;
9894953b6b2SChristian König
9903da2c382SPhilip Yang out_free_user_pages:
9913da2c382SPhilip Yang amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
992ca6c1e21SChristian König struct amdgpu_bo *bo = e->bo;
9933da2c382SPhilip Yang
9943da2c382SPhilip Yang if (!e->user_pages)
9953da2c382SPhilip Yang continue;
996fec8fdb5SChristian König amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
9973da2c382SPhilip Yang kvfree(e->user_pages);
9983da2c382SPhilip Yang e->user_pages = NULL;
999fec8fdb5SChristian König e->range = NULL;
10003da2c382SPhilip Yang }
1001c70e2166SPhilip Yang mutex_unlock(&p->bo_list->bo_list_mutex);
1002d38ceaf9SAlex Deucher return r;
1003d38ceaf9SAlex Deucher }
1004d38ceaf9SAlex Deucher
trace_amdgpu_cs_ibs(struct amdgpu_cs_parser * p)10054624459cSChristian König static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
1006d38ceaf9SAlex Deucher {
10074624459cSChristian König int i, j;
1008d38ceaf9SAlex Deucher
1009d4e8ad90SChristian König if (!trace_amdgpu_cs_enabled())
1010d4e8ad90SChristian König return;
1011e83dfe4dSChristian König
10124624459cSChristian König for (i = 0; i < p->gang_size; ++i) {
10134624459cSChristian König struct amdgpu_job *job = p->jobs[i];
10144624459cSChristian König
10154624459cSChristian König for (j = 0; j < job->num_ibs; ++j)
10164624459cSChristian König trace_amdgpu_cs(p, job, &job->ibs[j]);
10174624459cSChristian König }
1018d38ceaf9SAlex Deucher }
1019d38ceaf9SAlex Deucher
amdgpu_cs_patch_ibs(struct amdgpu_cs_parser * p,struct amdgpu_job * job)10204624459cSChristian König static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
10214624459cSChristian König struct amdgpu_job *job)
1022d38ceaf9SAlex Deucher {
1023c2b08e7aSChristian König struct amdgpu_ring *ring = amdgpu_job_ring(job);
1024f4b92fcdSChristian König unsigned int i;
102539f7f69aSChristian König int r;
1026d38ceaf9SAlex Deucher
1027d38ceaf9SAlex Deucher /* Only for UVD/VCE VM emulation */
1028f4b92fcdSChristian König if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
1029f4b92fcdSChristian König return 0;
1030d38ceaf9SAlex Deucher
1031f4b92fcdSChristian König for (i = 0; i < job->num_ibs; ++i) {
1032f4b92fcdSChristian König struct amdgpu_ib *ib = &job->ibs[i];
1033c5795c55SChristian König struct amdgpu_bo_va_mapping *m;
1034f4b92fcdSChristian König struct amdgpu_bo *aobj;
1035f4b92fcdSChristian König uint64_t va_start;
1036c5795c55SChristian König uint8_t *kptr;
1037ad864d24SAndrey Grodzovsky
1038f4b92fcdSChristian König va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
1039bb7939b2SChristian König r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
1040ad864d24SAndrey Grodzovsky if (r) {
1041ad864d24SAndrey Grodzovsky DRM_ERROR("IB va_start is invalid\n");
1042d38ceaf9SAlex Deucher return r;
1043d38ceaf9SAlex Deucher }
1044ad864d24SAndrey Grodzovsky
1045f4b92fcdSChristian König if ((va_start + ib->length_dw * 4) >
1046ad864d24SAndrey Grodzovsky (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
1047ad864d24SAndrey Grodzovsky DRM_ERROR("IB va_start+ib_bytes is invalid\n");
1048ad864d24SAndrey Grodzovsky return -EINVAL;
1049ad864d24SAndrey Grodzovsky }
1050ad864d24SAndrey Grodzovsky
1051ad864d24SAndrey Grodzovsky /* the IB should be reserved at this point */
1052ad864d24SAndrey Grodzovsky r = amdgpu_bo_kmap(aobj, (void **)&kptr);
10539e690184SSrinivasan Shanmugam if (r)
1054ad864d24SAndrey Grodzovsky return r;
1055ad864d24SAndrey Grodzovsky
1056f4b92fcdSChristian König kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
1057ad864d24SAndrey Grodzovsky
10580d346a14SChristian König if (ring->funcs->parse_cs) {
1059f4b92fcdSChristian König memcpy(ib->ptr, kptr, ib->length_dw * 4);
1060ad864d24SAndrey Grodzovsky amdgpu_bo_kunmap(aobj);
1061ad864d24SAndrey Grodzovsky
10624624459cSChristian König r = amdgpu_ring_parse_cs(ring, p, job, ib);
1063d38ceaf9SAlex Deucher if (r)
1064d38ceaf9SAlex Deucher return r;
1065a7f670d5SDavid (Ming Qiang) Wu
1066a7f670d5SDavid (Ming Qiang) Wu if (ib->sa_bo)
1067a7f670d5SDavid (Ming Qiang) Wu ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
10689d248517SChristian König } else {
10699d248517SChristian König ib->ptr = (uint32_t *)kptr;
10704624459cSChristian König r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
10719d248517SChristian König amdgpu_bo_kunmap(aobj);
10729d248517SChristian König if (r)
10739d248517SChristian König return r;
10749d248517SChristian König }
1075f4b92fcdSChristian König }
1076c5795c55SChristian König
1077f4b92fcdSChristian König return 0;
1078d38ceaf9SAlex Deucher }
1079f4b92fcdSChristian König
amdgpu_cs_patch_jobs(struct amdgpu_cs_parser * p)10804624459cSChristian König static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
10814624459cSChristian König {
10824624459cSChristian König unsigned int i;
10834624459cSChristian König int r;
10844624459cSChristian König
10854624459cSChristian König for (i = 0; i < p->gang_size; ++i) {
10864624459cSChristian König r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
10874624459cSChristian König if (r)
10884624459cSChristian König return r;
10894624459cSChristian König }
10904624459cSChristian König return 0;
10914624459cSChristian König }
10924624459cSChristian König
amdgpu_cs_vm_handling(struct amdgpu_cs_parser * p)1093f4b92fcdSChristian König static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
1094f4b92fcdSChristian König {
1095f4b92fcdSChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
10964624459cSChristian König struct amdgpu_job *job = p->gang_leader;
1097f4b92fcdSChristian König struct amdgpu_device *adev = p->adev;
1098f4b92fcdSChristian König struct amdgpu_vm *vm = &fpriv->vm;
1099f4b92fcdSChristian König struct amdgpu_bo_list_entry *e;
1100f4b92fcdSChristian König struct amdgpu_bo_va *bo_va;
11014624459cSChristian König unsigned int i;
1102f4b92fcdSChristian König int r;
110345088efcSChristian König
1104320debcaSChristian König /*
1105320debcaSChristian König * We can't use gang submit on with reserved VMIDs when the VM changes
1106320debcaSChristian König * can't be invalidated by more than one engine at the same time.
1107320debcaSChristian König */
1108a541a6e8SYunxiang Li if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) {
1109320debcaSChristian König for (i = 0; i < p->gang_size; ++i) {
1110320debcaSChristian König struct drm_sched_entity *entity = p->entities[i];
1111320debcaSChristian König struct drm_gpu_scheduler *sched = entity->rq->sched;
1112320debcaSChristian König struct amdgpu_ring *ring = to_amdgpu_ring(sched);
1113320debcaSChristian König
1114*db1e58ecSChristian König if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
1115320debcaSChristian König return -EINVAL;
1116320debcaSChristian König }
1117320debcaSChristian König }
1118320debcaSChristian König
1119d38ceaf9SAlex Deucher r = amdgpu_vm_clear_freed(adev, vm, NULL);
1120d38ceaf9SAlex Deucher if (r)
1121d38ceaf9SAlex Deucher return r;
1122d38ceaf9SAlex Deucher
11238f8cc3fbSChristian König r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
1124d38ceaf9SAlex Deucher if (r)
1125d38ceaf9SAlex Deucher return r;
1126d38ceaf9SAlex Deucher
112716590745SChristian König r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
112816590745SChristian König GFP_KERNEL);
1129d38ceaf9SAlex Deucher if (r)
1130d38ceaf9SAlex Deucher return r;
1131d38ceaf9SAlex Deucher
11329b94c609SChristian König if (fpriv->csa_va) {
1133d38ceaf9SAlex Deucher bo_va = fpriv->csa_va;
1134d38ceaf9SAlex Deucher BUG_ON(!bo_va);
11358f8cc3fbSChristian König r = amdgpu_vm_bo_update(adev, bo_va, false);
1136d38ceaf9SAlex Deucher if (r)
1137d38ceaf9SAlex Deucher return r;
1138d38ceaf9SAlex Deucher
113916590745SChristian König r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
114016590745SChristian König GFP_KERNEL);
1141d38ceaf9SAlex Deucher if (r)
1142d38ceaf9SAlex Deucher return r;
1143d38ceaf9SAlex Deucher }
1144d38ceaf9SAlex Deucher
11455a104cb9SFelix Kuehling /* FIXME: In theory this loop shouldn't be needed any more when
11465a104cb9SFelix Kuehling * amdgpu_vm_handle_moved handles all moved BOs that are reserved
11475a104cb9SFelix Kuehling * with p->ticket. But removing it caused test regressions, so I'm
11485a104cb9SFelix Kuehling * leaving it here for now.
11495a104cb9SFelix Kuehling */
1150d38ceaf9SAlex Deucher amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1151d38ceaf9SAlex Deucher bo_va = e->bo_va;
1152d38ceaf9SAlex Deucher if (bo_va == NULL)
1153d38ceaf9SAlex Deucher continue;
1154d38ceaf9SAlex Deucher
11558f8cc3fbSChristian König r = amdgpu_vm_bo_update(adev, bo_va, false);
1156bbca24d0SMaíra Canal if (r)
1157d38ceaf9SAlex Deucher return r;
1158d38ceaf9SAlex Deucher
115916590745SChristian König r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
116016590745SChristian König GFP_KERNEL);
1161bbca24d0SMaíra Canal if (r)
1162d38ceaf9SAlex Deucher return r;
1163d38ceaf9SAlex Deucher }
1164d38ceaf9SAlex Deucher
11655a104cb9SFelix Kuehling r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket);
1166d38ceaf9SAlex Deucher if (r)
1167d38ceaf9SAlex Deucher return r;
1168d38ceaf9SAlex Deucher
1169807e2994SChristian König r = amdgpu_vm_update_pdes(adev, vm, false);
1170d38ceaf9SAlex Deucher if (r)
1171d38ceaf9SAlex Deucher return r;
1172d38ceaf9SAlex Deucher
117316590745SChristian König r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
1174d38ceaf9SAlex Deucher if (r)
1175d38ceaf9SAlex Deucher return r;
1176d38ceaf9SAlex Deucher
11774624459cSChristian König for (i = 0; i < p->gang_size; ++i) {
11784624459cSChristian König job = p->jobs[i];
11794624459cSChristian König
11804624459cSChristian König if (!job->vm)
11814624459cSChristian König continue;
11824624459cSChristian König
11834624459cSChristian König job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
11844624459cSChristian König }
11859a02ece4SChristian König
1186887db1e4SAndré Almeida if (adev->debug_vm) {
11879a02ece4SChristian König /* Invalidate all BOs to test for userspace bugs */
11889a02ece4SChristian König amdgpu_bo_list_for_each_entry(e, p->bo_list) {
1189ca6c1e21SChristian König struct amdgpu_bo *bo = e->bo;
1190e83dfe4dSChristian König
11919a02ece4SChristian König /* ignore duplicates */
1192e83dfe4dSChristian König if (!bo)
11939a02ece4SChristian König continue;
11949a02ece4SChristian König
1195a541a6e8SYunxiang Li amdgpu_vm_bo_invalidate(bo, false);
11969a02ece4SChristian König }
1197d38ceaf9SAlex Deucher }
1198d38ceaf9SAlex Deucher
1199461fa7b0SKen Xue return 0;
1200d38ceaf9SAlex Deucher }
1201d38ceaf9SAlex Deucher
amdgpu_cs_sync_rings(struct amdgpu_cs_parser * p)1202d4e8ad90SChristian König static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
12032b48d323SChristian König {
120476a1ea61SChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
12053bd68b32SChristian König struct drm_gpu_scheduler *sched;
1206ca6c1e21SChristian König struct drm_gem_object *obj;
12073bd68b32SChristian König struct dma_fence *fence;
1208ca6c1e21SChristian König unsigned long index;
12094624459cSChristian König unsigned int i;
12102624dd15SChunming Zhou int r;
12112624dd15SChunming Zhou
12123bd68b32SChristian König r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
12133bd68b32SChristian König if (r) {
12143bd68b32SChristian König if (r != -ERESTARTSYS)
12153bd68b32SChristian König DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
12163bd68b32SChristian König return r;
12173bd68b32SChristian König }
12183bd68b32SChristian König
1219ca6c1e21SChristian König drm_exec_for_each_locked_object(&p->exec, index, obj) {
1220ca6c1e21SChristian König struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
1221ca6c1e21SChristian König
1222d4e8ad90SChristian König struct dma_resv *resv = bo->tbo.base.resv;
1223d4e8ad90SChristian König enum amdgpu_sync_mode sync_mode;
1224660e8558SDave Airlie
1225d4e8ad90SChristian König sync_mode = amdgpu_bo_explicit_sync(bo) ?
1226d4e8ad90SChristian König AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
12271728baa7SChristian König r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
1228d4e8ad90SChristian König &fpriv->vm);
1229660e8558SDave Airlie if (r)
1230660e8558SDave Airlie return r;
1231660e8558SDave Airlie }
12324624459cSChristian König
12331728baa7SChristian König for (i = 0; i < p->gang_size; ++i) {
12341728baa7SChristian König r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
12354624459cSChristian König if (r)
12364624459cSChristian König return r;
12374624459cSChristian König }
12384624459cSChristian König
12393bd68b32SChristian König sched = p->gang_leader->base.entity->rq->sched;
12403bd68b32SChristian König while ((fence = amdgpu_sync_get_fence(&p->sync))) {
12413bd68b32SChristian König struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
12423bd68b32SChristian König
12433bd68b32SChristian König /*
12443bd68b32SChristian König * When we have an dependency it might be necessary to insert a
12453bd68b32SChristian König * pipeline sync to make sure that all caches etc are flushed and the
12463bd68b32SChristian König * next job actually sees the results from the previous one
12473bd68b32SChristian König * before we start executing on the same scheduler ring.
12483bd68b32SChristian König */
12499f8b3706SBert Karwatzki if (!s_fence || s_fence->sched != sched) {
12509f8b3706SBert Karwatzki dma_fence_put(fence);
12513bd68b32SChristian König continue;
12529f8b3706SBert Karwatzki }
12533bd68b32SChristian König
125416590745SChristian König r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
125516590745SChristian König GFP_KERNEL);
12569f8b3706SBert Karwatzki dma_fence_put(fence);
12573bd68b32SChristian König if (r)
12584624459cSChristian König return r;
12592b48d323SChristian König }
12603bd68b32SChristian König return 0;
12613bd68b32SChristian König }
12622b48d323SChristian König
amdgpu_cs_post_dependencies(struct amdgpu_cs_parser * p)1263660e8558SDave Airlie static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
1264660e8558SDave Airlie {
1265660e8558SDave Airlie int i;
1266660e8558SDave Airlie
12672624dd15SChunming Zhou for (i = 0; i < p->num_post_deps; ++i) {
12682624dd15SChunming Zhou if (p->post_deps[i].chain && p->post_deps[i].point) {
12692624dd15SChunming Zhou drm_syncobj_add_point(p->post_deps[i].syncobj,
12702624dd15SChunming Zhou p->post_deps[i].chain,
12712624dd15SChunming Zhou p->fence, p->post_deps[i].point);
12722624dd15SChunming Zhou p->post_deps[i].chain = NULL;
12732624dd15SChunming Zhou } else {
12742624dd15SChunming Zhou drm_syncobj_replace_fence(p->post_deps[i].syncobj,
12752624dd15SChunming Zhou p->fence);
12762624dd15SChunming Zhou }
12772624dd15SChunming Zhou }
1278660e8558SDave Airlie }
1279660e8558SDave Airlie
amdgpu_cs_submit(struct amdgpu_cs_parser * p,union drm_amdgpu_cs * cs)1280cd75dc68SChristian König static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
1281cd75dc68SChristian König union drm_amdgpu_cs *cs)
1282cd75dc68SChristian König {
12838ab19ea6SChristian König struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
12844624459cSChristian König struct amdgpu_job *leader = p->gang_leader;
12854a102ad4SChristian König struct amdgpu_bo_list_entry *e;
1286ca6c1e21SChristian König struct drm_gem_object *gobj;
1287ca6c1e21SChristian König unsigned long index;
12884624459cSChristian König unsigned int i;
1289eb01abc7SMonk Liu uint64_t seq;
1290e686941aSMonk Liu int r;
1291cd75dc68SChristian König
12924624459cSChristian König for (i = 0; i < p->gang_size; ++i)
12934624459cSChristian König drm_sched_job_arm(&p->jobs[i]->base);
12944a2de54dSChristian König
1295eca13f3cSChristian König for (i = 0; i < p->gang_size; ++i) {
12964624459cSChristian König struct dma_fence *fence;
12974624459cSChristian König
1298eca13f3cSChristian König if (p->jobs[i] == leader)
1299eca13f3cSChristian König continue;
1300eca13f3cSChristian König
13014624459cSChristian König fence = &p->jobs[i]->base.s_fence->scheduled;
1302ed21f6c3SChristian König dma_fence_get(fence);
13031728baa7SChristian König r = drm_sched_job_add_dependency(&leader->base, fence);
1304ed21f6c3SChristian König if (r) {
1305ed21f6c3SChristian König dma_fence_put(fence);
1306967a6639SGuchun Chen return r;
13074624459cSChristian König }
1308ed21f6c3SChristian König }
13094624459cSChristian König
13104624459cSChristian König if (p->gang_size > 1) {
13114624459cSChristian König for (i = 0; i < p->gang_size; ++i)
13124624459cSChristian König amdgpu_job_set_gang_leader(p->jobs[i], leader);
13134624459cSChristian König }
1314dbe48d03SDaniel Vetter
131581fa1af3SJason Gunthorpe /* No memory allocation is allowed while holding the notifier lock.
131681fa1af3SJason Gunthorpe * The lock is held until amdgpu_cs_submit is finished and fence is
131781fa1af3SJason Gunthorpe * added to BOs.
1318899fbde1SPhilip Yang */
131981fa1af3SJason Gunthorpe mutex_lock(&p->adev->notifier_lock);
1320899fbde1SPhilip Yang
1321899fbde1SPhilip Yang /* If userptr are invalidated after amdgpu_cs_parser_bos(), return
1322899fbde1SPhilip Yang * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
1323899fbde1SPhilip Yang */
1324b091fc6fSChristian König r = 0;
132539f7f69aSChristian König amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
1326ca6c1e21SChristian König r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm,
1327ca6c1e21SChristian König e->range);
1328fec8fdb5SChristian König e->range = NULL;
13293fe89771SChristian König }
1330899fbde1SPhilip Yang if (r) {
1331899fbde1SPhilip Yang r = -EAGAIN;
1332967a6639SGuchun Chen mutex_unlock(&p->adev->notifier_lock);
1333967a6639SGuchun Chen return r;
13343fe89771SChristian König }
13353fe89771SChristian König
13364624459cSChristian König p->fence = dma_fence_get(&leader->base.s_fence->finished);
1337ca6c1e21SChristian König drm_exec_for_each_locked_object(&p->exec, index, gobj) {
1338ca6c1e21SChristian König
1339ca6c1e21SChristian König ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo);
1340660e8558SDave Airlie
13414624459cSChristian König /* Everybody except for the gang leader uses READ */
1342eca13f3cSChristian König for (i = 0; i < p->gang_size; ++i) {
1343eca13f3cSChristian König if (p->jobs[i] == leader)
1344eca13f3cSChristian König continue;
1345eca13f3cSChristian König
1346ca6c1e21SChristian König dma_resv_add_fence(gobj->resv,
13474624459cSChristian König &p->jobs[i]->base.s_fence->finished,
13484624459cSChristian König DMA_RESV_USAGE_READ);
13494624459cSChristian König }
13504624459cSChristian König
1351ca6c1e21SChristian König /* The gang leader as remembered as writer */
1352ca6c1e21SChristian König dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE);
13534624459cSChristian König }
13544624459cSChristian König
1355eca13f3cSChristian König seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
13564624459cSChristian König p->fence);
1357660e8558SDave Airlie amdgpu_cs_post_dependencies(p);
1358660e8558SDave Airlie
13594624459cSChristian König if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
1360d98ff24eSChristian König !p->ctx->preamble_presented) {
13614624459cSChristian König leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
1362d98ff24eSChristian König p->ctx->preamble_presented = true;
1363d98ff24eSChristian König }
1364d98ff24eSChristian König
1365eb01abc7SMonk Liu cs->out.handle = seq;
13664624459cSChristian König leader->uf_sequence = seq;
1367eb01abc7SMonk Liu
1368ca6c1e21SChristian König amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket);
13694624459cSChristian König for (i = 0; i < p->gang_size; ++i) {
13704624459cSChristian König amdgpu_job_free_resources(p->jobs[i]);
13714624459cSChristian König trace_amdgpu_cs_ioctl(p->jobs[i]);
13724624459cSChristian König drm_sched_entity_push_job(&p->jobs[i]->base);
13734624459cSChristian König p->jobs[i] = NULL;
13744624459cSChristian König }
13753fe89771SChristian König
1376b995795bSChristian König amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
13774624459cSChristian König
137881fa1af3SJason Gunthorpe mutex_unlock(&p->adev->notifier_lock);
13795df79aebSLuben Tuikov mutex_unlock(&p->bo_list->bo_list_mutex);
1380cd75dc68SChristian König return 0;
1381cd75dc68SChristian König }
1382cd75dc68SChristian König
138388c98d54SChristian König /* Cleanup the parser structure */
amdgpu_cs_parser_fini(struct amdgpu_cs_parser * parser)1384f4b92fcdSChristian König static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
138588c98d54SChristian König {
13869e690184SSrinivasan Shanmugam unsigned int i;
138788c98d54SChristian König
13883bd68b32SChristian König amdgpu_sync_free(&parser->sync);
1389ca6c1e21SChristian König drm_exec_fini(&parser->exec);
1390ca6c1e21SChristian König
139188c98d54SChristian König for (i = 0; i < parser->num_post_deps; i++) {
139288c98d54SChristian König drm_syncobj_put(parser->post_deps[i].syncobj);
139388c98d54SChristian König kfree(parser->post_deps[i].chain);
139488c98d54SChristian König }
139588c98d54SChristian König kfree(parser->post_deps);
139688c98d54SChristian König
139788c98d54SChristian König dma_fence_put(parser->fence);
139888c98d54SChristian König
13994624459cSChristian König if (parser->ctx)
140088c98d54SChristian König amdgpu_ctx_put(parser->ctx);
140188c98d54SChristian König if (parser->bo_list)
140288c98d54SChristian König amdgpu_bo_list_put(parser->bo_list);
140388c98d54SChristian König
140488c98d54SChristian König for (i = 0; i < parser->nchunks; i++)
140588c98d54SChristian König kvfree(parser->chunks[i].kdata);
140688c98d54SChristian König kvfree(parser->chunks);
14074624459cSChristian König for (i = 0; i < parser->gang_size; ++i) {
14084624459cSChristian König if (parser->jobs[i])
14094624459cSChristian König amdgpu_job_free(parser->jobs[i]);
14104624459cSChristian König }
1411ca6c1e21SChristian König amdgpu_bo_unref(&parser->uf_bo);
141288c98d54SChristian König }
141388c98d54SChristian König
amdgpu_cs_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1414049fc527SChunming Zhou int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
1415049fc527SChunming Zhou {
14161348969aSLuben Tuikov struct amdgpu_device *adev = drm_to_adev(dev);
141788c98d54SChristian König struct amdgpu_cs_parser parser;
141844444574SKevin Wang int r;
1419049fc527SChunming Zhou
14207c6e68c7SAndrey Grodzovsky if (amdgpu_ras_intr_triggered())
14217c6e68c7SAndrey Grodzovsky return -EHWPOISON;
14227c6e68c7SAndrey Grodzovsky
14230c418f10SChristian König if (!adev->accel_working)
1424049fc527SChunming Zhou return -EBUSY;
1425049fc527SChunming Zhou
142688c98d54SChristian König r = amdgpu_cs_parser_init(&parser, adev, filp, data);
1427049fc527SChunming Zhou if (r) {
14288254e05cSSrinivasan Shanmugam DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r);
1429f4b92fcdSChristian König return r;
1430d38ceaf9SAlex Deucher }
1431a414cd70SHuang Rui
143288c98d54SChristian König r = amdgpu_cs_pass1(&parser, data);
143388c98d54SChristian König if (r)
1434f4b92fcdSChristian König goto error_fini;
143588c98d54SChristian König
1436f4b92fcdSChristian König r = amdgpu_cs_pass2(&parser);
1437ad864d24SAndrey Grodzovsky if (r)
1438f4b92fcdSChristian König goto error_fini;
14397e7bf8deSChunming Zhou
14402a7d9bdaSChristian König r = amdgpu_cs_parser_bos(&parser, data);
1441a414cd70SHuang Rui if (r) {
144226a6980cSChristian König if (r == -ENOMEM)
144326a6980cSChristian König DRM_ERROR("Not enough memory for command submission!\n");
1444a3e7738dSChristian König else if (r != -ERESTARTSYS && r != -EAGAIN)
144517daf01aSChristian König DRM_DEBUG("Failed to process the buffer list %d!\n", r);
1446f4b92fcdSChristian König goto error_fini;
1447a414cd70SHuang Rui }
1448a414cd70SHuang Rui
14494624459cSChristian König r = amdgpu_cs_patch_jobs(&parser);
1450f4b92fcdSChristian König if (r)
1451f4b92fcdSChristian König goto error_backoff;
145226a6980cSChristian König
14539a02ece4SChristian König r = amdgpu_cs_vm_handling(&parser);
14544fe63117SChunming Zhou if (r)
1455f4b92fcdSChristian König goto error_backoff;
14564fe63117SChunming Zhou
1457d4e8ad90SChristian König r = amdgpu_cs_sync_rings(&parser);
1458d4e8ad90SChristian König if (r)
1459f4b92fcdSChristian König goto error_backoff;
1460f4b92fcdSChristian König
1461f4b92fcdSChristian König trace_amdgpu_cs_ibs(&parser);
1462d4e8ad90SChristian König
146388c98d54SChristian König r = amdgpu_cs_submit(&parser, data);
1464f4b92fcdSChristian König if (r)
1465f4b92fcdSChristian König goto error_backoff;
1466899fbde1SPhilip Yang
1467f4b92fcdSChristian König amdgpu_cs_parser_fini(&parser);
1468f4b92fcdSChristian König return 0;
1469f4b92fcdSChristian König
1470f4b92fcdSChristian König error_backoff:
1471f4b92fcdSChristian König mutex_unlock(&parser.bo_list->bo_list_mutex);
1472f4b92fcdSChristian König
1473f4b92fcdSChristian König error_fini:
1474f4b92fcdSChristian König amdgpu_cs_parser_fini(&parser);
1475d38ceaf9SAlex Deucher return r;
1476d38ceaf9SAlex Deucher }
1477d38ceaf9SAlex Deucher
1478d38ceaf9SAlex Deucher /**
1479d38ceaf9SAlex Deucher * amdgpu_cs_wait_ioctl - wait for a command submission to finish
1480d38ceaf9SAlex Deucher *
1481d38ceaf9SAlex Deucher * @dev: drm device
1482d38ceaf9SAlex Deucher * @data: data from userspace
1483d38ceaf9SAlex Deucher * @filp: file private
1484d38ceaf9SAlex Deucher *
1485d38ceaf9SAlex Deucher * Wait for the command submission identified by handle to finish.
1486d38ceaf9SAlex Deucher */
amdgpu_cs_wait_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1487d38ceaf9SAlex Deucher int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
1488d38ceaf9SAlex Deucher struct drm_file *filp)
1489d38ceaf9SAlex Deucher {
1490d38ceaf9SAlex Deucher union drm_amdgpu_wait_cs *wait = data;
1491d38ceaf9SAlex Deucher unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
14920d346a14SChristian König struct drm_sched_entity *entity;
149366b3cf2aSJammy Zhou struct amdgpu_ctx *ctx;
1494f54d1867SChris Wilson struct dma_fence *fence;
1495d38ceaf9SAlex Deucher long r;
1496d38ceaf9SAlex Deucher
149766b3cf2aSJammy Zhou ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
149866b3cf2aSJammy Zhou if (ctx == NULL)
149966b3cf2aSJammy Zhou return -EINVAL;
15004b559c90SChunming Zhou
15010d346a14SChristian König r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance,
15020d346a14SChristian König wait->in.ring, &entity);
1503effd924dSAndres Rodriguez if (r) {
1504effd924dSAndres Rodriguez amdgpu_ctx_put(ctx);
1505effd924dSAndres Rodriguez return r;
1506effd924dSAndres Rodriguez }
1507effd924dSAndres Rodriguez
15080d346a14SChristian König fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle);
150921c16bf6SChristian König if (IS_ERR(fence))
151021c16bf6SChristian König r = PTR_ERR(fence);
151121c16bf6SChristian König else if (fence) {
1512f54d1867SChris Wilson r = dma_fence_wait_timeout(fence, true, timeout);
15137a0a48ddSChristian König if (r > 0 && fence->error)
15147a0a48ddSChristian König r = fence->error;
1515f54d1867SChris Wilson dma_fence_put(fence);
151621c16bf6SChristian König } else
151721c16bf6SChristian König r = 1;
15184b559c90SChunming Zhou
151966b3cf2aSJammy Zhou amdgpu_ctx_put(ctx);
1520d38ceaf9SAlex Deucher if (r < 0)
1521d38ceaf9SAlex Deucher return r;
1522d38ceaf9SAlex Deucher
1523d38ceaf9SAlex Deucher memset(wait, 0, sizeof(*wait));
1524d38ceaf9SAlex Deucher wait->out.status = (r == 0);
1525d38ceaf9SAlex Deucher
1526d38ceaf9SAlex Deucher return 0;
1527d38ceaf9SAlex Deucher }
1528d38ceaf9SAlex Deucher
1529d38ceaf9SAlex Deucher /**
1530eef18a82SJunwei Zhang * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
1531eef18a82SJunwei Zhang *
1532eef18a82SJunwei Zhang * @adev: amdgpu device
1533eef18a82SJunwei Zhang * @filp: file private
1534eef18a82SJunwei Zhang * @user: drm_amdgpu_fence copied from user space
1535eef18a82SJunwei Zhang */
amdgpu_cs_get_fence(struct amdgpu_device * adev,struct drm_file * filp,struct drm_amdgpu_fence * user)1536eef18a82SJunwei Zhang static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
1537eef18a82SJunwei Zhang struct drm_file *filp,
1538eef18a82SJunwei Zhang struct drm_amdgpu_fence *user)
1539eef18a82SJunwei Zhang {
15400d346a14SChristian König struct drm_sched_entity *entity;
1541eef18a82SJunwei Zhang struct amdgpu_ctx *ctx;
1542eef18a82SJunwei Zhang struct dma_fence *fence;
1543eef18a82SJunwei Zhang int r;
1544eef18a82SJunwei Zhang
1545eef18a82SJunwei Zhang ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
1546eef18a82SJunwei Zhang if (ctx == NULL)
1547eef18a82SJunwei Zhang return ERR_PTR(-EINVAL);
1548eef18a82SJunwei Zhang
15490d346a14SChristian König r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance,
15500d346a14SChristian König user->ring, &entity);
1551effd924dSAndres Rodriguez if (r) {
1552effd924dSAndres Rodriguez amdgpu_ctx_put(ctx);
1553effd924dSAndres Rodriguez return ERR_PTR(r);
1554effd924dSAndres Rodriguez }
1555effd924dSAndres Rodriguez
15560d346a14SChristian König fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no);
1557eef18a82SJunwei Zhang amdgpu_ctx_put(ctx);
1558eef18a82SJunwei Zhang
1559eef18a82SJunwei Zhang return fence;
1560eef18a82SJunwei Zhang }
1561eef18a82SJunwei Zhang
amdgpu_cs_fence_to_handle_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)15627ca24cf2SMarek Olšák int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
15637ca24cf2SMarek Olšák struct drm_file *filp)
15647ca24cf2SMarek Olšák {
15651348969aSLuben Tuikov struct amdgpu_device *adev = drm_to_adev(dev);
15667ca24cf2SMarek Olšák union drm_amdgpu_fence_to_handle *info = data;
15677ca24cf2SMarek Olšák struct dma_fence *fence;
15687ca24cf2SMarek Olšák struct drm_syncobj *syncobj;
15697ca24cf2SMarek Olšák struct sync_file *sync_file;
15707ca24cf2SMarek Olšák int fd, r;
15717ca24cf2SMarek Olšák
15727ca24cf2SMarek Olšák fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
15737ca24cf2SMarek Olšák if (IS_ERR(fence))
15747ca24cf2SMarek Olšák return PTR_ERR(fence);
15757ca24cf2SMarek Olšák
15764e917713SChristian König if (!fence)
15774e917713SChristian König fence = dma_fence_get_stub();
15784e917713SChristian König
15797ca24cf2SMarek Olšák switch (info->in.what) {
15807ca24cf2SMarek Olšák case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
15817ca24cf2SMarek Olšák r = drm_syncobj_create(&syncobj, 0, fence);
15827ca24cf2SMarek Olšák dma_fence_put(fence);
15837ca24cf2SMarek Olšák if (r)
15847ca24cf2SMarek Olšák return r;
15857ca24cf2SMarek Olšák r = drm_syncobj_get_handle(filp, syncobj, &info->out.handle);
15867ca24cf2SMarek Olšák drm_syncobj_put(syncobj);
15877ca24cf2SMarek Olšák return r;
15887ca24cf2SMarek Olšák
15897ca24cf2SMarek Olšák case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD:
15907ca24cf2SMarek Olšák r = drm_syncobj_create(&syncobj, 0, fence);
15917ca24cf2SMarek Olšák dma_fence_put(fence);
15927ca24cf2SMarek Olšák if (r)
15937ca24cf2SMarek Olšák return r;
15947ca24cf2SMarek Olšák r = drm_syncobj_get_fd(syncobj, (int *)&info->out.handle);
15957ca24cf2SMarek Olšák drm_syncobj_put(syncobj);
15967ca24cf2SMarek Olšák return r;
15977ca24cf2SMarek Olšák
15987ca24cf2SMarek Olšák case AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD:
15997ca24cf2SMarek Olšák fd = get_unused_fd_flags(O_CLOEXEC);
16007ca24cf2SMarek Olšák if (fd < 0) {
16017ca24cf2SMarek Olšák dma_fence_put(fence);
16027ca24cf2SMarek Olšák return fd;
16037ca24cf2SMarek Olšák }
16047ca24cf2SMarek Olšák
16057ca24cf2SMarek Olšák sync_file = sync_file_create(fence);
16067ca24cf2SMarek Olšák dma_fence_put(fence);
16077ca24cf2SMarek Olšák if (!sync_file) {
16087ca24cf2SMarek Olšák put_unused_fd(fd);
16097ca24cf2SMarek Olšák return -ENOMEM;
16107ca24cf2SMarek Olšák }
16117ca24cf2SMarek Olšák
16127ca24cf2SMarek Olšák fd_install(fd, sync_file->file);
16137ca24cf2SMarek Olšák info->out.handle = fd;
16147ca24cf2SMarek Olšák return 0;
16157ca24cf2SMarek Olšák
16167ca24cf2SMarek Olšák default:
1617dfced44fSXin Xiong dma_fence_put(fence);
16187ca24cf2SMarek Olšák return -EINVAL;
16197ca24cf2SMarek Olšák }
16207ca24cf2SMarek Olšák }
16217ca24cf2SMarek Olšák
1622eef18a82SJunwei Zhang /**
16233bffd71dSLee Jones * amdgpu_cs_wait_all_fences - wait on all fences to signal
1624eef18a82SJunwei Zhang *
1625eef18a82SJunwei Zhang * @adev: amdgpu device
1626eef18a82SJunwei Zhang * @filp: file private
1627eef18a82SJunwei Zhang * @wait: wait parameters
1628eef18a82SJunwei Zhang * @fences: array of drm_amdgpu_fence
1629eef18a82SJunwei Zhang */
amdgpu_cs_wait_all_fences(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1630eef18a82SJunwei Zhang static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
1631eef18a82SJunwei Zhang struct drm_file *filp,
1632eef18a82SJunwei Zhang union drm_amdgpu_wait_fences *wait,
1633eef18a82SJunwei Zhang struct drm_amdgpu_fence *fences)
1634eef18a82SJunwei Zhang {
1635eef18a82SJunwei Zhang uint32_t fence_count = wait->in.fence_count;
1636eef18a82SJunwei Zhang unsigned int i;
1637eef18a82SJunwei Zhang long r = 1;
1638eef18a82SJunwei Zhang
1639eef18a82SJunwei Zhang for (i = 0; i < fence_count; i++) {
1640eef18a82SJunwei Zhang struct dma_fence *fence;
1641eef18a82SJunwei Zhang unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1642eef18a82SJunwei Zhang
1643eef18a82SJunwei Zhang fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1644eef18a82SJunwei Zhang if (IS_ERR(fence))
1645eef18a82SJunwei Zhang return PTR_ERR(fence);
1646eef18a82SJunwei Zhang else if (!fence)
1647eef18a82SJunwei Zhang continue;
1648eef18a82SJunwei Zhang
1649eef18a82SJunwei Zhang r = dma_fence_wait_timeout(fence, true, timeout);
16502e54154bSshanzhulig if (r > 0 && fence->error)
16512e54154bSshanzhulig r = fence->error;
16522e54154bSshanzhulig
165332df87dfSChunming Zhou dma_fence_put(fence);
1654eef18a82SJunwei Zhang if (r < 0)
1655eef18a82SJunwei Zhang return r;
1656eef18a82SJunwei Zhang
1657eef18a82SJunwei Zhang if (r == 0)
1658eef18a82SJunwei Zhang break;
1659eef18a82SJunwei Zhang }
1660eef18a82SJunwei Zhang
1661eef18a82SJunwei Zhang memset(wait, 0, sizeof(*wait));
1662eef18a82SJunwei Zhang wait->out.status = (r > 0);
1663eef18a82SJunwei Zhang
1664eef18a82SJunwei Zhang return 0;
1665eef18a82SJunwei Zhang }
1666eef18a82SJunwei Zhang
1667eef18a82SJunwei Zhang /**
1668eef18a82SJunwei Zhang * amdgpu_cs_wait_any_fence - wait on any fence to signal
1669eef18a82SJunwei Zhang *
1670eef18a82SJunwei Zhang * @adev: amdgpu device
1671eef18a82SJunwei Zhang * @filp: file private
1672eef18a82SJunwei Zhang * @wait: wait parameters
1673eef18a82SJunwei Zhang * @fences: array of drm_amdgpu_fence
1674eef18a82SJunwei Zhang */
amdgpu_cs_wait_any_fence(struct amdgpu_device * adev,struct drm_file * filp,union drm_amdgpu_wait_fences * wait,struct drm_amdgpu_fence * fences)1675eef18a82SJunwei Zhang static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
1676eef18a82SJunwei Zhang struct drm_file *filp,
1677eef18a82SJunwei Zhang union drm_amdgpu_wait_fences *wait,
1678eef18a82SJunwei Zhang struct drm_amdgpu_fence *fences)
1679eef18a82SJunwei Zhang {
1680eef18a82SJunwei Zhang unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout_ns);
1681eef18a82SJunwei Zhang uint32_t fence_count = wait->in.fence_count;
1682eef18a82SJunwei Zhang uint32_t first = ~0;
1683eef18a82SJunwei Zhang struct dma_fence **array;
1684eef18a82SJunwei Zhang unsigned int i;
1685eef18a82SJunwei Zhang long r;
1686eef18a82SJunwei Zhang
1687eef18a82SJunwei Zhang /* Prepare the fence array */
1688eef18a82SJunwei Zhang array = kcalloc(fence_count, sizeof(struct dma_fence *), GFP_KERNEL);
1689eef18a82SJunwei Zhang
1690eef18a82SJunwei Zhang if (array == NULL)
1691eef18a82SJunwei Zhang return -ENOMEM;
1692eef18a82SJunwei Zhang
1693eef18a82SJunwei Zhang for (i = 0; i < fence_count; i++) {
1694eef18a82SJunwei Zhang struct dma_fence *fence;
1695eef18a82SJunwei Zhang
1696eef18a82SJunwei Zhang fence = amdgpu_cs_get_fence(adev, filp, &fences[i]);
1697eef18a82SJunwei Zhang if (IS_ERR(fence)) {
1698eef18a82SJunwei Zhang r = PTR_ERR(fence);
1699eef18a82SJunwei Zhang goto err_free_fence_array;
1700eef18a82SJunwei Zhang } else if (fence) {
1701eef18a82SJunwei Zhang array[i] = fence;
1702eef18a82SJunwei Zhang } else { /* NULL, the fence has been already signaled */
1703eef18a82SJunwei Zhang r = 1;
1704a2138eafSMonk Liu first = i;
1705eef18a82SJunwei Zhang goto out;
1706eef18a82SJunwei Zhang }
1707eef18a82SJunwei Zhang }
1708eef18a82SJunwei Zhang
1709eef18a82SJunwei Zhang r = dma_fence_wait_any_timeout(array, fence_count, true, timeout,
1710eef18a82SJunwei Zhang &first);
1711eef18a82SJunwei Zhang if (r < 0)
1712eef18a82SJunwei Zhang goto err_free_fence_array;
1713eef18a82SJunwei Zhang
1714eef18a82SJunwei Zhang out:
1715eef18a82SJunwei Zhang memset(wait, 0, sizeof(*wait));
1716eef18a82SJunwei Zhang wait->out.status = (r > 0);
1717eef18a82SJunwei Zhang wait->out.first_signaled = first;
1718cdadab89SEmily Deng
1719eb174c77SRoger He if (first < fence_count && array[first])
17207a0a48ddSChristian König r = array[first]->error;
1721cdadab89SEmily Deng else
1722cdadab89SEmily Deng r = 0;
1723eef18a82SJunwei Zhang
1724eef18a82SJunwei Zhang err_free_fence_array:
1725eef18a82SJunwei Zhang for (i = 0; i < fence_count; i++)
1726eef18a82SJunwei Zhang dma_fence_put(array[i]);
1727eef18a82SJunwei Zhang kfree(array);
1728eef18a82SJunwei Zhang
1729eef18a82SJunwei Zhang return r;
1730eef18a82SJunwei Zhang }
1731eef18a82SJunwei Zhang
1732eef18a82SJunwei Zhang /**
1733eef18a82SJunwei Zhang * amdgpu_cs_wait_fences_ioctl - wait for multiple command submissions to finish
1734eef18a82SJunwei Zhang *
1735eef18a82SJunwei Zhang * @dev: drm device
1736eef18a82SJunwei Zhang * @data: data from userspace
1737eef18a82SJunwei Zhang * @filp: file private
1738eef18a82SJunwei Zhang */
amdgpu_cs_wait_fences_ioctl(struct drm_device * dev,void * data,struct drm_file * filp)1739eef18a82SJunwei Zhang int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
1740eef18a82SJunwei Zhang struct drm_file *filp)
1741eef18a82SJunwei Zhang {
17421348969aSLuben Tuikov struct amdgpu_device *adev = drm_to_adev(dev);
1743eef18a82SJunwei Zhang union drm_amdgpu_wait_fences *wait = data;
1744eef18a82SJunwei Zhang uint32_t fence_count = wait->in.fence_count;
1745eef18a82SJunwei Zhang struct drm_amdgpu_fence *fences_user;
1746eef18a82SJunwei Zhang struct drm_amdgpu_fence *fences;
1747eef18a82SJunwei Zhang int r;
1748eef18a82SJunwei Zhang
1749eef18a82SJunwei Zhang /* Get the fences from userspace */
1750eef18a82SJunwei Zhang fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
1751eef18a82SJunwei Zhang GFP_KERNEL);
1752eef18a82SJunwei Zhang if (fences == NULL)
1753eef18a82SJunwei Zhang return -ENOMEM;
1754eef18a82SJunwei Zhang
17557ecc245aSChristian König fences_user = u64_to_user_ptr(wait->in.fences);
1756eef18a82SJunwei Zhang if (copy_from_user(fences, fences_user,
1757eef18a82SJunwei Zhang sizeof(struct drm_amdgpu_fence) * fence_count)) {
1758eef18a82SJunwei Zhang r = -EFAULT;
1759eef18a82SJunwei Zhang goto err_free_fences;
1760eef18a82SJunwei Zhang }
1761eef18a82SJunwei Zhang
1762eef18a82SJunwei Zhang if (wait->in.wait_all)
1763eef18a82SJunwei Zhang r = amdgpu_cs_wait_all_fences(adev, filp, wait, fences);
1764eef18a82SJunwei Zhang else
1765eef18a82SJunwei Zhang r = amdgpu_cs_wait_any_fence(adev, filp, wait, fences);
1766eef18a82SJunwei Zhang
1767eef18a82SJunwei Zhang err_free_fences:
1768eef18a82SJunwei Zhang kfree(fences);
1769eef18a82SJunwei Zhang
1770eef18a82SJunwei Zhang return r;
1771eef18a82SJunwei Zhang }
1772eef18a82SJunwei Zhang
1773eef18a82SJunwei Zhang /**
17743bffd71dSLee Jones * amdgpu_cs_find_mapping - find bo_va for VM address
1775d38ceaf9SAlex Deucher *
1776d38ceaf9SAlex Deucher * @parser: command submission parser context
1777d38ceaf9SAlex Deucher * @addr: VM address
1778d38ceaf9SAlex Deucher * @bo: resulting BO of the mapping found
1779fec3124dSLee Jones * @map: Placeholder to return found BO mapping
1780d38ceaf9SAlex Deucher *
1781d38ceaf9SAlex Deucher * Search the buffer objects in the command submission context for a certain
1782d38ceaf9SAlex Deucher * virtual memory address. Returns allocation structure when found, NULL
1783d38ceaf9SAlex Deucher * otherwise.
1784d38ceaf9SAlex Deucher */
amdgpu_cs_find_mapping(struct amdgpu_cs_parser * parser,uint64_t addr,struct amdgpu_bo ** bo,struct amdgpu_bo_va_mapping ** map)17859cca0b8eSChristian König int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
17869cca0b8eSChristian König uint64_t addr, struct amdgpu_bo **bo,
17879cca0b8eSChristian König struct amdgpu_bo_va_mapping **map)
1788d38ceaf9SAlex Deucher {
1789aebc5e6fSChristian König struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
179019be5570SChristian König struct ttm_operation_ctx ctx = { false, false };
1791aebc5e6fSChristian König struct amdgpu_vm *vm = &fpriv->vm;
1792d38ceaf9SAlex Deucher struct amdgpu_bo_va_mapping *mapping;
1793fbfb5f03SChristian König int i, r;
1794d38ceaf9SAlex Deucher
1795d38ceaf9SAlex Deucher addr /= AMDGPU_GPU_PAGE_SIZE;
1796d38ceaf9SAlex Deucher
1797aebc5e6fSChristian König mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
1798aebc5e6fSChristian König if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
17999cca0b8eSChristian König return -EINVAL;
180015486fd2SChristian König
1801aebc5e6fSChristian König *bo = mapping->bo_va->base.bo;
1802aebc5e6fSChristian König *map = mapping;
1803d38ceaf9SAlex Deucher
1804aebc5e6fSChristian König /* Double check that the BO is reserved by this CS */
1805ca6c1e21SChristian König if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket)
1806aebc5e6fSChristian König return -EINVAL;
18077fc11959SChristian König
180812f325bcSChristian König /* Make sure VRAM is allocated contigiously */
18099cca0b8eSChristian König (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
181012f325bcSChristian König if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM &&
181112f325bcSChristian König !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
181212f325bcSChristian König
1813c704ab18SChristian König amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
1814fbfb5f03SChristian König for (i = 0; i < (*bo)->placement.num_placement; i++)
1815fbfb5f03SChristian König (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
181619be5570SChristian König r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
18174b6b691eSChristian König if (r)
181803f48dd5SChristian König return r;
181912f325bcSChristian König }
1820c855e250SChristian König
1821c5835bbbSChristian König return amdgpu_ttm_alloc_gart(&(*bo)->tbo);
1822c855e250SChristian König }
1823