1 /* 2 * Copyright 2008 Jerome Glisse. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Jerome Glisse <[email protected]> 26 */ 27 #include <linux/pagemap.h> 28 #include <drm/drmP.h> 29 #include <drm/amdgpu_drm.h> 30 #include "amdgpu.h" 31 #include "amdgpu_trace.h" 32 33 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, 34 u32 ip_instance, u32 ring, 35 struct amdgpu_ring **out_ring) 36 { 37 /* Right now all IPs have only one instance - multiple rings. */ 38 if (ip_instance != 0) { 39 DRM_ERROR("invalid ip instance: %d\n", ip_instance); 40 return -EINVAL; 41 } 42 43 switch (ip_type) { 44 default: 45 DRM_ERROR("unknown ip type: %d\n", ip_type); 46 return -EINVAL; 47 case AMDGPU_HW_IP_GFX: 48 if (ring < adev->gfx.num_gfx_rings) { 49 *out_ring = &adev->gfx.gfx_ring[ring]; 50 } else { 51 DRM_ERROR("only %d gfx rings are supported now\n", 52 adev->gfx.num_gfx_rings); 53 return -EINVAL; 54 } 55 break; 56 case AMDGPU_HW_IP_COMPUTE: 57 if (ring < adev->gfx.num_compute_rings) { 58 *out_ring = &adev->gfx.compute_ring[ring]; 59 } else { 60 DRM_ERROR("only %d compute rings are supported now\n", 61 adev->gfx.num_compute_rings); 62 return -EINVAL; 63 } 64 break; 65 case AMDGPU_HW_IP_DMA: 66 if (ring < adev->sdma.num_instances) { 67 *out_ring = &adev->sdma.instance[ring].ring; 68 } else { 69 DRM_ERROR("only %d SDMA rings are supported\n", 70 adev->sdma.num_instances); 71 return -EINVAL; 72 } 73 break; 74 case AMDGPU_HW_IP_UVD: 75 *out_ring = &adev->uvd.ring; 76 break; 77 case AMDGPU_HW_IP_VCE: 78 if (ring < 2){ 79 *out_ring = &adev->vce.ring[ring]; 80 } else { 81 DRM_ERROR("only two VCE rings are supported\n"); 82 return -EINVAL; 83 } 84 break; 85 } 86 return 0; 87 } 88 89 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, 90 struct amdgpu_user_fence *uf, 91 struct drm_amdgpu_cs_chunk_fence *fence_data) 92 { 93 struct drm_gem_object *gobj; 94 uint32_t handle; 95 96 handle = fence_data->handle; 97 gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, 98 fence_data->handle); 99 if (gobj == NULL) 100 return -EINVAL; 101 102 uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); 103 uf->offset = fence_data->offset; 104 105 if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { 106 drm_gem_object_unreference_unlocked(gobj); 107 return -EINVAL; 108 } 109 110 p->uf_entry.robj = amdgpu_bo_ref(uf->bo); 111 p->uf_entry.priority = 0; 112 p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; 113 p->uf_entry.tv.shared = true; 114 p->uf_entry.user_pages = NULL; 115 116 drm_gem_object_unreference_unlocked(gobj); 117 return 0; 118 } 119 120 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) 121 { 122 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 123 union drm_amdgpu_cs *cs = data; 124 uint64_t *chunk_array_user; 125 uint64_t *chunk_array; 126 struct amdgpu_user_fence uf = {}; 127 unsigned size, num_ibs = 0; 128 int i; 129 int ret; 130 131 if (cs->in.num_chunks == 0) 132 return 0; 133 134 chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); 135 if (!chunk_array) 136 return -ENOMEM; 137 138 p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id); 139 if (!p->ctx) { 140 ret = -EINVAL; 141 goto free_chunk; 142 } 143 144 /* get chunks */ 145 chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); 146 if (copy_from_user(chunk_array, chunk_array_user, 147 sizeof(uint64_t)*cs->in.num_chunks)) { 148 ret = -EFAULT; 149 goto put_ctx; 150 } 151 152 p->nchunks = cs->in.num_chunks; 153 p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk), 154 GFP_KERNEL); 155 if (!p->chunks) { 156 ret = -ENOMEM; 157 goto put_ctx; 158 } 159 160 for (i = 0; i < p->nchunks; i++) { 161 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; 162 struct drm_amdgpu_cs_chunk user_chunk; 163 uint32_t __user *cdata; 164 165 chunk_ptr = (void __user *)(unsigned long)chunk_array[i]; 166 if (copy_from_user(&user_chunk, chunk_ptr, 167 sizeof(struct drm_amdgpu_cs_chunk))) { 168 ret = -EFAULT; 169 i--; 170 goto free_partial_kdata; 171 } 172 p->chunks[i].chunk_id = user_chunk.chunk_id; 173 p->chunks[i].length_dw = user_chunk.length_dw; 174 175 size = p->chunks[i].length_dw; 176 cdata = (void __user *)(unsigned long)user_chunk.chunk_data; 177 178 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); 179 if (p->chunks[i].kdata == NULL) { 180 ret = -ENOMEM; 181 i--; 182 goto free_partial_kdata; 183 } 184 size *= sizeof(uint32_t); 185 if (copy_from_user(p->chunks[i].kdata, cdata, size)) { 186 ret = -EFAULT; 187 goto free_partial_kdata; 188 } 189 190 switch (p->chunks[i].chunk_id) { 191 case AMDGPU_CHUNK_ID_IB: 192 ++num_ibs; 193 break; 194 195 case AMDGPU_CHUNK_ID_FENCE: 196 size = sizeof(struct drm_amdgpu_cs_chunk_fence); 197 if (p->chunks[i].length_dw * sizeof(uint32_t) < size) { 198 ret = -EINVAL; 199 goto free_partial_kdata; 200 } 201 202 ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); 203 if (ret) 204 goto free_partial_kdata; 205 206 break; 207 208 case AMDGPU_CHUNK_ID_DEPENDENCIES: 209 break; 210 211 default: 212 ret = -EINVAL; 213 goto free_partial_kdata; 214 } 215 } 216 217 ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); 218 if (ret) 219 goto free_all_kdata; 220 221 p->job->uf = uf; 222 223 kfree(chunk_array); 224 return 0; 225 226 free_all_kdata: 227 i = p->nchunks - 1; 228 free_partial_kdata: 229 for (; i >= 0; i--) 230 drm_free_large(p->chunks[i].kdata); 231 kfree(p->chunks); 232 put_ctx: 233 amdgpu_ctx_put(p->ctx); 234 free_chunk: 235 kfree(chunk_array); 236 237 return ret; 238 } 239 240 /* Returns how many bytes TTM can move per IB. 241 */ 242 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) 243 { 244 u64 real_vram_size = adev->mc.real_vram_size; 245 u64 vram_usage = atomic64_read(&adev->vram_usage); 246 247 /* This function is based on the current VRAM usage. 248 * 249 * - If all of VRAM is free, allow relocating the number of bytes that 250 * is equal to 1/4 of the size of VRAM for this IB. 251 252 * - If more than one half of VRAM is occupied, only allow relocating 253 * 1 MB of data for this IB. 254 * 255 * - From 0 to one half of used VRAM, the threshold decreases 256 * linearly. 257 * __________________ 258 * 1/4 of -|\ | 259 * VRAM | \ | 260 * | \ | 261 * | \ | 262 * | \ | 263 * | \ | 264 * | \ | 265 * | \________|1 MB 266 * |----------------| 267 * VRAM 0 % 100 % 268 * used used 269 * 270 * Note: It's a threshold, not a limit. The threshold must be crossed 271 * for buffer relocations to stop, so any buffer of an arbitrary size 272 * can be moved as long as the threshold isn't crossed before 273 * the relocation takes place. We don't want to disable buffer 274 * relocations completely. 275 * 276 * The idea is that buffers should be placed in VRAM at creation time 277 * and TTM should only do a minimum number of relocations during 278 * command submission. In practice, you need to submit at least 279 * a dozen IBs to move all buffers to VRAM if they are in GTT. 280 * 281 * Also, things can get pretty crazy under memory pressure and actual 282 * VRAM usage can change a lot, so playing safe even at 50% does 283 * consistently increase performance. 284 */ 285 286 u64 half_vram = real_vram_size >> 1; 287 u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; 288 u64 bytes_moved_threshold = half_free_vram >> 1; 289 return max(bytes_moved_threshold, 1024*1024ull); 290 } 291 292 int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, 293 struct list_head *validated) 294 { 295 struct amdgpu_bo_list_entry *lobj; 296 u64 initial_bytes_moved; 297 int r; 298 299 list_for_each_entry(lobj, validated, tv.head) { 300 struct amdgpu_bo *bo = lobj->robj; 301 bool binding_userptr = false; 302 struct mm_struct *usermm; 303 uint32_t domain; 304 305 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); 306 if (usermm && usermm != current->mm) 307 return -EPERM; 308 309 /* Check if we have user pages and nobody bound the BO already */ 310 if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { 311 size_t size = sizeof(struct page *); 312 313 size *= bo->tbo.ttm->num_pages; 314 memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); 315 binding_userptr = true; 316 } 317 318 if (bo->pin_count) 319 continue; 320 321 /* Avoid moving this one if we have moved too many buffers 322 * for this IB already. 323 * 324 * Note that this allows moving at least one buffer of 325 * any size, because it doesn't take the current "bo" 326 * into account. We don't want to disallow buffer moves 327 * completely. 328 */ 329 if (p->bytes_moved <= p->bytes_moved_threshold) 330 domain = bo->prefered_domains; 331 else 332 domain = bo->allowed_domains; 333 334 retry: 335 amdgpu_ttm_placement_from_domain(bo, domain); 336 initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); 337 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); 338 p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - 339 initial_bytes_moved; 340 341 if (unlikely(r)) { 342 if (r != -ERESTARTSYS && domain != bo->allowed_domains) { 343 domain = bo->allowed_domains; 344 goto retry; 345 } 346 return r; 347 } 348 349 if (binding_userptr) { 350 drm_free_large(lobj->user_pages); 351 lobj->user_pages = NULL; 352 } 353 } 354 return 0; 355 } 356 357 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, 358 union drm_amdgpu_cs *cs) 359 { 360 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 361 struct amdgpu_bo_list_entry *e; 362 struct list_head duplicates; 363 bool need_mmap_lock = false; 364 unsigned i, tries = 10; 365 int r; 366 367 INIT_LIST_HEAD(&p->validated); 368 369 p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); 370 if (p->bo_list) { 371 need_mmap_lock = p->bo_list->first_userptr != 372 p->bo_list->num_entries; 373 amdgpu_bo_list_get_list(p->bo_list, &p->validated); 374 } 375 376 INIT_LIST_HEAD(&duplicates); 377 amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); 378 379 if (p->job->uf.bo) 380 list_add(&p->uf_entry.tv.head, &p->validated); 381 382 if (need_mmap_lock) 383 down_read(¤t->mm->mmap_sem); 384 385 while (1) { 386 struct list_head need_pages; 387 unsigned i; 388 389 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, 390 &duplicates); 391 if (unlikely(r != 0)) 392 goto error_free_pages; 393 394 /* Without a BO list we don't have userptr BOs */ 395 if (!p->bo_list) 396 break; 397 398 INIT_LIST_HEAD(&need_pages); 399 for (i = p->bo_list->first_userptr; 400 i < p->bo_list->num_entries; ++i) { 401 402 e = &p->bo_list->array[i]; 403 404 if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, 405 &e->user_invalidated) && e->user_pages) { 406 407 /* We acquired a page array, but somebody 408 * invalidated it. Free it an try again 409 */ 410 release_pages(e->user_pages, 411 e->robj->tbo.ttm->num_pages, 412 false); 413 drm_free_large(e->user_pages); 414 e->user_pages = NULL; 415 } 416 417 if (e->robj->tbo.ttm->state != tt_bound && 418 !e->user_pages) { 419 list_del(&e->tv.head); 420 list_add(&e->tv.head, &need_pages); 421 422 amdgpu_bo_unreserve(e->robj); 423 } 424 } 425 426 if (list_empty(&need_pages)) 427 break; 428 429 /* Unreserve everything again. */ 430 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 431 432 /* We tried to often, just abort */ 433 if (!--tries) { 434 r = -EDEADLK; 435 goto error_free_pages; 436 } 437 438 /* Fill the page arrays for all useptrs. */ 439 list_for_each_entry(e, &need_pages, tv.head) { 440 struct ttm_tt *ttm = e->robj->tbo.ttm; 441 442 e->user_pages = drm_calloc_large(ttm->num_pages, 443 sizeof(struct page*)); 444 if (!e->user_pages) { 445 r = -ENOMEM; 446 goto error_free_pages; 447 } 448 449 r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); 450 if (r) { 451 drm_free_large(e->user_pages); 452 e->user_pages = NULL; 453 goto error_free_pages; 454 } 455 } 456 457 /* And try again. */ 458 list_splice(&need_pages, &p->validated); 459 } 460 461 amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); 462 463 p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); 464 p->bytes_moved = 0; 465 466 r = amdgpu_cs_list_validate(p, &duplicates); 467 if (r) 468 goto error_validate; 469 470 r = amdgpu_cs_list_validate(p, &p->validated); 471 if (r) 472 goto error_validate; 473 474 if (p->bo_list) { 475 struct amdgpu_vm *vm = &fpriv->vm; 476 unsigned i; 477 478 for (i = 0; i < p->bo_list->num_entries; i++) { 479 struct amdgpu_bo *bo = p->bo_list->array[i].robj; 480 481 p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); 482 } 483 } 484 485 error_validate: 486 if (r) { 487 amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm); 488 ttm_eu_backoff_reservation(&p->ticket, &p->validated); 489 } 490 491 error_free_pages: 492 493 if (need_mmap_lock) 494 up_read(¤t->mm->mmap_sem); 495 496 if (p->bo_list) { 497 for (i = p->bo_list->first_userptr; 498 i < p->bo_list->num_entries; ++i) { 499 e = &p->bo_list->array[i]; 500 501 if (!e->user_pages) 502 continue; 503 504 release_pages(e->user_pages, 505 e->robj->tbo.ttm->num_pages, 506 false); 507 drm_free_large(e->user_pages); 508 } 509 } 510 511 return r; 512 } 513 514 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) 515 { 516 struct amdgpu_bo_list_entry *e; 517 int r; 518 519 list_for_each_entry(e, &p->validated, tv.head) { 520 struct reservation_object *resv = e->robj->tbo.resv; 521 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); 522 523 if (r) 524 return r; 525 } 526 return 0; 527 } 528 529 /** 530 * cs_parser_fini() - clean parser states 531 * @parser: parser structure holding parsing context. 532 * @error: error number 533 * 534 * If error is set than unvalidate buffer, otherwise just free memory 535 * used by parsing context. 536 **/ 537 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) 538 { 539 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 540 unsigned i; 541 542 if (!error) { 543 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); 544 545 ttm_eu_fence_buffer_objects(&parser->ticket, 546 &parser->validated, 547 parser->fence); 548 } else if (backoff) { 549 ttm_eu_backoff_reservation(&parser->ticket, 550 &parser->validated); 551 } 552 fence_put(parser->fence); 553 554 if (parser->ctx) 555 amdgpu_ctx_put(parser->ctx); 556 if (parser->bo_list) 557 amdgpu_bo_list_put(parser->bo_list); 558 559 for (i = 0; i < parser->nchunks; i++) 560 drm_free_large(parser->chunks[i].kdata); 561 kfree(parser->chunks); 562 if (parser->job) 563 amdgpu_job_free(parser->job); 564 amdgpu_bo_unref(&parser->uf_entry.robj); 565 } 566 567 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, 568 struct amdgpu_vm *vm) 569 { 570 struct amdgpu_device *adev = p->adev; 571 struct amdgpu_bo_va *bo_va; 572 struct amdgpu_bo *bo; 573 int i, r; 574 575 r = amdgpu_vm_update_page_directory(adev, vm); 576 if (r) 577 return r; 578 579 r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); 580 if (r) 581 return r; 582 583 r = amdgpu_vm_clear_freed(adev, vm); 584 if (r) 585 return r; 586 587 if (p->bo_list) { 588 for (i = 0; i < p->bo_list->num_entries; i++) { 589 struct fence *f; 590 591 /* ignore duplicates */ 592 bo = p->bo_list->array[i].robj; 593 if (!bo) 594 continue; 595 596 bo_va = p->bo_list->array[i].bo_va; 597 if (bo_va == NULL) 598 continue; 599 600 r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem); 601 if (r) 602 return r; 603 604 f = bo_va->last_pt_update; 605 r = amdgpu_sync_fence(adev, &p->job->sync, f); 606 if (r) 607 return r; 608 } 609 610 } 611 612 r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); 613 614 if (amdgpu_vm_debug && p->bo_list) { 615 /* Invalidate all BOs to test for userspace bugs */ 616 for (i = 0; i < p->bo_list->num_entries; i++) { 617 /* ignore duplicates */ 618 bo = p->bo_list->array[i].robj; 619 if (!bo) 620 continue; 621 622 amdgpu_vm_bo_invalidate(adev, bo); 623 } 624 } 625 626 return r; 627 } 628 629 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, 630 struct amdgpu_cs_parser *p) 631 { 632 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 633 struct amdgpu_vm *vm = &fpriv->vm; 634 struct amdgpu_ring *ring = p->job->ring; 635 int i, r; 636 637 /* Only for UVD/VCE VM emulation */ 638 if (ring->funcs->parse_cs) { 639 for (i = 0; i < p->job->num_ibs; i++) { 640 r = amdgpu_ring_parse_cs(ring, p, i); 641 if (r) 642 return r; 643 } 644 } 645 646 r = amdgpu_bo_vm_update_pte(p, vm); 647 if (!r) 648 amdgpu_cs_sync_rings(p); 649 650 return r; 651 } 652 653 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r) 654 { 655 if (r == -EDEADLK) { 656 r = amdgpu_gpu_reset(adev); 657 if (!r) 658 r = -EAGAIN; 659 } 660 return r; 661 } 662 663 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, 664 struct amdgpu_cs_parser *parser) 665 { 666 struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; 667 struct amdgpu_vm *vm = &fpriv->vm; 668 int i, j; 669 int r; 670 671 for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { 672 struct amdgpu_cs_chunk *chunk; 673 struct amdgpu_ib *ib; 674 struct drm_amdgpu_cs_chunk_ib *chunk_ib; 675 struct amdgpu_ring *ring; 676 677 chunk = &parser->chunks[i]; 678 ib = &parser->job->ibs[j]; 679 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; 680 681 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) 682 continue; 683 684 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, 685 chunk_ib->ip_instance, chunk_ib->ring, 686 &ring); 687 if (r) 688 return r; 689 690 if (parser->job->ring && parser->job->ring != ring) 691 return -EINVAL; 692 693 parser->job->ring = ring; 694 695 if (ring->funcs->parse_cs) { 696 struct amdgpu_bo_va_mapping *m; 697 struct amdgpu_bo *aobj = NULL; 698 uint64_t offset; 699 uint8_t *kptr; 700 701 m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, 702 &aobj); 703 if (!aobj) { 704 DRM_ERROR("IB va_start is invalid\n"); 705 return -EINVAL; 706 } 707 708 if ((chunk_ib->va_start + chunk_ib->ib_bytes) > 709 (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { 710 DRM_ERROR("IB va_start+ib_bytes is invalid\n"); 711 return -EINVAL; 712 } 713 714 /* the IB should be reserved at this point */ 715 r = amdgpu_bo_kmap(aobj, (void **)&kptr); 716 if (r) { 717 return r; 718 } 719 720 offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; 721 kptr += chunk_ib->va_start - offset; 722 723 r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); 724 if (r) { 725 DRM_ERROR("Failed to get ib !\n"); 726 return r; 727 } 728 729 memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); 730 amdgpu_bo_kunmap(aobj); 731 } else { 732 r = amdgpu_ib_get(adev, vm, 0, ib); 733 if (r) { 734 DRM_ERROR("Failed to get ib !\n"); 735 return r; 736 } 737 738 ib->gpu_addr = chunk_ib->va_start; 739 } 740 741 ib->length_dw = chunk_ib->ib_bytes / 4; 742 ib->flags = chunk_ib->flags; 743 ib->ctx = parser->ctx; 744 j++; 745 } 746 747 /* add GDS resources to first IB */ 748 if (parser->bo_list) { 749 struct amdgpu_bo *gds = parser->bo_list->gds_obj; 750 struct amdgpu_bo *gws = parser->bo_list->gws_obj; 751 struct amdgpu_bo *oa = parser->bo_list->oa_obj; 752 struct amdgpu_ib *ib = &parser->job->ibs[0]; 753 754 if (gds) { 755 ib->gds_base = amdgpu_bo_gpu_offset(gds); 756 ib->gds_size = amdgpu_bo_size(gds); 757 } 758 if (gws) { 759 ib->gws_base = amdgpu_bo_gpu_offset(gws); 760 ib->gws_size = amdgpu_bo_size(gws); 761 } 762 if (oa) { 763 ib->oa_base = amdgpu_bo_gpu_offset(oa); 764 ib->oa_size = amdgpu_bo_size(oa); 765 } 766 } 767 /* wrap the last IB with user fence */ 768 if (parser->job->uf.bo) { 769 struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; 770 771 /* UVD & VCE fw doesn't support user fences */ 772 if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || 773 parser->job->ring->type == AMDGPU_RING_TYPE_VCE) 774 return -EINVAL; 775 776 ib->user = &parser->job->uf; 777 } 778 779 return 0; 780 } 781 782 static int amdgpu_cs_dependencies(struct amdgpu_device *adev, 783 struct amdgpu_cs_parser *p) 784 { 785 struct amdgpu_fpriv *fpriv = p->filp->driver_priv; 786 int i, j, r; 787 788 for (i = 0; i < p->nchunks; ++i) { 789 struct drm_amdgpu_cs_chunk_dep *deps; 790 struct amdgpu_cs_chunk *chunk; 791 unsigned num_deps; 792 793 chunk = &p->chunks[i]; 794 795 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) 796 continue; 797 798 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; 799 num_deps = chunk->length_dw * 4 / 800 sizeof(struct drm_amdgpu_cs_chunk_dep); 801 802 for (j = 0; j < num_deps; ++j) { 803 struct amdgpu_ring *ring; 804 struct amdgpu_ctx *ctx; 805 struct fence *fence; 806 807 r = amdgpu_cs_get_ring(adev, deps[j].ip_type, 808 deps[j].ip_instance, 809 deps[j].ring, &ring); 810 if (r) 811 return r; 812 813 ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); 814 if (ctx == NULL) 815 return -EINVAL; 816 817 fence = amdgpu_ctx_get_fence(ctx, ring, 818 deps[j].handle); 819 if (IS_ERR(fence)) { 820 r = PTR_ERR(fence); 821 amdgpu_ctx_put(ctx); 822 return r; 823 824 } else if (fence) { 825 r = amdgpu_sync_fence(adev, &p->job->sync, 826 fence); 827 fence_put(fence); 828 amdgpu_ctx_put(ctx); 829 if (r) 830 return r; 831 } 832 } 833 } 834 835 return 0; 836 } 837 838 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, 839 union drm_amdgpu_cs *cs) 840 { 841 struct amdgpu_ring *ring = p->job->ring; 842 struct fence *fence; 843 struct amdgpu_job *job; 844 int r; 845 846 job = p->job; 847 p->job = NULL; 848 849 r = amd_sched_job_init(&job->base, &ring->sched, 850 &p->ctx->rings[ring->idx].entity, 851 amdgpu_job_timeout_func, 852 amdgpu_job_free_func, 853 p->filp, &fence); 854 if (r) { 855 amdgpu_job_free(job); 856 return r; 857 } 858 859 job->owner = p->filp; 860 p->fence = fence_get(fence); 861 cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); 862 job->ibs[job->num_ibs - 1].sequence = cs->out.handle; 863 864 trace_amdgpu_cs_ioctl(job); 865 amd_sched_entity_push_job(&job->base); 866 867 return 0; 868 } 869 870 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 871 { 872 struct amdgpu_device *adev = dev->dev_private; 873 union drm_amdgpu_cs *cs = data; 874 struct amdgpu_cs_parser parser = {}; 875 bool reserved_buffers = false; 876 int i, r; 877 878 if (!adev->accel_working) 879 return -EBUSY; 880 881 parser.adev = adev; 882 parser.filp = filp; 883 884 r = amdgpu_cs_parser_init(&parser, data); 885 if (r) { 886 DRM_ERROR("Failed to initialize parser !\n"); 887 amdgpu_cs_parser_fini(&parser, r, false); 888 r = amdgpu_cs_handle_lockup(adev, r); 889 return r; 890 } 891 r = amdgpu_cs_parser_bos(&parser, data); 892 if (r == -ENOMEM) 893 DRM_ERROR("Not enough memory for command submission!\n"); 894 else if (r && r != -ERESTARTSYS) 895 DRM_ERROR("Failed to process the buffer list %d!\n", r); 896 else if (!r) { 897 reserved_buffers = true; 898 r = amdgpu_cs_ib_fill(adev, &parser); 899 } 900 901 if (!r) { 902 r = amdgpu_cs_dependencies(adev, &parser); 903 if (r) 904 DRM_ERROR("Failed in the dependencies handling %d!\n", r); 905 } 906 907 if (r) 908 goto out; 909 910 for (i = 0; i < parser.job->num_ibs; i++) 911 trace_amdgpu_cs(&parser, i); 912 913 r = amdgpu_cs_ib_vm_chunk(adev, &parser); 914 if (r) 915 goto out; 916 917 r = amdgpu_cs_submit(&parser, cs); 918 919 out: 920 amdgpu_cs_parser_fini(&parser, r, reserved_buffers); 921 r = amdgpu_cs_handle_lockup(adev, r); 922 return r; 923 } 924 925 /** 926 * amdgpu_cs_wait_ioctl - wait for a command submission to finish 927 * 928 * @dev: drm device 929 * @data: data from userspace 930 * @filp: file private 931 * 932 * Wait for the command submission identified by handle to finish. 933 */ 934 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, 935 struct drm_file *filp) 936 { 937 union drm_amdgpu_wait_cs *wait = data; 938 struct amdgpu_device *adev = dev->dev_private; 939 unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); 940 struct amdgpu_ring *ring = NULL; 941 struct amdgpu_ctx *ctx; 942 struct fence *fence; 943 long r; 944 945 r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, 946 wait->in.ring, &ring); 947 if (r) 948 return r; 949 950 ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); 951 if (ctx == NULL) 952 return -EINVAL; 953 954 fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); 955 if (IS_ERR(fence)) 956 r = PTR_ERR(fence); 957 else if (fence) { 958 r = fence_wait_timeout(fence, true, timeout); 959 fence_put(fence); 960 } else 961 r = 1; 962 963 amdgpu_ctx_put(ctx); 964 if (r < 0) 965 return r; 966 967 memset(wait, 0, sizeof(*wait)); 968 wait->out.status = (r == 0); 969 970 return 0; 971 } 972 973 /** 974 * amdgpu_cs_find_bo_va - find bo_va for VM address 975 * 976 * @parser: command submission parser context 977 * @addr: VM address 978 * @bo: resulting BO of the mapping found 979 * 980 * Search the buffer objects in the command submission context for a certain 981 * virtual memory address. Returns allocation structure when found, NULL 982 * otherwise. 983 */ 984 struct amdgpu_bo_va_mapping * 985 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, 986 uint64_t addr, struct amdgpu_bo **bo) 987 { 988 struct amdgpu_bo_va_mapping *mapping; 989 unsigned i; 990 991 if (!parser->bo_list) 992 return NULL; 993 994 addr /= AMDGPU_GPU_PAGE_SIZE; 995 996 for (i = 0; i < parser->bo_list->num_entries; i++) { 997 struct amdgpu_bo_list_entry *lobj; 998 999 lobj = &parser->bo_list->array[i]; 1000 if (!lobj->bo_va) 1001 continue; 1002 1003 list_for_each_entry(mapping, &lobj->bo_va->valids, list) { 1004 if (mapping->it.start > addr || 1005 addr > mapping->it.last) 1006 continue; 1007 1008 *bo = lobj->bo_va->bo; 1009 return mapping; 1010 } 1011 1012 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { 1013 if (mapping->it.start > addr || 1014 addr > mapping->it.last) 1015 continue; 1016 1017 *bo = lobj->bo_va->bo; 1018 return mapping; 1019 } 1020 } 1021 1022 return NULL; 1023 } 1024