1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2021 Intel Corporation 4 */ 5 6 #include "xe_exec_queue.h" 7 8 #include <linux/nospec.h> 9 10 #include <drm/drm_device.h> 11 #include <drm/drm_drv.h> 12 #include <drm/drm_file.h> 13 #include <uapi/drm/xe_drm.h> 14 15 #include "xe_device.h" 16 #include "xe_gt.h" 17 #include "xe_hw_engine_class_sysfs.h" 18 #include "xe_hw_engine_group.h" 19 #include "xe_hw_fence.h" 20 #include "xe_irq.h" 21 #include "xe_lrc.h" 22 #include "xe_macros.h" 23 #include "xe_migrate.h" 24 #include "xe_pm.h" 25 #include "xe_ring_ops_types.h" 26 #include "xe_trace.h" 27 #include "xe_vm.h" 28 29 enum xe_exec_queue_sched_prop { 30 XE_EXEC_QUEUE_JOB_TIMEOUT = 0, 31 XE_EXEC_QUEUE_TIMESLICE = 1, 32 XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, 33 XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, 34 }; 35 36 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 37 u64 extensions, int ext_number); 38 39 static void __xe_exec_queue_free(struct xe_exec_queue *q) 40 { 41 if (q->vm) 42 xe_vm_put(q->vm); 43 44 if (q->xef) 45 xe_file_put(q->xef); 46 47 kfree(q); 48 } 49 50 static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, 51 struct xe_vm *vm, 52 u32 logical_mask, 53 u16 width, struct xe_hw_engine *hwe, 54 u32 flags, u64 extensions) 55 { 56 struct xe_exec_queue *q; 57 struct xe_gt *gt = hwe->gt; 58 int err; 59 60 /* only kernel queues can be permanent */ 61 XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); 62 63 q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); 64 if (!q) 65 return ERR_PTR(-ENOMEM); 66 67 kref_init(&q->refcount); 68 q->flags = flags; 69 q->hwe = hwe; 70 q->gt = gt; 71 q->class = hwe->class; 72 q->width = width; 73 q->msix_vec = XE_IRQ_DEFAULT_MSIX; 74 q->logical_mask = logical_mask; 75 q->fence_irq = >->fence_irq[hwe->class]; 76 q->ring_ops = gt->ring_ops[hwe->class]; 77 q->ops = gt->exec_queue_ops; 78 INIT_LIST_HEAD(&q->lr.link); 79 INIT_LIST_HEAD(&q->multi_gt_link); 80 INIT_LIST_HEAD(&q->hw_engine_group_link); 81 INIT_LIST_HEAD(&q->pxp.link); 82 83 q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; 84 q->sched_props.preempt_timeout_us = 85 hwe->eclass->sched_props.preempt_timeout_us; 86 q->sched_props.job_timeout_ms = 87 hwe->eclass->sched_props.job_timeout_ms; 88 if (q->flags & EXEC_QUEUE_FLAG_KERNEL && 89 q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) 90 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; 91 else 92 q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; 93 94 if (vm) 95 q->vm = xe_vm_get(vm); 96 97 if (extensions) { 98 /* 99 * may set q->usm, must come before xe_lrc_create(), 100 * may overwrite q->sched_props, must come before q->ops->init() 101 */ 102 err = exec_queue_user_extensions(xe, q, extensions, 0); 103 if (err) { 104 __xe_exec_queue_free(q); 105 return ERR_PTR(err); 106 } 107 } 108 109 return q; 110 } 111 112 static int __xe_exec_queue_init(struct xe_exec_queue *q) 113 { 114 struct xe_vm *vm = q->vm; 115 int i, err; 116 117 if (vm) { 118 err = xe_vm_lock(vm, true); 119 if (err) 120 return err; 121 } 122 123 for (i = 0; i < q->width; ++i) { 124 q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec); 125 if (IS_ERR(q->lrc[i])) { 126 err = PTR_ERR(q->lrc[i]); 127 goto err_unlock; 128 } 129 } 130 131 if (vm) 132 xe_vm_unlock(vm); 133 134 err = q->ops->init(q); 135 if (err) 136 goto err_lrc; 137 138 return 0; 139 140 err_unlock: 141 if (vm) 142 xe_vm_unlock(vm); 143 err_lrc: 144 for (i = i - 1; i >= 0; --i) 145 xe_lrc_put(q->lrc[i]); 146 return err; 147 } 148 149 struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, 150 u32 logical_mask, u16 width, 151 struct xe_hw_engine *hwe, u32 flags, 152 u64 extensions) 153 { 154 struct xe_exec_queue *q; 155 int err; 156 157 /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */ 158 xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0))); 159 160 q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, 161 extensions); 162 if (IS_ERR(q)) 163 return q; 164 165 err = __xe_exec_queue_init(q); 166 if (err) 167 goto err_post_alloc; 168 169 return q; 170 171 err_post_alloc: 172 __xe_exec_queue_free(q); 173 return ERR_PTR(err); 174 } 175 176 struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, 177 struct xe_vm *vm, 178 enum xe_engine_class class, 179 u32 flags, u64 extensions) 180 { 181 struct xe_hw_engine *hwe, *hwe0 = NULL; 182 enum xe_hw_engine_id id; 183 u32 logical_mask = 0; 184 185 for_each_hw_engine(hwe, gt, id) { 186 if (xe_hw_engine_is_reserved(hwe)) 187 continue; 188 189 if (hwe->class == class) { 190 logical_mask |= BIT(hwe->logical_instance); 191 if (!hwe0) 192 hwe0 = hwe; 193 } 194 } 195 196 if (!logical_mask) 197 return ERR_PTR(-ENODEV); 198 199 return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); 200 } 201 202 /** 203 * xe_exec_queue_create_bind() - Create bind exec queue. 204 * @xe: Xe device. 205 * @tile: tile which bind exec queue belongs to. 206 * @flags: exec queue creation flags 207 * @extensions: exec queue creation extensions 208 * 209 * Normalize bind exec queue creation. Bind exec queue is tied to migration VM 210 * for access to physical memory required for page table programming. On a 211 * faulting devices the reserved copy engine instance must be used to avoid 212 * deadlocking (user binds cannot get stuck behind faults as kernel binds which 213 * resolve faults depend on user binds). On non-faulting devices any copy engine 214 * can be used. 215 * 216 * Returns exec queue on success, ERR_PTR on failure 217 */ 218 struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, 219 struct xe_tile *tile, 220 u32 flags, u64 extensions) 221 { 222 struct xe_gt *gt = tile->primary_gt; 223 struct xe_exec_queue *q; 224 struct xe_vm *migrate_vm; 225 226 migrate_vm = xe_migrate_get_vm(tile->migrate); 227 if (xe->info.has_usm) { 228 struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, 229 XE_ENGINE_CLASS_COPY, 230 gt->usm.reserved_bcs_instance, 231 false); 232 233 if (!hwe) { 234 xe_vm_put(migrate_vm); 235 return ERR_PTR(-EINVAL); 236 } 237 238 q = xe_exec_queue_create(xe, migrate_vm, 239 BIT(hwe->logical_instance), 1, hwe, 240 flags, extensions); 241 } else { 242 q = xe_exec_queue_create_class(xe, gt, migrate_vm, 243 XE_ENGINE_CLASS_COPY, flags, 244 extensions); 245 } 246 xe_vm_put(migrate_vm); 247 248 return q; 249 } 250 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); 251 252 void xe_exec_queue_destroy(struct kref *ref) 253 { 254 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 255 struct xe_exec_queue *eq, *next; 256 257 xe_exec_queue_last_fence_put_unlocked(q); 258 if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { 259 list_for_each_entry_safe(eq, next, &q->multi_gt_list, 260 multi_gt_link) 261 xe_exec_queue_put(eq); 262 } 263 264 q->ops->fini(q); 265 } 266 267 void xe_exec_queue_fini(struct xe_exec_queue *q) 268 { 269 int i; 270 271 /* 272 * Before releasing our ref to lrc and xef, accumulate our run ticks 273 * and wakeup any waiters. 274 */ 275 xe_exec_queue_update_run_ticks(q); 276 if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) 277 wake_up_var(&q->xef->exec_queue.pending_removal); 278 279 for (i = 0; i < q->width; ++i) 280 xe_lrc_put(q->lrc[i]); 281 282 __xe_exec_queue_free(q); 283 } 284 285 void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) 286 { 287 switch (q->class) { 288 case XE_ENGINE_CLASS_RENDER: 289 snprintf(q->name, sizeof(q->name), "rcs%d", instance); 290 break; 291 case XE_ENGINE_CLASS_VIDEO_DECODE: 292 snprintf(q->name, sizeof(q->name), "vcs%d", instance); 293 break; 294 case XE_ENGINE_CLASS_VIDEO_ENHANCE: 295 snprintf(q->name, sizeof(q->name), "vecs%d", instance); 296 break; 297 case XE_ENGINE_CLASS_COPY: 298 snprintf(q->name, sizeof(q->name), "bcs%d", instance); 299 break; 300 case XE_ENGINE_CLASS_COMPUTE: 301 snprintf(q->name, sizeof(q->name), "ccs%d", instance); 302 break; 303 case XE_ENGINE_CLASS_OTHER: 304 snprintf(q->name, sizeof(q->name), "gsccs%d", instance); 305 break; 306 default: 307 XE_WARN_ON(q->class); 308 } 309 } 310 311 struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) 312 { 313 struct xe_exec_queue *q; 314 315 mutex_lock(&xef->exec_queue.lock); 316 q = xa_load(&xef->exec_queue.xa, id); 317 if (q) 318 xe_exec_queue_get(q); 319 mutex_unlock(&xef->exec_queue.lock); 320 321 return q; 322 } 323 324 enum xe_exec_queue_priority 325 xe_exec_queue_device_get_max_priority(struct xe_device *xe) 326 { 327 return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : 328 XE_EXEC_QUEUE_PRIORITY_NORMAL; 329 } 330 331 static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, 332 u64 value) 333 { 334 if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) 335 return -EINVAL; 336 337 if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) 338 return -EPERM; 339 340 q->sched_props.priority = value; 341 return 0; 342 } 343 344 static bool xe_exec_queue_enforce_schedule_limit(void) 345 { 346 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 347 return true; 348 #else 349 return !capable(CAP_SYS_NICE); 350 #endif 351 } 352 353 static void 354 xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, 355 enum xe_exec_queue_sched_prop prop, 356 u32 *min, u32 *max) 357 { 358 switch (prop) { 359 case XE_EXEC_QUEUE_JOB_TIMEOUT: 360 *min = eclass->sched_props.job_timeout_min; 361 *max = eclass->sched_props.job_timeout_max; 362 break; 363 case XE_EXEC_QUEUE_TIMESLICE: 364 *min = eclass->sched_props.timeslice_min; 365 *max = eclass->sched_props.timeslice_max; 366 break; 367 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 368 *min = eclass->sched_props.preempt_timeout_min; 369 *max = eclass->sched_props.preempt_timeout_max; 370 break; 371 default: 372 break; 373 } 374 #if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) 375 if (capable(CAP_SYS_NICE)) { 376 switch (prop) { 377 case XE_EXEC_QUEUE_JOB_TIMEOUT: 378 *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; 379 *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; 380 break; 381 case XE_EXEC_QUEUE_TIMESLICE: 382 *min = XE_HW_ENGINE_TIMESLICE_MIN; 383 *max = XE_HW_ENGINE_TIMESLICE_MAX; 384 break; 385 case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: 386 *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; 387 *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; 388 break; 389 default: 390 break; 391 } 392 } 393 #endif 394 } 395 396 static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, 397 u64 value) 398 { 399 u32 min = 0, max = 0; 400 401 xe_exec_queue_get_prop_minmax(q->hwe->eclass, 402 XE_EXEC_QUEUE_TIMESLICE, &min, &max); 403 404 if (xe_exec_queue_enforce_schedule_limit() && 405 !xe_hw_engine_timeout_in_range(value, min, max)) 406 return -EINVAL; 407 408 q->sched_props.timeslice_us = value; 409 return 0; 410 } 411 412 typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, 413 struct xe_exec_queue *q, 414 u64 value); 415 416 static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { 417 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, 418 [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, 419 }; 420 421 static int exec_queue_user_ext_set_property(struct xe_device *xe, 422 struct xe_exec_queue *q, 423 u64 extension) 424 { 425 u64 __user *address = u64_to_user_ptr(extension); 426 struct drm_xe_ext_set_property ext; 427 int err; 428 u32 idx; 429 430 err = __copy_from_user(&ext, address, sizeof(ext)); 431 if (XE_IOCTL_DBG(xe, err)) 432 return -EFAULT; 433 434 if (XE_IOCTL_DBG(xe, ext.property >= 435 ARRAY_SIZE(exec_queue_set_property_funcs)) || 436 XE_IOCTL_DBG(xe, ext.pad) || 437 XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && 438 ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) 439 return -EINVAL; 440 441 idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); 442 if (!exec_queue_set_property_funcs[idx]) 443 return -EINVAL; 444 445 return exec_queue_set_property_funcs[idx](xe, q, ext.value); 446 } 447 448 typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, 449 struct xe_exec_queue *q, 450 u64 extension); 451 452 static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { 453 [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, 454 }; 455 456 #define MAX_USER_EXTENSIONS 16 457 static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, 458 u64 extensions, int ext_number) 459 { 460 u64 __user *address = u64_to_user_ptr(extensions); 461 struct drm_xe_user_extension ext; 462 int err; 463 u32 idx; 464 465 if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) 466 return -E2BIG; 467 468 err = __copy_from_user(&ext, address, sizeof(ext)); 469 if (XE_IOCTL_DBG(xe, err)) 470 return -EFAULT; 471 472 if (XE_IOCTL_DBG(xe, ext.pad) || 473 XE_IOCTL_DBG(xe, ext.name >= 474 ARRAY_SIZE(exec_queue_user_extension_funcs))) 475 return -EINVAL; 476 477 idx = array_index_nospec(ext.name, 478 ARRAY_SIZE(exec_queue_user_extension_funcs)); 479 err = exec_queue_user_extension_funcs[idx](xe, q, extensions); 480 if (XE_IOCTL_DBG(xe, err)) 481 return err; 482 483 if (ext.next_extension) 484 return exec_queue_user_extensions(xe, q, ext.next_extension, 485 ++ext_number); 486 487 return 0; 488 } 489 490 static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, 491 struct drm_xe_engine_class_instance *eci, 492 u16 width, u16 num_placements) 493 { 494 int len = width * num_placements; 495 int i, j, n; 496 u16 class; 497 u16 gt_id; 498 u32 return_mask = 0, prev_mask; 499 500 if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && 501 len > 1)) 502 return 0; 503 504 for (i = 0; i < width; ++i) { 505 u32 current_mask = 0; 506 507 for (j = 0; j < num_placements; ++j) { 508 struct xe_hw_engine *hwe; 509 510 n = j * width + i; 511 512 hwe = xe_hw_engine_lookup(xe, eci[n]); 513 if (XE_IOCTL_DBG(xe, !hwe)) 514 return 0; 515 516 if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) 517 return 0; 518 519 if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || 520 XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) 521 return 0; 522 523 class = eci[n].engine_class; 524 gt_id = eci[n].gt_id; 525 526 if (width == 1 || !i) 527 return_mask |= BIT(eci[n].engine_instance); 528 current_mask |= BIT(eci[n].engine_instance); 529 } 530 531 /* Parallel submissions must be logically contiguous */ 532 if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) 533 return 0; 534 535 prev_mask = current_mask; 536 } 537 538 return return_mask; 539 } 540 541 int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, 542 struct drm_file *file) 543 { 544 struct xe_device *xe = to_xe_device(dev); 545 struct xe_file *xef = to_xe_file(file); 546 struct drm_xe_exec_queue_create *args = data; 547 struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; 548 struct drm_xe_engine_class_instance __user *user_eci = 549 u64_to_user_ptr(args->instances); 550 struct xe_hw_engine *hwe; 551 struct xe_vm *vm; 552 struct xe_gt *gt; 553 struct xe_tile *tile; 554 struct xe_exec_queue *q = NULL; 555 u32 logical_mask; 556 u32 id; 557 u32 len; 558 int err; 559 560 if (XE_IOCTL_DBG(xe, args->flags) || 561 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 562 return -EINVAL; 563 564 len = args->width * args->num_placements; 565 if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) 566 return -EINVAL; 567 568 err = __copy_from_user(eci, user_eci, 569 sizeof(struct drm_xe_engine_class_instance) * 570 len); 571 if (XE_IOCTL_DBG(xe, err)) 572 return -EFAULT; 573 574 if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) 575 return -EINVAL; 576 577 if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { 578 if (XE_IOCTL_DBG(xe, args->width != 1) || 579 XE_IOCTL_DBG(xe, args->num_placements != 1) || 580 XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) 581 return -EINVAL; 582 583 for_each_tile(tile, xe, id) { 584 struct xe_exec_queue *new; 585 u32 flags = EXEC_QUEUE_FLAG_VM; 586 587 if (id) 588 flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; 589 590 new = xe_exec_queue_create_bind(xe, tile, flags, 591 args->extensions); 592 if (IS_ERR(new)) { 593 err = PTR_ERR(new); 594 if (q) 595 goto put_exec_queue; 596 return err; 597 } 598 if (id == 0) 599 q = new; 600 else 601 list_add_tail(&new->multi_gt_list, 602 &q->multi_gt_link); 603 } 604 } else { 605 gt = xe_device_get_gt(xe, eci[0].gt_id); 606 logical_mask = calc_validate_logical_mask(xe, gt, eci, 607 args->width, 608 args->num_placements); 609 if (XE_IOCTL_DBG(xe, !logical_mask)) 610 return -EINVAL; 611 612 hwe = xe_hw_engine_lookup(xe, eci[0]); 613 if (XE_IOCTL_DBG(xe, !hwe)) 614 return -EINVAL; 615 616 vm = xe_vm_lookup(xef, args->vm_id); 617 if (XE_IOCTL_DBG(xe, !vm)) 618 return -ENOENT; 619 620 err = down_read_interruptible(&vm->lock); 621 if (err) { 622 xe_vm_put(vm); 623 return err; 624 } 625 626 if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { 627 up_read(&vm->lock); 628 xe_vm_put(vm); 629 return -ENOENT; 630 } 631 632 q = xe_exec_queue_create(xe, vm, logical_mask, 633 args->width, hwe, 0, 634 args->extensions); 635 up_read(&vm->lock); 636 xe_vm_put(vm); 637 if (IS_ERR(q)) 638 return PTR_ERR(q); 639 640 if (xe_vm_in_preempt_fence_mode(vm)) { 641 q->lr.context = dma_fence_context_alloc(1); 642 643 err = xe_vm_add_compute_exec_queue(vm, q); 644 if (XE_IOCTL_DBG(xe, err)) 645 goto put_exec_queue; 646 } 647 648 if (q->vm && q->hwe->hw_engine_group) { 649 err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); 650 if (err) 651 goto put_exec_queue; 652 } 653 } 654 655 q->xef = xe_file_get(xef); 656 657 /* user id alloc must always be last in ioctl to prevent UAF */ 658 err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); 659 if (err) 660 goto kill_exec_queue; 661 662 args->exec_queue_id = id; 663 664 return 0; 665 666 kill_exec_queue: 667 xe_exec_queue_kill(q); 668 put_exec_queue: 669 xe_exec_queue_put(q); 670 return err; 671 } 672 673 int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, 674 struct drm_file *file) 675 { 676 struct xe_device *xe = to_xe_device(dev); 677 struct xe_file *xef = to_xe_file(file); 678 struct drm_xe_exec_queue_get_property *args = data; 679 struct xe_exec_queue *q; 680 int ret; 681 682 if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 683 return -EINVAL; 684 685 q = xe_exec_queue_lookup(xef, args->exec_queue_id); 686 if (XE_IOCTL_DBG(xe, !q)) 687 return -ENOENT; 688 689 switch (args->property) { 690 case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: 691 args->value = q->ops->reset_status(q); 692 ret = 0; 693 break; 694 default: 695 ret = -EINVAL; 696 } 697 698 xe_exec_queue_put(q); 699 700 return ret; 701 } 702 703 /** 704 * xe_exec_queue_is_lr() - Whether an exec_queue is long-running 705 * @q: The exec_queue 706 * 707 * Return: True if the exec_queue is long-running, false otherwise. 708 */ 709 bool xe_exec_queue_is_lr(struct xe_exec_queue *q) 710 { 711 return q->vm && xe_vm_in_lr_mode(q->vm) && 712 !(q->flags & EXEC_QUEUE_FLAG_VM); 713 } 714 715 static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) 716 { 717 return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; 718 } 719 720 /** 721 * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full 722 * @q: The exec_queue 723 * 724 * Return: True if the exec_queue's ring is full, false otherwise. 725 */ 726 bool xe_exec_queue_ring_full(struct xe_exec_queue *q) 727 { 728 struct xe_lrc *lrc = q->lrc[0]; 729 s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; 730 731 return xe_exec_queue_num_job_inflight(q) >= max_job; 732 } 733 734 /** 735 * xe_exec_queue_is_idle() - Whether an exec_queue is idle. 736 * @q: The exec_queue 737 * 738 * FIXME: Need to determine what to use as the short-lived 739 * timeline lock for the exec_queues, so that the return value 740 * of this function becomes more than just an advisory 741 * snapshot in time. The timeline lock must protect the 742 * seqno from racing submissions on the same exec_queue. 743 * Typically vm->resv, but user-created timeline locks use the migrate vm 744 * and never grabs the migrate vm->resv so we have a race there. 745 * 746 * Return: True if the exec_queue is idle, false otherwise. 747 */ 748 bool xe_exec_queue_is_idle(struct xe_exec_queue *q) 749 { 750 if (xe_exec_queue_is_parallel(q)) { 751 int i; 752 753 for (i = 0; i < q->width; ++i) { 754 if (xe_lrc_seqno(q->lrc[i]) != 755 q->lrc[i]->fence_ctx.next_seqno - 1) 756 return false; 757 } 758 759 return true; 760 } 761 762 return xe_lrc_seqno(q->lrc[0]) == 763 q->lrc[0]->fence_ctx.next_seqno - 1; 764 } 765 766 /** 767 * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue 768 * from hw 769 * @q: The exec queue 770 * 771 * Update the timestamp saved by HW for this exec queue and save run ticks 772 * calculated by using the delta from last update. 773 */ 774 void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) 775 { 776 struct xe_device *xe = gt_to_xe(q->gt); 777 struct xe_lrc *lrc; 778 u32 old_ts, new_ts; 779 int idx; 780 781 /* 782 * Jobs that are executed by kernel doesn't have a corresponding xe_file 783 * and thus are not accounted. 784 */ 785 if (!q->xef) 786 return; 787 788 /* Synchronize with unbind while holding the xe file open */ 789 if (!drm_dev_enter(&xe->drm, &idx)) 790 return; 791 /* 792 * Only sample the first LRC. For parallel submission, all of them are 793 * scheduled together and we compensate that below by multiplying by 794 * width - this may introduce errors if that premise is not true and 795 * they don't exit 100% aligned. On the other hand, looping through 796 * the LRCs and reading them in different time could also introduce 797 * errors. 798 */ 799 lrc = q->lrc[0]; 800 new_ts = xe_lrc_update_timestamp(lrc, &old_ts); 801 q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; 802 803 drm_dev_exit(idx); 804 } 805 806 /** 807 * xe_exec_queue_kill - permanently stop all execution from an exec queue 808 * @q: The exec queue 809 * 810 * This function permanently stops all activity on an exec queue. If the queue 811 * is actively executing on the HW, it will be kicked off the engine; any 812 * pending jobs are discarded and all future submissions are rejected. 813 * This function is safe to call multiple times. 814 */ 815 void xe_exec_queue_kill(struct xe_exec_queue *q) 816 { 817 struct xe_exec_queue *eq = q, *next; 818 819 list_for_each_entry_safe(eq, next, &eq->multi_gt_list, 820 multi_gt_link) { 821 q->ops->kill(eq); 822 xe_vm_remove_compute_exec_queue(q->vm, eq); 823 } 824 825 q->ops->kill(q); 826 xe_vm_remove_compute_exec_queue(q->vm, q); 827 } 828 829 int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, 830 struct drm_file *file) 831 { 832 struct xe_device *xe = to_xe_device(dev); 833 struct xe_file *xef = to_xe_file(file); 834 struct drm_xe_exec_queue_destroy *args = data; 835 struct xe_exec_queue *q; 836 837 if (XE_IOCTL_DBG(xe, args->pad) || 838 XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) 839 return -EINVAL; 840 841 mutex_lock(&xef->exec_queue.lock); 842 q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); 843 if (q) 844 atomic_inc(&xef->exec_queue.pending_removal); 845 mutex_unlock(&xef->exec_queue.lock); 846 847 if (XE_IOCTL_DBG(xe, !q)) 848 return -ENOENT; 849 850 if (q->vm && q->hwe->hw_engine_group) 851 xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); 852 853 xe_exec_queue_kill(q); 854 855 trace_xe_exec_queue_close(q); 856 xe_exec_queue_put(q); 857 858 return 0; 859 } 860 861 static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, 862 struct xe_vm *vm) 863 { 864 if (q->flags & EXEC_QUEUE_FLAG_VM) { 865 lockdep_assert_held(&vm->lock); 866 } else { 867 xe_vm_assert_held(vm); 868 lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); 869 } 870 } 871 872 /** 873 * xe_exec_queue_last_fence_put() - Drop ref to last fence 874 * @q: The exec queue 875 * @vm: The VM the engine does a bind or exec for 876 */ 877 void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) 878 { 879 xe_exec_queue_last_fence_lockdep_assert(q, vm); 880 881 xe_exec_queue_last_fence_put_unlocked(q); 882 } 883 884 /** 885 * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked 886 * @q: The exec queue 887 * 888 * Only safe to be called from xe_exec_queue_destroy(). 889 */ 890 void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) 891 { 892 if (q->last_fence) { 893 dma_fence_put(q->last_fence); 894 q->last_fence = NULL; 895 } 896 } 897 898 /** 899 * xe_exec_queue_last_fence_get() - Get last fence 900 * @q: The exec queue 901 * @vm: The VM the engine does a bind or exec for 902 * 903 * Get last fence, takes a ref 904 * 905 * Returns: last fence if not signaled, dma fence stub if signaled 906 */ 907 struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, 908 struct xe_vm *vm) 909 { 910 struct dma_fence *fence; 911 912 xe_exec_queue_last_fence_lockdep_assert(q, vm); 913 914 if (q->last_fence && 915 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 916 xe_exec_queue_last_fence_put(q, vm); 917 918 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 919 dma_fence_get(fence); 920 return fence; 921 } 922 923 /** 924 * xe_exec_queue_last_fence_get_for_resume() - Get last fence 925 * @q: The exec queue 926 * @vm: The VM the engine does a bind or exec for 927 * 928 * Get last fence, takes a ref. Only safe to be called in the context of 929 * resuming the hw engine group's long-running exec queue, when the group 930 * semaphore is held. 931 * 932 * Returns: last fence if not signaled, dma fence stub if signaled 933 */ 934 struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, 935 struct xe_vm *vm) 936 { 937 struct dma_fence *fence; 938 939 lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); 940 941 if (q->last_fence && 942 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) 943 xe_exec_queue_last_fence_put_unlocked(q); 944 945 fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); 946 dma_fence_get(fence); 947 return fence; 948 } 949 950 /** 951 * xe_exec_queue_last_fence_set() - Set last fence 952 * @q: The exec queue 953 * @vm: The VM the engine does a bind or exec for 954 * @fence: The fence 955 * 956 * Set the last fence for the engine. Increases reference count for fence, when 957 * closing engine xe_exec_queue_last_fence_put should be called. 958 */ 959 void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, 960 struct dma_fence *fence) 961 { 962 xe_exec_queue_last_fence_lockdep_assert(q, vm); 963 964 xe_exec_queue_last_fence_put(q, vm); 965 q->last_fence = dma_fence_get(fence); 966 } 967 968 /** 969 * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue 970 * @q: The exec queue 971 * @vm: The VM the engine does a bind or exec for 972 * 973 * Returns: 974 * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. 975 */ 976 int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) 977 { 978 struct dma_fence *fence; 979 int err = 0; 980 981 fence = xe_exec_queue_last_fence_get(q, vm); 982 if (fence) { 983 err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? 984 0 : -ETIME; 985 dma_fence_put(fence); 986 } 987 988 return err; 989 } 990