1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "mes_v11_api_def.h" 40 #include "kfd_debug.h" 41 42 /* Size of the per-pipe EOP queue */ 43 #define CIK_HPD_EOP_BYTES_LOG2 11 44 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 45 46 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 47 u32 pasid, unsigned int vmid); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param, 52 uint32_t grace_period); 53 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 54 enum kfd_unmap_queues_filter filter, 55 uint32_t filter_param, 56 uint32_t grace_period, 57 bool reset); 58 59 static int map_queues_cpsch(struct device_queue_manager *dqm); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 struct queue *q); 63 64 static inline void deallocate_hqd(struct device_queue_manager *dqm, 65 struct queue *q); 66 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 67 static int allocate_sdma_queue(struct device_queue_manager *dqm, 68 struct queue *q, const uint32_t *restore_sdma_id); 69 static void kfd_process_hw_exception(struct work_struct *work); 70 71 static inline 72 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 73 { 74 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 75 return KFD_MQD_TYPE_SDMA; 76 return KFD_MQD_TYPE_CP; 77 } 78 79 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 80 { 81 int i; 82 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 83 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 84 85 /* queue is available for KFD usage if bit is 1 */ 86 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 87 if (test_bit(pipe_offset + i, 88 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 89 return true; 90 return false; 91 } 92 93 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 94 { 95 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 96 AMDGPU_MAX_QUEUES); 97 } 98 99 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 102 } 103 104 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 105 { 106 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 107 } 108 109 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 110 { 111 return kfd_get_num_sdma_engines(dqm->dev) + 112 kfd_get_num_xgmi_sdma_engines(dqm->dev); 113 } 114 115 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return kfd_get_num_sdma_engines(dqm->dev) * 118 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 119 } 120 121 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 122 { 123 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 124 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 135 /* Mask out the reserved queues */ 136 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 137 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 138 KFD_MAX_SDMA_QUEUES); 139 } 140 141 void program_sh_mem_settings(struct device_queue_manager *dqm, 142 struct qcm_process_device *qpd) 143 { 144 uint32_t xcc_mask = dqm->dev->xcc_mask; 145 int xcc_id; 146 147 for_each_inst(xcc_id, xcc_mask) 148 dqm->dev->kfd2kgd->program_sh_mem_settings( 149 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 150 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 151 qpd->sh_mem_bases, xcc_id); 152 } 153 154 static void kfd_hws_hang(struct device_queue_manager *dqm) 155 { 156 /* 157 * Issue a GPU reset if HWS is unresponsive 158 */ 159 schedule_work(&dqm->hw_exception_work); 160 } 161 162 static int convert_to_mes_queue_type(int queue_type) 163 { 164 int mes_queue_type; 165 166 switch (queue_type) { 167 case KFD_QUEUE_TYPE_COMPUTE: 168 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 169 break; 170 case KFD_QUEUE_TYPE_SDMA: 171 mes_queue_type = MES_QUEUE_TYPE_SDMA; 172 break; 173 default: 174 WARN(1, "Invalid queue type %d", queue_type); 175 mes_queue_type = -EINVAL; 176 break; 177 } 178 179 return mes_queue_type; 180 } 181 182 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 183 struct qcm_process_device *qpd) 184 { 185 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 186 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 187 struct mes_add_queue_input queue_input; 188 int r, queue_type; 189 uint64_t wptr_addr_off; 190 191 if (!down_read_trylock(&adev->reset_domain->sem)) 192 return -EIO; 193 194 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 195 queue_input.process_id = qpd->pqm->process->pasid; 196 queue_input.page_table_base_addr = qpd->page_table_base; 197 queue_input.process_va_start = 0; 198 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 199 /* MES unit for quantum is 100ns */ 200 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 201 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 202 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 203 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 204 queue_input.inprocess_gang_priority = q->properties.priority; 205 queue_input.gang_global_priority_level = 206 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 207 queue_input.doorbell_offset = q->properties.doorbell_off; 208 queue_input.mqd_addr = q->gart_mqd_addr; 209 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 210 211 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 212 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 213 214 queue_input.is_kfd_process = 1; 215 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 216 queue_input.queue_size = q->properties.queue_size >> 2; 217 218 queue_input.paging = false; 219 queue_input.tba_addr = qpd->tba_addr; 220 queue_input.tma_addr = qpd->tma_addr; 221 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 222 queue_input.skip_process_ctx_clear = 223 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 224 (qpd->pqm->process->debug_trap_enabled || 225 kfd_dbg_has_ttmps_always_setup(q->device)); 226 227 queue_type = convert_to_mes_queue_type(q->properties.type); 228 if (queue_type < 0) { 229 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 230 q->properties.type); 231 up_read(&adev->reset_domain->sem); 232 return -EINVAL; 233 } 234 queue_input.queue_type = (uint32_t)queue_type; 235 236 queue_input.exclusively_scheduled = q->properties.is_gws; 237 238 amdgpu_mes_lock(&adev->mes); 239 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 240 amdgpu_mes_unlock(&adev->mes); 241 up_read(&adev->reset_domain->sem); 242 if (r) { 243 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 244 q->properties.doorbell_off); 245 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 246 kfd_hws_hang(dqm); 247 } 248 249 return r; 250 } 251 252 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 253 struct qcm_process_device *qpd) 254 { 255 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 256 int r; 257 struct mes_remove_queue_input queue_input; 258 259 if (!down_read_trylock(&adev->reset_domain->sem)) 260 return -EIO; 261 262 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 263 queue_input.doorbell_offset = q->properties.doorbell_off; 264 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 265 266 amdgpu_mes_lock(&adev->mes); 267 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 268 amdgpu_mes_unlock(&adev->mes); 269 up_read(&adev->reset_domain->sem); 270 271 if (r) { 272 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 273 q->properties.doorbell_off); 274 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 275 kfd_hws_hang(dqm); 276 } 277 278 return r; 279 } 280 281 static int remove_all_queues_mes(struct device_queue_manager *dqm) 282 { 283 struct device_process_node *cur; 284 struct device *dev = dqm->dev->adev->dev; 285 struct qcm_process_device *qpd; 286 struct queue *q; 287 int retval = 0; 288 289 list_for_each_entry(cur, &dqm->queues, list) { 290 qpd = cur->qpd; 291 list_for_each_entry(q, &qpd->queues_list, list) { 292 if (q->properties.is_active) { 293 retval = remove_queue_mes(dqm, q, qpd); 294 if (retval) { 295 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 296 __func__, 297 q->properties.queue_id, 298 dqm->dev->id); 299 return retval; 300 } 301 } 302 } 303 } 304 305 return retval; 306 } 307 308 static void increment_queue_count(struct device_queue_manager *dqm, 309 struct qcm_process_device *qpd, 310 struct queue *q) 311 { 312 dqm->active_queue_count++; 313 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 314 q->properties.type == KFD_QUEUE_TYPE_DIQ) 315 dqm->active_cp_queue_count++; 316 317 if (q->properties.is_gws) { 318 dqm->gws_queue_count++; 319 qpd->mapped_gws_queue = true; 320 } 321 } 322 323 static void decrement_queue_count(struct device_queue_manager *dqm, 324 struct qcm_process_device *qpd, 325 struct queue *q) 326 { 327 dqm->active_queue_count--; 328 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 329 q->properties.type == KFD_QUEUE_TYPE_DIQ) 330 dqm->active_cp_queue_count--; 331 332 if (q->properties.is_gws) { 333 dqm->gws_queue_count--; 334 qpd->mapped_gws_queue = false; 335 } 336 } 337 338 /* 339 * Allocate a doorbell ID to this queue. 340 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 341 */ 342 static int allocate_doorbell(struct qcm_process_device *qpd, 343 struct queue *q, 344 uint32_t const *restore_id) 345 { 346 struct kfd_node *dev = qpd->dqm->dev; 347 348 if (!KFD_IS_SOC15(dev)) { 349 /* On pre-SOC15 chips we need to use the queue ID to 350 * preserve the user mode ABI. 351 */ 352 353 if (restore_id && *restore_id != q->properties.queue_id) 354 return -EINVAL; 355 356 q->doorbell_id = q->properties.queue_id; 357 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 358 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 359 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 360 * doorbell assignments based on the engine and queue id. 361 * The doobell index distance between RLC (2*i) and (2*i+1) 362 * for a SDMA engine is 512. 363 */ 364 365 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 366 367 /* 368 * q->properties.sdma_engine_id corresponds to the virtual 369 * sdma engine number. However, for doorbell allocation, 370 * we need the physical sdma engine id in order to get the 371 * correct doorbell offset. 372 */ 373 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 374 get_num_all_sdma_engines(qpd->dqm) + 375 q->properties.sdma_engine_id] 376 + (q->properties.sdma_queue_id & 1) 377 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 378 + (q->properties.sdma_queue_id >> 1); 379 380 if (restore_id && *restore_id != valid_id) 381 return -EINVAL; 382 q->doorbell_id = valid_id; 383 } else { 384 /* For CP queues on SOC15 */ 385 if (restore_id) { 386 /* make sure that ID is free */ 387 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 388 return -EINVAL; 389 390 q->doorbell_id = *restore_id; 391 } else { 392 /* or reserve a free doorbell ID */ 393 unsigned int found; 394 395 found = find_first_zero_bit(qpd->doorbell_bitmap, 396 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 397 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 398 pr_debug("No doorbells available"); 399 return -EBUSY; 400 } 401 set_bit(found, qpd->doorbell_bitmap); 402 q->doorbell_id = found; 403 } 404 } 405 406 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 407 qpd->proc_doorbells, 408 q->doorbell_id, 409 dev->kfd->device_info.doorbell_size); 410 return 0; 411 } 412 413 static void deallocate_doorbell(struct qcm_process_device *qpd, 414 struct queue *q) 415 { 416 unsigned int old; 417 struct kfd_node *dev = qpd->dqm->dev; 418 419 if (!KFD_IS_SOC15(dev) || 420 q->properties.type == KFD_QUEUE_TYPE_SDMA || 421 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 422 return; 423 424 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 425 WARN_ON(!old); 426 } 427 428 static void program_trap_handler_settings(struct device_queue_manager *dqm, 429 struct qcm_process_device *qpd) 430 { 431 uint32_t xcc_mask = dqm->dev->xcc_mask; 432 int xcc_id; 433 434 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 435 for_each_inst(xcc_id, xcc_mask) 436 dqm->dev->kfd2kgd->program_trap_handler_settings( 437 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 438 qpd->tma_addr, xcc_id); 439 } 440 441 static int allocate_vmid(struct device_queue_manager *dqm, 442 struct qcm_process_device *qpd, 443 struct queue *q) 444 { 445 struct device *dev = dqm->dev->adev->dev; 446 int allocated_vmid = -1, i; 447 448 for (i = dqm->dev->vm_info.first_vmid_kfd; 449 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 450 if (!dqm->vmid_pasid[i]) { 451 allocated_vmid = i; 452 break; 453 } 454 } 455 456 if (allocated_vmid < 0) { 457 dev_err(dev, "no more vmid to allocate\n"); 458 return -ENOSPC; 459 } 460 461 pr_debug("vmid allocated: %d\n", allocated_vmid); 462 463 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 464 465 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 466 467 qpd->vmid = allocated_vmid; 468 q->properties.vmid = allocated_vmid; 469 470 program_sh_mem_settings(dqm, qpd); 471 472 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 473 program_trap_handler_settings(dqm, qpd); 474 475 /* qpd->page_table_base is set earlier when register_process() 476 * is called, i.e. when the first queue is created. 477 */ 478 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 479 qpd->vmid, 480 qpd->page_table_base); 481 /* invalidate the VM context after pasid and vmid mapping is set up */ 482 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 483 484 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 485 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 486 qpd->sh_hidden_private_base, qpd->vmid); 487 488 return 0; 489 } 490 491 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 492 struct qcm_process_device *qpd) 493 { 494 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 495 int ret; 496 497 if (!qpd->ib_kaddr) 498 return -ENOMEM; 499 500 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 501 if (ret) 502 return ret; 503 504 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 505 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 506 pmf->release_mem_size / sizeof(uint32_t)); 507 } 508 509 static void deallocate_vmid(struct device_queue_manager *dqm, 510 struct qcm_process_device *qpd, 511 struct queue *q) 512 { 513 struct device *dev = dqm->dev->adev->dev; 514 515 /* On GFX v7, CP doesn't flush TC at dequeue */ 516 if (q->device->adev->asic_type == CHIP_HAWAII) 517 if (flush_texture_cache_nocpsch(q->device, qpd)) 518 dev_err(dev, "Failed to flush TC\n"); 519 520 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 521 522 /* Release the vmid mapping */ 523 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 524 dqm->vmid_pasid[qpd->vmid] = 0; 525 526 qpd->vmid = 0; 527 q->properties.vmid = 0; 528 } 529 530 static int create_queue_nocpsch(struct device_queue_manager *dqm, 531 struct queue *q, 532 struct qcm_process_device *qpd, 533 const struct kfd_criu_queue_priv_data *qd, 534 const void *restore_mqd, const void *restore_ctl_stack) 535 { 536 struct mqd_manager *mqd_mgr; 537 int retval; 538 539 dqm_lock(dqm); 540 541 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 542 pr_warn("Can't create new usermode queue because %d queues were already created\n", 543 dqm->total_queue_count); 544 retval = -EPERM; 545 goto out_unlock; 546 } 547 548 if (list_empty(&qpd->queues_list)) { 549 retval = allocate_vmid(dqm, qpd, q); 550 if (retval) 551 goto out_unlock; 552 } 553 q->properties.vmid = qpd->vmid; 554 /* 555 * Eviction state logic: mark all queues as evicted, even ones 556 * not currently active. Restoring inactive queues later only 557 * updates the is_evicted flag but is a no-op otherwise. 558 */ 559 q->properties.is_evicted = !!qpd->evicted; 560 561 q->properties.tba_addr = qpd->tba_addr; 562 q->properties.tma_addr = qpd->tma_addr; 563 564 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 565 q->properties.type)]; 566 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 567 retval = allocate_hqd(dqm, q); 568 if (retval) 569 goto deallocate_vmid; 570 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 571 q->pipe, q->queue); 572 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 573 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 574 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 575 if (retval) 576 goto deallocate_vmid; 577 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 578 } 579 580 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 581 if (retval) 582 goto out_deallocate_hqd; 583 584 /* Temporarily release dqm lock to avoid a circular lock dependency */ 585 dqm_unlock(dqm); 586 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 587 dqm_lock(dqm); 588 589 if (!q->mqd_mem_obj) { 590 retval = -ENOMEM; 591 goto out_deallocate_doorbell; 592 } 593 594 if (qd) 595 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 596 &q->properties, restore_mqd, restore_ctl_stack, 597 qd->ctl_stack_size); 598 else 599 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 600 &q->gart_mqd_addr, &q->properties); 601 602 if (q->properties.is_active) { 603 if (!dqm->sched_running) { 604 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 605 goto add_queue_to_list; 606 } 607 608 if (WARN(q->process->mm != current->mm, 609 "should only run in user thread")) 610 retval = -EFAULT; 611 else 612 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 613 q->queue, &q->properties, current->mm); 614 if (retval) 615 goto out_free_mqd; 616 } 617 618 add_queue_to_list: 619 list_add(&q->list, &qpd->queues_list); 620 qpd->queue_count++; 621 if (q->properties.is_active) 622 increment_queue_count(dqm, qpd, q); 623 624 /* 625 * Unconditionally increment this counter, regardless of the queue's 626 * type or whether the queue is active. 627 */ 628 dqm->total_queue_count++; 629 pr_debug("Total of %d queues are accountable so far\n", 630 dqm->total_queue_count); 631 goto out_unlock; 632 633 out_free_mqd: 634 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 635 out_deallocate_doorbell: 636 deallocate_doorbell(qpd, q); 637 out_deallocate_hqd: 638 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 639 deallocate_hqd(dqm, q); 640 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 641 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 642 deallocate_sdma_queue(dqm, q); 643 deallocate_vmid: 644 if (list_empty(&qpd->queues_list)) 645 deallocate_vmid(dqm, qpd, q); 646 out_unlock: 647 dqm_unlock(dqm); 648 return retval; 649 } 650 651 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 652 { 653 bool set; 654 int pipe, bit, i; 655 656 set = false; 657 658 for (pipe = dqm->next_pipe_to_allocate, i = 0; 659 i < get_pipes_per_mec(dqm); 660 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 661 662 if (!is_pipe_enabled(dqm, 0, pipe)) 663 continue; 664 665 if (dqm->allocated_queues[pipe] != 0) { 666 bit = ffs(dqm->allocated_queues[pipe]) - 1; 667 dqm->allocated_queues[pipe] &= ~(1 << bit); 668 q->pipe = pipe; 669 q->queue = bit; 670 set = true; 671 break; 672 } 673 } 674 675 if (!set) 676 return -EBUSY; 677 678 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 679 /* horizontal hqd allocation */ 680 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 681 682 return 0; 683 } 684 685 static inline void deallocate_hqd(struct device_queue_manager *dqm, 686 struct queue *q) 687 { 688 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 689 } 690 691 #define SQ_IND_CMD_CMD_KILL 0x00000003 692 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 693 694 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 695 { 696 int status = 0; 697 unsigned int vmid; 698 uint16_t queried_pasid; 699 union SQ_CMD_BITS reg_sq_cmd; 700 union GRBM_GFX_INDEX_BITS reg_gfx_index; 701 struct kfd_process_device *pdd; 702 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 703 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 704 uint32_t xcc_mask = dev->xcc_mask; 705 int xcc_id; 706 707 reg_sq_cmd.u32All = 0; 708 reg_gfx_index.u32All = 0; 709 710 pr_debug("Killing all process wavefronts\n"); 711 712 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 713 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 714 return -EOPNOTSUPP; 715 } 716 717 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 718 * ATC_VMID15_PASID_MAPPING 719 * to check which VMID the current process is mapped to. 720 */ 721 722 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 723 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 724 (dev->adev, vmid, &queried_pasid); 725 726 if (status && queried_pasid == p->pasid) { 727 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 728 vmid, p->pasid); 729 break; 730 } 731 } 732 733 if (vmid > last_vmid_to_scan) { 734 dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid); 735 return -EFAULT; 736 } 737 738 /* taking the VMID for that process on the safe way using PDD */ 739 pdd = kfd_get_process_device_data(dev, p); 740 if (!pdd) 741 return -EFAULT; 742 743 reg_gfx_index.bits.sh_broadcast_writes = 1; 744 reg_gfx_index.bits.se_broadcast_writes = 1; 745 reg_gfx_index.bits.instance_broadcast_writes = 1; 746 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 747 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 748 reg_sq_cmd.bits.vm_id = vmid; 749 750 for_each_inst(xcc_id, xcc_mask) 751 dev->kfd2kgd->wave_control_execute( 752 dev->adev, reg_gfx_index.u32All, 753 reg_sq_cmd.u32All, xcc_id); 754 755 return 0; 756 } 757 758 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 759 * to avoid asynchronized access 760 */ 761 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 762 struct qcm_process_device *qpd, 763 struct queue *q) 764 { 765 int retval; 766 struct mqd_manager *mqd_mgr; 767 768 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 769 q->properties.type)]; 770 771 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 772 deallocate_hqd(dqm, q); 773 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 774 deallocate_sdma_queue(dqm, q); 775 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 776 deallocate_sdma_queue(dqm, q); 777 else { 778 pr_debug("q->properties.type %d is invalid\n", 779 q->properties.type); 780 return -EINVAL; 781 } 782 dqm->total_queue_count--; 783 784 deallocate_doorbell(qpd, q); 785 786 if (!dqm->sched_running) { 787 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 788 return 0; 789 } 790 791 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 792 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 793 KFD_UNMAP_LATENCY_MS, 794 q->pipe, q->queue); 795 if (retval == -ETIME) 796 qpd->reset_wavefronts = true; 797 798 list_del(&q->list); 799 if (list_empty(&qpd->queues_list)) { 800 if (qpd->reset_wavefronts) { 801 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 802 dqm->dev); 803 /* dbgdev_wave_reset_wavefronts has to be called before 804 * deallocate_vmid(), i.e. when vmid is still in use. 805 */ 806 dbgdev_wave_reset_wavefronts(dqm->dev, 807 qpd->pqm->process); 808 qpd->reset_wavefronts = false; 809 } 810 811 deallocate_vmid(dqm, qpd, q); 812 } 813 qpd->queue_count--; 814 if (q->properties.is_active) 815 decrement_queue_count(dqm, qpd, q); 816 817 return retval; 818 } 819 820 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 821 struct qcm_process_device *qpd, 822 struct queue *q) 823 { 824 int retval; 825 uint64_t sdma_val = 0; 826 struct device *dev = dqm->dev->adev->dev; 827 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 828 struct mqd_manager *mqd_mgr = 829 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 830 831 /* Get the SDMA queue stats */ 832 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 833 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 834 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 835 &sdma_val); 836 if (retval) 837 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 838 q->properties.queue_id); 839 } 840 841 dqm_lock(dqm); 842 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 843 if (!retval) 844 pdd->sdma_past_activity_counter += sdma_val; 845 dqm_unlock(dqm); 846 847 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 848 849 return retval; 850 } 851 852 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 853 struct mqd_update_info *minfo) 854 { 855 int retval = 0; 856 struct device *dev = dqm->dev->adev->dev; 857 struct mqd_manager *mqd_mgr; 858 struct kfd_process_device *pdd; 859 bool prev_active = false; 860 861 dqm_lock(dqm); 862 pdd = kfd_get_process_device_data(q->device, q->process); 863 if (!pdd) { 864 retval = -ENODEV; 865 goto out_unlock; 866 } 867 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 868 q->properties.type)]; 869 870 /* Save previous activity state for counters */ 871 prev_active = q->properties.is_active; 872 873 /* Make sure the queue is unmapped before updating the MQD */ 874 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 875 if (!dqm->dev->kfd->shared_resources.enable_mes) 876 retval = unmap_queues_cpsch(dqm, 877 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 878 else if (prev_active) 879 retval = remove_queue_mes(dqm, q, &pdd->qpd); 880 881 if (retval) { 882 dev_err(dev, "unmap queue failed\n"); 883 goto out_unlock; 884 } 885 } else if (prev_active && 886 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 887 q->properties.type == KFD_QUEUE_TYPE_SDMA || 888 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 889 890 if (!dqm->sched_running) { 891 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 892 goto out_unlock; 893 } 894 895 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 896 (dqm->dev->kfd->cwsr_enabled ? 897 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 898 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 899 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 900 if (retval) { 901 dev_err(dev, "destroy mqd failed\n"); 902 goto out_unlock; 903 } 904 } 905 906 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 907 908 /* 909 * check active state vs. the previous state and modify 910 * counter accordingly. map_queues_cpsch uses the 911 * dqm->active_queue_count to determine whether a new runlist must be 912 * uploaded. 913 */ 914 if (q->properties.is_active && !prev_active) { 915 increment_queue_count(dqm, &pdd->qpd, q); 916 } else if (!q->properties.is_active && prev_active) { 917 decrement_queue_count(dqm, &pdd->qpd, q); 918 } else if (q->gws && !q->properties.is_gws) { 919 if (q->properties.is_active) { 920 dqm->gws_queue_count++; 921 pdd->qpd.mapped_gws_queue = true; 922 } 923 q->properties.is_gws = true; 924 } else if (!q->gws && q->properties.is_gws) { 925 if (q->properties.is_active) { 926 dqm->gws_queue_count--; 927 pdd->qpd.mapped_gws_queue = false; 928 } 929 q->properties.is_gws = false; 930 } 931 932 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 933 if (!dqm->dev->kfd->shared_resources.enable_mes) 934 retval = map_queues_cpsch(dqm); 935 else if (q->properties.is_active) 936 retval = add_queue_mes(dqm, q, &pdd->qpd); 937 } else if (q->properties.is_active && 938 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 939 q->properties.type == KFD_QUEUE_TYPE_SDMA || 940 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 941 if (WARN(q->process->mm != current->mm, 942 "should only run in user thread")) 943 retval = -EFAULT; 944 else 945 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 946 q->pipe, q->queue, 947 &q->properties, current->mm); 948 } 949 950 out_unlock: 951 dqm_unlock(dqm); 952 return retval; 953 } 954 955 /* suspend_single_queue does not lock the dqm like the 956 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 957 * lock the dqm before calling, and unlock after calling. 958 * 959 * The reason we don't lock the dqm is because this function may be 960 * called on multiple queues in a loop, so rather than locking/unlocking 961 * multiple times, we will just keep the dqm locked for all of the calls. 962 */ 963 static int suspend_single_queue(struct device_queue_manager *dqm, 964 struct kfd_process_device *pdd, 965 struct queue *q) 966 { 967 bool is_new; 968 969 if (q->properties.is_suspended) 970 return 0; 971 972 pr_debug("Suspending PASID %u queue [%i]\n", 973 pdd->process->pasid, 974 q->properties.queue_id); 975 976 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 977 978 if (is_new || q->properties.is_being_destroyed) { 979 pr_debug("Suspend: skip %s queue id %i\n", 980 is_new ? "new" : "destroyed", 981 q->properties.queue_id); 982 return -EBUSY; 983 } 984 985 q->properties.is_suspended = true; 986 if (q->properties.is_active) { 987 if (dqm->dev->kfd->shared_resources.enable_mes) { 988 int r = remove_queue_mes(dqm, q, &pdd->qpd); 989 990 if (r) 991 return r; 992 } 993 994 decrement_queue_count(dqm, &pdd->qpd, q); 995 q->properties.is_active = false; 996 } 997 998 return 0; 999 } 1000 1001 /* resume_single_queue does not lock the dqm like the functions 1002 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1003 * lock the dqm before calling, and unlock after calling. 1004 * 1005 * The reason we don't lock the dqm is because this function may be 1006 * called on multiple queues in a loop, so rather than locking/unlocking 1007 * multiple times, we will just keep the dqm locked for all of the calls. 1008 */ 1009 static int resume_single_queue(struct device_queue_manager *dqm, 1010 struct qcm_process_device *qpd, 1011 struct queue *q) 1012 { 1013 struct kfd_process_device *pdd; 1014 1015 if (!q->properties.is_suspended) 1016 return 0; 1017 1018 pdd = qpd_to_pdd(qpd); 1019 1020 pr_debug("Restoring from suspend PASID %u queue [%i]\n", 1021 pdd->process->pasid, 1022 q->properties.queue_id); 1023 1024 q->properties.is_suspended = false; 1025 1026 if (QUEUE_IS_ACTIVE(q->properties)) { 1027 if (dqm->dev->kfd->shared_resources.enable_mes) { 1028 int r = add_queue_mes(dqm, q, &pdd->qpd); 1029 1030 if (r) 1031 return r; 1032 } 1033 1034 q->properties.is_active = true; 1035 increment_queue_count(dqm, qpd, q); 1036 } 1037 1038 return 0; 1039 } 1040 1041 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1042 struct qcm_process_device *qpd) 1043 { 1044 struct queue *q; 1045 struct mqd_manager *mqd_mgr; 1046 struct kfd_process_device *pdd; 1047 int retval, ret = 0; 1048 1049 dqm_lock(dqm); 1050 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1051 goto out; 1052 1053 pdd = qpd_to_pdd(qpd); 1054 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1055 pdd->process->pasid); 1056 1057 pdd->last_evict_timestamp = get_jiffies_64(); 1058 /* Mark all queues as evicted. Deactivate all active queues on 1059 * the qpd. 1060 */ 1061 list_for_each_entry(q, &qpd->queues_list, list) { 1062 q->properties.is_evicted = true; 1063 if (!q->properties.is_active) 1064 continue; 1065 1066 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1067 q->properties.type)]; 1068 q->properties.is_active = false; 1069 decrement_queue_count(dqm, qpd, q); 1070 1071 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1072 continue; 1073 1074 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1075 (dqm->dev->kfd->cwsr_enabled ? 1076 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1077 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1078 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1079 if (retval && !ret) 1080 /* Return the first error, but keep going to 1081 * maintain a consistent eviction state 1082 */ 1083 ret = retval; 1084 } 1085 1086 out: 1087 dqm_unlock(dqm); 1088 return ret; 1089 } 1090 1091 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1092 struct qcm_process_device *qpd) 1093 { 1094 struct queue *q; 1095 struct device *dev = dqm->dev->adev->dev; 1096 struct kfd_process_device *pdd; 1097 int retval = 0; 1098 1099 dqm_lock(dqm); 1100 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1101 goto out; 1102 1103 pdd = qpd_to_pdd(qpd); 1104 1105 /* The debugger creates processes that temporarily have not acquired 1106 * all VMs for all devices and has no VMs itself. 1107 * Skip queue eviction on process eviction. 1108 */ 1109 if (!pdd->drm_priv) 1110 goto out; 1111 1112 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1113 pdd->process->pasid); 1114 1115 /* Mark all queues as evicted. Deactivate all active queues on 1116 * the qpd. 1117 */ 1118 list_for_each_entry(q, &qpd->queues_list, list) { 1119 q->properties.is_evicted = true; 1120 if (!q->properties.is_active) 1121 continue; 1122 1123 q->properties.is_active = false; 1124 decrement_queue_count(dqm, qpd, q); 1125 1126 if (dqm->dev->kfd->shared_resources.enable_mes) { 1127 retval = remove_queue_mes(dqm, q, qpd); 1128 if (retval) { 1129 dev_err(dev, "Failed to evict queue %d\n", 1130 q->properties.queue_id); 1131 goto out; 1132 } 1133 } 1134 } 1135 pdd->last_evict_timestamp = get_jiffies_64(); 1136 if (!dqm->dev->kfd->shared_resources.enable_mes) 1137 retval = execute_queues_cpsch(dqm, 1138 qpd->is_debug ? 1139 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1140 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1141 USE_DEFAULT_GRACE_PERIOD); 1142 1143 out: 1144 dqm_unlock(dqm); 1145 return retval; 1146 } 1147 1148 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1149 struct qcm_process_device *qpd) 1150 { 1151 struct mm_struct *mm = NULL; 1152 struct queue *q; 1153 struct mqd_manager *mqd_mgr; 1154 struct kfd_process_device *pdd; 1155 uint64_t pd_base; 1156 uint64_t eviction_duration; 1157 int retval, ret = 0; 1158 1159 pdd = qpd_to_pdd(qpd); 1160 /* Retrieve PD base */ 1161 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1162 1163 dqm_lock(dqm); 1164 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1165 goto out; 1166 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1167 qpd->evicted--; 1168 goto out; 1169 } 1170 1171 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1172 pdd->process->pasid); 1173 1174 /* Update PD Base in QPD */ 1175 qpd->page_table_base = pd_base; 1176 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1177 1178 if (!list_empty(&qpd->queues_list)) { 1179 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1180 dqm->dev->adev, 1181 qpd->vmid, 1182 qpd->page_table_base); 1183 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1184 } 1185 1186 /* Take a safe reference to the mm_struct, which may otherwise 1187 * disappear even while the kfd_process is still referenced. 1188 */ 1189 mm = get_task_mm(pdd->process->lead_thread); 1190 if (!mm) { 1191 ret = -EFAULT; 1192 goto out; 1193 } 1194 1195 /* Remove the eviction flags. Activate queues that are not 1196 * inactive for other reasons. 1197 */ 1198 list_for_each_entry(q, &qpd->queues_list, list) { 1199 q->properties.is_evicted = false; 1200 if (!QUEUE_IS_ACTIVE(q->properties)) 1201 continue; 1202 1203 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1204 q->properties.type)]; 1205 q->properties.is_active = true; 1206 increment_queue_count(dqm, qpd, q); 1207 1208 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1209 continue; 1210 1211 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1212 q->queue, &q->properties, mm); 1213 if (retval && !ret) 1214 /* Return the first error, but keep going to 1215 * maintain a consistent eviction state 1216 */ 1217 ret = retval; 1218 } 1219 qpd->evicted = 0; 1220 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1221 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1222 out: 1223 if (mm) 1224 mmput(mm); 1225 dqm_unlock(dqm); 1226 return ret; 1227 } 1228 1229 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1230 struct qcm_process_device *qpd) 1231 { 1232 struct queue *q; 1233 struct device *dev = dqm->dev->adev->dev; 1234 struct kfd_process_device *pdd; 1235 uint64_t eviction_duration; 1236 int retval = 0; 1237 1238 pdd = qpd_to_pdd(qpd); 1239 1240 dqm_lock(dqm); 1241 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1242 goto out; 1243 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1244 qpd->evicted--; 1245 goto out; 1246 } 1247 1248 /* The debugger creates processes that temporarily have not acquired 1249 * all VMs for all devices and has no VMs itself. 1250 * Skip queue restore on process restore. 1251 */ 1252 if (!pdd->drm_priv) 1253 goto vm_not_acquired; 1254 1255 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1256 pdd->process->pasid); 1257 1258 /* Update PD Base in QPD */ 1259 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1260 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1261 1262 /* activate all active queues on the qpd */ 1263 list_for_each_entry(q, &qpd->queues_list, list) { 1264 q->properties.is_evicted = false; 1265 if (!QUEUE_IS_ACTIVE(q->properties)) 1266 continue; 1267 1268 q->properties.is_active = true; 1269 increment_queue_count(dqm, &pdd->qpd, q); 1270 1271 if (dqm->dev->kfd->shared_resources.enable_mes) { 1272 retval = add_queue_mes(dqm, q, qpd); 1273 if (retval) { 1274 dev_err(dev, "Failed to restore queue %d\n", 1275 q->properties.queue_id); 1276 goto out; 1277 } 1278 } 1279 } 1280 if (!dqm->dev->kfd->shared_resources.enable_mes) 1281 retval = execute_queues_cpsch(dqm, 1282 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1283 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1284 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1285 vm_not_acquired: 1286 qpd->evicted = 0; 1287 out: 1288 dqm_unlock(dqm); 1289 return retval; 1290 } 1291 1292 static int register_process(struct device_queue_manager *dqm, 1293 struct qcm_process_device *qpd) 1294 { 1295 struct device_process_node *n; 1296 struct kfd_process_device *pdd; 1297 uint64_t pd_base; 1298 int retval; 1299 1300 n = kzalloc(sizeof(*n), GFP_KERNEL); 1301 if (!n) 1302 return -ENOMEM; 1303 1304 n->qpd = qpd; 1305 1306 pdd = qpd_to_pdd(qpd); 1307 /* Retrieve PD base */ 1308 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1309 1310 dqm_lock(dqm); 1311 list_add(&n->list, &dqm->queues); 1312 1313 /* Update PD Base in QPD */ 1314 qpd->page_table_base = pd_base; 1315 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1316 1317 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1318 1319 dqm->processes_count++; 1320 1321 dqm_unlock(dqm); 1322 1323 /* Outside the DQM lock because under the DQM lock we can't do 1324 * reclaim or take other locks that others hold while reclaiming. 1325 */ 1326 kfd_inc_compute_active(dqm->dev); 1327 1328 return retval; 1329 } 1330 1331 static int unregister_process(struct device_queue_manager *dqm, 1332 struct qcm_process_device *qpd) 1333 { 1334 int retval; 1335 struct device_process_node *cur, *next; 1336 1337 pr_debug("qpd->queues_list is %s\n", 1338 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1339 1340 retval = 0; 1341 dqm_lock(dqm); 1342 1343 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1344 if (qpd == cur->qpd) { 1345 list_del(&cur->list); 1346 kfree(cur); 1347 dqm->processes_count--; 1348 goto out; 1349 } 1350 } 1351 /* qpd not found in dqm list */ 1352 retval = 1; 1353 out: 1354 dqm_unlock(dqm); 1355 1356 /* Outside the DQM lock because under the DQM lock we can't do 1357 * reclaim or take other locks that others hold while reclaiming. 1358 */ 1359 if (!retval) 1360 kfd_dec_compute_active(dqm->dev); 1361 1362 return retval; 1363 } 1364 1365 static int 1366 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1367 unsigned int vmid) 1368 { 1369 uint32_t xcc_mask = dqm->dev->xcc_mask; 1370 int xcc_id, ret; 1371 1372 for_each_inst(xcc_id, xcc_mask) { 1373 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1374 dqm->dev->adev, pasid, vmid, xcc_id); 1375 if (ret) 1376 break; 1377 } 1378 1379 return ret; 1380 } 1381 1382 static void init_interrupts(struct device_queue_manager *dqm) 1383 { 1384 uint32_t xcc_mask = dqm->dev->xcc_mask; 1385 unsigned int i, xcc_id; 1386 1387 for_each_inst(xcc_id, xcc_mask) { 1388 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1389 if (is_pipe_enabled(dqm, 0, i)) { 1390 dqm->dev->kfd2kgd->init_interrupts( 1391 dqm->dev->adev, i, xcc_id); 1392 } 1393 } 1394 } 1395 } 1396 1397 static int initialize_nocpsch(struct device_queue_manager *dqm) 1398 { 1399 int pipe, queue; 1400 1401 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1402 1403 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1404 sizeof(unsigned int), GFP_KERNEL); 1405 if (!dqm->allocated_queues) 1406 return -ENOMEM; 1407 1408 mutex_init(&dqm->lock_hidden); 1409 INIT_LIST_HEAD(&dqm->queues); 1410 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1411 dqm->active_cp_queue_count = 0; 1412 dqm->gws_queue_count = 0; 1413 1414 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1415 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1416 1417 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1418 if (test_bit(pipe_offset + queue, 1419 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1420 dqm->allocated_queues[pipe] |= 1 << queue; 1421 } 1422 1423 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1424 1425 init_sdma_bitmaps(dqm); 1426 1427 return 0; 1428 } 1429 1430 static void uninitialize(struct device_queue_manager *dqm) 1431 { 1432 int i; 1433 1434 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1435 1436 kfree(dqm->allocated_queues); 1437 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1438 kfree(dqm->mqd_mgrs[i]); 1439 mutex_destroy(&dqm->lock_hidden); 1440 } 1441 1442 static int start_nocpsch(struct device_queue_manager *dqm) 1443 { 1444 int r = 0; 1445 1446 pr_info("SW scheduler is used"); 1447 init_interrupts(dqm); 1448 1449 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1450 r = pm_init(&dqm->packet_mgr, dqm); 1451 if (!r) 1452 dqm->sched_running = true; 1453 1454 return r; 1455 } 1456 1457 static int stop_nocpsch(struct device_queue_manager *dqm) 1458 { 1459 dqm_lock(dqm); 1460 if (!dqm->sched_running) { 1461 dqm_unlock(dqm); 1462 return 0; 1463 } 1464 1465 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1466 pm_uninit(&dqm->packet_mgr); 1467 dqm->sched_running = false; 1468 dqm_unlock(dqm); 1469 1470 return 0; 1471 } 1472 1473 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1474 struct queue *q, const uint32_t *restore_sdma_id) 1475 { 1476 struct device *dev = dqm->dev->adev->dev; 1477 int bit; 1478 1479 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1480 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1481 dev_err(dev, "No more SDMA queue to allocate\n"); 1482 return -ENOMEM; 1483 } 1484 1485 if (restore_sdma_id) { 1486 /* Re-use existing sdma_id */ 1487 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1488 dev_err(dev, "SDMA queue already in use\n"); 1489 return -EBUSY; 1490 } 1491 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1492 q->sdma_id = *restore_sdma_id; 1493 } else { 1494 /* Find first available sdma_id */ 1495 bit = find_first_bit(dqm->sdma_bitmap, 1496 get_num_sdma_queues(dqm)); 1497 clear_bit(bit, dqm->sdma_bitmap); 1498 q->sdma_id = bit; 1499 } 1500 1501 q->properties.sdma_engine_id = 1502 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1503 q->properties.sdma_queue_id = q->sdma_id / 1504 kfd_get_num_sdma_engines(dqm->dev); 1505 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1506 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1507 dev_err(dev, "No more XGMI SDMA queue to allocate\n"); 1508 return -ENOMEM; 1509 } 1510 if (restore_sdma_id) { 1511 /* Re-use existing sdma_id */ 1512 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1513 dev_err(dev, "SDMA queue already in use\n"); 1514 return -EBUSY; 1515 } 1516 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1517 q->sdma_id = *restore_sdma_id; 1518 } else { 1519 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1520 get_num_xgmi_sdma_queues(dqm)); 1521 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1522 q->sdma_id = bit; 1523 } 1524 /* sdma_engine_id is sdma id including 1525 * both PCIe-optimized SDMAs and XGMI- 1526 * optimized SDMAs. The calculation below 1527 * assumes the first N engines are always 1528 * PCIe-optimized ones 1529 */ 1530 q->properties.sdma_engine_id = 1531 kfd_get_num_sdma_engines(dqm->dev) + 1532 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1533 q->properties.sdma_queue_id = q->sdma_id / 1534 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1535 } 1536 1537 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1538 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1539 1540 return 0; 1541 } 1542 1543 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1544 struct queue *q) 1545 { 1546 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1547 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1548 return; 1549 set_bit(q->sdma_id, dqm->sdma_bitmap); 1550 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1551 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1552 return; 1553 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1554 } 1555 } 1556 1557 /* 1558 * Device Queue Manager implementation for cp scheduler 1559 */ 1560 1561 static int set_sched_resources(struct device_queue_manager *dqm) 1562 { 1563 int i, mec; 1564 struct scheduling_resources res; 1565 struct device *dev = dqm->dev->adev->dev; 1566 1567 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1568 1569 res.queue_mask = 0; 1570 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1571 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1572 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1573 1574 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1575 continue; 1576 1577 /* only acquire queues from the first MEC */ 1578 if (mec > 0) 1579 continue; 1580 1581 /* This situation may be hit in the future if a new HW 1582 * generation exposes more than 64 queues. If so, the 1583 * definition of res.queue_mask needs updating 1584 */ 1585 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1586 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1587 break; 1588 } 1589 1590 res.queue_mask |= 1ull 1591 << amdgpu_queue_mask_bit_to_set_resource_bit( 1592 dqm->dev->adev, i); 1593 } 1594 res.gws_mask = ~0ull; 1595 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1596 1597 pr_debug("Scheduling resources:\n" 1598 "vmid mask: 0x%8X\n" 1599 "queue mask: 0x%8llX\n", 1600 res.vmid_mask, res.queue_mask); 1601 1602 return pm_send_set_resources(&dqm->packet_mgr, &res); 1603 } 1604 1605 static int initialize_cpsch(struct device_queue_manager *dqm) 1606 { 1607 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1608 1609 mutex_init(&dqm->lock_hidden); 1610 INIT_LIST_HEAD(&dqm->queues); 1611 dqm->active_queue_count = dqm->processes_count = 0; 1612 dqm->active_cp_queue_count = 0; 1613 dqm->gws_queue_count = 0; 1614 dqm->active_runlist = false; 1615 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1616 dqm->trap_debug_vmid = 0; 1617 1618 init_sdma_bitmaps(dqm); 1619 1620 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1621 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1622 &dqm->wait_times, 1623 ffs(dqm->dev->xcc_mask) - 1); 1624 return 0; 1625 } 1626 1627 static int start_cpsch(struct device_queue_manager *dqm) 1628 { 1629 struct device *dev = dqm->dev->adev->dev; 1630 int retval; 1631 1632 retval = 0; 1633 1634 dqm_lock(dqm); 1635 1636 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1637 retval = pm_init(&dqm->packet_mgr, dqm); 1638 if (retval) 1639 goto fail_packet_manager_init; 1640 1641 retval = set_sched_resources(dqm); 1642 if (retval) 1643 goto fail_set_sched_resources; 1644 } 1645 pr_debug("Allocating fence memory\n"); 1646 1647 /* allocate fence memory on the gart */ 1648 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1649 &dqm->fence_mem); 1650 1651 if (retval) 1652 goto fail_allocate_vidmem; 1653 1654 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1655 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1656 1657 init_interrupts(dqm); 1658 1659 /* clear hang status when driver try to start the hw scheduler */ 1660 dqm->sched_running = true; 1661 1662 if (!dqm->dev->kfd->shared_resources.enable_mes) 1663 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1664 1665 /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 1666 if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && 1667 (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { 1668 uint32_t reg_offset = 0; 1669 uint32_t grace_period = 1; 1670 1671 retval = pm_update_grace_period(&dqm->packet_mgr, 1672 grace_period); 1673 if (retval) 1674 dev_err(dev, "Setting grace timeout failed\n"); 1675 else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) 1676 /* Update dqm->wait_times maintained in software */ 1677 dqm->dev->kfd2kgd->build_grace_period_packet_info( 1678 dqm->dev->adev, dqm->wait_times, 1679 grace_period, ®_offset, 1680 &dqm->wait_times); 1681 } 1682 1683 dqm_unlock(dqm); 1684 1685 return 0; 1686 fail_allocate_vidmem: 1687 fail_set_sched_resources: 1688 if (!dqm->dev->kfd->shared_resources.enable_mes) 1689 pm_uninit(&dqm->packet_mgr); 1690 fail_packet_manager_init: 1691 dqm_unlock(dqm); 1692 return retval; 1693 } 1694 1695 static int stop_cpsch(struct device_queue_manager *dqm) 1696 { 1697 dqm_lock(dqm); 1698 if (!dqm->sched_running) { 1699 dqm_unlock(dqm); 1700 return 0; 1701 } 1702 1703 if (!dqm->dev->kfd->shared_resources.enable_mes) 1704 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1705 else 1706 remove_all_queues_mes(dqm); 1707 1708 dqm->sched_running = false; 1709 1710 if (!dqm->dev->kfd->shared_resources.enable_mes) 1711 pm_release_ib(&dqm->packet_mgr); 1712 1713 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1714 if (!dqm->dev->kfd->shared_resources.enable_mes) 1715 pm_uninit(&dqm->packet_mgr); 1716 dqm_unlock(dqm); 1717 1718 return 0; 1719 } 1720 1721 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1722 struct kernel_queue *kq, 1723 struct qcm_process_device *qpd) 1724 { 1725 dqm_lock(dqm); 1726 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1727 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1728 dqm->total_queue_count); 1729 dqm_unlock(dqm); 1730 return -EPERM; 1731 } 1732 1733 /* 1734 * Unconditionally increment this counter, regardless of the queue's 1735 * type or whether the queue is active. 1736 */ 1737 dqm->total_queue_count++; 1738 pr_debug("Total of %d queues are accountable so far\n", 1739 dqm->total_queue_count); 1740 1741 list_add(&kq->list, &qpd->priv_queue_list); 1742 increment_queue_count(dqm, qpd, kq->queue); 1743 qpd->is_debug = true; 1744 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1745 USE_DEFAULT_GRACE_PERIOD); 1746 dqm_unlock(dqm); 1747 1748 return 0; 1749 } 1750 1751 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1752 struct kernel_queue *kq, 1753 struct qcm_process_device *qpd) 1754 { 1755 dqm_lock(dqm); 1756 list_del(&kq->list); 1757 decrement_queue_count(dqm, qpd, kq->queue); 1758 qpd->is_debug = false; 1759 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1760 USE_DEFAULT_GRACE_PERIOD); 1761 /* 1762 * Unconditionally decrement this counter, regardless of the queue's 1763 * type. 1764 */ 1765 dqm->total_queue_count--; 1766 pr_debug("Total of %d queues are accountable so far\n", 1767 dqm->total_queue_count); 1768 dqm_unlock(dqm); 1769 } 1770 1771 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1772 struct qcm_process_device *qpd, 1773 const struct kfd_criu_queue_priv_data *qd, 1774 const void *restore_mqd, const void *restore_ctl_stack) 1775 { 1776 int retval; 1777 struct mqd_manager *mqd_mgr; 1778 1779 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1780 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1781 dqm->total_queue_count); 1782 retval = -EPERM; 1783 goto out; 1784 } 1785 1786 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1787 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1788 dqm_lock(dqm); 1789 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1790 dqm_unlock(dqm); 1791 if (retval) 1792 goto out; 1793 } 1794 1795 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1796 if (retval) 1797 goto out_deallocate_sdma_queue; 1798 1799 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1800 q->properties.type)]; 1801 1802 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1803 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1804 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1805 q->properties.tba_addr = qpd->tba_addr; 1806 q->properties.tma_addr = qpd->tma_addr; 1807 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1808 if (!q->mqd_mem_obj) { 1809 retval = -ENOMEM; 1810 goto out_deallocate_doorbell; 1811 } 1812 1813 dqm_lock(dqm); 1814 /* 1815 * Eviction state logic: mark all queues as evicted, even ones 1816 * not currently active. Restoring inactive queues later only 1817 * updates the is_evicted flag but is a no-op otherwise. 1818 */ 1819 q->properties.is_evicted = !!qpd->evicted; 1820 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 1821 kfd_dbg_has_cwsr_workaround(q->device); 1822 1823 if (qd) 1824 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1825 &q->properties, restore_mqd, restore_ctl_stack, 1826 qd->ctl_stack_size); 1827 else 1828 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1829 &q->gart_mqd_addr, &q->properties); 1830 1831 list_add(&q->list, &qpd->queues_list); 1832 qpd->queue_count++; 1833 1834 if (q->properties.is_active) { 1835 increment_queue_count(dqm, qpd, q); 1836 1837 if (!dqm->dev->kfd->shared_resources.enable_mes) 1838 retval = execute_queues_cpsch(dqm, 1839 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1840 else 1841 retval = add_queue_mes(dqm, q, qpd); 1842 if (retval) 1843 goto cleanup_queue; 1844 } 1845 1846 /* 1847 * Unconditionally increment this counter, regardless of the queue's 1848 * type or whether the queue is active. 1849 */ 1850 dqm->total_queue_count++; 1851 1852 pr_debug("Total of %d queues are accountable so far\n", 1853 dqm->total_queue_count); 1854 1855 dqm_unlock(dqm); 1856 return retval; 1857 1858 cleanup_queue: 1859 qpd->queue_count--; 1860 list_del(&q->list); 1861 if (q->properties.is_active) 1862 decrement_queue_count(dqm, qpd, q); 1863 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1864 dqm_unlock(dqm); 1865 out_deallocate_doorbell: 1866 deallocate_doorbell(qpd, q); 1867 out_deallocate_sdma_queue: 1868 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1869 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1870 dqm_lock(dqm); 1871 deallocate_sdma_queue(dqm, q); 1872 dqm_unlock(dqm); 1873 } 1874 out: 1875 return retval; 1876 } 1877 1878 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 1879 uint64_t fence_value, 1880 unsigned int timeout_ms) 1881 { 1882 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1883 struct device *dev = dqm->dev->adev->dev; 1884 uint64_t *fence_addr = dqm->fence_addr; 1885 1886 while (*fence_addr != fence_value) { 1887 /* Fatal err detected, this response won't come */ 1888 if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) 1889 return -EIO; 1890 1891 if (time_after(jiffies, end_jiffies)) { 1892 dev_err(dev, "qcm fence wait loop timeout expired\n"); 1893 /* In HWS case, this is used to halt the driver thread 1894 * in order not to mess up CP states before doing 1895 * scandumps for FW debugging. 1896 */ 1897 while (halt_if_hws_hang) 1898 schedule(); 1899 1900 return -ETIME; 1901 } 1902 schedule(); 1903 } 1904 1905 return 0; 1906 } 1907 1908 /* dqm->lock mutex has to be locked before calling this function */ 1909 static int map_queues_cpsch(struct device_queue_manager *dqm) 1910 { 1911 struct device *dev = dqm->dev->adev->dev; 1912 int retval; 1913 1914 if (!dqm->sched_running) 1915 return 0; 1916 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1917 return 0; 1918 if (dqm->active_runlist) 1919 return 0; 1920 1921 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1922 pr_debug("%s sent runlist\n", __func__); 1923 if (retval) { 1924 dev_err(dev, "failed to execute runlist\n"); 1925 return retval; 1926 } 1927 dqm->active_runlist = true; 1928 1929 return retval; 1930 } 1931 1932 /* dqm->lock mutex has to be locked before calling this function */ 1933 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1934 enum kfd_unmap_queues_filter filter, 1935 uint32_t filter_param, 1936 uint32_t grace_period, 1937 bool reset) 1938 { 1939 struct device *dev = dqm->dev->adev->dev; 1940 struct mqd_manager *mqd_mgr; 1941 int retval; 1942 1943 if (!dqm->sched_running) 1944 return 0; 1945 if (!dqm->active_runlist) 1946 return 0; 1947 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 1948 return -EIO; 1949 1950 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1951 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 1952 if (retval) 1953 goto out; 1954 } 1955 1956 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1957 if (retval) 1958 goto out; 1959 1960 *dqm->fence_addr = KFD_FENCE_INIT; 1961 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1962 KFD_FENCE_COMPLETED); 1963 /* should be timed out */ 1964 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 1965 queue_preemption_timeout_ms); 1966 if (retval) { 1967 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1968 kfd_hws_hang(dqm); 1969 goto out; 1970 } 1971 1972 /* In the current MEC firmware implementation, if compute queue 1973 * doesn't response to the preemption request in time, HIQ will 1974 * abandon the unmap request without returning any timeout error 1975 * to driver. Instead, MEC firmware will log the doorbell of the 1976 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1977 * To make sure the queue unmap was successful, driver need to 1978 * check those fields 1979 */ 1980 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1981 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { 1982 while (halt_if_hws_hang) 1983 schedule(); 1984 kfd_hws_hang(dqm); 1985 retval = -ETIME; 1986 goto out; 1987 } 1988 1989 /* We need to reset the grace period value for this device */ 1990 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1991 if (pm_update_grace_period(&dqm->packet_mgr, 1992 USE_DEFAULT_GRACE_PERIOD)) 1993 dev_err(dev, "Failed to reset grace period\n"); 1994 } 1995 1996 pm_release_ib(&dqm->packet_mgr); 1997 dqm->active_runlist = false; 1998 1999 out: 2000 up_read(&dqm->dev->adev->reset_domain->sem); 2001 return retval; 2002 } 2003 2004 /* only for compute queue */ 2005 static int reset_queues_cpsch(struct device_queue_manager *dqm, 2006 uint16_t pasid) 2007 { 2008 int retval; 2009 2010 dqm_lock(dqm); 2011 2012 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2013 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2014 2015 dqm_unlock(dqm); 2016 return retval; 2017 } 2018 2019 /* dqm->lock mutex has to be locked before calling this function */ 2020 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2021 enum kfd_unmap_queues_filter filter, 2022 uint32_t filter_param, 2023 uint32_t grace_period) 2024 { 2025 int retval; 2026 2027 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2028 return -EIO; 2029 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2030 if (!retval) 2031 retval = map_queues_cpsch(dqm); 2032 up_read(&dqm->dev->adev->reset_domain->sem); 2033 return retval; 2034 } 2035 2036 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2037 struct queue *q) 2038 { 2039 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2040 q->process); 2041 int ret = 0; 2042 2043 if (pdd->qpd.is_debug) 2044 return ret; 2045 2046 q->properties.is_being_destroyed = true; 2047 2048 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2049 dqm_unlock(dqm); 2050 mutex_unlock(&q->process->mutex); 2051 ret = wait_event_interruptible(dqm->destroy_wait, 2052 !q->properties.is_suspended); 2053 2054 mutex_lock(&q->process->mutex); 2055 dqm_lock(dqm); 2056 } 2057 2058 return ret; 2059 } 2060 2061 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2062 struct qcm_process_device *qpd, 2063 struct queue *q) 2064 { 2065 int retval; 2066 struct mqd_manager *mqd_mgr; 2067 uint64_t sdma_val = 0; 2068 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2069 struct device *dev = dqm->dev->adev->dev; 2070 2071 /* Get the SDMA queue stats */ 2072 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2073 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2074 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2075 &sdma_val); 2076 if (retval) 2077 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2078 q->properties.queue_id); 2079 } 2080 2081 /* remove queue from list to prevent rescheduling after preemption */ 2082 dqm_lock(dqm); 2083 2084 retval = wait_on_destroy_queue(dqm, q); 2085 2086 if (retval) { 2087 dqm_unlock(dqm); 2088 return retval; 2089 } 2090 2091 if (qpd->is_debug) { 2092 /* 2093 * error, currently we do not allow to destroy a queue 2094 * of a currently debugged process 2095 */ 2096 retval = -EBUSY; 2097 goto failed_try_destroy_debugged_queue; 2098 2099 } 2100 2101 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2102 q->properties.type)]; 2103 2104 deallocate_doorbell(qpd, q); 2105 2106 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2107 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2108 deallocate_sdma_queue(dqm, q); 2109 pdd->sdma_past_activity_counter += sdma_val; 2110 } 2111 2112 list_del(&q->list); 2113 qpd->queue_count--; 2114 if (q->properties.is_active) { 2115 decrement_queue_count(dqm, qpd, q); 2116 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2117 retval = execute_queues_cpsch(dqm, 2118 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2119 USE_DEFAULT_GRACE_PERIOD); 2120 if (retval == -ETIME) 2121 qpd->reset_wavefronts = true; 2122 } else { 2123 retval = remove_queue_mes(dqm, q, qpd); 2124 } 2125 } 2126 2127 /* 2128 * Unconditionally decrement this counter, regardless of the queue's 2129 * type 2130 */ 2131 dqm->total_queue_count--; 2132 pr_debug("Total of %d queues are accountable so far\n", 2133 dqm->total_queue_count); 2134 2135 dqm_unlock(dqm); 2136 2137 /* 2138 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2139 * circular locking 2140 */ 2141 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2142 qpd->pqm->process, q->device, 2143 -1, false, NULL, 0); 2144 2145 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2146 2147 return retval; 2148 2149 failed_try_destroy_debugged_queue: 2150 2151 dqm_unlock(dqm); 2152 return retval; 2153 } 2154 2155 /* 2156 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2157 * stay in user mode. 2158 */ 2159 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2160 /* APE1 limit is inclusive and 64K aligned. */ 2161 #define APE1_LIMIT_ALIGNMENT 0xFFFF 2162 2163 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2164 struct qcm_process_device *qpd, 2165 enum cache_policy default_policy, 2166 enum cache_policy alternate_policy, 2167 void __user *alternate_aperture_base, 2168 uint64_t alternate_aperture_size) 2169 { 2170 bool retval = true; 2171 2172 if (!dqm->asic_ops.set_cache_memory_policy) 2173 return retval; 2174 2175 dqm_lock(dqm); 2176 2177 if (alternate_aperture_size == 0) { 2178 /* base > limit disables APE1 */ 2179 qpd->sh_mem_ape1_base = 1; 2180 qpd->sh_mem_ape1_limit = 0; 2181 } else { 2182 /* 2183 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2184 * SH_MEM_APE1_BASE[31:0], 0x0000 } 2185 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2186 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2187 * Verify that the base and size parameters can be 2188 * represented in this format and convert them. 2189 * Additionally restrict APE1 to user-mode addresses. 2190 */ 2191 2192 uint64_t base = (uintptr_t)alternate_aperture_base; 2193 uint64_t limit = base + alternate_aperture_size - 1; 2194 2195 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2196 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2197 retval = false; 2198 goto out; 2199 } 2200 2201 qpd->sh_mem_ape1_base = base >> 16; 2202 qpd->sh_mem_ape1_limit = limit >> 16; 2203 } 2204 2205 retval = dqm->asic_ops.set_cache_memory_policy( 2206 dqm, 2207 qpd, 2208 default_policy, 2209 alternate_policy, 2210 alternate_aperture_base, 2211 alternate_aperture_size); 2212 2213 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2214 program_sh_mem_settings(dqm, qpd); 2215 2216 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2217 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2218 qpd->sh_mem_ape1_limit); 2219 2220 out: 2221 dqm_unlock(dqm); 2222 return retval; 2223 } 2224 2225 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2226 struct qcm_process_device *qpd) 2227 { 2228 struct queue *q; 2229 struct device_process_node *cur, *next_dpn; 2230 int retval = 0; 2231 bool found = false; 2232 2233 dqm_lock(dqm); 2234 2235 /* Clear all user mode queues */ 2236 while (!list_empty(&qpd->queues_list)) { 2237 struct mqd_manager *mqd_mgr; 2238 int ret; 2239 2240 q = list_first_entry(&qpd->queues_list, struct queue, list); 2241 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2242 q->properties.type)]; 2243 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2244 if (ret) 2245 retval = ret; 2246 dqm_unlock(dqm); 2247 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2248 dqm_lock(dqm); 2249 } 2250 2251 /* Unregister process */ 2252 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2253 if (qpd == cur->qpd) { 2254 list_del(&cur->list); 2255 kfree(cur); 2256 dqm->processes_count--; 2257 found = true; 2258 break; 2259 } 2260 } 2261 2262 dqm_unlock(dqm); 2263 2264 /* Outside the DQM lock because under the DQM lock we can't do 2265 * reclaim or take other locks that others hold while reclaiming. 2266 */ 2267 if (found) 2268 kfd_dec_compute_active(dqm->dev); 2269 2270 return retval; 2271 } 2272 2273 static int get_wave_state(struct device_queue_manager *dqm, 2274 struct queue *q, 2275 void __user *ctl_stack, 2276 u32 *ctl_stack_used_size, 2277 u32 *save_area_used_size) 2278 { 2279 struct mqd_manager *mqd_mgr; 2280 2281 dqm_lock(dqm); 2282 2283 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2284 2285 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2286 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2287 !mqd_mgr->get_wave_state) { 2288 dqm_unlock(dqm); 2289 return -EINVAL; 2290 } 2291 2292 dqm_unlock(dqm); 2293 2294 /* 2295 * get_wave_state is outside the dqm lock to prevent circular locking 2296 * and the queue should be protected against destruction by the process 2297 * lock. 2298 */ 2299 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2300 ctl_stack, ctl_stack_used_size, save_area_used_size); 2301 } 2302 2303 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2304 const struct queue *q, 2305 u32 *mqd_size, 2306 u32 *ctl_stack_size) 2307 { 2308 struct mqd_manager *mqd_mgr; 2309 enum KFD_MQD_TYPE mqd_type = 2310 get_mqd_type_from_queue_type(q->properties.type); 2311 2312 dqm_lock(dqm); 2313 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2314 *mqd_size = mqd_mgr->mqd_size; 2315 *ctl_stack_size = 0; 2316 2317 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2318 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2319 2320 dqm_unlock(dqm); 2321 } 2322 2323 static int checkpoint_mqd(struct device_queue_manager *dqm, 2324 const struct queue *q, 2325 void *mqd, 2326 void *ctl_stack) 2327 { 2328 struct mqd_manager *mqd_mgr; 2329 int r = 0; 2330 enum KFD_MQD_TYPE mqd_type = 2331 get_mqd_type_from_queue_type(q->properties.type); 2332 2333 dqm_lock(dqm); 2334 2335 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2336 r = -EINVAL; 2337 goto dqm_unlock; 2338 } 2339 2340 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2341 if (!mqd_mgr->checkpoint_mqd) { 2342 r = -EOPNOTSUPP; 2343 goto dqm_unlock; 2344 } 2345 2346 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2347 2348 dqm_unlock: 2349 dqm_unlock(dqm); 2350 return r; 2351 } 2352 2353 static int process_termination_cpsch(struct device_queue_manager *dqm, 2354 struct qcm_process_device *qpd) 2355 { 2356 int retval; 2357 struct queue *q; 2358 struct device *dev = dqm->dev->adev->dev; 2359 struct kernel_queue *kq, *kq_next; 2360 struct mqd_manager *mqd_mgr; 2361 struct device_process_node *cur, *next_dpn; 2362 enum kfd_unmap_queues_filter filter = 2363 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2364 bool found = false; 2365 2366 retval = 0; 2367 2368 dqm_lock(dqm); 2369 2370 /* Clean all kernel queues */ 2371 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2372 list_del(&kq->list); 2373 decrement_queue_count(dqm, qpd, kq->queue); 2374 qpd->is_debug = false; 2375 dqm->total_queue_count--; 2376 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2377 } 2378 2379 /* Clear all user mode queues */ 2380 list_for_each_entry(q, &qpd->queues_list, list) { 2381 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2382 deallocate_sdma_queue(dqm, q); 2383 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2384 deallocate_sdma_queue(dqm, q); 2385 2386 if (q->properties.is_active) { 2387 decrement_queue_count(dqm, qpd, q); 2388 2389 if (dqm->dev->kfd->shared_resources.enable_mes) { 2390 retval = remove_queue_mes(dqm, q, qpd); 2391 if (retval) 2392 dev_err(dev, "Failed to remove queue %d\n", 2393 q->properties.queue_id); 2394 } 2395 } 2396 2397 dqm->total_queue_count--; 2398 } 2399 2400 /* Unregister process */ 2401 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2402 if (qpd == cur->qpd) { 2403 list_del(&cur->list); 2404 kfree(cur); 2405 dqm->processes_count--; 2406 found = true; 2407 break; 2408 } 2409 } 2410 2411 if (!dqm->dev->kfd->shared_resources.enable_mes) 2412 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2413 2414 if ((retval || qpd->reset_wavefronts) && 2415 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2416 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2417 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2418 qpd->reset_wavefronts = false; 2419 up_read(&dqm->dev->adev->reset_domain->sem); 2420 } 2421 2422 /* Lastly, free mqd resources. 2423 * Do free_mqd() after dqm_unlock to avoid circular locking. 2424 */ 2425 while (!list_empty(&qpd->queues_list)) { 2426 q = list_first_entry(&qpd->queues_list, struct queue, list); 2427 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2428 q->properties.type)]; 2429 list_del(&q->list); 2430 qpd->queue_count--; 2431 dqm_unlock(dqm); 2432 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2433 dqm_lock(dqm); 2434 } 2435 dqm_unlock(dqm); 2436 2437 /* Outside the DQM lock because under the DQM lock we can't do 2438 * reclaim or take other locks that others hold while reclaiming. 2439 */ 2440 if (found) 2441 kfd_dec_compute_active(dqm->dev); 2442 2443 return retval; 2444 } 2445 2446 static int init_mqd_managers(struct device_queue_manager *dqm) 2447 { 2448 int i, j; 2449 struct device *dev = dqm->dev->adev->dev; 2450 struct mqd_manager *mqd_mgr; 2451 2452 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2453 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2454 if (!mqd_mgr) { 2455 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2456 goto out_free; 2457 } 2458 dqm->mqd_mgrs[i] = mqd_mgr; 2459 } 2460 2461 return 0; 2462 2463 out_free: 2464 for (j = 0; j < i; j++) { 2465 kfree(dqm->mqd_mgrs[j]); 2466 dqm->mqd_mgrs[j] = NULL; 2467 } 2468 2469 return -ENOMEM; 2470 } 2471 2472 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2473 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2474 { 2475 int retval; 2476 struct kfd_node *dev = dqm->dev; 2477 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2478 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2479 get_num_all_sdma_engines(dqm) * 2480 dev->kfd->device_info.num_sdma_queues_per_engine + 2481 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2482 NUM_XCC(dqm->dev->xcc_mask)); 2483 2484 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2485 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2486 (void *)&(mem_obj->cpu_ptr), false); 2487 2488 return retval; 2489 } 2490 2491 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2492 { 2493 struct device_queue_manager *dqm; 2494 2495 pr_debug("Loading device queue manager\n"); 2496 2497 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2498 if (!dqm) 2499 return NULL; 2500 2501 switch (dev->adev->asic_type) { 2502 /* HWS is not available on Hawaii. */ 2503 case CHIP_HAWAII: 2504 /* HWS depends on CWSR for timely dequeue. CWSR is not 2505 * available on Tonga. 2506 * 2507 * FIXME: This argument also applies to Kaveri. 2508 */ 2509 case CHIP_TONGA: 2510 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2511 break; 2512 default: 2513 dqm->sched_policy = sched_policy; 2514 break; 2515 } 2516 2517 dqm->dev = dev; 2518 switch (dqm->sched_policy) { 2519 case KFD_SCHED_POLICY_HWS: 2520 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2521 /* initialize dqm for cp scheduling */ 2522 dqm->ops.create_queue = create_queue_cpsch; 2523 dqm->ops.initialize = initialize_cpsch; 2524 dqm->ops.start = start_cpsch; 2525 dqm->ops.stop = stop_cpsch; 2526 dqm->ops.destroy_queue = destroy_queue_cpsch; 2527 dqm->ops.update_queue = update_queue; 2528 dqm->ops.register_process = register_process; 2529 dqm->ops.unregister_process = unregister_process; 2530 dqm->ops.uninitialize = uninitialize; 2531 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2532 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2533 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2534 dqm->ops.process_termination = process_termination_cpsch; 2535 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2536 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2537 dqm->ops.get_wave_state = get_wave_state; 2538 dqm->ops.reset_queues = reset_queues_cpsch; 2539 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2540 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2541 break; 2542 case KFD_SCHED_POLICY_NO_HWS: 2543 /* initialize dqm for no cp scheduling */ 2544 dqm->ops.start = start_nocpsch; 2545 dqm->ops.stop = stop_nocpsch; 2546 dqm->ops.create_queue = create_queue_nocpsch; 2547 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2548 dqm->ops.update_queue = update_queue; 2549 dqm->ops.register_process = register_process; 2550 dqm->ops.unregister_process = unregister_process; 2551 dqm->ops.initialize = initialize_nocpsch; 2552 dqm->ops.uninitialize = uninitialize; 2553 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2554 dqm->ops.process_termination = process_termination_nocpsch; 2555 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2556 dqm->ops.restore_process_queues = 2557 restore_process_queues_nocpsch; 2558 dqm->ops.get_wave_state = get_wave_state; 2559 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2560 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2561 break; 2562 default: 2563 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 2564 goto out_free; 2565 } 2566 2567 switch (dev->adev->asic_type) { 2568 case CHIP_KAVERI: 2569 case CHIP_HAWAII: 2570 device_queue_manager_init_cik(&dqm->asic_ops); 2571 break; 2572 2573 case CHIP_CARRIZO: 2574 case CHIP_TONGA: 2575 case CHIP_FIJI: 2576 case CHIP_POLARIS10: 2577 case CHIP_POLARIS11: 2578 case CHIP_POLARIS12: 2579 case CHIP_VEGAM: 2580 device_queue_manager_init_vi(&dqm->asic_ops); 2581 break; 2582 2583 default: 2584 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 2585 device_queue_manager_init_v12(&dqm->asic_ops); 2586 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2587 device_queue_manager_init_v11(&dqm->asic_ops); 2588 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2589 device_queue_manager_init_v10(&dqm->asic_ops); 2590 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2591 device_queue_manager_init_v9(&dqm->asic_ops); 2592 else { 2593 WARN(1, "Unexpected ASIC family %u", 2594 dev->adev->asic_type); 2595 goto out_free; 2596 } 2597 } 2598 2599 if (init_mqd_managers(dqm)) 2600 goto out_free; 2601 2602 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2603 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 2604 goto out_free; 2605 } 2606 2607 if (!dqm->ops.initialize(dqm)) { 2608 init_waitqueue_head(&dqm->destroy_wait); 2609 return dqm; 2610 } 2611 2612 out_free: 2613 kfree(dqm); 2614 return NULL; 2615 } 2616 2617 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2618 struct kfd_mem_obj *mqd) 2619 { 2620 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2621 2622 amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); 2623 } 2624 2625 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2626 { 2627 dqm->ops.stop(dqm); 2628 dqm->ops.uninitialize(dqm); 2629 if (!dqm->dev->kfd->shared_resources.enable_mes) 2630 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2631 kfree(dqm); 2632 } 2633 2634 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2635 { 2636 struct kfd_process_device *pdd; 2637 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2638 int ret = 0; 2639 2640 if (!p) 2641 return -EINVAL; 2642 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2643 pdd = kfd_get_process_device_data(dqm->dev, p); 2644 if (pdd) 2645 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2646 kfd_unref_process(p); 2647 2648 return ret; 2649 } 2650 2651 static void kfd_process_hw_exception(struct work_struct *work) 2652 { 2653 struct device_queue_manager *dqm = container_of(work, 2654 struct device_queue_manager, hw_exception_work); 2655 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2656 } 2657 2658 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 2659 struct qcm_process_device *qpd) 2660 { 2661 int r; 2662 struct device *dev = dqm->dev->adev->dev; 2663 int updated_vmid_mask; 2664 2665 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2666 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 2667 return -EINVAL; 2668 } 2669 2670 dqm_lock(dqm); 2671 2672 if (dqm->trap_debug_vmid != 0) { 2673 dev_err(dev, "Trap debug id already reserved\n"); 2674 r = -EBUSY; 2675 goto out_unlock; 2676 } 2677 2678 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2679 USE_DEFAULT_GRACE_PERIOD, false); 2680 if (r) 2681 goto out_unlock; 2682 2683 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2684 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 2685 2686 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2687 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 2688 r = set_sched_resources(dqm); 2689 if (r) 2690 goto out_unlock; 2691 2692 r = map_queues_cpsch(dqm); 2693 if (r) 2694 goto out_unlock; 2695 2696 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 2697 2698 out_unlock: 2699 dqm_unlock(dqm); 2700 return r; 2701 } 2702 2703 /* 2704 * Releases vmid for the trap debugger 2705 */ 2706 int release_debug_trap_vmid(struct device_queue_manager *dqm, 2707 struct qcm_process_device *qpd) 2708 { 2709 struct device *dev = dqm->dev->adev->dev; 2710 int r; 2711 int updated_vmid_mask; 2712 uint32_t trap_debug_vmid; 2713 2714 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2715 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 2716 return -EINVAL; 2717 } 2718 2719 dqm_lock(dqm); 2720 trap_debug_vmid = dqm->trap_debug_vmid; 2721 if (dqm->trap_debug_vmid == 0) { 2722 dev_err(dev, "Trap debug id is not reserved\n"); 2723 r = -EINVAL; 2724 goto out_unlock; 2725 } 2726 2727 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2728 USE_DEFAULT_GRACE_PERIOD, false); 2729 if (r) 2730 goto out_unlock; 2731 2732 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2733 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 2734 2735 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2736 dqm->trap_debug_vmid = 0; 2737 r = set_sched_resources(dqm); 2738 if (r) 2739 goto out_unlock; 2740 2741 r = map_queues_cpsch(dqm); 2742 if (r) 2743 goto out_unlock; 2744 2745 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 2746 2747 out_unlock: 2748 dqm_unlock(dqm); 2749 return r; 2750 } 2751 2752 #define QUEUE_NOT_FOUND -1 2753 /* invalidate queue operation in array */ 2754 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 2755 { 2756 int i; 2757 2758 for (i = 0; i < num_queues; i++) 2759 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 2760 } 2761 2762 /* find queue index in array */ 2763 static int q_array_get_index(unsigned int queue_id, 2764 uint32_t num_queues, 2765 uint32_t *queue_ids) 2766 { 2767 int i; 2768 2769 for (i = 0; i < num_queues; i++) 2770 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 2771 return i; 2772 2773 return QUEUE_NOT_FOUND; 2774 } 2775 2776 struct copy_context_work_handler_workarea { 2777 struct work_struct copy_context_work; 2778 struct kfd_process *p; 2779 }; 2780 2781 static void copy_context_work_handler (struct work_struct *work) 2782 { 2783 struct copy_context_work_handler_workarea *workarea; 2784 struct mqd_manager *mqd_mgr; 2785 struct queue *q; 2786 struct mm_struct *mm; 2787 struct kfd_process *p; 2788 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 2789 int i; 2790 2791 workarea = container_of(work, 2792 struct copy_context_work_handler_workarea, 2793 copy_context_work); 2794 2795 p = workarea->p; 2796 mm = get_task_mm(p->lead_thread); 2797 2798 if (!mm) 2799 return; 2800 2801 kthread_use_mm(mm); 2802 for (i = 0; i < p->n_pdds; i++) { 2803 struct kfd_process_device *pdd = p->pdds[i]; 2804 struct device_queue_manager *dqm = pdd->dev->dqm; 2805 struct qcm_process_device *qpd = &pdd->qpd; 2806 2807 list_for_each_entry(q, &qpd->queues_list, list) { 2808 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2809 2810 /* We ignore the return value from get_wave_state 2811 * because 2812 * i) right now, it always returns 0, and 2813 * ii) if we hit an error, we would continue to the 2814 * next queue anyway. 2815 */ 2816 mqd_mgr->get_wave_state(mqd_mgr, 2817 q->mqd, 2818 &q->properties, 2819 (void __user *) q->properties.ctx_save_restore_area_address, 2820 &tmp_ctl_stack_used_size, 2821 &tmp_save_area_used_size); 2822 } 2823 } 2824 kthread_unuse_mm(mm); 2825 mmput(mm); 2826 } 2827 2828 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 2829 { 2830 size_t array_size = num_queues * sizeof(uint32_t); 2831 2832 if (!usr_queue_id_array) 2833 return NULL; 2834 2835 return memdup_user(usr_queue_id_array, array_size); 2836 } 2837 2838 int resume_queues(struct kfd_process *p, 2839 uint32_t num_queues, 2840 uint32_t *usr_queue_id_array) 2841 { 2842 uint32_t *queue_ids = NULL; 2843 int total_resumed = 0; 2844 int i; 2845 2846 if (usr_queue_id_array) { 2847 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2848 2849 if (IS_ERR(queue_ids)) 2850 return PTR_ERR(queue_ids); 2851 2852 /* mask all queues as invalid. unmask per successful request */ 2853 q_array_invalidate(num_queues, queue_ids); 2854 } 2855 2856 for (i = 0; i < p->n_pdds; i++) { 2857 struct kfd_process_device *pdd = p->pdds[i]; 2858 struct device_queue_manager *dqm = pdd->dev->dqm; 2859 struct device *dev = dqm->dev->adev->dev; 2860 struct qcm_process_device *qpd = &pdd->qpd; 2861 struct queue *q; 2862 int r, per_device_resumed = 0; 2863 2864 dqm_lock(dqm); 2865 2866 /* unmask queues that resume or already resumed as valid */ 2867 list_for_each_entry(q, &qpd->queues_list, list) { 2868 int q_idx = QUEUE_NOT_FOUND; 2869 2870 if (queue_ids) 2871 q_idx = q_array_get_index( 2872 q->properties.queue_id, 2873 num_queues, 2874 queue_ids); 2875 2876 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 2877 int err = resume_single_queue(dqm, &pdd->qpd, q); 2878 2879 if (queue_ids) { 2880 if (!err) { 2881 queue_ids[q_idx] &= 2882 ~KFD_DBG_QUEUE_INVALID_MASK; 2883 } else { 2884 queue_ids[q_idx] |= 2885 KFD_DBG_QUEUE_ERROR_MASK; 2886 break; 2887 } 2888 } 2889 2890 if (dqm->dev->kfd->shared_resources.enable_mes) { 2891 wake_up_all(&dqm->destroy_wait); 2892 if (!err) 2893 total_resumed++; 2894 } else { 2895 per_device_resumed++; 2896 } 2897 } 2898 } 2899 2900 if (!per_device_resumed) { 2901 dqm_unlock(dqm); 2902 continue; 2903 } 2904 2905 r = execute_queues_cpsch(dqm, 2906 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 2907 0, 2908 USE_DEFAULT_GRACE_PERIOD); 2909 if (r) { 2910 dev_err(dev, "Failed to resume process queues\n"); 2911 if (queue_ids) { 2912 list_for_each_entry(q, &qpd->queues_list, list) { 2913 int q_idx = q_array_get_index( 2914 q->properties.queue_id, 2915 num_queues, 2916 queue_ids); 2917 2918 /* mask queue as error on resume fail */ 2919 if (q_idx != QUEUE_NOT_FOUND) 2920 queue_ids[q_idx] |= 2921 KFD_DBG_QUEUE_ERROR_MASK; 2922 } 2923 } 2924 } else { 2925 wake_up_all(&dqm->destroy_wait); 2926 total_resumed += per_device_resumed; 2927 } 2928 2929 dqm_unlock(dqm); 2930 } 2931 2932 if (queue_ids) { 2933 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 2934 num_queues * sizeof(uint32_t))) 2935 pr_err("copy_to_user failed on queue resume\n"); 2936 2937 kfree(queue_ids); 2938 } 2939 2940 return total_resumed; 2941 } 2942 2943 int suspend_queues(struct kfd_process *p, 2944 uint32_t num_queues, 2945 uint32_t grace_period, 2946 uint64_t exception_clear_mask, 2947 uint32_t *usr_queue_id_array) 2948 { 2949 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2950 int total_suspended = 0; 2951 int i; 2952 2953 if (IS_ERR(queue_ids)) 2954 return PTR_ERR(queue_ids); 2955 2956 /* mask all queues as invalid. umask on successful request */ 2957 q_array_invalidate(num_queues, queue_ids); 2958 2959 for (i = 0; i < p->n_pdds; i++) { 2960 struct kfd_process_device *pdd = p->pdds[i]; 2961 struct device_queue_manager *dqm = pdd->dev->dqm; 2962 struct device *dev = dqm->dev->adev->dev; 2963 struct qcm_process_device *qpd = &pdd->qpd; 2964 struct queue *q; 2965 int r, per_device_suspended = 0; 2966 2967 mutex_lock(&p->event_mutex); 2968 dqm_lock(dqm); 2969 2970 /* unmask queues that suspend or already suspended */ 2971 list_for_each_entry(q, &qpd->queues_list, list) { 2972 int q_idx = q_array_get_index(q->properties.queue_id, 2973 num_queues, 2974 queue_ids); 2975 2976 if (q_idx != QUEUE_NOT_FOUND) { 2977 int err = suspend_single_queue(dqm, pdd, q); 2978 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 2979 2980 if (!err) { 2981 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 2982 if (exception_clear_mask && is_mes) 2983 q->properties.exception_status &= 2984 ~exception_clear_mask; 2985 2986 if (is_mes) 2987 total_suspended++; 2988 else 2989 per_device_suspended++; 2990 } else if (err != -EBUSY) { 2991 r = err; 2992 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 2993 break; 2994 } 2995 } 2996 } 2997 2998 if (!per_device_suspended) { 2999 dqm_unlock(dqm); 3000 mutex_unlock(&p->event_mutex); 3001 if (total_suspended) 3002 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3003 continue; 3004 } 3005 3006 r = execute_queues_cpsch(dqm, 3007 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3008 grace_period); 3009 3010 if (r) 3011 dev_err(dev, "Failed to suspend process queues.\n"); 3012 else 3013 total_suspended += per_device_suspended; 3014 3015 list_for_each_entry(q, &qpd->queues_list, list) { 3016 int q_idx = q_array_get_index(q->properties.queue_id, 3017 num_queues, queue_ids); 3018 3019 if (q_idx == QUEUE_NOT_FOUND) 3020 continue; 3021 3022 /* mask queue as error on suspend fail */ 3023 if (r) 3024 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3025 else if (exception_clear_mask) 3026 q->properties.exception_status &= 3027 ~exception_clear_mask; 3028 } 3029 3030 dqm_unlock(dqm); 3031 mutex_unlock(&p->event_mutex); 3032 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3033 } 3034 3035 if (total_suspended) { 3036 struct copy_context_work_handler_workarea copy_context_worker; 3037 3038 INIT_WORK_ONSTACK( 3039 ©_context_worker.copy_context_work, 3040 copy_context_work_handler); 3041 3042 copy_context_worker.p = p; 3043 3044 schedule_work(©_context_worker.copy_context_work); 3045 3046 3047 flush_work(©_context_worker.copy_context_work); 3048 destroy_work_on_stack(©_context_worker.copy_context_work); 3049 } 3050 3051 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3052 num_queues * sizeof(uint32_t))) 3053 pr_err("copy_to_user failed on queue suspend\n"); 3054 3055 kfree(queue_ids); 3056 3057 return total_suspended; 3058 } 3059 3060 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3061 { 3062 switch (q_props->type) { 3063 case KFD_QUEUE_TYPE_COMPUTE: 3064 return q_props->format == KFD_QUEUE_FORMAT_PM4 3065 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3066 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3067 case KFD_QUEUE_TYPE_SDMA: 3068 return KFD_IOC_QUEUE_TYPE_SDMA; 3069 case KFD_QUEUE_TYPE_SDMA_XGMI: 3070 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3071 default: 3072 WARN_ONCE(true, "queue type not recognized!"); 3073 return 0xffffffff; 3074 }; 3075 } 3076 3077 void set_queue_snapshot_entry(struct queue *q, 3078 uint64_t exception_clear_mask, 3079 struct kfd_queue_snapshot_entry *qss_entry) 3080 { 3081 qss_entry->ring_base_address = q->properties.queue_address; 3082 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3083 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3084 qss_entry->ctx_save_restore_address = 3085 q->properties.ctx_save_restore_area_address; 3086 qss_entry->ctx_save_restore_area_size = 3087 q->properties.ctx_save_restore_area_size; 3088 qss_entry->exception_status = q->properties.exception_status; 3089 qss_entry->queue_id = q->properties.queue_id; 3090 qss_entry->gpu_id = q->device->id; 3091 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3092 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3093 q->properties.exception_status &= ~exception_clear_mask; 3094 } 3095 3096 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3097 { 3098 struct device *dev = dqm->dev->adev->dev; 3099 int r; 3100 3101 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3102 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3103 return -EINVAL; 3104 } 3105 3106 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3107 return 0; 3108 3109 dqm_lock(dqm); 3110 3111 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3112 if (r) 3113 dqm_unlock(dqm); 3114 3115 return r; 3116 } 3117 3118 int debug_map_and_unlock(struct device_queue_manager *dqm) 3119 { 3120 struct device *dev = dqm->dev->adev->dev; 3121 int r; 3122 3123 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3124 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3125 return -EINVAL; 3126 } 3127 3128 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3129 return 0; 3130 3131 r = map_queues_cpsch(dqm); 3132 3133 dqm_unlock(dqm); 3134 3135 return r; 3136 } 3137 3138 int debug_refresh_runlist(struct device_queue_manager *dqm) 3139 { 3140 int r = debug_lock_and_unmap(dqm); 3141 3142 if (r) 3143 return r; 3144 3145 return debug_map_and_unlock(dqm); 3146 } 3147 3148 #if defined(CONFIG_DEBUG_FS) 3149 3150 static void seq_reg_dump(struct seq_file *m, 3151 uint32_t (*dump)[2], uint32_t n_regs) 3152 { 3153 uint32_t i, count; 3154 3155 for (i = 0, count = 0; i < n_regs; i++) { 3156 if (count == 0 || 3157 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3158 seq_printf(m, "%s %08x: %08x", 3159 i ? "\n" : "", 3160 dump[i][0], dump[i][1]); 3161 count = 7; 3162 } else { 3163 seq_printf(m, " %08x", dump[i][1]); 3164 count--; 3165 } 3166 } 3167 3168 seq_puts(m, "\n"); 3169 } 3170 3171 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3172 { 3173 struct device_queue_manager *dqm = data; 3174 uint32_t xcc_mask = dqm->dev->xcc_mask; 3175 uint32_t (*dump)[2], n_regs; 3176 int pipe, queue; 3177 int r = 0, xcc_id; 3178 uint32_t sdma_engine_start; 3179 3180 if (!dqm->sched_running) { 3181 seq_puts(m, " Device is stopped\n"); 3182 return 0; 3183 } 3184 3185 for_each_inst(xcc_id, xcc_mask) { 3186 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3187 KFD_CIK_HIQ_PIPE, 3188 KFD_CIK_HIQ_QUEUE, &dump, 3189 &n_regs, xcc_id); 3190 if (!r) { 3191 seq_printf( 3192 m, 3193 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3194 xcc_id, 3195 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3196 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3197 KFD_CIK_HIQ_QUEUE); 3198 seq_reg_dump(m, dump, n_regs); 3199 3200 kfree(dump); 3201 } 3202 3203 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3204 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3205 3206 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3207 if (!test_bit(pipe_offset + queue, 3208 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3209 continue; 3210 3211 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3212 pipe, queue, 3213 &dump, &n_regs, 3214 xcc_id); 3215 if (r) 3216 break; 3217 3218 seq_printf(m, 3219 " Inst %d, CP Pipe %d, Queue %d\n", 3220 xcc_id, pipe, queue); 3221 seq_reg_dump(m, dump, n_regs); 3222 3223 kfree(dump); 3224 } 3225 } 3226 } 3227 3228 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3229 for (pipe = sdma_engine_start; 3230 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3231 pipe++) { 3232 for (queue = 0; 3233 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3234 queue++) { 3235 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3236 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3237 if (r) 3238 break; 3239 3240 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3241 pipe, queue); 3242 seq_reg_dump(m, dump, n_regs); 3243 3244 kfree(dump); 3245 } 3246 } 3247 3248 return r; 3249 } 3250 3251 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3252 { 3253 int r = 0; 3254 3255 dqm_lock(dqm); 3256 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3257 if (r) { 3258 dqm_unlock(dqm); 3259 return r; 3260 } 3261 dqm->active_runlist = true; 3262 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3263 0, USE_DEFAULT_GRACE_PERIOD); 3264 dqm_unlock(dqm); 3265 3266 return r; 3267 } 3268 3269 #endif 3270