1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "mes_v11_api_def.h" 40 #include "kfd_debug.h" 41 42 /* Size of the per-pipe EOP queue */ 43 #define CIK_HPD_EOP_BYTES_LOG2 11 44 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 45 46 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 47 u32 pasid, unsigned int vmid); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param, 52 uint32_t grace_period); 53 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 54 enum kfd_unmap_queues_filter filter, 55 uint32_t filter_param, 56 uint32_t grace_period, 57 bool reset); 58 59 static int map_queues_cpsch(struct device_queue_manager *dqm); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 struct queue *q); 63 64 static inline void deallocate_hqd(struct device_queue_manager *dqm, 65 struct queue *q); 66 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 67 static int allocate_sdma_queue(struct device_queue_manager *dqm, 68 struct queue *q, const uint32_t *restore_sdma_id); 69 static void kfd_process_hw_exception(struct work_struct *work); 70 71 static inline 72 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 73 { 74 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 75 return KFD_MQD_TYPE_SDMA; 76 return KFD_MQD_TYPE_CP; 77 } 78 79 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 80 { 81 int i; 82 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 83 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 84 85 /* queue is available for KFD usage if bit is 1 */ 86 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 87 if (test_bit(pipe_offset + i, 88 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 89 return true; 90 return false; 91 } 92 93 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 94 { 95 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 96 AMDGPU_MAX_QUEUES); 97 } 98 99 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 102 } 103 104 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 105 { 106 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 107 } 108 109 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 110 { 111 return kfd_get_num_sdma_engines(dqm->dev) + 112 kfd_get_num_xgmi_sdma_engines(dqm->dev); 113 } 114 115 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return kfd_get_num_sdma_engines(dqm->dev) * 118 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 119 } 120 121 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 122 { 123 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 124 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 135 /* Mask out the reserved queues */ 136 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 137 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 138 KFD_MAX_SDMA_QUEUES); 139 } 140 141 void program_sh_mem_settings(struct device_queue_manager *dqm, 142 struct qcm_process_device *qpd) 143 { 144 uint32_t xcc_mask = dqm->dev->xcc_mask; 145 int xcc_id; 146 147 for_each_inst(xcc_id, xcc_mask) 148 dqm->dev->kfd2kgd->program_sh_mem_settings( 149 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 150 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 151 qpd->sh_mem_bases, xcc_id); 152 } 153 154 static void kfd_hws_hang(struct device_queue_manager *dqm) 155 { 156 struct device_process_node *cur; 157 struct qcm_process_device *qpd; 158 struct queue *q; 159 160 /* Mark all device queues as reset. */ 161 list_for_each_entry(cur, &dqm->queues, list) { 162 qpd = cur->qpd; 163 list_for_each_entry(q, &qpd->queues_list, list) { 164 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 165 166 pdd->has_reset_queue = true; 167 } 168 } 169 170 /* 171 * Issue a GPU reset if HWS is unresponsive 172 */ 173 schedule_work(&dqm->hw_exception_work); 174 } 175 176 static int convert_to_mes_queue_type(int queue_type) 177 { 178 int mes_queue_type; 179 180 switch (queue_type) { 181 case KFD_QUEUE_TYPE_COMPUTE: 182 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 183 break; 184 case KFD_QUEUE_TYPE_SDMA: 185 mes_queue_type = MES_QUEUE_TYPE_SDMA; 186 break; 187 default: 188 WARN(1, "Invalid queue type %d", queue_type); 189 mes_queue_type = -EINVAL; 190 break; 191 } 192 193 return mes_queue_type; 194 } 195 196 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 197 struct qcm_process_device *qpd) 198 { 199 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 200 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 201 struct mes_add_queue_input queue_input; 202 int r, queue_type; 203 uint64_t wptr_addr_off; 204 205 if (!down_read_trylock(&adev->reset_domain->sem)) 206 return -EIO; 207 208 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 209 queue_input.process_id = qpd->pqm->process->pasid; 210 queue_input.page_table_base_addr = qpd->page_table_base; 211 queue_input.process_va_start = 0; 212 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 213 /* MES unit for quantum is 100ns */ 214 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 215 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 216 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 217 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 218 queue_input.inprocess_gang_priority = q->properties.priority; 219 queue_input.gang_global_priority_level = 220 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 221 queue_input.doorbell_offset = q->properties.doorbell_off; 222 queue_input.mqd_addr = q->gart_mqd_addr; 223 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 224 225 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 226 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 227 228 queue_input.is_kfd_process = 1; 229 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 230 queue_input.queue_size = q->properties.queue_size >> 2; 231 232 queue_input.paging = false; 233 queue_input.tba_addr = qpd->tba_addr; 234 queue_input.tma_addr = qpd->tma_addr; 235 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 236 queue_input.skip_process_ctx_clear = 237 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 238 (qpd->pqm->process->debug_trap_enabled || 239 kfd_dbg_has_ttmps_always_setup(q->device)); 240 241 queue_type = convert_to_mes_queue_type(q->properties.type); 242 if (queue_type < 0) { 243 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 244 q->properties.type); 245 up_read(&adev->reset_domain->sem); 246 return -EINVAL; 247 } 248 queue_input.queue_type = (uint32_t)queue_type; 249 250 queue_input.exclusively_scheduled = q->properties.is_gws; 251 252 amdgpu_mes_lock(&adev->mes); 253 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 254 amdgpu_mes_unlock(&adev->mes); 255 up_read(&adev->reset_domain->sem); 256 if (r) { 257 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 258 q->properties.doorbell_off); 259 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 260 kfd_hws_hang(dqm); 261 } 262 263 return r; 264 } 265 266 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 267 struct qcm_process_device *qpd) 268 { 269 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 270 int r; 271 struct mes_remove_queue_input queue_input; 272 273 if (!down_read_trylock(&adev->reset_domain->sem)) 274 return -EIO; 275 276 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 277 queue_input.doorbell_offset = q->properties.doorbell_off; 278 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 279 280 amdgpu_mes_lock(&adev->mes); 281 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 282 amdgpu_mes_unlock(&adev->mes); 283 up_read(&adev->reset_domain->sem); 284 285 if (r) { 286 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 287 q->properties.doorbell_off); 288 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 289 kfd_hws_hang(dqm); 290 } 291 292 return r; 293 } 294 295 static int remove_all_queues_mes(struct device_queue_manager *dqm) 296 { 297 struct device_process_node *cur; 298 struct device *dev = dqm->dev->adev->dev; 299 struct qcm_process_device *qpd; 300 struct queue *q; 301 int retval = 0; 302 303 list_for_each_entry(cur, &dqm->queues, list) { 304 qpd = cur->qpd; 305 list_for_each_entry(q, &qpd->queues_list, list) { 306 if (q->properties.is_active) { 307 retval = remove_queue_mes(dqm, q, qpd); 308 if (retval) { 309 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 310 __func__, 311 q->properties.queue_id, 312 dqm->dev->id); 313 return retval; 314 } 315 } 316 } 317 } 318 319 return retval; 320 } 321 322 static void increment_queue_count(struct device_queue_manager *dqm, 323 struct qcm_process_device *qpd, 324 struct queue *q) 325 { 326 dqm->active_queue_count++; 327 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 328 q->properties.type == KFD_QUEUE_TYPE_DIQ) 329 dqm->active_cp_queue_count++; 330 331 if (q->properties.is_gws) { 332 dqm->gws_queue_count++; 333 qpd->mapped_gws_queue = true; 334 } 335 } 336 337 static void decrement_queue_count(struct device_queue_manager *dqm, 338 struct qcm_process_device *qpd, 339 struct queue *q) 340 { 341 dqm->active_queue_count--; 342 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 343 q->properties.type == KFD_QUEUE_TYPE_DIQ) 344 dqm->active_cp_queue_count--; 345 346 if (q->properties.is_gws) { 347 dqm->gws_queue_count--; 348 qpd->mapped_gws_queue = false; 349 } 350 } 351 352 /* 353 * Allocate a doorbell ID to this queue. 354 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 355 */ 356 static int allocate_doorbell(struct qcm_process_device *qpd, 357 struct queue *q, 358 uint32_t const *restore_id) 359 { 360 struct kfd_node *dev = qpd->dqm->dev; 361 362 if (!KFD_IS_SOC15(dev)) { 363 /* On pre-SOC15 chips we need to use the queue ID to 364 * preserve the user mode ABI. 365 */ 366 367 if (restore_id && *restore_id != q->properties.queue_id) 368 return -EINVAL; 369 370 q->doorbell_id = q->properties.queue_id; 371 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 372 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 373 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 374 * doorbell assignments based on the engine and queue id. 375 * The doobell index distance between RLC (2*i) and (2*i+1) 376 * for a SDMA engine is 512. 377 */ 378 379 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 380 381 /* 382 * q->properties.sdma_engine_id corresponds to the virtual 383 * sdma engine number. However, for doorbell allocation, 384 * we need the physical sdma engine id in order to get the 385 * correct doorbell offset. 386 */ 387 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 388 get_num_all_sdma_engines(qpd->dqm) + 389 q->properties.sdma_engine_id] 390 + (q->properties.sdma_queue_id & 1) 391 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 392 + (q->properties.sdma_queue_id >> 1); 393 394 if (restore_id && *restore_id != valid_id) 395 return -EINVAL; 396 q->doorbell_id = valid_id; 397 } else { 398 /* For CP queues on SOC15 */ 399 if (restore_id) { 400 /* make sure that ID is free */ 401 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 402 return -EINVAL; 403 404 q->doorbell_id = *restore_id; 405 } else { 406 /* or reserve a free doorbell ID */ 407 unsigned int found; 408 409 found = find_first_zero_bit(qpd->doorbell_bitmap, 410 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 411 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 412 pr_debug("No doorbells available"); 413 return -EBUSY; 414 } 415 set_bit(found, qpd->doorbell_bitmap); 416 q->doorbell_id = found; 417 } 418 } 419 420 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 421 qpd->proc_doorbells, 422 q->doorbell_id, 423 dev->kfd->device_info.doorbell_size); 424 return 0; 425 } 426 427 static void deallocate_doorbell(struct qcm_process_device *qpd, 428 struct queue *q) 429 { 430 unsigned int old; 431 struct kfd_node *dev = qpd->dqm->dev; 432 433 if (!KFD_IS_SOC15(dev) || 434 q->properties.type == KFD_QUEUE_TYPE_SDMA || 435 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 436 return; 437 438 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 439 WARN_ON(!old); 440 } 441 442 static void program_trap_handler_settings(struct device_queue_manager *dqm, 443 struct qcm_process_device *qpd) 444 { 445 uint32_t xcc_mask = dqm->dev->xcc_mask; 446 int xcc_id; 447 448 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 449 for_each_inst(xcc_id, xcc_mask) 450 dqm->dev->kfd2kgd->program_trap_handler_settings( 451 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 452 qpd->tma_addr, xcc_id); 453 } 454 455 static int allocate_vmid(struct device_queue_manager *dqm, 456 struct qcm_process_device *qpd, 457 struct queue *q) 458 { 459 struct device *dev = dqm->dev->adev->dev; 460 int allocated_vmid = -1, i; 461 462 for (i = dqm->dev->vm_info.first_vmid_kfd; 463 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 464 if (!dqm->vmid_pasid[i]) { 465 allocated_vmid = i; 466 break; 467 } 468 } 469 470 if (allocated_vmid < 0) { 471 dev_err(dev, "no more vmid to allocate\n"); 472 return -ENOSPC; 473 } 474 475 pr_debug("vmid allocated: %d\n", allocated_vmid); 476 477 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 478 479 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 480 481 qpd->vmid = allocated_vmid; 482 q->properties.vmid = allocated_vmid; 483 484 program_sh_mem_settings(dqm, qpd); 485 486 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 487 program_trap_handler_settings(dqm, qpd); 488 489 /* qpd->page_table_base is set earlier when register_process() 490 * is called, i.e. when the first queue is created. 491 */ 492 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 493 qpd->vmid, 494 qpd->page_table_base); 495 /* invalidate the VM context after pasid and vmid mapping is set up */ 496 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 497 498 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 499 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 500 qpd->sh_hidden_private_base, qpd->vmid); 501 502 return 0; 503 } 504 505 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 506 struct qcm_process_device *qpd) 507 { 508 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 509 int ret; 510 511 if (!qpd->ib_kaddr) 512 return -ENOMEM; 513 514 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 515 if (ret) 516 return ret; 517 518 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 519 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 520 pmf->release_mem_size / sizeof(uint32_t)); 521 } 522 523 static void deallocate_vmid(struct device_queue_manager *dqm, 524 struct qcm_process_device *qpd, 525 struct queue *q) 526 { 527 struct device *dev = dqm->dev->adev->dev; 528 529 /* On GFX v7, CP doesn't flush TC at dequeue */ 530 if (q->device->adev->asic_type == CHIP_HAWAII) 531 if (flush_texture_cache_nocpsch(q->device, qpd)) 532 dev_err(dev, "Failed to flush TC\n"); 533 534 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 535 536 /* Release the vmid mapping */ 537 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 538 dqm->vmid_pasid[qpd->vmid] = 0; 539 540 qpd->vmid = 0; 541 q->properties.vmid = 0; 542 } 543 544 static int create_queue_nocpsch(struct device_queue_manager *dqm, 545 struct queue *q, 546 struct qcm_process_device *qpd, 547 const struct kfd_criu_queue_priv_data *qd, 548 const void *restore_mqd, const void *restore_ctl_stack) 549 { 550 struct mqd_manager *mqd_mgr; 551 int retval; 552 553 dqm_lock(dqm); 554 555 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 556 pr_warn("Can't create new usermode queue because %d queues were already created\n", 557 dqm->total_queue_count); 558 retval = -EPERM; 559 goto out_unlock; 560 } 561 562 if (list_empty(&qpd->queues_list)) { 563 retval = allocate_vmid(dqm, qpd, q); 564 if (retval) 565 goto out_unlock; 566 } 567 q->properties.vmid = qpd->vmid; 568 /* 569 * Eviction state logic: mark all queues as evicted, even ones 570 * not currently active. Restoring inactive queues later only 571 * updates the is_evicted flag but is a no-op otherwise. 572 */ 573 q->properties.is_evicted = !!qpd->evicted; 574 575 q->properties.tba_addr = qpd->tba_addr; 576 q->properties.tma_addr = qpd->tma_addr; 577 578 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 579 q->properties.type)]; 580 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 581 retval = allocate_hqd(dqm, q); 582 if (retval) 583 goto deallocate_vmid; 584 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 585 q->pipe, q->queue); 586 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 587 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 588 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 589 if (retval) 590 goto deallocate_vmid; 591 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 592 } 593 594 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 595 if (retval) 596 goto out_deallocate_hqd; 597 598 /* Temporarily release dqm lock to avoid a circular lock dependency */ 599 dqm_unlock(dqm); 600 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 601 dqm_lock(dqm); 602 603 if (!q->mqd_mem_obj) { 604 retval = -ENOMEM; 605 goto out_deallocate_doorbell; 606 } 607 608 if (qd) 609 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 610 &q->properties, restore_mqd, restore_ctl_stack, 611 qd->ctl_stack_size); 612 else 613 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 614 &q->gart_mqd_addr, &q->properties); 615 616 if (q->properties.is_active) { 617 if (!dqm->sched_running) { 618 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 619 goto add_queue_to_list; 620 } 621 622 if (WARN(q->process->mm != current->mm, 623 "should only run in user thread")) 624 retval = -EFAULT; 625 else 626 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 627 q->queue, &q->properties, current->mm); 628 if (retval) 629 goto out_free_mqd; 630 } 631 632 add_queue_to_list: 633 list_add(&q->list, &qpd->queues_list); 634 qpd->queue_count++; 635 if (q->properties.is_active) 636 increment_queue_count(dqm, qpd, q); 637 638 /* 639 * Unconditionally increment this counter, regardless of the queue's 640 * type or whether the queue is active. 641 */ 642 dqm->total_queue_count++; 643 pr_debug("Total of %d queues are accountable so far\n", 644 dqm->total_queue_count); 645 goto out_unlock; 646 647 out_free_mqd: 648 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 649 out_deallocate_doorbell: 650 deallocate_doorbell(qpd, q); 651 out_deallocate_hqd: 652 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 653 deallocate_hqd(dqm, q); 654 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 655 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 656 deallocate_sdma_queue(dqm, q); 657 deallocate_vmid: 658 if (list_empty(&qpd->queues_list)) 659 deallocate_vmid(dqm, qpd, q); 660 out_unlock: 661 dqm_unlock(dqm); 662 return retval; 663 } 664 665 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 666 { 667 bool set; 668 int pipe, bit, i; 669 670 set = false; 671 672 for (pipe = dqm->next_pipe_to_allocate, i = 0; 673 i < get_pipes_per_mec(dqm); 674 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 675 676 if (!is_pipe_enabled(dqm, 0, pipe)) 677 continue; 678 679 if (dqm->allocated_queues[pipe] != 0) { 680 bit = ffs(dqm->allocated_queues[pipe]) - 1; 681 dqm->allocated_queues[pipe] &= ~(1 << bit); 682 q->pipe = pipe; 683 q->queue = bit; 684 set = true; 685 break; 686 } 687 } 688 689 if (!set) 690 return -EBUSY; 691 692 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 693 /* horizontal hqd allocation */ 694 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 695 696 return 0; 697 } 698 699 static inline void deallocate_hqd(struct device_queue_manager *dqm, 700 struct queue *q) 701 { 702 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 703 } 704 705 #define SQ_IND_CMD_CMD_KILL 0x00000003 706 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 707 708 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 709 { 710 int status = 0; 711 unsigned int vmid; 712 uint16_t queried_pasid; 713 union SQ_CMD_BITS reg_sq_cmd; 714 union GRBM_GFX_INDEX_BITS reg_gfx_index; 715 struct kfd_process_device *pdd; 716 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 717 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 718 uint32_t xcc_mask = dev->xcc_mask; 719 int xcc_id; 720 721 reg_sq_cmd.u32All = 0; 722 reg_gfx_index.u32All = 0; 723 724 pr_debug("Killing all process wavefronts\n"); 725 726 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 727 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 728 return -EOPNOTSUPP; 729 } 730 731 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 732 * ATC_VMID15_PASID_MAPPING 733 * to check which VMID the current process is mapped to. 734 */ 735 736 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 737 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 738 (dev->adev, vmid, &queried_pasid); 739 740 if (status && queried_pasid == p->pasid) { 741 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 742 vmid, p->pasid); 743 break; 744 } 745 } 746 747 if (vmid > last_vmid_to_scan) { 748 dev_err(dev->adev->dev, "Didn't find vmid for pasid 0x%x\n", p->pasid); 749 return -EFAULT; 750 } 751 752 /* taking the VMID for that process on the safe way using PDD */ 753 pdd = kfd_get_process_device_data(dev, p); 754 if (!pdd) 755 return -EFAULT; 756 757 reg_gfx_index.bits.sh_broadcast_writes = 1; 758 reg_gfx_index.bits.se_broadcast_writes = 1; 759 reg_gfx_index.bits.instance_broadcast_writes = 1; 760 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 761 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 762 reg_sq_cmd.bits.vm_id = vmid; 763 764 for_each_inst(xcc_id, xcc_mask) 765 dev->kfd2kgd->wave_control_execute( 766 dev->adev, reg_gfx_index.u32All, 767 reg_sq_cmd.u32All, xcc_id); 768 769 return 0; 770 } 771 772 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 773 * to avoid asynchronized access 774 */ 775 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 776 struct qcm_process_device *qpd, 777 struct queue *q) 778 { 779 int retval; 780 struct mqd_manager *mqd_mgr; 781 782 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 783 q->properties.type)]; 784 785 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 786 deallocate_hqd(dqm, q); 787 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 788 deallocate_sdma_queue(dqm, q); 789 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 790 deallocate_sdma_queue(dqm, q); 791 else { 792 pr_debug("q->properties.type %d is invalid\n", 793 q->properties.type); 794 return -EINVAL; 795 } 796 dqm->total_queue_count--; 797 798 deallocate_doorbell(qpd, q); 799 800 if (!dqm->sched_running) { 801 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 802 return 0; 803 } 804 805 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 806 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 807 KFD_UNMAP_LATENCY_MS, 808 q->pipe, q->queue); 809 if (retval == -ETIME) 810 qpd->reset_wavefronts = true; 811 812 list_del(&q->list); 813 if (list_empty(&qpd->queues_list)) { 814 if (qpd->reset_wavefronts) { 815 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 816 dqm->dev); 817 /* dbgdev_wave_reset_wavefronts has to be called before 818 * deallocate_vmid(), i.e. when vmid is still in use. 819 */ 820 dbgdev_wave_reset_wavefronts(dqm->dev, 821 qpd->pqm->process); 822 qpd->reset_wavefronts = false; 823 } 824 825 deallocate_vmid(dqm, qpd, q); 826 } 827 qpd->queue_count--; 828 if (q->properties.is_active) 829 decrement_queue_count(dqm, qpd, q); 830 831 return retval; 832 } 833 834 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 835 struct qcm_process_device *qpd, 836 struct queue *q) 837 { 838 int retval; 839 uint64_t sdma_val = 0; 840 struct device *dev = dqm->dev->adev->dev; 841 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 842 struct mqd_manager *mqd_mgr = 843 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 844 845 /* Get the SDMA queue stats */ 846 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 847 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 848 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 849 &sdma_val); 850 if (retval) 851 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 852 q->properties.queue_id); 853 } 854 855 dqm_lock(dqm); 856 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 857 if (!retval) 858 pdd->sdma_past_activity_counter += sdma_val; 859 dqm_unlock(dqm); 860 861 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 862 863 return retval; 864 } 865 866 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 867 struct mqd_update_info *minfo) 868 { 869 int retval = 0; 870 struct device *dev = dqm->dev->adev->dev; 871 struct mqd_manager *mqd_mgr; 872 struct kfd_process_device *pdd; 873 bool prev_active = false; 874 875 dqm_lock(dqm); 876 pdd = kfd_get_process_device_data(q->device, q->process); 877 if (!pdd) { 878 retval = -ENODEV; 879 goto out_unlock; 880 } 881 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 882 q->properties.type)]; 883 884 /* Save previous activity state for counters */ 885 prev_active = q->properties.is_active; 886 887 /* Make sure the queue is unmapped before updating the MQD */ 888 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 889 if (!dqm->dev->kfd->shared_resources.enable_mes) 890 retval = unmap_queues_cpsch(dqm, 891 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 892 else if (prev_active) 893 retval = remove_queue_mes(dqm, q, &pdd->qpd); 894 895 /* queue is reset so inaccessable */ 896 if (pdd->has_reset_queue) { 897 retval = -EACCES; 898 goto out_unlock; 899 } 900 901 if (retval) { 902 dev_err(dev, "unmap queue failed\n"); 903 goto out_unlock; 904 } 905 } else if (prev_active && 906 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 907 q->properties.type == KFD_QUEUE_TYPE_SDMA || 908 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 909 910 if (!dqm->sched_running) { 911 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 912 goto out_unlock; 913 } 914 915 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 916 (dqm->dev->kfd->cwsr_enabled ? 917 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 918 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 919 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 920 if (retval) { 921 dev_err(dev, "destroy mqd failed\n"); 922 goto out_unlock; 923 } 924 } 925 926 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 927 928 /* 929 * check active state vs. the previous state and modify 930 * counter accordingly. map_queues_cpsch uses the 931 * dqm->active_queue_count to determine whether a new runlist must be 932 * uploaded. 933 */ 934 if (q->properties.is_active && !prev_active) { 935 increment_queue_count(dqm, &pdd->qpd, q); 936 } else if (!q->properties.is_active && prev_active) { 937 decrement_queue_count(dqm, &pdd->qpd, q); 938 } else if (q->gws && !q->properties.is_gws) { 939 if (q->properties.is_active) { 940 dqm->gws_queue_count++; 941 pdd->qpd.mapped_gws_queue = true; 942 } 943 q->properties.is_gws = true; 944 } else if (!q->gws && q->properties.is_gws) { 945 if (q->properties.is_active) { 946 dqm->gws_queue_count--; 947 pdd->qpd.mapped_gws_queue = false; 948 } 949 q->properties.is_gws = false; 950 } 951 952 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 953 if (!dqm->dev->kfd->shared_resources.enable_mes) 954 retval = map_queues_cpsch(dqm); 955 else if (q->properties.is_active) 956 retval = add_queue_mes(dqm, q, &pdd->qpd); 957 } else if (q->properties.is_active && 958 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 959 q->properties.type == KFD_QUEUE_TYPE_SDMA || 960 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 961 if (WARN(q->process->mm != current->mm, 962 "should only run in user thread")) 963 retval = -EFAULT; 964 else 965 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 966 q->pipe, q->queue, 967 &q->properties, current->mm); 968 } 969 970 out_unlock: 971 dqm_unlock(dqm); 972 return retval; 973 } 974 975 /* suspend_single_queue does not lock the dqm like the 976 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 977 * lock the dqm before calling, and unlock after calling. 978 * 979 * The reason we don't lock the dqm is because this function may be 980 * called on multiple queues in a loop, so rather than locking/unlocking 981 * multiple times, we will just keep the dqm locked for all of the calls. 982 */ 983 static int suspend_single_queue(struct device_queue_manager *dqm, 984 struct kfd_process_device *pdd, 985 struct queue *q) 986 { 987 bool is_new; 988 989 if (q->properties.is_suspended) 990 return 0; 991 992 pr_debug("Suspending PASID %u queue [%i]\n", 993 pdd->process->pasid, 994 q->properties.queue_id); 995 996 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 997 998 if (is_new || q->properties.is_being_destroyed) { 999 pr_debug("Suspend: skip %s queue id %i\n", 1000 is_new ? "new" : "destroyed", 1001 q->properties.queue_id); 1002 return -EBUSY; 1003 } 1004 1005 q->properties.is_suspended = true; 1006 if (q->properties.is_active) { 1007 if (dqm->dev->kfd->shared_resources.enable_mes) { 1008 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1009 1010 if (r) 1011 return r; 1012 } 1013 1014 decrement_queue_count(dqm, &pdd->qpd, q); 1015 q->properties.is_active = false; 1016 } 1017 1018 return 0; 1019 } 1020 1021 /* resume_single_queue does not lock the dqm like the functions 1022 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1023 * lock the dqm before calling, and unlock after calling. 1024 * 1025 * The reason we don't lock the dqm is because this function may be 1026 * called on multiple queues in a loop, so rather than locking/unlocking 1027 * multiple times, we will just keep the dqm locked for all of the calls. 1028 */ 1029 static int resume_single_queue(struct device_queue_manager *dqm, 1030 struct qcm_process_device *qpd, 1031 struct queue *q) 1032 { 1033 struct kfd_process_device *pdd; 1034 1035 if (!q->properties.is_suspended) 1036 return 0; 1037 1038 pdd = qpd_to_pdd(qpd); 1039 1040 pr_debug("Restoring from suspend PASID %u queue [%i]\n", 1041 pdd->process->pasid, 1042 q->properties.queue_id); 1043 1044 q->properties.is_suspended = false; 1045 1046 if (QUEUE_IS_ACTIVE(q->properties)) { 1047 if (dqm->dev->kfd->shared_resources.enable_mes) { 1048 int r = add_queue_mes(dqm, q, &pdd->qpd); 1049 1050 if (r) 1051 return r; 1052 } 1053 1054 q->properties.is_active = true; 1055 increment_queue_count(dqm, qpd, q); 1056 } 1057 1058 return 0; 1059 } 1060 1061 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1062 struct qcm_process_device *qpd) 1063 { 1064 struct queue *q; 1065 struct mqd_manager *mqd_mgr; 1066 struct kfd_process_device *pdd; 1067 int retval, ret = 0; 1068 1069 dqm_lock(dqm); 1070 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1071 goto out; 1072 1073 pdd = qpd_to_pdd(qpd); 1074 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1075 pdd->process->pasid); 1076 1077 pdd->last_evict_timestamp = get_jiffies_64(); 1078 /* Mark all queues as evicted. Deactivate all active queues on 1079 * the qpd. 1080 */ 1081 list_for_each_entry(q, &qpd->queues_list, list) { 1082 q->properties.is_evicted = true; 1083 if (!q->properties.is_active) 1084 continue; 1085 1086 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1087 q->properties.type)]; 1088 q->properties.is_active = false; 1089 decrement_queue_count(dqm, qpd, q); 1090 1091 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1092 continue; 1093 1094 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1095 (dqm->dev->kfd->cwsr_enabled ? 1096 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1097 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1098 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1099 if (retval && !ret) 1100 /* Return the first error, but keep going to 1101 * maintain a consistent eviction state 1102 */ 1103 ret = retval; 1104 } 1105 1106 out: 1107 dqm_unlock(dqm); 1108 return ret; 1109 } 1110 1111 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1112 struct qcm_process_device *qpd) 1113 { 1114 struct queue *q; 1115 struct device *dev = dqm->dev->adev->dev; 1116 struct kfd_process_device *pdd; 1117 int retval = 0; 1118 1119 dqm_lock(dqm); 1120 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1121 goto out; 1122 1123 pdd = qpd_to_pdd(qpd); 1124 1125 /* The debugger creates processes that temporarily have not acquired 1126 * all VMs for all devices and has no VMs itself. 1127 * Skip queue eviction on process eviction. 1128 */ 1129 if (!pdd->drm_priv) 1130 goto out; 1131 1132 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1133 pdd->process->pasid); 1134 1135 /* Mark all queues as evicted. Deactivate all active queues on 1136 * the qpd. 1137 */ 1138 list_for_each_entry(q, &qpd->queues_list, list) { 1139 q->properties.is_evicted = true; 1140 if (!q->properties.is_active) 1141 continue; 1142 1143 q->properties.is_active = false; 1144 decrement_queue_count(dqm, qpd, q); 1145 1146 if (dqm->dev->kfd->shared_resources.enable_mes) { 1147 retval = remove_queue_mes(dqm, q, qpd); 1148 if (retval) { 1149 dev_err(dev, "Failed to evict queue %d\n", 1150 q->properties.queue_id); 1151 goto out; 1152 } 1153 } 1154 } 1155 pdd->last_evict_timestamp = get_jiffies_64(); 1156 if (!dqm->dev->kfd->shared_resources.enable_mes) 1157 retval = execute_queues_cpsch(dqm, 1158 qpd->is_debug ? 1159 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1160 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1161 USE_DEFAULT_GRACE_PERIOD); 1162 1163 out: 1164 dqm_unlock(dqm); 1165 return retval; 1166 } 1167 1168 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1169 struct qcm_process_device *qpd) 1170 { 1171 struct mm_struct *mm = NULL; 1172 struct queue *q; 1173 struct mqd_manager *mqd_mgr; 1174 struct kfd_process_device *pdd; 1175 uint64_t pd_base; 1176 uint64_t eviction_duration; 1177 int retval, ret = 0; 1178 1179 pdd = qpd_to_pdd(qpd); 1180 /* Retrieve PD base */ 1181 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1182 1183 dqm_lock(dqm); 1184 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1185 goto out; 1186 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1187 qpd->evicted--; 1188 goto out; 1189 } 1190 1191 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1192 pdd->process->pasid); 1193 1194 /* Update PD Base in QPD */ 1195 qpd->page_table_base = pd_base; 1196 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1197 1198 if (!list_empty(&qpd->queues_list)) { 1199 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1200 dqm->dev->adev, 1201 qpd->vmid, 1202 qpd->page_table_base); 1203 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1204 } 1205 1206 /* Take a safe reference to the mm_struct, which may otherwise 1207 * disappear even while the kfd_process is still referenced. 1208 */ 1209 mm = get_task_mm(pdd->process->lead_thread); 1210 if (!mm) { 1211 ret = -EFAULT; 1212 goto out; 1213 } 1214 1215 /* Remove the eviction flags. Activate queues that are not 1216 * inactive for other reasons. 1217 */ 1218 list_for_each_entry(q, &qpd->queues_list, list) { 1219 q->properties.is_evicted = false; 1220 if (!QUEUE_IS_ACTIVE(q->properties)) 1221 continue; 1222 1223 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1224 q->properties.type)]; 1225 q->properties.is_active = true; 1226 increment_queue_count(dqm, qpd, q); 1227 1228 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1229 continue; 1230 1231 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1232 q->queue, &q->properties, mm); 1233 if (retval && !ret) 1234 /* Return the first error, but keep going to 1235 * maintain a consistent eviction state 1236 */ 1237 ret = retval; 1238 } 1239 qpd->evicted = 0; 1240 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1241 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1242 out: 1243 if (mm) 1244 mmput(mm); 1245 dqm_unlock(dqm); 1246 return ret; 1247 } 1248 1249 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1250 struct qcm_process_device *qpd) 1251 { 1252 struct queue *q; 1253 struct device *dev = dqm->dev->adev->dev; 1254 struct kfd_process_device *pdd; 1255 uint64_t eviction_duration; 1256 int retval = 0; 1257 1258 pdd = qpd_to_pdd(qpd); 1259 1260 dqm_lock(dqm); 1261 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1262 goto out; 1263 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1264 qpd->evicted--; 1265 goto out; 1266 } 1267 1268 /* The debugger creates processes that temporarily have not acquired 1269 * all VMs for all devices and has no VMs itself. 1270 * Skip queue restore on process restore. 1271 */ 1272 if (!pdd->drm_priv) 1273 goto vm_not_acquired; 1274 1275 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1276 pdd->process->pasid); 1277 1278 /* Update PD Base in QPD */ 1279 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1280 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1281 1282 /* activate all active queues on the qpd */ 1283 list_for_each_entry(q, &qpd->queues_list, list) { 1284 q->properties.is_evicted = false; 1285 if (!QUEUE_IS_ACTIVE(q->properties)) 1286 continue; 1287 1288 q->properties.is_active = true; 1289 increment_queue_count(dqm, &pdd->qpd, q); 1290 1291 if (dqm->dev->kfd->shared_resources.enable_mes) { 1292 retval = add_queue_mes(dqm, q, qpd); 1293 if (retval) { 1294 dev_err(dev, "Failed to restore queue %d\n", 1295 q->properties.queue_id); 1296 goto out; 1297 } 1298 } 1299 } 1300 if (!dqm->dev->kfd->shared_resources.enable_mes) 1301 retval = execute_queues_cpsch(dqm, 1302 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1303 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1304 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1305 vm_not_acquired: 1306 qpd->evicted = 0; 1307 out: 1308 dqm_unlock(dqm); 1309 return retval; 1310 } 1311 1312 static int register_process(struct device_queue_manager *dqm, 1313 struct qcm_process_device *qpd) 1314 { 1315 struct device_process_node *n; 1316 struct kfd_process_device *pdd; 1317 uint64_t pd_base; 1318 int retval; 1319 1320 n = kzalloc(sizeof(*n), GFP_KERNEL); 1321 if (!n) 1322 return -ENOMEM; 1323 1324 n->qpd = qpd; 1325 1326 pdd = qpd_to_pdd(qpd); 1327 /* Retrieve PD base */ 1328 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1329 1330 dqm_lock(dqm); 1331 list_add(&n->list, &dqm->queues); 1332 1333 /* Update PD Base in QPD */ 1334 qpd->page_table_base = pd_base; 1335 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1336 1337 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1338 1339 dqm->processes_count++; 1340 1341 dqm_unlock(dqm); 1342 1343 /* Outside the DQM lock because under the DQM lock we can't do 1344 * reclaim or take other locks that others hold while reclaiming. 1345 */ 1346 kfd_inc_compute_active(dqm->dev); 1347 1348 return retval; 1349 } 1350 1351 static int unregister_process(struct device_queue_manager *dqm, 1352 struct qcm_process_device *qpd) 1353 { 1354 int retval; 1355 struct device_process_node *cur, *next; 1356 1357 pr_debug("qpd->queues_list is %s\n", 1358 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1359 1360 retval = 0; 1361 dqm_lock(dqm); 1362 1363 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1364 if (qpd == cur->qpd) { 1365 list_del(&cur->list); 1366 kfree(cur); 1367 dqm->processes_count--; 1368 goto out; 1369 } 1370 } 1371 /* qpd not found in dqm list */ 1372 retval = 1; 1373 out: 1374 dqm_unlock(dqm); 1375 1376 /* Outside the DQM lock because under the DQM lock we can't do 1377 * reclaim or take other locks that others hold while reclaiming. 1378 */ 1379 if (!retval) 1380 kfd_dec_compute_active(dqm->dev); 1381 1382 return retval; 1383 } 1384 1385 static int 1386 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1387 unsigned int vmid) 1388 { 1389 uint32_t xcc_mask = dqm->dev->xcc_mask; 1390 int xcc_id, ret; 1391 1392 for_each_inst(xcc_id, xcc_mask) { 1393 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1394 dqm->dev->adev, pasid, vmid, xcc_id); 1395 if (ret) 1396 break; 1397 } 1398 1399 return ret; 1400 } 1401 1402 static void init_interrupts(struct device_queue_manager *dqm) 1403 { 1404 uint32_t xcc_mask = dqm->dev->xcc_mask; 1405 unsigned int i, xcc_id; 1406 1407 for_each_inst(xcc_id, xcc_mask) { 1408 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1409 if (is_pipe_enabled(dqm, 0, i)) { 1410 dqm->dev->kfd2kgd->init_interrupts( 1411 dqm->dev->adev, i, xcc_id); 1412 } 1413 } 1414 } 1415 } 1416 1417 static int initialize_nocpsch(struct device_queue_manager *dqm) 1418 { 1419 int pipe, queue; 1420 1421 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1422 1423 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1424 sizeof(unsigned int), GFP_KERNEL); 1425 if (!dqm->allocated_queues) 1426 return -ENOMEM; 1427 1428 mutex_init(&dqm->lock_hidden); 1429 INIT_LIST_HEAD(&dqm->queues); 1430 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1431 dqm->active_cp_queue_count = 0; 1432 dqm->gws_queue_count = 0; 1433 1434 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1435 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1436 1437 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1438 if (test_bit(pipe_offset + queue, 1439 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1440 dqm->allocated_queues[pipe] |= 1 << queue; 1441 } 1442 1443 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1444 1445 init_sdma_bitmaps(dqm); 1446 1447 return 0; 1448 } 1449 1450 static void uninitialize(struct device_queue_manager *dqm) 1451 { 1452 int i; 1453 1454 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1455 1456 kfree(dqm->allocated_queues); 1457 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1458 kfree(dqm->mqd_mgrs[i]); 1459 mutex_destroy(&dqm->lock_hidden); 1460 } 1461 1462 static int start_nocpsch(struct device_queue_manager *dqm) 1463 { 1464 int r = 0; 1465 1466 pr_info("SW scheduler is used"); 1467 init_interrupts(dqm); 1468 1469 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1470 r = pm_init(&dqm->packet_mgr, dqm); 1471 if (!r) 1472 dqm->sched_running = true; 1473 1474 return r; 1475 } 1476 1477 static int stop_nocpsch(struct device_queue_manager *dqm) 1478 { 1479 dqm_lock(dqm); 1480 if (!dqm->sched_running) { 1481 dqm_unlock(dqm); 1482 return 0; 1483 } 1484 1485 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1486 pm_uninit(&dqm->packet_mgr); 1487 dqm->sched_running = false; 1488 dqm_unlock(dqm); 1489 1490 return 0; 1491 } 1492 1493 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1494 struct queue *q, const uint32_t *restore_sdma_id) 1495 { 1496 struct device *dev = dqm->dev->adev->dev; 1497 int bit; 1498 1499 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1500 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1501 dev_err(dev, "No more SDMA queue to allocate\n"); 1502 return -ENOMEM; 1503 } 1504 1505 if (restore_sdma_id) { 1506 /* Re-use existing sdma_id */ 1507 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1508 dev_err(dev, "SDMA queue already in use\n"); 1509 return -EBUSY; 1510 } 1511 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1512 q->sdma_id = *restore_sdma_id; 1513 } else { 1514 /* Find first available sdma_id */ 1515 bit = find_first_bit(dqm->sdma_bitmap, 1516 get_num_sdma_queues(dqm)); 1517 clear_bit(bit, dqm->sdma_bitmap); 1518 q->sdma_id = bit; 1519 } 1520 1521 q->properties.sdma_engine_id = 1522 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1523 q->properties.sdma_queue_id = q->sdma_id / 1524 kfd_get_num_sdma_engines(dqm->dev); 1525 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1526 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1527 dev_err(dev, "No more XGMI SDMA queue to allocate\n"); 1528 return -ENOMEM; 1529 } 1530 if (restore_sdma_id) { 1531 /* Re-use existing sdma_id */ 1532 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1533 dev_err(dev, "SDMA queue already in use\n"); 1534 return -EBUSY; 1535 } 1536 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1537 q->sdma_id = *restore_sdma_id; 1538 } else { 1539 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1540 get_num_xgmi_sdma_queues(dqm)); 1541 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1542 q->sdma_id = bit; 1543 } 1544 /* sdma_engine_id is sdma id including 1545 * both PCIe-optimized SDMAs and XGMI- 1546 * optimized SDMAs. The calculation below 1547 * assumes the first N engines are always 1548 * PCIe-optimized ones 1549 */ 1550 q->properties.sdma_engine_id = 1551 kfd_get_num_sdma_engines(dqm->dev) + 1552 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1553 q->properties.sdma_queue_id = q->sdma_id / 1554 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1555 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1556 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1557 bool free_bit_found = false, is_xgmi = false; 1558 1559 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1560 num_queues = get_num_sdma_queues(dqm); 1561 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1562 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1563 } else { 1564 num_queues = get_num_xgmi_sdma_queues(dqm); 1565 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1566 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1567 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1568 is_xgmi = true; 1569 } 1570 1571 /* Scan available bit based on target engine ID. */ 1572 start_engine = q->properties.sdma_engine_id - eng_offset; 1573 for (i = start_engine; i < num_queues; i += num_engines) { 1574 1575 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1576 continue; 1577 1578 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1579 q->sdma_id = i; 1580 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1581 free_bit_found = true; 1582 break; 1583 } 1584 1585 if (!free_bit_found) { 1586 dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", 1587 q->properties.sdma_engine_id); 1588 return -ENOMEM; 1589 } 1590 } 1591 1592 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1593 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1594 1595 return 0; 1596 } 1597 1598 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1599 struct queue *q) 1600 { 1601 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1602 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1603 return; 1604 set_bit(q->sdma_id, dqm->sdma_bitmap); 1605 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1606 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1607 return; 1608 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1609 } 1610 } 1611 1612 /* 1613 * Device Queue Manager implementation for cp scheduler 1614 */ 1615 1616 static int set_sched_resources(struct device_queue_manager *dqm) 1617 { 1618 int i, mec; 1619 struct scheduling_resources res; 1620 struct device *dev = dqm->dev->adev->dev; 1621 1622 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1623 1624 res.queue_mask = 0; 1625 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1626 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1627 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1628 1629 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1630 continue; 1631 1632 /* only acquire queues from the first MEC */ 1633 if (mec > 0) 1634 continue; 1635 1636 /* This situation may be hit in the future if a new HW 1637 * generation exposes more than 64 queues. If so, the 1638 * definition of res.queue_mask needs updating 1639 */ 1640 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1641 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1642 break; 1643 } 1644 1645 res.queue_mask |= 1ull 1646 << amdgpu_queue_mask_bit_to_set_resource_bit( 1647 dqm->dev->adev, i); 1648 } 1649 res.gws_mask = ~0ull; 1650 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1651 1652 pr_debug("Scheduling resources:\n" 1653 "vmid mask: 0x%8X\n" 1654 "queue mask: 0x%8llX\n", 1655 res.vmid_mask, res.queue_mask); 1656 1657 return pm_send_set_resources(&dqm->packet_mgr, &res); 1658 } 1659 1660 static int initialize_cpsch(struct device_queue_manager *dqm) 1661 { 1662 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1663 1664 mutex_init(&dqm->lock_hidden); 1665 INIT_LIST_HEAD(&dqm->queues); 1666 dqm->active_queue_count = dqm->processes_count = 0; 1667 dqm->active_cp_queue_count = 0; 1668 dqm->gws_queue_count = 0; 1669 dqm->active_runlist = false; 1670 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1671 dqm->trap_debug_vmid = 0; 1672 1673 init_sdma_bitmaps(dqm); 1674 1675 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1676 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1677 &dqm->wait_times, 1678 ffs(dqm->dev->xcc_mask) - 1); 1679 return 0; 1680 } 1681 1682 static int start_cpsch(struct device_queue_manager *dqm) 1683 { 1684 struct device *dev = dqm->dev->adev->dev; 1685 int retval, num_hw_queue_slots; 1686 1687 retval = 0; 1688 1689 dqm_lock(dqm); 1690 1691 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1692 retval = pm_init(&dqm->packet_mgr, dqm); 1693 if (retval) 1694 goto fail_packet_manager_init; 1695 1696 retval = set_sched_resources(dqm); 1697 if (retval) 1698 goto fail_set_sched_resources; 1699 } 1700 pr_debug("Allocating fence memory\n"); 1701 1702 /* allocate fence memory on the gart */ 1703 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1704 &dqm->fence_mem); 1705 1706 if (retval) 1707 goto fail_allocate_vidmem; 1708 1709 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1710 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1711 1712 init_interrupts(dqm); 1713 1714 /* clear hang status when driver try to start the hw scheduler */ 1715 dqm->sched_running = true; 1716 1717 if (!dqm->dev->kfd->shared_resources.enable_mes) 1718 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1719 1720 /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 1721 if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && 1722 (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { 1723 uint32_t reg_offset = 0; 1724 uint32_t grace_period = 1; 1725 1726 retval = pm_update_grace_period(&dqm->packet_mgr, 1727 grace_period); 1728 if (retval) 1729 dev_err(dev, "Setting grace timeout failed\n"); 1730 else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) 1731 /* Update dqm->wait_times maintained in software */ 1732 dqm->dev->kfd2kgd->build_grace_period_packet_info( 1733 dqm->dev->adev, dqm->wait_times, 1734 grace_period, ®_offset, 1735 &dqm->wait_times); 1736 } 1737 1738 /* setup per-queue reset detection buffer */ 1739 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1740 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1741 NUM_XCC(dqm->dev->xcc_mask); 1742 1743 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1744 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1745 1746 if (!dqm->detect_hang_info) { 1747 retval = -ENOMEM; 1748 goto fail_detect_hang_buffer; 1749 } 1750 1751 dqm_unlock(dqm); 1752 1753 return 0; 1754 fail_detect_hang_buffer: 1755 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1756 fail_allocate_vidmem: 1757 fail_set_sched_resources: 1758 if (!dqm->dev->kfd->shared_resources.enable_mes) 1759 pm_uninit(&dqm->packet_mgr); 1760 fail_packet_manager_init: 1761 dqm_unlock(dqm); 1762 return retval; 1763 } 1764 1765 static int stop_cpsch(struct device_queue_manager *dqm) 1766 { 1767 dqm_lock(dqm); 1768 if (!dqm->sched_running) { 1769 dqm_unlock(dqm); 1770 return 0; 1771 } 1772 1773 if (!dqm->dev->kfd->shared_resources.enable_mes) 1774 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1775 else 1776 remove_all_queues_mes(dqm); 1777 1778 dqm->sched_running = false; 1779 1780 if (!dqm->dev->kfd->shared_resources.enable_mes) 1781 pm_release_ib(&dqm->packet_mgr); 1782 1783 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1784 if (!dqm->dev->kfd->shared_resources.enable_mes) 1785 pm_uninit(&dqm->packet_mgr); 1786 kfree(dqm->detect_hang_info); 1787 dqm->detect_hang_info = NULL; 1788 dqm_unlock(dqm); 1789 1790 return 0; 1791 } 1792 1793 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1794 struct kernel_queue *kq, 1795 struct qcm_process_device *qpd) 1796 { 1797 dqm_lock(dqm); 1798 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1799 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1800 dqm->total_queue_count); 1801 dqm_unlock(dqm); 1802 return -EPERM; 1803 } 1804 1805 /* 1806 * Unconditionally increment this counter, regardless of the queue's 1807 * type or whether the queue is active. 1808 */ 1809 dqm->total_queue_count++; 1810 pr_debug("Total of %d queues are accountable so far\n", 1811 dqm->total_queue_count); 1812 1813 list_add(&kq->list, &qpd->priv_queue_list); 1814 increment_queue_count(dqm, qpd, kq->queue); 1815 qpd->is_debug = true; 1816 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1817 USE_DEFAULT_GRACE_PERIOD); 1818 dqm_unlock(dqm); 1819 1820 return 0; 1821 } 1822 1823 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1824 struct kernel_queue *kq, 1825 struct qcm_process_device *qpd) 1826 { 1827 dqm_lock(dqm); 1828 list_del(&kq->list); 1829 decrement_queue_count(dqm, qpd, kq->queue); 1830 qpd->is_debug = false; 1831 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1832 USE_DEFAULT_GRACE_PERIOD); 1833 /* 1834 * Unconditionally decrement this counter, regardless of the queue's 1835 * type. 1836 */ 1837 dqm->total_queue_count--; 1838 pr_debug("Total of %d queues are accountable so far\n", 1839 dqm->total_queue_count); 1840 dqm_unlock(dqm); 1841 } 1842 1843 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1844 struct qcm_process_device *qpd, 1845 const struct kfd_criu_queue_priv_data *qd, 1846 const void *restore_mqd, const void *restore_ctl_stack) 1847 { 1848 int retval; 1849 struct mqd_manager *mqd_mgr; 1850 1851 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1852 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1853 dqm->total_queue_count); 1854 retval = -EPERM; 1855 goto out; 1856 } 1857 1858 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1859 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 1860 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1861 dqm_lock(dqm); 1862 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1863 dqm_unlock(dqm); 1864 if (retval) 1865 goto out; 1866 } 1867 1868 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1869 if (retval) 1870 goto out_deallocate_sdma_queue; 1871 1872 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1873 q->properties.type)]; 1874 1875 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1876 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1877 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1878 q->properties.tba_addr = qpd->tba_addr; 1879 q->properties.tma_addr = qpd->tma_addr; 1880 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1881 if (!q->mqd_mem_obj) { 1882 retval = -ENOMEM; 1883 goto out_deallocate_doorbell; 1884 } 1885 1886 dqm_lock(dqm); 1887 /* 1888 * Eviction state logic: mark all queues as evicted, even ones 1889 * not currently active. Restoring inactive queues later only 1890 * updates the is_evicted flag but is a no-op otherwise. 1891 */ 1892 q->properties.is_evicted = !!qpd->evicted; 1893 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 1894 kfd_dbg_has_cwsr_workaround(q->device); 1895 1896 if (qd) 1897 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1898 &q->properties, restore_mqd, restore_ctl_stack, 1899 qd->ctl_stack_size); 1900 else 1901 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1902 &q->gart_mqd_addr, &q->properties); 1903 1904 list_add(&q->list, &qpd->queues_list); 1905 qpd->queue_count++; 1906 1907 if (q->properties.is_active) { 1908 increment_queue_count(dqm, qpd, q); 1909 1910 if (!dqm->dev->kfd->shared_resources.enable_mes) 1911 retval = execute_queues_cpsch(dqm, 1912 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1913 else 1914 retval = add_queue_mes(dqm, q, qpd); 1915 if (retval) 1916 goto cleanup_queue; 1917 } 1918 1919 /* 1920 * Unconditionally increment this counter, regardless of the queue's 1921 * type or whether the queue is active. 1922 */ 1923 dqm->total_queue_count++; 1924 1925 pr_debug("Total of %d queues are accountable so far\n", 1926 dqm->total_queue_count); 1927 1928 dqm_unlock(dqm); 1929 return retval; 1930 1931 cleanup_queue: 1932 qpd->queue_count--; 1933 list_del(&q->list); 1934 if (q->properties.is_active) 1935 decrement_queue_count(dqm, qpd, q); 1936 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1937 dqm_unlock(dqm); 1938 out_deallocate_doorbell: 1939 deallocate_doorbell(qpd, q); 1940 out_deallocate_sdma_queue: 1941 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1942 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1943 dqm_lock(dqm); 1944 deallocate_sdma_queue(dqm, q); 1945 dqm_unlock(dqm); 1946 } 1947 out: 1948 return retval; 1949 } 1950 1951 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 1952 uint64_t fence_value, 1953 unsigned int timeout_ms) 1954 { 1955 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1956 struct device *dev = dqm->dev->adev->dev; 1957 uint64_t *fence_addr = dqm->fence_addr; 1958 1959 while (*fence_addr != fence_value) { 1960 /* Fatal err detected, this response won't come */ 1961 if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) 1962 return -EIO; 1963 1964 if (time_after(jiffies, end_jiffies)) { 1965 dev_err(dev, "qcm fence wait loop timeout expired\n"); 1966 /* In HWS case, this is used to halt the driver thread 1967 * in order not to mess up CP states before doing 1968 * scandumps for FW debugging. 1969 */ 1970 while (halt_if_hws_hang) 1971 schedule(); 1972 1973 return -ETIME; 1974 } 1975 schedule(); 1976 } 1977 1978 return 0; 1979 } 1980 1981 /* dqm->lock mutex has to be locked before calling this function */ 1982 static int map_queues_cpsch(struct device_queue_manager *dqm) 1983 { 1984 struct device *dev = dqm->dev->adev->dev; 1985 int retval; 1986 1987 if (!dqm->sched_running) 1988 return 0; 1989 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1990 return 0; 1991 if (dqm->active_runlist) 1992 return 0; 1993 1994 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1995 pr_debug("%s sent runlist\n", __func__); 1996 if (retval) { 1997 dev_err(dev, "failed to execute runlist\n"); 1998 return retval; 1999 } 2000 dqm->active_runlist = true; 2001 2002 return retval; 2003 } 2004 2005 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2006 struct qcm_process_device *qpd) 2007 { 2008 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2009 2010 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid 0x%0x is reset\n", 2011 q->properties.queue_id, q->process->pasid); 2012 2013 pdd->has_reset_queue = true; 2014 if (q->properties.is_active) { 2015 q->properties.is_active = false; 2016 decrement_queue_count(dqm, qpd, q); 2017 } 2018 } 2019 2020 static int detect_queue_hang(struct device_queue_manager *dqm) 2021 { 2022 int i; 2023 2024 /* detect should be used only in dqm locked queue reset */ 2025 if (WARN_ON(dqm->detect_hang_count > 0)) 2026 return 0; 2027 2028 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2029 2030 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2031 uint32_t mec, pipe, queue; 2032 int xcc_id; 2033 2034 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2035 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2036 2037 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2038 continue; 2039 2040 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2041 2042 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2043 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2044 dqm->dev->adev, pipe, queue, xcc_id); 2045 struct dqm_detect_hang_info hang_info; 2046 2047 if (!queue_addr) 2048 continue; 2049 2050 hang_info.pipe_id = pipe; 2051 hang_info.queue_id = queue; 2052 hang_info.xcc_id = xcc_id; 2053 hang_info.queue_address = queue_addr; 2054 2055 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2056 dqm->detect_hang_count++; 2057 } 2058 } 2059 2060 return dqm->detect_hang_count; 2061 } 2062 2063 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2064 { 2065 struct device_process_node *cur; 2066 struct qcm_process_device *qpd; 2067 struct queue *q; 2068 2069 list_for_each_entry(cur, &dqm->queues, list) { 2070 qpd = cur->qpd; 2071 list_for_each_entry(q, &qpd->queues_list, list) { 2072 if (queue_address == q->properties.queue_address) 2073 return q; 2074 } 2075 } 2076 2077 return NULL; 2078 } 2079 2080 /* only for compute queue */ 2081 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) 2082 { 2083 int r = 0, reset_count = 0, i; 2084 2085 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2086 return -EIO; 2087 2088 /* assume dqm locked. */ 2089 if (!detect_queue_hang(dqm)) 2090 return -ENOTRECOVERABLE; 2091 2092 for (i = 0; i < dqm->detect_hang_count; i++) { 2093 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2094 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2095 struct kfd_process_device *pdd; 2096 uint64_t queue_addr = 0; 2097 2098 if (!q) { 2099 r = -ENOTRECOVERABLE; 2100 goto reset_fail; 2101 } 2102 2103 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2104 if (!pdd) { 2105 r = -ENOTRECOVERABLE; 2106 goto reset_fail; 2107 } 2108 2109 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2110 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2111 KFD_UNMAP_LATENCY_MS); 2112 2113 /* either reset failed or we reset an unexpected queue. */ 2114 if (queue_addr != q->properties.queue_address) { 2115 r = -ENOTRECOVERABLE; 2116 goto reset_fail; 2117 } 2118 2119 set_queue_as_reset(dqm, q, &pdd->qpd); 2120 reset_count++; 2121 } 2122 2123 if (reset_count == dqm->detect_hang_count) 2124 kfd_signal_reset_event(dqm->dev); 2125 else 2126 r = -ENOTRECOVERABLE; 2127 2128 reset_fail: 2129 dqm->detect_hang_count = 0; 2130 2131 return r; 2132 } 2133 2134 /* dqm->lock mutex has to be locked before calling this function */ 2135 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2136 enum kfd_unmap_queues_filter filter, 2137 uint32_t filter_param, 2138 uint32_t grace_period, 2139 bool reset) 2140 { 2141 struct device *dev = dqm->dev->adev->dev; 2142 struct mqd_manager *mqd_mgr; 2143 int retval; 2144 2145 if (!dqm->sched_running) 2146 return 0; 2147 if (!dqm->active_runlist) 2148 return 0; 2149 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2150 return -EIO; 2151 2152 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2153 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 2154 if (retval) 2155 goto out; 2156 } 2157 2158 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2159 if (retval) 2160 goto out; 2161 2162 *dqm->fence_addr = KFD_FENCE_INIT; 2163 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2164 KFD_FENCE_COMPLETED); 2165 /* should be timed out */ 2166 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2167 queue_preemption_timeout_ms); 2168 if (retval) { 2169 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2170 kfd_hws_hang(dqm); 2171 goto out; 2172 } 2173 2174 /* In the current MEC firmware implementation, if compute queue 2175 * doesn't response to the preemption request in time, HIQ will 2176 * abandon the unmap request without returning any timeout error 2177 * to driver. Instead, MEC firmware will log the doorbell of the 2178 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2179 * To make sure the queue unmap was successful, driver need to 2180 * check those fields 2181 */ 2182 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2183 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { 2184 if (reset_queues_on_hws_hang(dqm)) { 2185 while (halt_if_hws_hang) 2186 schedule(); 2187 dqm->is_hws_hang = true; 2188 kfd_hws_hang(dqm); 2189 retval = -ETIME; 2190 goto out; 2191 } 2192 } 2193 2194 /* We need to reset the grace period value for this device */ 2195 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2196 if (pm_update_grace_period(&dqm->packet_mgr, 2197 USE_DEFAULT_GRACE_PERIOD)) 2198 dev_err(dev, "Failed to reset grace period\n"); 2199 } 2200 2201 pm_release_ib(&dqm->packet_mgr); 2202 dqm->active_runlist = false; 2203 2204 out: 2205 up_read(&dqm->dev->adev->reset_domain->sem); 2206 return retval; 2207 } 2208 2209 /* only for compute queue */ 2210 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2211 { 2212 int retval; 2213 2214 dqm_lock(dqm); 2215 2216 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2217 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2218 2219 dqm_unlock(dqm); 2220 return retval; 2221 } 2222 2223 /* dqm->lock mutex has to be locked before calling this function */ 2224 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2225 enum kfd_unmap_queues_filter filter, 2226 uint32_t filter_param, 2227 uint32_t grace_period) 2228 { 2229 int retval; 2230 2231 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2232 return -EIO; 2233 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2234 if (!retval) 2235 retval = map_queues_cpsch(dqm); 2236 up_read(&dqm->dev->adev->reset_domain->sem); 2237 return retval; 2238 } 2239 2240 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2241 struct queue *q) 2242 { 2243 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2244 q->process); 2245 int ret = 0; 2246 2247 if (pdd->qpd.is_debug) 2248 return ret; 2249 2250 q->properties.is_being_destroyed = true; 2251 2252 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2253 dqm_unlock(dqm); 2254 mutex_unlock(&q->process->mutex); 2255 ret = wait_event_interruptible(dqm->destroy_wait, 2256 !q->properties.is_suspended); 2257 2258 mutex_lock(&q->process->mutex); 2259 dqm_lock(dqm); 2260 } 2261 2262 return ret; 2263 } 2264 2265 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2266 struct qcm_process_device *qpd, 2267 struct queue *q) 2268 { 2269 int retval; 2270 struct mqd_manager *mqd_mgr; 2271 uint64_t sdma_val = 0; 2272 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2273 struct device *dev = dqm->dev->adev->dev; 2274 2275 /* Get the SDMA queue stats */ 2276 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2277 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2278 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2279 &sdma_val); 2280 if (retval) 2281 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2282 q->properties.queue_id); 2283 } 2284 2285 /* remove queue from list to prevent rescheduling after preemption */ 2286 dqm_lock(dqm); 2287 2288 retval = wait_on_destroy_queue(dqm, q); 2289 2290 if (retval) { 2291 dqm_unlock(dqm); 2292 return retval; 2293 } 2294 2295 if (qpd->is_debug) { 2296 /* 2297 * error, currently we do not allow to destroy a queue 2298 * of a currently debugged process 2299 */ 2300 retval = -EBUSY; 2301 goto failed_try_destroy_debugged_queue; 2302 2303 } 2304 2305 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2306 q->properties.type)]; 2307 2308 deallocate_doorbell(qpd, q); 2309 2310 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2311 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2312 deallocate_sdma_queue(dqm, q); 2313 pdd->sdma_past_activity_counter += sdma_val; 2314 } 2315 2316 list_del(&q->list); 2317 qpd->queue_count--; 2318 if (q->properties.is_active) { 2319 decrement_queue_count(dqm, qpd, q); 2320 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2321 retval = execute_queues_cpsch(dqm, 2322 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2323 USE_DEFAULT_GRACE_PERIOD); 2324 if (retval == -ETIME) 2325 qpd->reset_wavefronts = true; 2326 } else { 2327 retval = remove_queue_mes(dqm, q, qpd); 2328 } 2329 } 2330 2331 /* 2332 * Unconditionally decrement this counter, regardless of the queue's 2333 * type 2334 */ 2335 dqm->total_queue_count--; 2336 pr_debug("Total of %d queues are accountable so far\n", 2337 dqm->total_queue_count); 2338 2339 dqm_unlock(dqm); 2340 2341 /* 2342 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2343 * circular locking 2344 */ 2345 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2346 qpd->pqm->process, q->device, 2347 -1, false, NULL, 0); 2348 2349 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2350 2351 return retval; 2352 2353 failed_try_destroy_debugged_queue: 2354 2355 dqm_unlock(dqm); 2356 return retval; 2357 } 2358 2359 /* 2360 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2361 * stay in user mode. 2362 */ 2363 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2364 /* APE1 limit is inclusive and 64K aligned. */ 2365 #define APE1_LIMIT_ALIGNMENT 0xFFFF 2366 2367 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2368 struct qcm_process_device *qpd, 2369 enum cache_policy default_policy, 2370 enum cache_policy alternate_policy, 2371 void __user *alternate_aperture_base, 2372 uint64_t alternate_aperture_size) 2373 { 2374 bool retval = true; 2375 2376 if (!dqm->asic_ops.set_cache_memory_policy) 2377 return retval; 2378 2379 dqm_lock(dqm); 2380 2381 if (alternate_aperture_size == 0) { 2382 /* base > limit disables APE1 */ 2383 qpd->sh_mem_ape1_base = 1; 2384 qpd->sh_mem_ape1_limit = 0; 2385 } else { 2386 /* 2387 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2388 * SH_MEM_APE1_BASE[31:0], 0x0000 } 2389 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2390 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2391 * Verify that the base and size parameters can be 2392 * represented in this format and convert them. 2393 * Additionally restrict APE1 to user-mode addresses. 2394 */ 2395 2396 uint64_t base = (uintptr_t)alternate_aperture_base; 2397 uint64_t limit = base + alternate_aperture_size - 1; 2398 2399 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2400 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2401 retval = false; 2402 goto out; 2403 } 2404 2405 qpd->sh_mem_ape1_base = base >> 16; 2406 qpd->sh_mem_ape1_limit = limit >> 16; 2407 } 2408 2409 retval = dqm->asic_ops.set_cache_memory_policy( 2410 dqm, 2411 qpd, 2412 default_policy, 2413 alternate_policy, 2414 alternate_aperture_base, 2415 alternate_aperture_size); 2416 2417 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2418 program_sh_mem_settings(dqm, qpd); 2419 2420 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2421 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2422 qpd->sh_mem_ape1_limit); 2423 2424 out: 2425 dqm_unlock(dqm); 2426 return retval; 2427 } 2428 2429 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2430 struct qcm_process_device *qpd) 2431 { 2432 struct queue *q; 2433 struct device_process_node *cur, *next_dpn; 2434 int retval = 0; 2435 bool found = false; 2436 2437 dqm_lock(dqm); 2438 2439 /* Clear all user mode queues */ 2440 while (!list_empty(&qpd->queues_list)) { 2441 struct mqd_manager *mqd_mgr; 2442 int ret; 2443 2444 q = list_first_entry(&qpd->queues_list, struct queue, list); 2445 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2446 q->properties.type)]; 2447 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2448 if (ret) 2449 retval = ret; 2450 dqm_unlock(dqm); 2451 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2452 dqm_lock(dqm); 2453 } 2454 2455 /* Unregister process */ 2456 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2457 if (qpd == cur->qpd) { 2458 list_del(&cur->list); 2459 kfree(cur); 2460 dqm->processes_count--; 2461 found = true; 2462 break; 2463 } 2464 } 2465 2466 dqm_unlock(dqm); 2467 2468 /* Outside the DQM lock because under the DQM lock we can't do 2469 * reclaim or take other locks that others hold while reclaiming. 2470 */ 2471 if (found) 2472 kfd_dec_compute_active(dqm->dev); 2473 2474 return retval; 2475 } 2476 2477 static int get_wave_state(struct device_queue_manager *dqm, 2478 struct queue *q, 2479 void __user *ctl_stack, 2480 u32 *ctl_stack_used_size, 2481 u32 *save_area_used_size) 2482 { 2483 struct mqd_manager *mqd_mgr; 2484 2485 dqm_lock(dqm); 2486 2487 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2488 2489 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2490 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2491 !mqd_mgr->get_wave_state) { 2492 dqm_unlock(dqm); 2493 return -EINVAL; 2494 } 2495 2496 dqm_unlock(dqm); 2497 2498 /* 2499 * get_wave_state is outside the dqm lock to prevent circular locking 2500 * and the queue should be protected against destruction by the process 2501 * lock. 2502 */ 2503 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2504 ctl_stack, ctl_stack_used_size, save_area_used_size); 2505 } 2506 2507 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2508 const struct queue *q, 2509 u32 *mqd_size, 2510 u32 *ctl_stack_size) 2511 { 2512 struct mqd_manager *mqd_mgr; 2513 enum KFD_MQD_TYPE mqd_type = 2514 get_mqd_type_from_queue_type(q->properties.type); 2515 2516 dqm_lock(dqm); 2517 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2518 *mqd_size = mqd_mgr->mqd_size; 2519 *ctl_stack_size = 0; 2520 2521 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2522 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2523 2524 dqm_unlock(dqm); 2525 } 2526 2527 static int checkpoint_mqd(struct device_queue_manager *dqm, 2528 const struct queue *q, 2529 void *mqd, 2530 void *ctl_stack) 2531 { 2532 struct mqd_manager *mqd_mgr; 2533 int r = 0; 2534 enum KFD_MQD_TYPE mqd_type = 2535 get_mqd_type_from_queue_type(q->properties.type); 2536 2537 dqm_lock(dqm); 2538 2539 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2540 r = -EINVAL; 2541 goto dqm_unlock; 2542 } 2543 2544 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2545 if (!mqd_mgr->checkpoint_mqd) { 2546 r = -EOPNOTSUPP; 2547 goto dqm_unlock; 2548 } 2549 2550 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2551 2552 dqm_unlock: 2553 dqm_unlock(dqm); 2554 return r; 2555 } 2556 2557 static int process_termination_cpsch(struct device_queue_manager *dqm, 2558 struct qcm_process_device *qpd) 2559 { 2560 int retval; 2561 struct queue *q; 2562 struct device *dev = dqm->dev->adev->dev; 2563 struct kernel_queue *kq, *kq_next; 2564 struct mqd_manager *mqd_mgr; 2565 struct device_process_node *cur, *next_dpn; 2566 enum kfd_unmap_queues_filter filter = 2567 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2568 bool found = false; 2569 2570 retval = 0; 2571 2572 dqm_lock(dqm); 2573 2574 /* Clean all kernel queues */ 2575 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2576 list_del(&kq->list); 2577 decrement_queue_count(dqm, qpd, kq->queue); 2578 qpd->is_debug = false; 2579 dqm->total_queue_count--; 2580 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2581 } 2582 2583 /* Clear all user mode queues */ 2584 list_for_each_entry(q, &qpd->queues_list, list) { 2585 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2586 deallocate_sdma_queue(dqm, q); 2587 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2588 deallocate_sdma_queue(dqm, q); 2589 2590 if (q->properties.is_active) { 2591 decrement_queue_count(dqm, qpd, q); 2592 2593 if (dqm->dev->kfd->shared_resources.enable_mes) { 2594 retval = remove_queue_mes(dqm, q, qpd); 2595 if (retval) 2596 dev_err(dev, "Failed to remove queue %d\n", 2597 q->properties.queue_id); 2598 } 2599 } 2600 2601 dqm->total_queue_count--; 2602 } 2603 2604 /* Unregister process */ 2605 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2606 if (qpd == cur->qpd) { 2607 list_del(&cur->list); 2608 kfree(cur); 2609 dqm->processes_count--; 2610 found = true; 2611 break; 2612 } 2613 } 2614 2615 if (!dqm->dev->kfd->shared_resources.enable_mes) 2616 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2617 2618 if ((retval || qpd->reset_wavefronts) && 2619 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2620 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2621 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2622 qpd->reset_wavefronts = false; 2623 up_read(&dqm->dev->adev->reset_domain->sem); 2624 } 2625 2626 /* Lastly, free mqd resources. 2627 * Do free_mqd() after dqm_unlock to avoid circular locking. 2628 */ 2629 while (!list_empty(&qpd->queues_list)) { 2630 q = list_first_entry(&qpd->queues_list, struct queue, list); 2631 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2632 q->properties.type)]; 2633 list_del(&q->list); 2634 qpd->queue_count--; 2635 dqm_unlock(dqm); 2636 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2637 dqm_lock(dqm); 2638 } 2639 dqm_unlock(dqm); 2640 2641 /* Outside the DQM lock because under the DQM lock we can't do 2642 * reclaim or take other locks that others hold while reclaiming. 2643 */ 2644 if (found) 2645 kfd_dec_compute_active(dqm->dev); 2646 2647 return retval; 2648 } 2649 2650 static int init_mqd_managers(struct device_queue_manager *dqm) 2651 { 2652 int i, j; 2653 struct device *dev = dqm->dev->adev->dev; 2654 struct mqd_manager *mqd_mgr; 2655 2656 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2657 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2658 if (!mqd_mgr) { 2659 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2660 goto out_free; 2661 } 2662 dqm->mqd_mgrs[i] = mqd_mgr; 2663 } 2664 2665 return 0; 2666 2667 out_free: 2668 for (j = 0; j < i; j++) { 2669 kfree(dqm->mqd_mgrs[j]); 2670 dqm->mqd_mgrs[j] = NULL; 2671 } 2672 2673 return -ENOMEM; 2674 } 2675 2676 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2677 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2678 { 2679 int retval; 2680 struct kfd_node *dev = dqm->dev; 2681 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2682 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2683 get_num_all_sdma_engines(dqm) * 2684 dev->kfd->device_info.num_sdma_queues_per_engine + 2685 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2686 NUM_XCC(dqm->dev->xcc_mask)); 2687 2688 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2689 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2690 (void *)&(mem_obj->cpu_ptr), false); 2691 2692 return retval; 2693 } 2694 2695 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2696 { 2697 struct device_queue_manager *dqm; 2698 2699 pr_debug("Loading device queue manager\n"); 2700 2701 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2702 if (!dqm) 2703 return NULL; 2704 2705 switch (dev->adev->asic_type) { 2706 /* HWS is not available on Hawaii. */ 2707 case CHIP_HAWAII: 2708 /* HWS depends on CWSR for timely dequeue. CWSR is not 2709 * available on Tonga. 2710 * 2711 * FIXME: This argument also applies to Kaveri. 2712 */ 2713 case CHIP_TONGA: 2714 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2715 break; 2716 default: 2717 dqm->sched_policy = sched_policy; 2718 break; 2719 } 2720 2721 dqm->dev = dev; 2722 switch (dqm->sched_policy) { 2723 case KFD_SCHED_POLICY_HWS: 2724 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2725 /* initialize dqm for cp scheduling */ 2726 dqm->ops.create_queue = create_queue_cpsch; 2727 dqm->ops.initialize = initialize_cpsch; 2728 dqm->ops.start = start_cpsch; 2729 dqm->ops.stop = stop_cpsch; 2730 dqm->ops.destroy_queue = destroy_queue_cpsch; 2731 dqm->ops.update_queue = update_queue; 2732 dqm->ops.register_process = register_process; 2733 dqm->ops.unregister_process = unregister_process; 2734 dqm->ops.uninitialize = uninitialize; 2735 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2736 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2737 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2738 dqm->ops.process_termination = process_termination_cpsch; 2739 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2740 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2741 dqm->ops.get_wave_state = get_wave_state; 2742 dqm->ops.reset_queues = reset_queues_cpsch; 2743 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2744 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2745 break; 2746 case KFD_SCHED_POLICY_NO_HWS: 2747 /* initialize dqm for no cp scheduling */ 2748 dqm->ops.start = start_nocpsch; 2749 dqm->ops.stop = stop_nocpsch; 2750 dqm->ops.create_queue = create_queue_nocpsch; 2751 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2752 dqm->ops.update_queue = update_queue; 2753 dqm->ops.register_process = register_process; 2754 dqm->ops.unregister_process = unregister_process; 2755 dqm->ops.initialize = initialize_nocpsch; 2756 dqm->ops.uninitialize = uninitialize; 2757 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2758 dqm->ops.process_termination = process_termination_nocpsch; 2759 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2760 dqm->ops.restore_process_queues = 2761 restore_process_queues_nocpsch; 2762 dqm->ops.get_wave_state = get_wave_state; 2763 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2764 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2765 break; 2766 default: 2767 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 2768 goto out_free; 2769 } 2770 2771 switch (dev->adev->asic_type) { 2772 case CHIP_KAVERI: 2773 case CHIP_HAWAII: 2774 device_queue_manager_init_cik(&dqm->asic_ops); 2775 break; 2776 2777 case CHIP_CARRIZO: 2778 case CHIP_TONGA: 2779 case CHIP_FIJI: 2780 case CHIP_POLARIS10: 2781 case CHIP_POLARIS11: 2782 case CHIP_POLARIS12: 2783 case CHIP_VEGAM: 2784 device_queue_manager_init_vi(&dqm->asic_ops); 2785 break; 2786 2787 default: 2788 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 2789 device_queue_manager_init_v12(&dqm->asic_ops); 2790 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2791 device_queue_manager_init_v11(&dqm->asic_ops); 2792 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2793 device_queue_manager_init_v10(&dqm->asic_ops); 2794 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2795 device_queue_manager_init_v9(&dqm->asic_ops); 2796 else { 2797 WARN(1, "Unexpected ASIC family %u", 2798 dev->adev->asic_type); 2799 goto out_free; 2800 } 2801 } 2802 2803 if (init_mqd_managers(dqm)) 2804 goto out_free; 2805 2806 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2807 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 2808 goto out_free; 2809 } 2810 2811 if (!dqm->ops.initialize(dqm)) { 2812 init_waitqueue_head(&dqm->destroy_wait); 2813 return dqm; 2814 } 2815 2816 out_free: 2817 kfree(dqm); 2818 return NULL; 2819 } 2820 2821 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2822 struct kfd_mem_obj *mqd) 2823 { 2824 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2825 2826 amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); 2827 } 2828 2829 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2830 { 2831 dqm->ops.stop(dqm); 2832 dqm->ops.uninitialize(dqm); 2833 if (!dqm->dev->kfd->shared_resources.enable_mes) 2834 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2835 kfree(dqm); 2836 } 2837 2838 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2839 { 2840 struct kfd_process_device *pdd; 2841 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2842 int ret = 0; 2843 2844 if (!p) 2845 return -EINVAL; 2846 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2847 pdd = kfd_get_process_device_data(dqm->dev, p); 2848 if (pdd) 2849 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2850 kfd_unref_process(p); 2851 2852 return ret; 2853 } 2854 2855 static void kfd_process_hw_exception(struct work_struct *work) 2856 { 2857 struct device_queue_manager *dqm = container_of(work, 2858 struct device_queue_manager, hw_exception_work); 2859 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2860 } 2861 2862 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 2863 struct qcm_process_device *qpd) 2864 { 2865 int r; 2866 struct device *dev = dqm->dev->adev->dev; 2867 int updated_vmid_mask; 2868 2869 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2870 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 2871 return -EINVAL; 2872 } 2873 2874 dqm_lock(dqm); 2875 2876 if (dqm->trap_debug_vmid != 0) { 2877 dev_err(dev, "Trap debug id already reserved\n"); 2878 r = -EBUSY; 2879 goto out_unlock; 2880 } 2881 2882 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2883 USE_DEFAULT_GRACE_PERIOD, false); 2884 if (r) 2885 goto out_unlock; 2886 2887 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2888 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 2889 2890 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2891 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 2892 r = set_sched_resources(dqm); 2893 if (r) 2894 goto out_unlock; 2895 2896 r = map_queues_cpsch(dqm); 2897 if (r) 2898 goto out_unlock; 2899 2900 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 2901 2902 out_unlock: 2903 dqm_unlock(dqm); 2904 return r; 2905 } 2906 2907 /* 2908 * Releases vmid for the trap debugger 2909 */ 2910 int release_debug_trap_vmid(struct device_queue_manager *dqm, 2911 struct qcm_process_device *qpd) 2912 { 2913 struct device *dev = dqm->dev->adev->dev; 2914 int r; 2915 int updated_vmid_mask; 2916 uint32_t trap_debug_vmid; 2917 2918 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2919 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 2920 return -EINVAL; 2921 } 2922 2923 dqm_lock(dqm); 2924 trap_debug_vmid = dqm->trap_debug_vmid; 2925 if (dqm->trap_debug_vmid == 0) { 2926 dev_err(dev, "Trap debug id is not reserved\n"); 2927 r = -EINVAL; 2928 goto out_unlock; 2929 } 2930 2931 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2932 USE_DEFAULT_GRACE_PERIOD, false); 2933 if (r) 2934 goto out_unlock; 2935 2936 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2937 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 2938 2939 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2940 dqm->trap_debug_vmid = 0; 2941 r = set_sched_resources(dqm); 2942 if (r) 2943 goto out_unlock; 2944 2945 r = map_queues_cpsch(dqm); 2946 if (r) 2947 goto out_unlock; 2948 2949 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 2950 2951 out_unlock: 2952 dqm_unlock(dqm); 2953 return r; 2954 } 2955 2956 #define QUEUE_NOT_FOUND -1 2957 /* invalidate queue operation in array */ 2958 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 2959 { 2960 int i; 2961 2962 for (i = 0; i < num_queues; i++) 2963 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 2964 } 2965 2966 /* find queue index in array */ 2967 static int q_array_get_index(unsigned int queue_id, 2968 uint32_t num_queues, 2969 uint32_t *queue_ids) 2970 { 2971 int i; 2972 2973 for (i = 0; i < num_queues; i++) 2974 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 2975 return i; 2976 2977 return QUEUE_NOT_FOUND; 2978 } 2979 2980 struct copy_context_work_handler_workarea { 2981 struct work_struct copy_context_work; 2982 struct kfd_process *p; 2983 }; 2984 2985 static void copy_context_work_handler (struct work_struct *work) 2986 { 2987 struct copy_context_work_handler_workarea *workarea; 2988 struct mqd_manager *mqd_mgr; 2989 struct queue *q; 2990 struct mm_struct *mm; 2991 struct kfd_process *p; 2992 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 2993 int i; 2994 2995 workarea = container_of(work, 2996 struct copy_context_work_handler_workarea, 2997 copy_context_work); 2998 2999 p = workarea->p; 3000 mm = get_task_mm(p->lead_thread); 3001 3002 if (!mm) 3003 return; 3004 3005 kthread_use_mm(mm); 3006 for (i = 0; i < p->n_pdds; i++) { 3007 struct kfd_process_device *pdd = p->pdds[i]; 3008 struct device_queue_manager *dqm = pdd->dev->dqm; 3009 struct qcm_process_device *qpd = &pdd->qpd; 3010 3011 list_for_each_entry(q, &qpd->queues_list, list) { 3012 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3013 3014 /* We ignore the return value from get_wave_state 3015 * because 3016 * i) right now, it always returns 0, and 3017 * ii) if we hit an error, we would continue to the 3018 * next queue anyway. 3019 */ 3020 mqd_mgr->get_wave_state(mqd_mgr, 3021 q->mqd, 3022 &q->properties, 3023 (void __user *) q->properties.ctx_save_restore_area_address, 3024 &tmp_ctl_stack_used_size, 3025 &tmp_save_area_used_size); 3026 } 3027 } 3028 kthread_unuse_mm(mm); 3029 mmput(mm); 3030 } 3031 3032 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3033 { 3034 size_t array_size = num_queues * sizeof(uint32_t); 3035 3036 if (!usr_queue_id_array) 3037 return NULL; 3038 3039 return memdup_user(usr_queue_id_array, array_size); 3040 } 3041 3042 int resume_queues(struct kfd_process *p, 3043 uint32_t num_queues, 3044 uint32_t *usr_queue_id_array) 3045 { 3046 uint32_t *queue_ids = NULL; 3047 int total_resumed = 0; 3048 int i; 3049 3050 if (usr_queue_id_array) { 3051 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3052 3053 if (IS_ERR(queue_ids)) 3054 return PTR_ERR(queue_ids); 3055 3056 /* mask all queues as invalid. unmask per successful request */ 3057 q_array_invalidate(num_queues, queue_ids); 3058 } 3059 3060 for (i = 0; i < p->n_pdds; i++) { 3061 struct kfd_process_device *pdd = p->pdds[i]; 3062 struct device_queue_manager *dqm = pdd->dev->dqm; 3063 struct device *dev = dqm->dev->adev->dev; 3064 struct qcm_process_device *qpd = &pdd->qpd; 3065 struct queue *q; 3066 int r, per_device_resumed = 0; 3067 3068 dqm_lock(dqm); 3069 3070 /* unmask queues that resume or already resumed as valid */ 3071 list_for_each_entry(q, &qpd->queues_list, list) { 3072 int q_idx = QUEUE_NOT_FOUND; 3073 3074 if (queue_ids) 3075 q_idx = q_array_get_index( 3076 q->properties.queue_id, 3077 num_queues, 3078 queue_ids); 3079 3080 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3081 int err = resume_single_queue(dqm, &pdd->qpd, q); 3082 3083 if (queue_ids) { 3084 if (!err) { 3085 queue_ids[q_idx] &= 3086 ~KFD_DBG_QUEUE_INVALID_MASK; 3087 } else { 3088 queue_ids[q_idx] |= 3089 KFD_DBG_QUEUE_ERROR_MASK; 3090 break; 3091 } 3092 } 3093 3094 if (dqm->dev->kfd->shared_resources.enable_mes) { 3095 wake_up_all(&dqm->destroy_wait); 3096 if (!err) 3097 total_resumed++; 3098 } else { 3099 per_device_resumed++; 3100 } 3101 } 3102 } 3103 3104 if (!per_device_resumed) { 3105 dqm_unlock(dqm); 3106 continue; 3107 } 3108 3109 r = execute_queues_cpsch(dqm, 3110 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3111 0, 3112 USE_DEFAULT_GRACE_PERIOD); 3113 if (r) { 3114 dev_err(dev, "Failed to resume process queues\n"); 3115 if (queue_ids) { 3116 list_for_each_entry(q, &qpd->queues_list, list) { 3117 int q_idx = q_array_get_index( 3118 q->properties.queue_id, 3119 num_queues, 3120 queue_ids); 3121 3122 /* mask queue as error on resume fail */ 3123 if (q_idx != QUEUE_NOT_FOUND) 3124 queue_ids[q_idx] |= 3125 KFD_DBG_QUEUE_ERROR_MASK; 3126 } 3127 } 3128 } else { 3129 wake_up_all(&dqm->destroy_wait); 3130 total_resumed += per_device_resumed; 3131 } 3132 3133 dqm_unlock(dqm); 3134 } 3135 3136 if (queue_ids) { 3137 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3138 num_queues * sizeof(uint32_t))) 3139 pr_err("copy_to_user failed on queue resume\n"); 3140 3141 kfree(queue_ids); 3142 } 3143 3144 return total_resumed; 3145 } 3146 3147 int suspend_queues(struct kfd_process *p, 3148 uint32_t num_queues, 3149 uint32_t grace_period, 3150 uint64_t exception_clear_mask, 3151 uint32_t *usr_queue_id_array) 3152 { 3153 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3154 int total_suspended = 0; 3155 int i; 3156 3157 if (IS_ERR(queue_ids)) 3158 return PTR_ERR(queue_ids); 3159 3160 /* mask all queues as invalid. umask on successful request */ 3161 q_array_invalidate(num_queues, queue_ids); 3162 3163 for (i = 0; i < p->n_pdds; i++) { 3164 struct kfd_process_device *pdd = p->pdds[i]; 3165 struct device_queue_manager *dqm = pdd->dev->dqm; 3166 struct device *dev = dqm->dev->adev->dev; 3167 struct qcm_process_device *qpd = &pdd->qpd; 3168 struct queue *q; 3169 int r, per_device_suspended = 0; 3170 3171 mutex_lock(&p->event_mutex); 3172 dqm_lock(dqm); 3173 3174 /* unmask queues that suspend or already suspended */ 3175 list_for_each_entry(q, &qpd->queues_list, list) { 3176 int q_idx = q_array_get_index(q->properties.queue_id, 3177 num_queues, 3178 queue_ids); 3179 3180 if (q_idx != QUEUE_NOT_FOUND) { 3181 int err = suspend_single_queue(dqm, pdd, q); 3182 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3183 3184 if (!err) { 3185 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3186 if (exception_clear_mask && is_mes) 3187 q->properties.exception_status &= 3188 ~exception_clear_mask; 3189 3190 if (is_mes) 3191 total_suspended++; 3192 else 3193 per_device_suspended++; 3194 } else if (err != -EBUSY) { 3195 r = err; 3196 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3197 break; 3198 } 3199 } 3200 } 3201 3202 if (!per_device_suspended) { 3203 dqm_unlock(dqm); 3204 mutex_unlock(&p->event_mutex); 3205 if (total_suspended) 3206 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3207 continue; 3208 } 3209 3210 r = execute_queues_cpsch(dqm, 3211 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3212 grace_period); 3213 3214 if (r) 3215 dev_err(dev, "Failed to suspend process queues.\n"); 3216 else 3217 total_suspended += per_device_suspended; 3218 3219 list_for_each_entry(q, &qpd->queues_list, list) { 3220 int q_idx = q_array_get_index(q->properties.queue_id, 3221 num_queues, queue_ids); 3222 3223 if (q_idx == QUEUE_NOT_FOUND) 3224 continue; 3225 3226 /* mask queue as error on suspend fail */ 3227 if (r) 3228 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3229 else if (exception_clear_mask) 3230 q->properties.exception_status &= 3231 ~exception_clear_mask; 3232 } 3233 3234 dqm_unlock(dqm); 3235 mutex_unlock(&p->event_mutex); 3236 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3237 } 3238 3239 if (total_suspended) { 3240 struct copy_context_work_handler_workarea copy_context_worker; 3241 3242 INIT_WORK_ONSTACK( 3243 ©_context_worker.copy_context_work, 3244 copy_context_work_handler); 3245 3246 copy_context_worker.p = p; 3247 3248 schedule_work(©_context_worker.copy_context_work); 3249 3250 3251 flush_work(©_context_worker.copy_context_work); 3252 destroy_work_on_stack(©_context_worker.copy_context_work); 3253 } 3254 3255 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3256 num_queues * sizeof(uint32_t))) 3257 pr_err("copy_to_user failed on queue suspend\n"); 3258 3259 kfree(queue_ids); 3260 3261 return total_suspended; 3262 } 3263 3264 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3265 { 3266 switch (q_props->type) { 3267 case KFD_QUEUE_TYPE_COMPUTE: 3268 return q_props->format == KFD_QUEUE_FORMAT_PM4 3269 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3270 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3271 case KFD_QUEUE_TYPE_SDMA: 3272 return KFD_IOC_QUEUE_TYPE_SDMA; 3273 case KFD_QUEUE_TYPE_SDMA_XGMI: 3274 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3275 default: 3276 WARN_ONCE(true, "queue type not recognized!"); 3277 return 0xffffffff; 3278 }; 3279 } 3280 3281 void set_queue_snapshot_entry(struct queue *q, 3282 uint64_t exception_clear_mask, 3283 struct kfd_queue_snapshot_entry *qss_entry) 3284 { 3285 qss_entry->ring_base_address = q->properties.queue_address; 3286 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3287 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3288 qss_entry->ctx_save_restore_address = 3289 q->properties.ctx_save_restore_area_address; 3290 qss_entry->ctx_save_restore_area_size = 3291 q->properties.ctx_save_restore_area_size; 3292 qss_entry->exception_status = q->properties.exception_status; 3293 qss_entry->queue_id = q->properties.queue_id; 3294 qss_entry->gpu_id = q->device->id; 3295 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3296 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3297 q->properties.exception_status &= ~exception_clear_mask; 3298 } 3299 3300 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3301 { 3302 struct device *dev = dqm->dev->adev->dev; 3303 int r; 3304 3305 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3306 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3307 return -EINVAL; 3308 } 3309 3310 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3311 return 0; 3312 3313 dqm_lock(dqm); 3314 3315 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3316 if (r) 3317 dqm_unlock(dqm); 3318 3319 return r; 3320 } 3321 3322 int debug_map_and_unlock(struct device_queue_manager *dqm) 3323 { 3324 struct device *dev = dqm->dev->adev->dev; 3325 int r; 3326 3327 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3328 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3329 return -EINVAL; 3330 } 3331 3332 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3333 return 0; 3334 3335 r = map_queues_cpsch(dqm); 3336 3337 dqm_unlock(dqm); 3338 3339 return r; 3340 } 3341 3342 int debug_refresh_runlist(struct device_queue_manager *dqm) 3343 { 3344 int r = debug_lock_and_unmap(dqm); 3345 3346 if (r) 3347 return r; 3348 3349 return debug_map_and_unlock(dqm); 3350 } 3351 3352 #if defined(CONFIG_DEBUG_FS) 3353 3354 static void seq_reg_dump(struct seq_file *m, 3355 uint32_t (*dump)[2], uint32_t n_regs) 3356 { 3357 uint32_t i, count; 3358 3359 for (i = 0, count = 0; i < n_regs; i++) { 3360 if (count == 0 || 3361 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3362 seq_printf(m, "%s %08x: %08x", 3363 i ? "\n" : "", 3364 dump[i][0], dump[i][1]); 3365 count = 7; 3366 } else { 3367 seq_printf(m, " %08x", dump[i][1]); 3368 count--; 3369 } 3370 } 3371 3372 seq_puts(m, "\n"); 3373 } 3374 3375 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3376 { 3377 struct device_queue_manager *dqm = data; 3378 uint32_t xcc_mask = dqm->dev->xcc_mask; 3379 uint32_t (*dump)[2], n_regs; 3380 int pipe, queue; 3381 int r = 0, xcc_id; 3382 uint32_t sdma_engine_start; 3383 3384 if (!dqm->sched_running) { 3385 seq_puts(m, " Device is stopped\n"); 3386 return 0; 3387 } 3388 3389 for_each_inst(xcc_id, xcc_mask) { 3390 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3391 KFD_CIK_HIQ_PIPE, 3392 KFD_CIK_HIQ_QUEUE, &dump, 3393 &n_regs, xcc_id); 3394 if (!r) { 3395 seq_printf( 3396 m, 3397 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3398 xcc_id, 3399 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3400 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3401 KFD_CIK_HIQ_QUEUE); 3402 seq_reg_dump(m, dump, n_regs); 3403 3404 kfree(dump); 3405 } 3406 3407 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3408 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3409 3410 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3411 if (!test_bit(pipe_offset + queue, 3412 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3413 continue; 3414 3415 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3416 pipe, queue, 3417 &dump, &n_regs, 3418 xcc_id); 3419 if (r) 3420 break; 3421 3422 seq_printf(m, 3423 " Inst %d, CP Pipe %d, Queue %d\n", 3424 xcc_id, pipe, queue); 3425 seq_reg_dump(m, dump, n_regs); 3426 3427 kfree(dump); 3428 } 3429 } 3430 } 3431 3432 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3433 for (pipe = sdma_engine_start; 3434 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3435 pipe++) { 3436 for (queue = 0; 3437 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3438 queue++) { 3439 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3440 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3441 if (r) 3442 break; 3443 3444 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3445 pipe, queue); 3446 seq_reg_dump(m, dump, n_regs); 3447 3448 kfree(dump); 3449 } 3450 } 3451 3452 return r; 3453 } 3454 3455 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3456 { 3457 int r = 0; 3458 3459 dqm_lock(dqm); 3460 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3461 if (r) { 3462 dqm_unlock(dqm); 3463 return r; 3464 } 3465 dqm->active_runlist = true; 3466 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3467 0, USE_DEFAULT_GRACE_PERIOD); 3468 dqm_unlock(dqm); 3469 3470 return r; 3471 } 3472 3473 #endif 3474