1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "mes_api_def.h" 39 40 /* Size of the per-pipe EOP queue */ 41 #define CIK_HPD_EOP_BYTES_LOG2 11 42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 43 44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 45 u32 pasid, unsigned int vmid); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 enum kfd_unmap_queues_filter filter, 49 uint32_t filter_param); 50 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param, bool reset); 53 54 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 56 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 57 struct queue *q); 58 59 static inline void deallocate_hqd(struct device_queue_manager *dqm, 60 struct queue *q); 61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 62 static int allocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q, const uint32_t *restore_sdma_id); 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 78 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->shared_resources.cp_queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return kfd_get_num_sdma_engines(dqm->dev) + 107 kfd_get_num_xgmi_sdma_engines(dqm->dev); 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return kfd_get_num_sdma_engines(dqm->dev) * 113 dqm->dev->device_info.num_sdma_queues_per_engine; 114 } 115 116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 117 { 118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 119 dqm->dev->device_info.num_sdma_queues_per_engine; 120 } 121 122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) 123 { 124 return dqm->dev->device_info.reserved_sdma_queues_bitmap; 125 } 126 127 void program_sh_mem_settings(struct device_queue_manager *dqm, 128 struct qcm_process_device *qpd) 129 { 130 return dqm->dev->kfd2kgd->program_sh_mem_settings( 131 dqm->dev->adev, qpd->vmid, 132 qpd->sh_mem_config, 133 qpd->sh_mem_ape1_base, 134 qpd->sh_mem_ape1_limit, 135 qpd->sh_mem_bases); 136 } 137 138 static void kfd_hws_hang(struct device_queue_manager *dqm) 139 { 140 /* 141 * Issue a GPU reset if HWS is unresponsive 142 */ 143 dqm->is_hws_hang = true; 144 145 /* It's possible we're detecting a HWS hang in the 146 * middle of a GPU reset. No need to schedule another 147 * reset in this case. 148 */ 149 if (!dqm->is_resetting) 150 schedule_work(&dqm->hw_exception_work); 151 } 152 153 static int convert_to_mes_queue_type(int queue_type) 154 { 155 int mes_queue_type; 156 157 switch (queue_type) { 158 case KFD_QUEUE_TYPE_COMPUTE: 159 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 160 break; 161 case KFD_QUEUE_TYPE_SDMA: 162 mes_queue_type = MES_QUEUE_TYPE_SDMA; 163 break; 164 default: 165 WARN(1, "Invalid queue type %d", queue_type); 166 mes_queue_type = -EINVAL; 167 break; 168 } 169 170 return mes_queue_type; 171 } 172 173 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 174 struct qcm_process_device *qpd) 175 { 176 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 177 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 178 struct mes_add_queue_input queue_input; 179 int r, queue_type; 180 181 if (dqm->is_hws_hang) 182 return -EIO; 183 184 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 185 queue_input.process_id = qpd->pqm->process->pasid; 186 queue_input.page_table_base_addr = qpd->page_table_base; 187 queue_input.process_va_start = 0; 188 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 189 /* MES unit for quantum is 100ns */ 190 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 191 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 192 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 193 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 194 queue_input.inprocess_gang_priority = q->properties.priority; 195 queue_input.gang_global_priority_level = 196 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 197 queue_input.doorbell_offset = q->properties.doorbell_off; 198 queue_input.mqd_addr = q->gart_mqd_addr; 199 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 200 queue_input.paging = false; 201 queue_input.tba_addr = qpd->tba_addr; 202 queue_input.tma_addr = qpd->tma_addr; 203 204 queue_type = convert_to_mes_queue_type(q->properties.type); 205 if (queue_type < 0) { 206 pr_err("Queue type not supported with MES, queue:%d\n", 207 q->properties.type); 208 return -EINVAL; 209 } 210 queue_input.queue_type = (uint32_t)queue_type; 211 212 if (q->gws) { 213 queue_input.gws_base = 0; 214 queue_input.gws_size = qpd->num_gws; 215 } 216 217 amdgpu_mes_lock(&adev->mes); 218 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 219 amdgpu_mes_unlock(&adev->mes); 220 if (r) { 221 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 222 q->properties.doorbell_off); 223 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 224 kfd_hws_hang(dqm); 225 } 226 227 return r; 228 } 229 230 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 231 struct qcm_process_device *qpd) 232 { 233 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 234 int r; 235 struct mes_remove_queue_input queue_input; 236 237 if (dqm->is_hws_hang) 238 return -EIO; 239 240 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 241 queue_input.doorbell_offset = q->properties.doorbell_off; 242 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 243 244 amdgpu_mes_lock(&adev->mes); 245 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 246 amdgpu_mes_unlock(&adev->mes); 247 248 if (r) { 249 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 250 q->properties.doorbell_off); 251 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 252 kfd_hws_hang(dqm); 253 } 254 255 return r; 256 } 257 258 static int remove_all_queues_mes(struct device_queue_manager *dqm) 259 { 260 struct device_process_node *cur; 261 struct qcm_process_device *qpd; 262 struct queue *q; 263 int retval = 0; 264 265 list_for_each_entry(cur, &dqm->queues, list) { 266 qpd = cur->qpd; 267 list_for_each_entry(q, &qpd->queues_list, list) { 268 if (q->properties.is_active) { 269 retval = remove_queue_mes(dqm, q, qpd); 270 if (retval) { 271 pr_err("%s: Failed to remove queue %d for dev %d", 272 __func__, 273 q->properties.queue_id, 274 dqm->dev->id); 275 return retval; 276 } 277 } 278 } 279 } 280 281 return retval; 282 } 283 284 static void increment_queue_count(struct device_queue_manager *dqm, 285 struct qcm_process_device *qpd, 286 struct queue *q) 287 { 288 dqm->active_queue_count++; 289 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 290 q->properties.type == KFD_QUEUE_TYPE_DIQ) 291 dqm->active_cp_queue_count++; 292 293 if (q->properties.is_gws) { 294 dqm->gws_queue_count++; 295 qpd->mapped_gws_queue = true; 296 } 297 } 298 299 static void decrement_queue_count(struct device_queue_manager *dqm, 300 struct qcm_process_device *qpd, 301 struct queue *q) 302 { 303 dqm->active_queue_count--; 304 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 305 q->properties.type == KFD_QUEUE_TYPE_DIQ) 306 dqm->active_cp_queue_count--; 307 308 if (q->properties.is_gws) { 309 dqm->gws_queue_count--; 310 qpd->mapped_gws_queue = false; 311 } 312 } 313 314 /* 315 * Allocate a doorbell ID to this queue. 316 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 317 */ 318 static int allocate_doorbell(struct qcm_process_device *qpd, 319 struct queue *q, 320 uint32_t const *restore_id) 321 { 322 struct kfd_dev *dev = qpd->dqm->dev; 323 324 if (!KFD_IS_SOC15(dev)) { 325 /* On pre-SOC15 chips we need to use the queue ID to 326 * preserve the user mode ABI. 327 */ 328 329 if (restore_id && *restore_id != q->properties.queue_id) 330 return -EINVAL; 331 332 q->doorbell_id = q->properties.queue_id; 333 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 334 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 335 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 336 * doorbell assignments based on the engine and queue id. 337 * The doobell index distance between RLC (2*i) and (2*i+1) 338 * for a SDMA engine is 512. 339 */ 340 341 uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; 342 uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] 343 + (q->properties.sdma_queue_id & 1) 344 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 345 + (q->properties.sdma_queue_id >> 1); 346 347 if (restore_id && *restore_id != valid_id) 348 return -EINVAL; 349 q->doorbell_id = valid_id; 350 } else { 351 /* For CP queues on SOC15 */ 352 if (restore_id) { 353 /* make sure that ID is free */ 354 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 355 return -EINVAL; 356 357 q->doorbell_id = *restore_id; 358 } else { 359 /* or reserve a free doorbell ID */ 360 unsigned int found; 361 362 found = find_first_zero_bit(qpd->doorbell_bitmap, 363 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 364 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 365 pr_debug("No doorbells available"); 366 return -EBUSY; 367 } 368 set_bit(found, qpd->doorbell_bitmap); 369 q->doorbell_id = found; 370 } 371 } 372 373 q->properties.doorbell_off = 374 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 375 q->doorbell_id); 376 return 0; 377 } 378 379 static void deallocate_doorbell(struct qcm_process_device *qpd, 380 struct queue *q) 381 { 382 unsigned int old; 383 struct kfd_dev *dev = qpd->dqm->dev; 384 385 if (!KFD_IS_SOC15(dev) || 386 q->properties.type == KFD_QUEUE_TYPE_SDMA || 387 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 388 return; 389 390 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 391 WARN_ON(!old); 392 } 393 394 static void program_trap_handler_settings(struct device_queue_manager *dqm, 395 struct qcm_process_device *qpd) 396 { 397 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 398 dqm->dev->kfd2kgd->program_trap_handler_settings( 399 dqm->dev->adev, qpd->vmid, 400 qpd->tba_addr, qpd->tma_addr); 401 } 402 403 static int allocate_vmid(struct device_queue_manager *dqm, 404 struct qcm_process_device *qpd, 405 struct queue *q) 406 { 407 int allocated_vmid = -1, i; 408 409 for (i = dqm->dev->vm_info.first_vmid_kfd; 410 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 411 if (!dqm->vmid_pasid[i]) { 412 allocated_vmid = i; 413 break; 414 } 415 } 416 417 if (allocated_vmid < 0) { 418 pr_err("no more vmid to allocate\n"); 419 return -ENOSPC; 420 } 421 422 pr_debug("vmid allocated: %d\n", allocated_vmid); 423 424 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 425 426 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 427 428 qpd->vmid = allocated_vmid; 429 q->properties.vmid = allocated_vmid; 430 431 program_sh_mem_settings(dqm, qpd); 432 433 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 434 program_trap_handler_settings(dqm, qpd); 435 436 /* qpd->page_table_base is set earlier when register_process() 437 * is called, i.e. when the first queue is created. 438 */ 439 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 440 qpd->vmid, 441 qpd->page_table_base); 442 /* invalidate the VM context after pasid and vmid mapping is set up */ 443 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 444 445 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 446 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 447 qpd->sh_hidden_private_base, qpd->vmid); 448 449 return 0; 450 } 451 452 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 453 struct qcm_process_device *qpd) 454 { 455 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 456 int ret; 457 458 if (!qpd->ib_kaddr) 459 return -ENOMEM; 460 461 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 462 if (ret) 463 return ret; 464 465 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 466 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 467 pmf->release_mem_size / sizeof(uint32_t)); 468 } 469 470 static void deallocate_vmid(struct device_queue_manager *dqm, 471 struct qcm_process_device *qpd, 472 struct queue *q) 473 { 474 /* On GFX v7, CP doesn't flush TC at dequeue */ 475 if (q->device->adev->asic_type == CHIP_HAWAII) 476 if (flush_texture_cache_nocpsch(q->device, qpd)) 477 pr_err("Failed to flush TC\n"); 478 479 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 480 481 /* Release the vmid mapping */ 482 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 483 dqm->vmid_pasid[qpd->vmid] = 0; 484 485 qpd->vmid = 0; 486 q->properties.vmid = 0; 487 } 488 489 static int create_queue_nocpsch(struct device_queue_manager *dqm, 490 struct queue *q, 491 struct qcm_process_device *qpd, 492 const struct kfd_criu_queue_priv_data *qd, 493 const void *restore_mqd, const void *restore_ctl_stack) 494 { 495 struct mqd_manager *mqd_mgr; 496 int retval; 497 498 dqm_lock(dqm); 499 500 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 501 pr_warn("Can't create new usermode queue because %d queues were already created\n", 502 dqm->total_queue_count); 503 retval = -EPERM; 504 goto out_unlock; 505 } 506 507 if (list_empty(&qpd->queues_list)) { 508 retval = allocate_vmid(dqm, qpd, q); 509 if (retval) 510 goto out_unlock; 511 } 512 q->properties.vmid = qpd->vmid; 513 /* 514 * Eviction state logic: mark all queues as evicted, even ones 515 * not currently active. Restoring inactive queues later only 516 * updates the is_evicted flag but is a no-op otherwise. 517 */ 518 q->properties.is_evicted = !!qpd->evicted; 519 520 q->properties.tba_addr = qpd->tba_addr; 521 q->properties.tma_addr = qpd->tma_addr; 522 523 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 524 q->properties.type)]; 525 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 526 retval = allocate_hqd(dqm, q); 527 if (retval) 528 goto deallocate_vmid; 529 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 530 q->pipe, q->queue); 531 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 532 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 533 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 534 if (retval) 535 goto deallocate_vmid; 536 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 537 } 538 539 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 540 if (retval) 541 goto out_deallocate_hqd; 542 543 /* Temporarily release dqm lock to avoid a circular lock dependency */ 544 dqm_unlock(dqm); 545 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 546 dqm_lock(dqm); 547 548 if (!q->mqd_mem_obj) { 549 retval = -ENOMEM; 550 goto out_deallocate_doorbell; 551 } 552 553 if (qd) 554 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 555 &q->properties, restore_mqd, restore_ctl_stack, 556 qd->ctl_stack_size); 557 else 558 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 559 &q->gart_mqd_addr, &q->properties); 560 561 if (q->properties.is_active) { 562 if (!dqm->sched_running) { 563 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 564 goto add_queue_to_list; 565 } 566 567 if (WARN(q->process->mm != current->mm, 568 "should only run in user thread")) 569 retval = -EFAULT; 570 else 571 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 572 q->queue, &q->properties, current->mm); 573 if (retval) 574 goto out_free_mqd; 575 } 576 577 add_queue_to_list: 578 list_add(&q->list, &qpd->queues_list); 579 qpd->queue_count++; 580 if (q->properties.is_active) 581 increment_queue_count(dqm, qpd, q); 582 583 /* 584 * Unconditionally increment this counter, regardless of the queue's 585 * type or whether the queue is active. 586 */ 587 dqm->total_queue_count++; 588 pr_debug("Total of %d queues are accountable so far\n", 589 dqm->total_queue_count); 590 goto out_unlock; 591 592 out_free_mqd: 593 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 594 out_deallocate_doorbell: 595 deallocate_doorbell(qpd, q); 596 out_deallocate_hqd: 597 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 598 deallocate_hqd(dqm, q); 599 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 600 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 601 deallocate_sdma_queue(dqm, q); 602 deallocate_vmid: 603 if (list_empty(&qpd->queues_list)) 604 deallocate_vmid(dqm, qpd, q); 605 out_unlock: 606 dqm_unlock(dqm); 607 return retval; 608 } 609 610 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 611 { 612 bool set; 613 int pipe, bit, i; 614 615 set = false; 616 617 for (pipe = dqm->next_pipe_to_allocate, i = 0; 618 i < get_pipes_per_mec(dqm); 619 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 620 621 if (!is_pipe_enabled(dqm, 0, pipe)) 622 continue; 623 624 if (dqm->allocated_queues[pipe] != 0) { 625 bit = ffs(dqm->allocated_queues[pipe]) - 1; 626 dqm->allocated_queues[pipe] &= ~(1 << bit); 627 q->pipe = pipe; 628 q->queue = bit; 629 set = true; 630 break; 631 } 632 } 633 634 if (!set) 635 return -EBUSY; 636 637 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 638 /* horizontal hqd allocation */ 639 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 640 641 return 0; 642 } 643 644 static inline void deallocate_hqd(struct device_queue_manager *dqm, 645 struct queue *q) 646 { 647 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 648 } 649 650 #define SQ_IND_CMD_CMD_KILL 0x00000003 651 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 652 653 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 654 { 655 int status = 0; 656 unsigned int vmid; 657 uint16_t queried_pasid; 658 union SQ_CMD_BITS reg_sq_cmd; 659 union GRBM_GFX_INDEX_BITS reg_gfx_index; 660 struct kfd_process_device *pdd; 661 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 662 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 663 664 reg_sq_cmd.u32All = 0; 665 reg_gfx_index.u32All = 0; 666 667 pr_debug("Killing all process wavefronts\n"); 668 669 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 670 pr_err("no vmid pasid mapping supported \n"); 671 return -EOPNOTSUPP; 672 } 673 674 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 675 * ATC_VMID15_PASID_MAPPING 676 * to check which VMID the current process is mapped to. 677 */ 678 679 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 680 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 681 (dev->adev, vmid, &queried_pasid); 682 683 if (status && queried_pasid == p->pasid) { 684 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 685 vmid, p->pasid); 686 break; 687 } 688 } 689 690 if (vmid > last_vmid_to_scan) { 691 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 692 return -EFAULT; 693 } 694 695 /* taking the VMID for that process on the safe way using PDD */ 696 pdd = kfd_get_process_device_data(dev, p); 697 if (!pdd) 698 return -EFAULT; 699 700 reg_gfx_index.bits.sh_broadcast_writes = 1; 701 reg_gfx_index.bits.se_broadcast_writes = 1; 702 reg_gfx_index.bits.instance_broadcast_writes = 1; 703 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 704 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 705 reg_sq_cmd.bits.vm_id = vmid; 706 707 dev->kfd2kgd->wave_control_execute(dev->adev, 708 reg_gfx_index.u32All, 709 reg_sq_cmd.u32All); 710 711 return 0; 712 } 713 714 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 715 * to avoid asynchronized access 716 */ 717 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 718 struct qcm_process_device *qpd, 719 struct queue *q) 720 { 721 int retval; 722 struct mqd_manager *mqd_mgr; 723 724 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 725 q->properties.type)]; 726 727 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 728 deallocate_hqd(dqm, q); 729 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 730 deallocate_sdma_queue(dqm, q); 731 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 732 deallocate_sdma_queue(dqm, q); 733 else { 734 pr_debug("q->properties.type %d is invalid\n", 735 q->properties.type); 736 return -EINVAL; 737 } 738 dqm->total_queue_count--; 739 740 deallocate_doorbell(qpd, q); 741 742 if (!dqm->sched_running) { 743 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 744 return 0; 745 } 746 747 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 748 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 749 KFD_UNMAP_LATENCY_MS, 750 q->pipe, q->queue); 751 if (retval == -ETIME) 752 qpd->reset_wavefronts = true; 753 754 list_del(&q->list); 755 if (list_empty(&qpd->queues_list)) { 756 if (qpd->reset_wavefronts) { 757 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 758 dqm->dev); 759 /* dbgdev_wave_reset_wavefronts has to be called before 760 * deallocate_vmid(), i.e. when vmid is still in use. 761 */ 762 dbgdev_wave_reset_wavefronts(dqm->dev, 763 qpd->pqm->process); 764 qpd->reset_wavefronts = false; 765 } 766 767 deallocate_vmid(dqm, qpd, q); 768 } 769 qpd->queue_count--; 770 if (q->properties.is_active) 771 decrement_queue_count(dqm, qpd, q); 772 773 return retval; 774 } 775 776 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 777 struct qcm_process_device *qpd, 778 struct queue *q) 779 { 780 int retval; 781 uint64_t sdma_val = 0; 782 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 783 struct mqd_manager *mqd_mgr = 784 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 785 786 /* Get the SDMA queue stats */ 787 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 788 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 789 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 790 &sdma_val); 791 if (retval) 792 pr_err("Failed to read SDMA queue counter for queue: %d\n", 793 q->properties.queue_id); 794 } 795 796 dqm_lock(dqm); 797 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 798 if (!retval) 799 pdd->sdma_past_activity_counter += sdma_val; 800 dqm_unlock(dqm); 801 802 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 803 804 return retval; 805 } 806 807 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 808 struct mqd_update_info *minfo) 809 { 810 int retval = 0; 811 struct mqd_manager *mqd_mgr; 812 struct kfd_process_device *pdd; 813 bool prev_active = false; 814 bool add_queue = false; 815 816 dqm_lock(dqm); 817 pdd = kfd_get_process_device_data(q->device, q->process); 818 if (!pdd) { 819 retval = -ENODEV; 820 goto out_unlock; 821 } 822 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 823 q->properties.type)]; 824 825 /* Save previous activity state for counters */ 826 prev_active = q->properties.is_active; 827 828 /* Make sure the queue is unmapped before updating the MQD */ 829 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 830 if (!dqm->dev->shared_resources.enable_mes) 831 retval = unmap_queues_cpsch(dqm, 832 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 833 else if (prev_active) 834 retval = remove_queue_mes(dqm, q, &pdd->qpd); 835 836 if (retval) { 837 pr_err("unmap queue failed\n"); 838 goto out_unlock; 839 } 840 } else if (prev_active && 841 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 842 q->properties.type == KFD_QUEUE_TYPE_SDMA || 843 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 844 845 if (!dqm->sched_running) { 846 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 847 goto out_unlock; 848 } 849 850 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 851 (dqm->dev->cwsr_enabled ? 852 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 853 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 854 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 855 if (retval) { 856 pr_err("destroy mqd failed\n"); 857 goto out_unlock; 858 } 859 } 860 861 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 862 863 /* 864 * check active state vs. the previous state and modify 865 * counter accordingly. map_queues_cpsch uses the 866 * dqm->active_queue_count to determine whether a new runlist must be 867 * uploaded. 868 */ 869 if (q->properties.is_active && !prev_active) { 870 increment_queue_count(dqm, &pdd->qpd, q); 871 } else if (!q->properties.is_active && prev_active) { 872 decrement_queue_count(dqm, &pdd->qpd, q); 873 } else if (q->gws && !q->properties.is_gws) { 874 if (q->properties.is_active) { 875 dqm->gws_queue_count++; 876 pdd->qpd.mapped_gws_queue = true; 877 } 878 q->properties.is_gws = true; 879 } else if (!q->gws && q->properties.is_gws) { 880 if (q->properties.is_active) { 881 dqm->gws_queue_count--; 882 pdd->qpd.mapped_gws_queue = false; 883 } 884 q->properties.is_gws = false; 885 } 886 887 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 888 if (!dqm->dev->shared_resources.enable_mes) 889 retval = map_queues_cpsch(dqm); 890 else if (add_queue) 891 retval = add_queue_mes(dqm, q, &pdd->qpd); 892 } else if (q->properties.is_active && 893 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 894 q->properties.type == KFD_QUEUE_TYPE_SDMA || 895 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 896 if (WARN(q->process->mm != current->mm, 897 "should only run in user thread")) 898 retval = -EFAULT; 899 else 900 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 901 q->pipe, q->queue, 902 &q->properties, current->mm); 903 } 904 905 out_unlock: 906 dqm_unlock(dqm); 907 return retval; 908 } 909 910 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 911 struct qcm_process_device *qpd) 912 { 913 struct queue *q; 914 struct mqd_manager *mqd_mgr; 915 struct kfd_process_device *pdd; 916 int retval, ret = 0; 917 918 dqm_lock(dqm); 919 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 920 goto out; 921 922 pdd = qpd_to_pdd(qpd); 923 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 924 pdd->process->pasid); 925 926 pdd->last_evict_timestamp = get_jiffies_64(); 927 /* Mark all queues as evicted. Deactivate all active queues on 928 * the qpd. 929 */ 930 list_for_each_entry(q, &qpd->queues_list, list) { 931 q->properties.is_evicted = true; 932 if (!q->properties.is_active) 933 continue; 934 935 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 936 q->properties.type)]; 937 q->properties.is_active = false; 938 decrement_queue_count(dqm, qpd, q); 939 940 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 941 continue; 942 943 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 944 (dqm->dev->cwsr_enabled ? 945 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 946 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 947 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 948 if (retval && !ret) 949 /* Return the first error, but keep going to 950 * maintain a consistent eviction state 951 */ 952 ret = retval; 953 } 954 955 out: 956 dqm_unlock(dqm); 957 return ret; 958 } 959 960 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 961 struct qcm_process_device *qpd) 962 { 963 struct queue *q; 964 struct kfd_process_device *pdd; 965 int retval = 0; 966 967 dqm_lock(dqm); 968 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 969 goto out; 970 971 pdd = qpd_to_pdd(qpd); 972 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 973 pdd->process->pasid); 974 975 /* Mark all queues as evicted. Deactivate all active queues on 976 * the qpd. 977 */ 978 list_for_each_entry(q, &qpd->queues_list, list) { 979 q->properties.is_evicted = true; 980 if (!q->properties.is_active) 981 continue; 982 983 q->properties.is_active = false; 984 decrement_queue_count(dqm, qpd, q); 985 986 if (dqm->dev->shared_resources.enable_mes) { 987 retval = remove_queue_mes(dqm, q, qpd); 988 if (retval) { 989 pr_err("Failed to evict queue %d\n", 990 q->properties.queue_id); 991 goto out; 992 } 993 } 994 } 995 pdd->last_evict_timestamp = get_jiffies_64(); 996 if (!dqm->dev->shared_resources.enable_mes) 997 retval = execute_queues_cpsch(dqm, 998 qpd->is_debug ? 999 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1000 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1001 1002 out: 1003 dqm_unlock(dqm); 1004 return retval; 1005 } 1006 1007 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1008 struct qcm_process_device *qpd) 1009 { 1010 struct mm_struct *mm = NULL; 1011 struct queue *q; 1012 struct mqd_manager *mqd_mgr; 1013 struct kfd_process_device *pdd; 1014 uint64_t pd_base; 1015 uint64_t eviction_duration; 1016 int retval, ret = 0; 1017 1018 pdd = qpd_to_pdd(qpd); 1019 /* Retrieve PD base */ 1020 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1021 1022 dqm_lock(dqm); 1023 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1024 goto out; 1025 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1026 qpd->evicted--; 1027 goto out; 1028 } 1029 1030 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1031 pdd->process->pasid); 1032 1033 /* Update PD Base in QPD */ 1034 qpd->page_table_base = pd_base; 1035 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1036 1037 if (!list_empty(&qpd->queues_list)) { 1038 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1039 dqm->dev->adev, 1040 qpd->vmid, 1041 qpd->page_table_base); 1042 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1043 } 1044 1045 /* Take a safe reference to the mm_struct, which may otherwise 1046 * disappear even while the kfd_process is still referenced. 1047 */ 1048 mm = get_task_mm(pdd->process->lead_thread); 1049 if (!mm) { 1050 ret = -EFAULT; 1051 goto out; 1052 } 1053 1054 /* Remove the eviction flags. Activate queues that are not 1055 * inactive for other reasons. 1056 */ 1057 list_for_each_entry(q, &qpd->queues_list, list) { 1058 q->properties.is_evicted = false; 1059 if (!QUEUE_IS_ACTIVE(q->properties)) 1060 continue; 1061 1062 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1063 q->properties.type)]; 1064 q->properties.is_active = true; 1065 increment_queue_count(dqm, qpd, q); 1066 1067 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1068 continue; 1069 1070 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1071 q->queue, &q->properties, mm); 1072 if (retval && !ret) 1073 /* Return the first error, but keep going to 1074 * maintain a consistent eviction state 1075 */ 1076 ret = retval; 1077 } 1078 qpd->evicted = 0; 1079 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1080 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1081 out: 1082 if (mm) 1083 mmput(mm); 1084 dqm_unlock(dqm); 1085 return ret; 1086 } 1087 1088 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1089 struct qcm_process_device *qpd) 1090 { 1091 struct queue *q; 1092 struct kfd_process_device *pdd; 1093 uint64_t pd_base; 1094 uint64_t eviction_duration; 1095 int retval = 0; 1096 1097 pdd = qpd_to_pdd(qpd); 1098 /* Retrieve PD base */ 1099 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1100 1101 dqm_lock(dqm); 1102 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1103 goto out; 1104 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1105 qpd->evicted--; 1106 goto out; 1107 } 1108 1109 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1110 pdd->process->pasid); 1111 1112 /* Update PD Base in QPD */ 1113 qpd->page_table_base = pd_base; 1114 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1115 1116 /* activate all active queues on the qpd */ 1117 list_for_each_entry(q, &qpd->queues_list, list) { 1118 q->properties.is_evicted = false; 1119 if (!QUEUE_IS_ACTIVE(q->properties)) 1120 continue; 1121 1122 q->properties.is_active = true; 1123 increment_queue_count(dqm, &pdd->qpd, q); 1124 1125 if (dqm->dev->shared_resources.enable_mes) { 1126 retval = add_queue_mes(dqm, q, qpd); 1127 if (retval) { 1128 pr_err("Failed to restore queue %d\n", 1129 q->properties.queue_id); 1130 goto out; 1131 } 1132 } 1133 } 1134 if (!dqm->dev->shared_resources.enable_mes) 1135 retval = execute_queues_cpsch(dqm, 1136 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1137 qpd->evicted = 0; 1138 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1139 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1140 out: 1141 dqm_unlock(dqm); 1142 return retval; 1143 } 1144 1145 static int register_process(struct device_queue_manager *dqm, 1146 struct qcm_process_device *qpd) 1147 { 1148 struct device_process_node *n; 1149 struct kfd_process_device *pdd; 1150 uint64_t pd_base; 1151 int retval; 1152 1153 n = kzalloc(sizeof(*n), GFP_KERNEL); 1154 if (!n) 1155 return -ENOMEM; 1156 1157 n->qpd = qpd; 1158 1159 pdd = qpd_to_pdd(qpd); 1160 /* Retrieve PD base */ 1161 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1162 1163 dqm_lock(dqm); 1164 list_add(&n->list, &dqm->queues); 1165 1166 /* Update PD Base in QPD */ 1167 qpd->page_table_base = pd_base; 1168 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1169 1170 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1171 1172 dqm->processes_count++; 1173 1174 dqm_unlock(dqm); 1175 1176 /* Outside the DQM lock because under the DQM lock we can't do 1177 * reclaim or take other locks that others hold while reclaiming. 1178 */ 1179 kfd_inc_compute_active(dqm->dev); 1180 1181 return retval; 1182 } 1183 1184 static int unregister_process(struct device_queue_manager *dqm, 1185 struct qcm_process_device *qpd) 1186 { 1187 int retval; 1188 struct device_process_node *cur, *next; 1189 1190 pr_debug("qpd->queues_list is %s\n", 1191 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1192 1193 retval = 0; 1194 dqm_lock(dqm); 1195 1196 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1197 if (qpd == cur->qpd) { 1198 list_del(&cur->list); 1199 kfree(cur); 1200 dqm->processes_count--; 1201 goto out; 1202 } 1203 } 1204 /* qpd not found in dqm list */ 1205 retval = 1; 1206 out: 1207 dqm_unlock(dqm); 1208 1209 /* Outside the DQM lock because under the DQM lock we can't do 1210 * reclaim or take other locks that others hold while reclaiming. 1211 */ 1212 if (!retval) 1213 kfd_dec_compute_active(dqm->dev); 1214 1215 return retval; 1216 } 1217 1218 static int 1219 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1220 unsigned int vmid) 1221 { 1222 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1223 dqm->dev->adev, pasid, vmid); 1224 } 1225 1226 static void init_interrupts(struct device_queue_manager *dqm) 1227 { 1228 unsigned int i; 1229 1230 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 1231 if (is_pipe_enabled(dqm, 0, i)) 1232 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); 1233 } 1234 1235 static int initialize_nocpsch(struct device_queue_manager *dqm) 1236 { 1237 int pipe, queue; 1238 1239 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1240 1241 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1242 sizeof(unsigned int), GFP_KERNEL); 1243 if (!dqm->allocated_queues) 1244 return -ENOMEM; 1245 1246 mutex_init(&dqm->lock_hidden); 1247 INIT_LIST_HEAD(&dqm->queues); 1248 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1249 dqm->active_cp_queue_count = 0; 1250 dqm->gws_queue_count = 0; 1251 1252 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1253 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1254 1255 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1256 if (test_bit(pipe_offset + queue, 1257 dqm->dev->shared_resources.cp_queue_bitmap)) 1258 dqm->allocated_queues[pipe] |= 1 << queue; 1259 } 1260 1261 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1262 1263 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); 1264 dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); 1265 pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); 1266 1267 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); 1268 1269 return 0; 1270 } 1271 1272 static void uninitialize(struct device_queue_manager *dqm) 1273 { 1274 int i; 1275 1276 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1277 1278 kfree(dqm->allocated_queues); 1279 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1280 kfree(dqm->mqd_mgrs[i]); 1281 mutex_destroy(&dqm->lock_hidden); 1282 } 1283 1284 static int start_nocpsch(struct device_queue_manager *dqm) 1285 { 1286 int r = 0; 1287 1288 pr_info("SW scheduler is used"); 1289 init_interrupts(dqm); 1290 1291 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1292 r = pm_init(&dqm->packet_mgr, dqm); 1293 if (!r) 1294 dqm->sched_running = true; 1295 1296 return r; 1297 } 1298 1299 static int stop_nocpsch(struct device_queue_manager *dqm) 1300 { 1301 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1302 pm_uninit(&dqm->packet_mgr, false); 1303 dqm->sched_running = false; 1304 1305 return 0; 1306 } 1307 1308 static void pre_reset(struct device_queue_manager *dqm) 1309 { 1310 dqm_lock(dqm); 1311 dqm->is_resetting = true; 1312 dqm_unlock(dqm); 1313 } 1314 1315 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1316 struct queue *q, const uint32_t *restore_sdma_id) 1317 { 1318 int bit; 1319 1320 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1321 if (dqm->sdma_bitmap == 0) { 1322 pr_err("No more SDMA queue to allocate\n"); 1323 return -ENOMEM; 1324 } 1325 1326 if (restore_sdma_id) { 1327 /* Re-use existing sdma_id */ 1328 if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) { 1329 pr_err("SDMA queue already in use\n"); 1330 return -EBUSY; 1331 } 1332 dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1333 q->sdma_id = *restore_sdma_id; 1334 } else { 1335 /* Find first available sdma_id */ 1336 bit = __ffs64(dqm->sdma_bitmap); 1337 dqm->sdma_bitmap &= ~(1ULL << bit); 1338 q->sdma_id = bit; 1339 } 1340 1341 q->properties.sdma_engine_id = q->sdma_id % 1342 kfd_get_num_sdma_engines(dqm->dev); 1343 q->properties.sdma_queue_id = q->sdma_id / 1344 kfd_get_num_sdma_engines(dqm->dev); 1345 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1346 if (dqm->xgmi_sdma_bitmap == 0) { 1347 pr_err("No more XGMI SDMA queue to allocate\n"); 1348 return -ENOMEM; 1349 } 1350 if (restore_sdma_id) { 1351 /* Re-use existing sdma_id */ 1352 if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) { 1353 pr_err("SDMA queue already in use\n"); 1354 return -EBUSY; 1355 } 1356 dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1357 q->sdma_id = *restore_sdma_id; 1358 } else { 1359 bit = __ffs64(dqm->xgmi_sdma_bitmap); 1360 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 1361 q->sdma_id = bit; 1362 } 1363 /* sdma_engine_id is sdma id including 1364 * both PCIe-optimized SDMAs and XGMI- 1365 * optimized SDMAs. The calculation below 1366 * assumes the first N engines are always 1367 * PCIe-optimized ones 1368 */ 1369 q->properties.sdma_engine_id = 1370 kfd_get_num_sdma_engines(dqm->dev) + 1371 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1372 q->properties.sdma_queue_id = q->sdma_id / 1373 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1374 } 1375 1376 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1377 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1378 1379 return 0; 1380 } 1381 1382 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1383 struct queue *q) 1384 { 1385 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1386 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1387 return; 1388 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 1389 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1390 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1391 return; 1392 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 1393 } 1394 } 1395 1396 /* 1397 * Device Queue Manager implementation for cp scheduler 1398 */ 1399 1400 static int set_sched_resources(struct device_queue_manager *dqm) 1401 { 1402 int i, mec; 1403 struct scheduling_resources res; 1404 1405 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1406 1407 res.queue_mask = 0; 1408 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1409 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1410 / dqm->dev->shared_resources.num_pipe_per_mec; 1411 1412 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1413 continue; 1414 1415 /* only acquire queues from the first MEC */ 1416 if (mec > 0) 1417 continue; 1418 1419 /* This situation may be hit in the future if a new HW 1420 * generation exposes more than 64 queues. If so, the 1421 * definition of res.queue_mask needs updating 1422 */ 1423 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1424 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1425 break; 1426 } 1427 1428 res.queue_mask |= 1ull 1429 << amdgpu_queue_mask_bit_to_set_resource_bit( 1430 dqm->dev->adev, i); 1431 } 1432 res.gws_mask = ~0ull; 1433 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1434 1435 pr_debug("Scheduling resources:\n" 1436 "vmid mask: 0x%8X\n" 1437 "queue mask: 0x%8llX\n", 1438 res.vmid_mask, res.queue_mask); 1439 1440 return pm_send_set_resources(&dqm->packet_mgr, &res); 1441 } 1442 1443 static int initialize_cpsch(struct device_queue_manager *dqm) 1444 { 1445 uint64_t num_sdma_queues; 1446 uint64_t num_xgmi_sdma_queues; 1447 1448 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1449 1450 mutex_init(&dqm->lock_hidden); 1451 INIT_LIST_HEAD(&dqm->queues); 1452 dqm->active_queue_count = dqm->processes_count = 0; 1453 dqm->active_cp_queue_count = 0; 1454 dqm->gws_queue_count = 0; 1455 dqm->active_runlist = false; 1456 1457 num_sdma_queues = get_num_sdma_queues(dqm); 1458 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) 1459 dqm->sdma_bitmap = ULLONG_MAX; 1460 else 1461 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); 1462 1463 dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm)); 1464 pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap); 1465 1466 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); 1467 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) 1468 dqm->xgmi_sdma_bitmap = ULLONG_MAX; 1469 else 1470 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); 1471 1472 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1473 1474 return 0; 1475 } 1476 1477 static int start_cpsch(struct device_queue_manager *dqm) 1478 { 1479 int retval; 1480 1481 retval = 0; 1482 1483 dqm_lock(dqm); 1484 1485 if (!dqm->dev->shared_resources.enable_mes) { 1486 retval = pm_init(&dqm->packet_mgr, dqm); 1487 if (retval) 1488 goto fail_packet_manager_init; 1489 1490 retval = set_sched_resources(dqm); 1491 if (retval) 1492 goto fail_set_sched_resources; 1493 } 1494 pr_debug("Allocating fence memory\n"); 1495 1496 /* allocate fence memory on the gart */ 1497 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1498 &dqm->fence_mem); 1499 1500 if (retval) 1501 goto fail_allocate_vidmem; 1502 1503 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1504 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1505 1506 init_interrupts(dqm); 1507 1508 /* clear hang status when driver try to start the hw scheduler */ 1509 dqm->is_hws_hang = false; 1510 dqm->is_resetting = false; 1511 dqm->sched_running = true; 1512 if (!dqm->dev->shared_resources.enable_mes) 1513 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1514 dqm_unlock(dqm); 1515 1516 return 0; 1517 fail_allocate_vidmem: 1518 fail_set_sched_resources: 1519 if (!dqm->dev->shared_resources.enable_mes) 1520 pm_uninit(&dqm->packet_mgr, false); 1521 fail_packet_manager_init: 1522 dqm_unlock(dqm); 1523 return retval; 1524 } 1525 1526 static int stop_cpsch(struct device_queue_manager *dqm) 1527 { 1528 bool hanging; 1529 1530 dqm_lock(dqm); 1531 if (!dqm->sched_running) { 1532 dqm_unlock(dqm); 1533 return 0; 1534 } 1535 1536 if (!dqm->is_hws_hang) { 1537 if (!dqm->dev->shared_resources.enable_mes) 1538 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1539 else 1540 remove_all_queues_mes(dqm); 1541 } 1542 1543 hanging = dqm->is_hws_hang || dqm->is_resetting; 1544 dqm->sched_running = false; 1545 1546 if (!dqm->dev->shared_resources.enable_mes) 1547 pm_release_ib(&dqm->packet_mgr); 1548 1549 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1550 if (!dqm->dev->shared_resources.enable_mes) 1551 pm_uninit(&dqm->packet_mgr, hanging); 1552 dqm_unlock(dqm); 1553 1554 return 0; 1555 } 1556 1557 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1558 struct kernel_queue *kq, 1559 struct qcm_process_device *qpd) 1560 { 1561 dqm_lock(dqm); 1562 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1563 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1564 dqm->total_queue_count); 1565 dqm_unlock(dqm); 1566 return -EPERM; 1567 } 1568 1569 /* 1570 * Unconditionally increment this counter, regardless of the queue's 1571 * type or whether the queue is active. 1572 */ 1573 dqm->total_queue_count++; 1574 pr_debug("Total of %d queues are accountable so far\n", 1575 dqm->total_queue_count); 1576 1577 list_add(&kq->list, &qpd->priv_queue_list); 1578 increment_queue_count(dqm, qpd, kq->queue); 1579 qpd->is_debug = true; 1580 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1581 dqm_unlock(dqm); 1582 1583 return 0; 1584 } 1585 1586 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1587 struct kernel_queue *kq, 1588 struct qcm_process_device *qpd) 1589 { 1590 dqm_lock(dqm); 1591 list_del(&kq->list); 1592 decrement_queue_count(dqm, qpd, kq->queue); 1593 qpd->is_debug = false; 1594 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1595 /* 1596 * Unconditionally decrement this counter, regardless of the queue's 1597 * type. 1598 */ 1599 dqm->total_queue_count--; 1600 pr_debug("Total of %d queues are accountable so far\n", 1601 dqm->total_queue_count); 1602 dqm_unlock(dqm); 1603 } 1604 1605 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1606 struct qcm_process_device *qpd, 1607 const struct kfd_criu_queue_priv_data *qd, 1608 const void *restore_mqd, const void *restore_ctl_stack) 1609 { 1610 int retval; 1611 struct mqd_manager *mqd_mgr; 1612 1613 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1614 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1615 dqm->total_queue_count); 1616 retval = -EPERM; 1617 goto out; 1618 } 1619 1620 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1621 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1622 dqm_lock(dqm); 1623 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1624 dqm_unlock(dqm); 1625 if (retval) 1626 goto out; 1627 } 1628 1629 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1630 if (retval) 1631 goto out_deallocate_sdma_queue; 1632 1633 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1634 q->properties.type)]; 1635 1636 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1637 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1638 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1639 q->properties.tba_addr = qpd->tba_addr; 1640 q->properties.tma_addr = qpd->tma_addr; 1641 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1642 if (!q->mqd_mem_obj) { 1643 retval = -ENOMEM; 1644 goto out_deallocate_doorbell; 1645 } 1646 1647 dqm_lock(dqm); 1648 /* 1649 * Eviction state logic: mark all queues as evicted, even ones 1650 * not currently active. Restoring inactive queues later only 1651 * updates the is_evicted flag but is a no-op otherwise. 1652 */ 1653 q->properties.is_evicted = !!qpd->evicted; 1654 1655 if (qd) 1656 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1657 &q->properties, restore_mqd, restore_ctl_stack, 1658 qd->ctl_stack_size); 1659 else 1660 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1661 &q->gart_mqd_addr, &q->properties); 1662 1663 list_add(&q->list, &qpd->queues_list); 1664 qpd->queue_count++; 1665 1666 if (q->properties.is_active) { 1667 increment_queue_count(dqm, qpd, q); 1668 1669 if (!dqm->dev->shared_resources.enable_mes) { 1670 retval = execute_queues_cpsch(dqm, 1671 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1672 } else { 1673 retval = add_queue_mes(dqm, q, qpd); 1674 if (retval) 1675 goto cleanup_queue; 1676 } 1677 } 1678 1679 /* 1680 * Unconditionally increment this counter, regardless of the queue's 1681 * type or whether the queue is active. 1682 */ 1683 dqm->total_queue_count++; 1684 1685 pr_debug("Total of %d queues are accountable so far\n", 1686 dqm->total_queue_count); 1687 1688 dqm_unlock(dqm); 1689 return retval; 1690 1691 cleanup_queue: 1692 qpd->queue_count--; 1693 list_del(&q->list); 1694 if (q->properties.is_active) 1695 decrement_queue_count(dqm, qpd, q); 1696 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1697 dqm_unlock(dqm); 1698 out_deallocate_doorbell: 1699 deallocate_doorbell(qpd, q); 1700 out_deallocate_sdma_queue: 1701 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1702 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1703 dqm_lock(dqm); 1704 deallocate_sdma_queue(dqm, q); 1705 dqm_unlock(dqm); 1706 } 1707 out: 1708 return retval; 1709 } 1710 1711 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1712 uint64_t fence_value, 1713 unsigned int timeout_ms) 1714 { 1715 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1716 1717 while (*fence_addr != fence_value) { 1718 if (time_after(jiffies, end_jiffies)) { 1719 pr_err("qcm fence wait loop timeout expired\n"); 1720 /* In HWS case, this is used to halt the driver thread 1721 * in order not to mess up CP states before doing 1722 * scandumps for FW debugging. 1723 */ 1724 while (halt_if_hws_hang) 1725 schedule(); 1726 1727 return -ETIME; 1728 } 1729 schedule(); 1730 } 1731 1732 return 0; 1733 } 1734 1735 /* dqm->lock mutex has to be locked before calling this function */ 1736 static int map_queues_cpsch(struct device_queue_manager *dqm) 1737 { 1738 int retval; 1739 1740 if (!dqm->sched_running) 1741 return 0; 1742 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1743 return 0; 1744 if (dqm->active_runlist) 1745 return 0; 1746 1747 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1748 pr_debug("%s sent runlist\n", __func__); 1749 if (retval) { 1750 pr_err("failed to execute runlist\n"); 1751 return retval; 1752 } 1753 dqm->active_runlist = true; 1754 1755 return retval; 1756 } 1757 1758 /* dqm->lock mutex has to be locked before calling this function */ 1759 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1760 enum kfd_unmap_queues_filter filter, 1761 uint32_t filter_param, bool reset) 1762 { 1763 int retval = 0; 1764 struct mqd_manager *mqd_mgr; 1765 1766 if (!dqm->sched_running) 1767 return 0; 1768 if (dqm->is_hws_hang || dqm->is_resetting) 1769 return -EIO; 1770 if (!dqm->active_runlist) 1771 return retval; 1772 1773 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1774 if (retval) 1775 return retval; 1776 1777 *dqm->fence_addr = KFD_FENCE_INIT; 1778 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1779 KFD_FENCE_COMPLETED); 1780 /* should be timed out */ 1781 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1782 queue_preemption_timeout_ms); 1783 if (retval) { 1784 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1785 kfd_hws_hang(dqm); 1786 return retval; 1787 } 1788 1789 /* In the current MEC firmware implementation, if compute queue 1790 * doesn't response to the preemption request in time, HIQ will 1791 * abandon the unmap request without returning any timeout error 1792 * to driver. Instead, MEC firmware will log the doorbell of the 1793 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1794 * To make sure the queue unmap was successful, driver need to 1795 * check those fields 1796 */ 1797 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1798 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1799 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1800 while (halt_if_hws_hang) 1801 schedule(); 1802 return -ETIME; 1803 } 1804 1805 pm_release_ib(&dqm->packet_mgr); 1806 dqm->active_runlist = false; 1807 1808 return retval; 1809 } 1810 1811 /* only for compute queue */ 1812 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1813 uint16_t pasid) 1814 { 1815 int retval; 1816 1817 dqm_lock(dqm); 1818 1819 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1820 pasid, true); 1821 1822 dqm_unlock(dqm); 1823 return retval; 1824 } 1825 1826 /* dqm->lock mutex has to be locked before calling this function */ 1827 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1828 enum kfd_unmap_queues_filter filter, 1829 uint32_t filter_param) 1830 { 1831 int retval; 1832 1833 if (dqm->is_hws_hang) 1834 return -EIO; 1835 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1836 if (retval) 1837 return retval; 1838 1839 return map_queues_cpsch(dqm); 1840 } 1841 1842 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1843 struct qcm_process_device *qpd, 1844 struct queue *q) 1845 { 1846 int retval; 1847 struct mqd_manager *mqd_mgr; 1848 uint64_t sdma_val = 0; 1849 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1850 1851 /* Get the SDMA queue stats */ 1852 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1853 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1854 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1855 &sdma_val); 1856 if (retval) 1857 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1858 q->properties.queue_id); 1859 } 1860 1861 retval = 0; 1862 1863 /* remove queue from list to prevent rescheduling after preemption */ 1864 dqm_lock(dqm); 1865 1866 if (qpd->is_debug) { 1867 /* 1868 * error, currently we do not allow to destroy a queue 1869 * of a currently debugged process 1870 */ 1871 retval = -EBUSY; 1872 goto failed_try_destroy_debugged_queue; 1873 1874 } 1875 1876 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1877 q->properties.type)]; 1878 1879 deallocate_doorbell(qpd, q); 1880 1881 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1882 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1883 deallocate_sdma_queue(dqm, q); 1884 pdd->sdma_past_activity_counter += sdma_val; 1885 } 1886 1887 list_del(&q->list); 1888 qpd->queue_count--; 1889 if (q->properties.is_active) { 1890 if (!dqm->dev->shared_resources.enable_mes) { 1891 decrement_queue_count(dqm, qpd, q); 1892 retval = execute_queues_cpsch(dqm, 1893 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1894 if (retval == -ETIME) 1895 qpd->reset_wavefronts = true; 1896 } else { 1897 retval = remove_queue_mes(dqm, q, qpd); 1898 } 1899 } 1900 1901 /* 1902 * Unconditionally decrement this counter, regardless of the queue's 1903 * type 1904 */ 1905 dqm->total_queue_count--; 1906 pr_debug("Total of %d queues are accountable so far\n", 1907 dqm->total_queue_count); 1908 1909 dqm_unlock(dqm); 1910 1911 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1912 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1913 1914 return retval; 1915 1916 failed_try_destroy_debugged_queue: 1917 1918 dqm_unlock(dqm); 1919 return retval; 1920 } 1921 1922 /* 1923 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1924 * stay in user mode. 1925 */ 1926 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1927 /* APE1 limit is inclusive and 64K aligned. */ 1928 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1929 1930 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1931 struct qcm_process_device *qpd, 1932 enum cache_policy default_policy, 1933 enum cache_policy alternate_policy, 1934 void __user *alternate_aperture_base, 1935 uint64_t alternate_aperture_size) 1936 { 1937 bool retval = true; 1938 1939 if (!dqm->asic_ops.set_cache_memory_policy) 1940 return retval; 1941 1942 dqm_lock(dqm); 1943 1944 if (alternate_aperture_size == 0) { 1945 /* base > limit disables APE1 */ 1946 qpd->sh_mem_ape1_base = 1; 1947 qpd->sh_mem_ape1_limit = 0; 1948 } else { 1949 /* 1950 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1951 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1952 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1953 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1954 * Verify that the base and size parameters can be 1955 * represented in this format and convert them. 1956 * Additionally restrict APE1 to user-mode addresses. 1957 */ 1958 1959 uint64_t base = (uintptr_t)alternate_aperture_base; 1960 uint64_t limit = base + alternate_aperture_size - 1; 1961 1962 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1963 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1964 retval = false; 1965 goto out; 1966 } 1967 1968 qpd->sh_mem_ape1_base = base >> 16; 1969 qpd->sh_mem_ape1_limit = limit >> 16; 1970 } 1971 1972 retval = dqm->asic_ops.set_cache_memory_policy( 1973 dqm, 1974 qpd, 1975 default_policy, 1976 alternate_policy, 1977 alternate_aperture_base, 1978 alternate_aperture_size); 1979 1980 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1981 program_sh_mem_settings(dqm, qpd); 1982 1983 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1984 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1985 qpd->sh_mem_ape1_limit); 1986 1987 out: 1988 dqm_unlock(dqm); 1989 return retval; 1990 } 1991 1992 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1993 struct qcm_process_device *qpd) 1994 { 1995 struct queue *q; 1996 struct device_process_node *cur, *next_dpn; 1997 int retval = 0; 1998 bool found = false; 1999 2000 dqm_lock(dqm); 2001 2002 /* Clear all user mode queues */ 2003 while (!list_empty(&qpd->queues_list)) { 2004 struct mqd_manager *mqd_mgr; 2005 int ret; 2006 2007 q = list_first_entry(&qpd->queues_list, struct queue, list); 2008 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2009 q->properties.type)]; 2010 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2011 if (ret) 2012 retval = ret; 2013 dqm_unlock(dqm); 2014 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2015 dqm_lock(dqm); 2016 } 2017 2018 /* Unregister process */ 2019 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2020 if (qpd == cur->qpd) { 2021 list_del(&cur->list); 2022 kfree(cur); 2023 dqm->processes_count--; 2024 found = true; 2025 break; 2026 } 2027 } 2028 2029 dqm_unlock(dqm); 2030 2031 /* Outside the DQM lock because under the DQM lock we can't do 2032 * reclaim or take other locks that others hold while reclaiming. 2033 */ 2034 if (found) 2035 kfd_dec_compute_active(dqm->dev); 2036 2037 return retval; 2038 } 2039 2040 static int get_wave_state(struct device_queue_manager *dqm, 2041 struct queue *q, 2042 void __user *ctl_stack, 2043 u32 *ctl_stack_used_size, 2044 u32 *save_area_used_size) 2045 { 2046 struct mqd_manager *mqd_mgr; 2047 2048 dqm_lock(dqm); 2049 2050 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2051 2052 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2053 q->properties.is_active || !q->device->cwsr_enabled || 2054 !mqd_mgr->get_wave_state) { 2055 dqm_unlock(dqm); 2056 return -EINVAL; 2057 } 2058 2059 dqm_unlock(dqm); 2060 2061 /* 2062 * get_wave_state is outside the dqm lock to prevent circular locking 2063 * and the queue should be protected against destruction by the process 2064 * lock. 2065 */ 2066 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 2067 ctl_stack_used_size, save_area_used_size); 2068 } 2069 2070 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2071 const struct queue *q, 2072 u32 *mqd_size, 2073 u32 *ctl_stack_size) 2074 { 2075 struct mqd_manager *mqd_mgr; 2076 enum KFD_MQD_TYPE mqd_type = 2077 get_mqd_type_from_queue_type(q->properties.type); 2078 2079 dqm_lock(dqm); 2080 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2081 *mqd_size = mqd_mgr->mqd_size; 2082 *ctl_stack_size = 0; 2083 2084 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2085 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2086 2087 dqm_unlock(dqm); 2088 } 2089 2090 static int checkpoint_mqd(struct device_queue_manager *dqm, 2091 const struct queue *q, 2092 void *mqd, 2093 void *ctl_stack) 2094 { 2095 struct mqd_manager *mqd_mgr; 2096 int r = 0; 2097 enum KFD_MQD_TYPE mqd_type = 2098 get_mqd_type_from_queue_type(q->properties.type); 2099 2100 dqm_lock(dqm); 2101 2102 if (q->properties.is_active || !q->device->cwsr_enabled) { 2103 r = -EINVAL; 2104 goto dqm_unlock; 2105 } 2106 2107 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2108 if (!mqd_mgr->checkpoint_mqd) { 2109 r = -EOPNOTSUPP; 2110 goto dqm_unlock; 2111 } 2112 2113 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2114 2115 dqm_unlock: 2116 dqm_unlock(dqm); 2117 return r; 2118 } 2119 2120 static int process_termination_cpsch(struct device_queue_manager *dqm, 2121 struct qcm_process_device *qpd) 2122 { 2123 int retval; 2124 struct queue *q; 2125 struct kernel_queue *kq, *kq_next; 2126 struct mqd_manager *mqd_mgr; 2127 struct device_process_node *cur, *next_dpn; 2128 enum kfd_unmap_queues_filter filter = 2129 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2130 bool found = false; 2131 2132 retval = 0; 2133 2134 dqm_lock(dqm); 2135 2136 /* Clean all kernel queues */ 2137 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2138 list_del(&kq->list); 2139 decrement_queue_count(dqm, qpd, kq->queue); 2140 qpd->is_debug = false; 2141 dqm->total_queue_count--; 2142 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2143 } 2144 2145 /* Clear all user mode queues */ 2146 list_for_each_entry(q, &qpd->queues_list, list) { 2147 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2148 deallocate_sdma_queue(dqm, q); 2149 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2150 deallocate_sdma_queue(dqm, q); 2151 2152 if (q->properties.is_active) { 2153 decrement_queue_count(dqm, qpd, q); 2154 2155 if (dqm->dev->shared_resources.enable_mes) { 2156 retval = remove_queue_mes(dqm, q, qpd); 2157 if (retval) 2158 pr_err("Failed to remove queue %d\n", 2159 q->properties.queue_id); 2160 } 2161 } 2162 2163 dqm->total_queue_count--; 2164 } 2165 2166 /* Unregister process */ 2167 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2168 if (qpd == cur->qpd) { 2169 list_del(&cur->list); 2170 kfree(cur); 2171 dqm->processes_count--; 2172 found = true; 2173 break; 2174 } 2175 } 2176 2177 if (!dqm->dev->shared_resources.enable_mes) 2178 retval = execute_queues_cpsch(dqm, filter, 0); 2179 2180 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2181 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2182 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2183 qpd->reset_wavefronts = false; 2184 } 2185 2186 /* Lastly, free mqd resources. 2187 * Do free_mqd() after dqm_unlock to avoid circular locking. 2188 */ 2189 while (!list_empty(&qpd->queues_list)) { 2190 q = list_first_entry(&qpd->queues_list, struct queue, list); 2191 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2192 q->properties.type)]; 2193 list_del(&q->list); 2194 qpd->queue_count--; 2195 dqm_unlock(dqm); 2196 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2197 dqm_lock(dqm); 2198 } 2199 dqm_unlock(dqm); 2200 2201 /* Outside the DQM lock because under the DQM lock we can't do 2202 * reclaim or take other locks that others hold while reclaiming. 2203 */ 2204 if (found) 2205 kfd_dec_compute_active(dqm->dev); 2206 2207 return retval; 2208 } 2209 2210 static int init_mqd_managers(struct device_queue_manager *dqm) 2211 { 2212 int i, j; 2213 struct mqd_manager *mqd_mgr; 2214 2215 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2216 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2217 if (!mqd_mgr) { 2218 pr_err("mqd manager [%d] initialization failed\n", i); 2219 goto out_free; 2220 } 2221 dqm->mqd_mgrs[i] = mqd_mgr; 2222 } 2223 2224 return 0; 2225 2226 out_free: 2227 for (j = 0; j < i; j++) { 2228 kfree(dqm->mqd_mgrs[j]); 2229 dqm->mqd_mgrs[j] = NULL; 2230 } 2231 2232 return -ENOMEM; 2233 } 2234 2235 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2236 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2237 { 2238 int retval; 2239 struct kfd_dev *dev = dqm->dev; 2240 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2241 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2242 get_num_all_sdma_engines(dqm) * 2243 dev->device_info.num_sdma_queues_per_engine + 2244 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 2245 2246 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2247 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2248 (void *)&(mem_obj->cpu_ptr), false); 2249 2250 return retval; 2251 } 2252 2253 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 2254 { 2255 struct device_queue_manager *dqm; 2256 2257 pr_debug("Loading device queue manager\n"); 2258 2259 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2260 if (!dqm) 2261 return NULL; 2262 2263 switch (dev->adev->asic_type) { 2264 /* HWS is not available on Hawaii. */ 2265 case CHIP_HAWAII: 2266 /* HWS depends on CWSR for timely dequeue. CWSR is not 2267 * available on Tonga. 2268 * 2269 * FIXME: This argument also applies to Kaveri. 2270 */ 2271 case CHIP_TONGA: 2272 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2273 break; 2274 default: 2275 dqm->sched_policy = sched_policy; 2276 break; 2277 } 2278 2279 dqm->dev = dev; 2280 switch (dqm->sched_policy) { 2281 case KFD_SCHED_POLICY_HWS: 2282 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2283 /* initialize dqm for cp scheduling */ 2284 dqm->ops.create_queue = create_queue_cpsch; 2285 dqm->ops.initialize = initialize_cpsch; 2286 dqm->ops.start = start_cpsch; 2287 dqm->ops.stop = stop_cpsch; 2288 dqm->ops.pre_reset = pre_reset; 2289 dqm->ops.destroy_queue = destroy_queue_cpsch; 2290 dqm->ops.update_queue = update_queue; 2291 dqm->ops.register_process = register_process; 2292 dqm->ops.unregister_process = unregister_process; 2293 dqm->ops.uninitialize = uninitialize; 2294 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2295 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2296 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2297 dqm->ops.process_termination = process_termination_cpsch; 2298 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2299 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2300 dqm->ops.get_wave_state = get_wave_state; 2301 dqm->ops.reset_queues = reset_queues_cpsch; 2302 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2303 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2304 break; 2305 case KFD_SCHED_POLICY_NO_HWS: 2306 /* initialize dqm for no cp scheduling */ 2307 dqm->ops.start = start_nocpsch; 2308 dqm->ops.stop = stop_nocpsch; 2309 dqm->ops.pre_reset = pre_reset; 2310 dqm->ops.create_queue = create_queue_nocpsch; 2311 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2312 dqm->ops.update_queue = update_queue; 2313 dqm->ops.register_process = register_process; 2314 dqm->ops.unregister_process = unregister_process; 2315 dqm->ops.initialize = initialize_nocpsch; 2316 dqm->ops.uninitialize = uninitialize; 2317 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2318 dqm->ops.process_termination = process_termination_nocpsch; 2319 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2320 dqm->ops.restore_process_queues = 2321 restore_process_queues_nocpsch; 2322 dqm->ops.get_wave_state = get_wave_state; 2323 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2324 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2325 break; 2326 default: 2327 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2328 goto out_free; 2329 } 2330 2331 switch (dev->adev->asic_type) { 2332 case CHIP_CARRIZO: 2333 device_queue_manager_init_vi(&dqm->asic_ops); 2334 break; 2335 2336 case CHIP_KAVERI: 2337 device_queue_manager_init_cik(&dqm->asic_ops); 2338 break; 2339 2340 case CHIP_HAWAII: 2341 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2342 break; 2343 2344 case CHIP_TONGA: 2345 case CHIP_FIJI: 2346 case CHIP_POLARIS10: 2347 case CHIP_POLARIS11: 2348 case CHIP_POLARIS12: 2349 case CHIP_VEGAM: 2350 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2351 break; 2352 2353 default: 2354 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2355 device_queue_manager_init_v11(&dqm->asic_ops); 2356 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2357 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2358 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2359 device_queue_manager_init_v9(&dqm->asic_ops); 2360 else { 2361 WARN(1, "Unexpected ASIC family %u", 2362 dev->adev->asic_type); 2363 goto out_free; 2364 } 2365 } 2366 2367 if (init_mqd_managers(dqm)) 2368 goto out_free; 2369 2370 if (allocate_hiq_sdma_mqd(dqm)) { 2371 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2372 goto out_free; 2373 } 2374 2375 if (!dqm->ops.initialize(dqm)) 2376 return dqm; 2377 2378 out_free: 2379 kfree(dqm); 2380 return NULL; 2381 } 2382 2383 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 2384 struct kfd_mem_obj *mqd) 2385 { 2386 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2387 2388 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2389 } 2390 2391 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2392 { 2393 dqm->ops.uninitialize(dqm); 2394 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2395 kfree(dqm); 2396 } 2397 2398 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2399 { 2400 struct kfd_process_device *pdd; 2401 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2402 int ret = 0; 2403 2404 if (!p) 2405 return -EINVAL; 2406 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2407 pdd = kfd_get_process_device_data(dqm->dev, p); 2408 if (pdd) 2409 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2410 kfd_unref_process(p); 2411 2412 return ret; 2413 } 2414 2415 static void kfd_process_hw_exception(struct work_struct *work) 2416 { 2417 struct device_queue_manager *dqm = container_of(work, 2418 struct device_queue_manager, hw_exception_work); 2419 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2420 } 2421 2422 #if defined(CONFIG_DEBUG_FS) 2423 2424 static void seq_reg_dump(struct seq_file *m, 2425 uint32_t (*dump)[2], uint32_t n_regs) 2426 { 2427 uint32_t i, count; 2428 2429 for (i = 0, count = 0; i < n_regs; i++) { 2430 if (count == 0 || 2431 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2432 seq_printf(m, "%s %08x: %08x", 2433 i ? "\n" : "", 2434 dump[i][0], dump[i][1]); 2435 count = 7; 2436 } else { 2437 seq_printf(m, " %08x", dump[i][1]); 2438 count--; 2439 } 2440 } 2441 2442 seq_puts(m, "\n"); 2443 } 2444 2445 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2446 { 2447 struct device_queue_manager *dqm = data; 2448 uint32_t (*dump)[2], n_regs; 2449 int pipe, queue; 2450 int r = 0; 2451 2452 if (!dqm->sched_running) { 2453 seq_puts(m, " Device is stopped\n"); 2454 return 0; 2455 } 2456 2457 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2458 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2459 &dump, &n_regs); 2460 if (!r) { 2461 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 2462 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2463 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2464 KFD_CIK_HIQ_QUEUE); 2465 seq_reg_dump(m, dump, n_regs); 2466 2467 kfree(dump); 2468 } 2469 2470 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2471 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2472 2473 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2474 if (!test_bit(pipe_offset + queue, 2475 dqm->dev->shared_resources.cp_queue_bitmap)) 2476 continue; 2477 2478 r = dqm->dev->kfd2kgd->hqd_dump( 2479 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2480 if (r) 2481 break; 2482 2483 seq_printf(m, " CP Pipe %d, Queue %d\n", 2484 pipe, queue); 2485 seq_reg_dump(m, dump, n_regs); 2486 2487 kfree(dump); 2488 } 2489 } 2490 2491 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2492 for (queue = 0; 2493 queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2494 queue++) { 2495 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2496 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2497 if (r) 2498 break; 2499 2500 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2501 pipe, queue); 2502 seq_reg_dump(m, dump, n_regs); 2503 2504 kfree(dump); 2505 } 2506 } 2507 2508 return r; 2509 } 2510 2511 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2512 { 2513 int r = 0; 2514 2515 dqm_lock(dqm); 2516 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2517 if (r) { 2518 dqm_unlock(dqm); 2519 return r; 2520 } 2521 dqm->active_runlist = true; 2522 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2523 dqm_unlock(dqm); 2524 2525 return r; 2526 } 2527 2528 #endif 2529