1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "mes_api_def.h" 39 #include "kfd_debug.h" 40 41 /* Size of the per-pipe EOP queue */ 42 #define CIK_HPD_EOP_BYTES_LOG2 11 43 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 44 45 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 46 u32 pasid, unsigned int vmid); 47 48 static int execute_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param, 51 uint32_t grace_period); 52 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param, 55 uint32_t grace_period, 56 bool reset); 57 58 static int map_queues_cpsch(struct device_queue_manager *dqm); 59 60 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 61 struct queue *q); 62 63 static inline void deallocate_hqd(struct device_queue_manager *dqm, 64 struct queue *q); 65 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 66 static int allocate_sdma_queue(struct device_queue_manager *dqm, 67 struct queue *q, const uint32_t *restore_sdma_id); 68 static void kfd_process_hw_exception(struct work_struct *work); 69 70 static inline 71 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 72 { 73 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 74 return KFD_MQD_TYPE_SDMA; 75 return KFD_MQD_TYPE_CP; 76 } 77 78 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 79 { 80 int i; 81 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 82 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 83 84 /* queue is available for KFD usage if bit is 1 */ 85 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 86 if (test_bit(pipe_offset + i, 87 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 88 return true; 89 return false; 90 } 91 92 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 93 { 94 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 95 KGD_MAX_QUEUES); 96 } 97 98 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 99 { 100 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 101 } 102 103 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 104 { 105 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 106 } 107 108 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 109 { 110 return kfd_get_num_sdma_engines(dqm->dev) + 111 kfd_get_num_xgmi_sdma_engines(dqm->dev); 112 } 113 114 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 115 { 116 return kfd_get_num_sdma_engines(dqm->dev) * 117 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 118 } 119 120 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 121 { 122 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 123 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 124 } 125 126 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 127 { 128 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 129 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 130 131 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 132 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 133 134 /* Mask out the reserved queues */ 135 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 136 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 137 KFD_MAX_SDMA_QUEUES); 138 } 139 140 void program_sh_mem_settings(struct device_queue_manager *dqm, 141 struct qcm_process_device *qpd) 142 { 143 uint32_t xcc_mask = dqm->dev->xcc_mask; 144 int xcc_id; 145 146 for_each_inst(xcc_id, xcc_mask) 147 dqm->dev->kfd2kgd->program_sh_mem_settings( 148 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 149 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 150 qpd->sh_mem_bases, xcc_id); 151 } 152 153 static void kfd_hws_hang(struct device_queue_manager *dqm) 154 { 155 /* 156 * Issue a GPU reset if HWS is unresponsive 157 */ 158 dqm->is_hws_hang = true; 159 160 /* It's possible we're detecting a HWS hang in the 161 * middle of a GPU reset. No need to schedule another 162 * reset in this case. 163 */ 164 if (!dqm->is_resetting) 165 schedule_work(&dqm->hw_exception_work); 166 } 167 168 static int convert_to_mes_queue_type(int queue_type) 169 { 170 int mes_queue_type; 171 172 switch (queue_type) { 173 case KFD_QUEUE_TYPE_COMPUTE: 174 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 175 break; 176 case KFD_QUEUE_TYPE_SDMA: 177 mes_queue_type = MES_QUEUE_TYPE_SDMA; 178 break; 179 default: 180 WARN(1, "Invalid queue type %d", queue_type); 181 mes_queue_type = -EINVAL; 182 break; 183 } 184 185 return mes_queue_type; 186 } 187 188 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 189 struct qcm_process_device *qpd) 190 { 191 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 192 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 193 struct mes_add_queue_input queue_input; 194 int r, queue_type; 195 uint64_t wptr_addr_off; 196 197 if (dqm->is_hws_hang) 198 return -EIO; 199 200 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 201 queue_input.process_id = qpd->pqm->process->pasid; 202 queue_input.page_table_base_addr = qpd->page_table_base; 203 queue_input.process_va_start = 0; 204 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 205 /* MES unit for quantum is 100ns */ 206 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 207 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 208 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 209 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 210 queue_input.inprocess_gang_priority = q->properties.priority; 211 queue_input.gang_global_priority_level = 212 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 213 queue_input.doorbell_offset = q->properties.doorbell_off; 214 queue_input.mqd_addr = q->gart_mqd_addr; 215 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 216 217 if (q->wptr_bo) { 218 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 219 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; 220 } 221 222 queue_input.is_kfd_process = 1; 223 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 224 queue_input.queue_size = q->properties.queue_size >> 2; 225 226 queue_input.paging = false; 227 queue_input.tba_addr = qpd->tba_addr; 228 queue_input.tma_addr = qpd->tma_addr; 229 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 230 queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled; 231 232 queue_type = convert_to_mes_queue_type(q->properties.type); 233 if (queue_type < 0) { 234 pr_err("Queue type not supported with MES, queue:%d\n", 235 q->properties.type); 236 return -EINVAL; 237 } 238 queue_input.queue_type = (uint32_t)queue_type; 239 240 if (q->gws) { 241 queue_input.gws_base = 0; 242 queue_input.gws_size = qpd->num_gws; 243 } 244 245 amdgpu_mes_lock(&adev->mes); 246 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 247 amdgpu_mes_unlock(&adev->mes); 248 if (r) { 249 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 250 q->properties.doorbell_off); 251 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 252 kfd_hws_hang(dqm); 253 } 254 255 return r; 256 } 257 258 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 259 struct qcm_process_device *qpd) 260 { 261 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 262 int r; 263 struct mes_remove_queue_input queue_input; 264 265 if (dqm->is_hws_hang) 266 return -EIO; 267 268 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 269 queue_input.doorbell_offset = q->properties.doorbell_off; 270 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 271 272 amdgpu_mes_lock(&adev->mes); 273 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 274 amdgpu_mes_unlock(&adev->mes); 275 276 if (r) { 277 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 278 q->properties.doorbell_off); 279 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 280 kfd_hws_hang(dqm); 281 } 282 283 return r; 284 } 285 286 static int remove_all_queues_mes(struct device_queue_manager *dqm) 287 { 288 struct device_process_node *cur; 289 struct qcm_process_device *qpd; 290 struct queue *q; 291 int retval = 0; 292 293 list_for_each_entry(cur, &dqm->queues, list) { 294 qpd = cur->qpd; 295 list_for_each_entry(q, &qpd->queues_list, list) { 296 if (q->properties.is_active) { 297 retval = remove_queue_mes(dqm, q, qpd); 298 if (retval) { 299 pr_err("%s: Failed to remove queue %d for dev %d", 300 __func__, 301 q->properties.queue_id, 302 dqm->dev->id); 303 return retval; 304 } 305 } 306 } 307 } 308 309 return retval; 310 } 311 312 static void increment_queue_count(struct device_queue_manager *dqm, 313 struct qcm_process_device *qpd, 314 struct queue *q) 315 { 316 dqm->active_queue_count++; 317 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 318 q->properties.type == KFD_QUEUE_TYPE_DIQ) 319 dqm->active_cp_queue_count++; 320 321 if (q->properties.is_gws) { 322 dqm->gws_queue_count++; 323 qpd->mapped_gws_queue = true; 324 } 325 } 326 327 static void decrement_queue_count(struct device_queue_manager *dqm, 328 struct qcm_process_device *qpd, 329 struct queue *q) 330 { 331 dqm->active_queue_count--; 332 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 333 q->properties.type == KFD_QUEUE_TYPE_DIQ) 334 dqm->active_cp_queue_count--; 335 336 if (q->properties.is_gws) { 337 dqm->gws_queue_count--; 338 qpd->mapped_gws_queue = false; 339 } 340 } 341 342 /* 343 * Allocate a doorbell ID to this queue. 344 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 345 */ 346 static int allocate_doorbell(struct qcm_process_device *qpd, 347 struct queue *q, 348 uint32_t const *restore_id) 349 { 350 struct kfd_node *dev = qpd->dqm->dev; 351 352 if (!KFD_IS_SOC15(dev)) { 353 /* On pre-SOC15 chips we need to use the queue ID to 354 * preserve the user mode ABI. 355 */ 356 357 if (restore_id && *restore_id != q->properties.queue_id) 358 return -EINVAL; 359 360 q->doorbell_id = q->properties.queue_id; 361 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 362 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 363 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 364 * doorbell assignments based on the engine and queue id. 365 * The doobell index distance between RLC (2*i) and (2*i+1) 366 * for a SDMA engine is 512. 367 */ 368 369 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 370 371 /* 372 * q->properties.sdma_engine_id corresponds to the virtual 373 * sdma engine number. However, for doorbell allocation, 374 * we need the physical sdma engine id in order to get the 375 * correct doorbell offset. 376 */ 377 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 378 get_num_all_sdma_engines(qpd->dqm) + 379 q->properties.sdma_engine_id] 380 + (q->properties.sdma_queue_id & 1) 381 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 382 + (q->properties.sdma_queue_id >> 1); 383 384 if (restore_id && *restore_id != valid_id) 385 return -EINVAL; 386 q->doorbell_id = valid_id; 387 } else { 388 /* For CP queues on SOC15 */ 389 if (restore_id) { 390 /* make sure that ID is free */ 391 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 392 return -EINVAL; 393 394 q->doorbell_id = *restore_id; 395 } else { 396 /* or reserve a free doorbell ID */ 397 unsigned int found; 398 399 found = find_first_zero_bit(qpd->doorbell_bitmap, 400 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 401 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 402 pr_debug("No doorbells available"); 403 return -EBUSY; 404 } 405 set_bit(found, qpd->doorbell_bitmap); 406 q->doorbell_id = found; 407 } 408 } 409 410 q->properties.doorbell_off = 411 kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), 412 q->doorbell_id); 413 return 0; 414 } 415 416 static void deallocate_doorbell(struct qcm_process_device *qpd, 417 struct queue *q) 418 { 419 unsigned int old; 420 struct kfd_node *dev = qpd->dqm->dev; 421 422 if (!KFD_IS_SOC15(dev) || 423 q->properties.type == KFD_QUEUE_TYPE_SDMA || 424 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 425 return; 426 427 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 428 WARN_ON(!old); 429 } 430 431 static void program_trap_handler_settings(struct device_queue_manager *dqm, 432 struct qcm_process_device *qpd) 433 { 434 uint32_t xcc_mask = dqm->dev->xcc_mask; 435 int xcc_id; 436 437 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 438 for_each_inst(xcc_id, xcc_mask) 439 dqm->dev->kfd2kgd->program_trap_handler_settings( 440 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 441 qpd->tma_addr, xcc_id); 442 } 443 444 static int allocate_vmid(struct device_queue_manager *dqm, 445 struct qcm_process_device *qpd, 446 struct queue *q) 447 { 448 int allocated_vmid = -1, i; 449 450 for (i = dqm->dev->vm_info.first_vmid_kfd; 451 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 452 if (!dqm->vmid_pasid[i]) { 453 allocated_vmid = i; 454 break; 455 } 456 } 457 458 if (allocated_vmid < 0) { 459 pr_err("no more vmid to allocate\n"); 460 return -ENOSPC; 461 } 462 463 pr_debug("vmid allocated: %d\n", allocated_vmid); 464 465 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 466 467 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 468 469 qpd->vmid = allocated_vmid; 470 q->properties.vmid = allocated_vmid; 471 472 program_sh_mem_settings(dqm, qpd); 473 474 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 475 program_trap_handler_settings(dqm, qpd); 476 477 /* qpd->page_table_base is set earlier when register_process() 478 * is called, i.e. when the first queue is created. 479 */ 480 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 481 qpd->vmid, 482 qpd->page_table_base); 483 /* invalidate the VM context after pasid and vmid mapping is set up */ 484 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 485 486 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 487 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 488 qpd->sh_hidden_private_base, qpd->vmid); 489 490 return 0; 491 } 492 493 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 494 struct qcm_process_device *qpd) 495 { 496 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 497 int ret; 498 499 if (!qpd->ib_kaddr) 500 return -ENOMEM; 501 502 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 503 if (ret) 504 return ret; 505 506 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 507 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 508 pmf->release_mem_size / sizeof(uint32_t)); 509 } 510 511 static void deallocate_vmid(struct device_queue_manager *dqm, 512 struct qcm_process_device *qpd, 513 struct queue *q) 514 { 515 /* On GFX v7, CP doesn't flush TC at dequeue */ 516 if (q->device->adev->asic_type == CHIP_HAWAII) 517 if (flush_texture_cache_nocpsch(q->device, qpd)) 518 pr_err("Failed to flush TC\n"); 519 520 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 521 522 /* Release the vmid mapping */ 523 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 524 dqm->vmid_pasid[qpd->vmid] = 0; 525 526 qpd->vmid = 0; 527 q->properties.vmid = 0; 528 } 529 530 static int create_queue_nocpsch(struct device_queue_manager *dqm, 531 struct queue *q, 532 struct qcm_process_device *qpd, 533 const struct kfd_criu_queue_priv_data *qd, 534 const void *restore_mqd, const void *restore_ctl_stack) 535 { 536 struct mqd_manager *mqd_mgr; 537 int retval; 538 539 dqm_lock(dqm); 540 541 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 542 pr_warn("Can't create new usermode queue because %d queues were already created\n", 543 dqm->total_queue_count); 544 retval = -EPERM; 545 goto out_unlock; 546 } 547 548 if (list_empty(&qpd->queues_list)) { 549 retval = allocate_vmid(dqm, qpd, q); 550 if (retval) 551 goto out_unlock; 552 } 553 q->properties.vmid = qpd->vmid; 554 /* 555 * Eviction state logic: mark all queues as evicted, even ones 556 * not currently active. Restoring inactive queues later only 557 * updates the is_evicted flag but is a no-op otherwise. 558 */ 559 q->properties.is_evicted = !!qpd->evicted; 560 561 q->properties.tba_addr = qpd->tba_addr; 562 q->properties.tma_addr = qpd->tma_addr; 563 564 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 565 q->properties.type)]; 566 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 567 retval = allocate_hqd(dqm, q); 568 if (retval) 569 goto deallocate_vmid; 570 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 571 q->pipe, q->queue); 572 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 573 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 574 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 575 if (retval) 576 goto deallocate_vmid; 577 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 578 } 579 580 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 581 if (retval) 582 goto out_deallocate_hqd; 583 584 /* Temporarily release dqm lock to avoid a circular lock dependency */ 585 dqm_unlock(dqm); 586 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 587 dqm_lock(dqm); 588 589 if (!q->mqd_mem_obj) { 590 retval = -ENOMEM; 591 goto out_deallocate_doorbell; 592 } 593 594 if (qd) 595 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 596 &q->properties, restore_mqd, restore_ctl_stack, 597 qd->ctl_stack_size); 598 else 599 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 600 &q->gart_mqd_addr, &q->properties); 601 602 if (q->properties.is_active) { 603 if (!dqm->sched_running) { 604 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 605 goto add_queue_to_list; 606 } 607 608 if (WARN(q->process->mm != current->mm, 609 "should only run in user thread")) 610 retval = -EFAULT; 611 else 612 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 613 q->queue, &q->properties, current->mm); 614 if (retval) 615 goto out_free_mqd; 616 } 617 618 add_queue_to_list: 619 list_add(&q->list, &qpd->queues_list); 620 qpd->queue_count++; 621 if (q->properties.is_active) 622 increment_queue_count(dqm, qpd, q); 623 624 /* 625 * Unconditionally increment this counter, regardless of the queue's 626 * type or whether the queue is active. 627 */ 628 dqm->total_queue_count++; 629 pr_debug("Total of %d queues are accountable so far\n", 630 dqm->total_queue_count); 631 goto out_unlock; 632 633 out_free_mqd: 634 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 635 out_deallocate_doorbell: 636 deallocate_doorbell(qpd, q); 637 out_deallocate_hqd: 638 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 639 deallocate_hqd(dqm, q); 640 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 641 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 642 deallocate_sdma_queue(dqm, q); 643 deallocate_vmid: 644 if (list_empty(&qpd->queues_list)) 645 deallocate_vmid(dqm, qpd, q); 646 out_unlock: 647 dqm_unlock(dqm); 648 return retval; 649 } 650 651 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 652 { 653 bool set; 654 int pipe, bit, i; 655 656 set = false; 657 658 for (pipe = dqm->next_pipe_to_allocate, i = 0; 659 i < get_pipes_per_mec(dqm); 660 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 661 662 if (!is_pipe_enabled(dqm, 0, pipe)) 663 continue; 664 665 if (dqm->allocated_queues[pipe] != 0) { 666 bit = ffs(dqm->allocated_queues[pipe]) - 1; 667 dqm->allocated_queues[pipe] &= ~(1 << bit); 668 q->pipe = pipe; 669 q->queue = bit; 670 set = true; 671 break; 672 } 673 } 674 675 if (!set) 676 return -EBUSY; 677 678 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 679 /* horizontal hqd allocation */ 680 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 681 682 return 0; 683 } 684 685 static inline void deallocate_hqd(struct device_queue_manager *dqm, 686 struct queue *q) 687 { 688 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 689 } 690 691 #define SQ_IND_CMD_CMD_KILL 0x00000003 692 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 693 694 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 695 { 696 int status = 0; 697 unsigned int vmid; 698 uint16_t queried_pasid; 699 union SQ_CMD_BITS reg_sq_cmd; 700 union GRBM_GFX_INDEX_BITS reg_gfx_index; 701 struct kfd_process_device *pdd; 702 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 703 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 704 uint32_t xcc_mask = dev->xcc_mask; 705 int xcc_id; 706 707 reg_sq_cmd.u32All = 0; 708 reg_gfx_index.u32All = 0; 709 710 pr_debug("Killing all process wavefronts\n"); 711 712 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 713 pr_err("no vmid pasid mapping supported \n"); 714 return -EOPNOTSUPP; 715 } 716 717 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 718 * ATC_VMID15_PASID_MAPPING 719 * to check which VMID the current process is mapped to. 720 */ 721 722 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 723 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 724 (dev->adev, vmid, &queried_pasid); 725 726 if (status && queried_pasid == p->pasid) { 727 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 728 vmid, p->pasid); 729 break; 730 } 731 } 732 733 if (vmid > last_vmid_to_scan) { 734 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 735 return -EFAULT; 736 } 737 738 /* taking the VMID for that process on the safe way using PDD */ 739 pdd = kfd_get_process_device_data(dev, p); 740 if (!pdd) 741 return -EFAULT; 742 743 reg_gfx_index.bits.sh_broadcast_writes = 1; 744 reg_gfx_index.bits.se_broadcast_writes = 1; 745 reg_gfx_index.bits.instance_broadcast_writes = 1; 746 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 747 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 748 reg_sq_cmd.bits.vm_id = vmid; 749 750 for_each_inst(xcc_id, xcc_mask) 751 dev->kfd2kgd->wave_control_execute( 752 dev->adev, reg_gfx_index.u32All, 753 reg_sq_cmd.u32All, xcc_id); 754 755 return 0; 756 } 757 758 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 759 * to avoid asynchronized access 760 */ 761 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 762 struct qcm_process_device *qpd, 763 struct queue *q) 764 { 765 int retval; 766 struct mqd_manager *mqd_mgr; 767 768 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 769 q->properties.type)]; 770 771 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 772 deallocate_hqd(dqm, q); 773 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 774 deallocate_sdma_queue(dqm, q); 775 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 776 deallocate_sdma_queue(dqm, q); 777 else { 778 pr_debug("q->properties.type %d is invalid\n", 779 q->properties.type); 780 return -EINVAL; 781 } 782 dqm->total_queue_count--; 783 784 deallocate_doorbell(qpd, q); 785 786 if (!dqm->sched_running) { 787 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 788 return 0; 789 } 790 791 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 792 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 793 KFD_UNMAP_LATENCY_MS, 794 q->pipe, q->queue); 795 if (retval == -ETIME) 796 qpd->reset_wavefronts = true; 797 798 list_del(&q->list); 799 if (list_empty(&qpd->queues_list)) { 800 if (qpd->reset_wavefronts) { 801 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 802 dqm->dev); 803 /* dbgdev_wave_reset_wavefronts has to be called before 804 * deallocate_vmid(), i.e. when vmid is still in use. 805 */ 806 dbgdev_wave_reset_wavefronts(dqm->dev, 807 qpd->pqm->process); 808 qpd->reset_wavefronts = false; 809 } 810 811 deallocate_vmid(dqm, qpd, q); 812 } 813 qpd->queue_count--; 814 if (q->properties.is_active) 815 decrement_queue_count(dqm, qpd, q); 816 817 return retval; 818 } 819 820 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 821 struct qcm_process_device *qpd, 822 struct queue *q) 823 { 824 int retval; 825 uint64_t sdma_val = 0; 826 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 827 struct mqd_manager *mqd_mgr = 828 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 829 830 /* Get the SDMA queue stats */ 831 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 832 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 833 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 834 &sdma_val); 835 if (retval) 836 pr_err("Failed to read SDMA queue counter for queue: %d\n", 837 q->properties.queue_id); 838 } 839 840 dqm_lock(dqm); 841 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 842 if (!retval) 843 pdd->sdma_past_activity_counter += sdma_val; 844 dqm_unlock(dqm); 845 846 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 847 848 return retval; 849 } 850 851 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 852 struct mqd_update_info *minfo) 853 { 854 int retval = 0; 855 struct mqd_manager *mqd_mgr; 856 struct kfd_process_device *pdd; 857 bool prev_active = false; 858 859 dqm_lock(dqm); 860 pdd = kfd_get_process_device_data(q->device, q->process); 861 if (!pdd) { 862 retval = -ENODEV; 863 goto out_unlock; 864 } 865 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 866 q->properties.type)]; 867 868 /* Save previous activity state for counters */ 869 prev_active = q->properties.is_active; 870 871 /* Make sure the queue is unmapped before updating the MQD */ 872 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 873 if (!dqm->dev->kfd->shared_resources.enable_mes) 874 retval = unmap_queues_cpsch(dqm, 875 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 876 else if (prev_active) 877 retval = remove_queue_mes(dqm, q, &pdd->qpd); 878 879 if (retval) { 880 pr_err("unmap queue failed\n"); 881 goto out_unlock; 882 } 883 } else if (prev_active && 884 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 885 q->properties.type == KFD_QUEUE_TYPE_SDMA || 886 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 887 888 if (!dqm->sched_running) { 889 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 890 goto out_unlock; 891 } 892 893 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 894 (dqm->dev->kfd->cwsr_enabled ? 895 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 896 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 897 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 898 if (retval) { 899 pr_err("destroy mqd failed\n"); 900 goto out_unlock; 901 } 902 } 903 904 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 905 906 /* 907 * check active state vs. the previous state and modify 908 * counter accordingly. map_queues_cpsch uses the 909 * dqm->active_queue_count to determine whether a new runlist must be 910 * uploaded. 911 */ 912 if (q->properties.is_active && !prev_active) { 913 increment_queue_count(dqm, &pdd->qpd, q); 914 } else if (!q->properties.is_active && prev_active) { 915 decrement_queue_count(dqm, &pdd->qpd, q); 916 } else if (q->gws && !q->properties.is_gws) { 917 if (q->properties.is_active) { 918 dqm->gws_queue_count++; 919 pdd->qpd.mapped_gws_queue = true; 920 } 921 q->properties.is_gws = true; 922 } else if (!q->gws && q->properties.is_gws) { 923 if (q->properties.is_active) { 924 dqm->gws_queue_count--; 925 pdd->qpd.mapped_gws_queue = false; 926 } 927 q->properties.is_gws = false; 928 } 929 930 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 931 if (!dqm->dev->kfd->shared_resources.enable_mes) 932 retval = map_queues_cpsch(dqm); 933 else if (q->properties.is_active) 934 retval = add_queue_mes(dqm, q, &pdd->qpd); 935 } else if (q->properties.is_active && 936 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 937 q->properties.type == KFD_QUEUE_TYPE_SDMA || 938 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 939 if (WARN(q->process->mm != current->mm, 940 "should only run in user thread")) 941 retval = -EFAULT; 942 else 943 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 944 q->pipe, q->queue, 945 &q->properties, current->mm); 946 } 947 948 out_unlock: 949 dqm_unlock(dqm); 950 return retval; 951 } 952 953 /* suspend_single_queue does not lock the dqm like the 954 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 955 * lock the dqm before calling, and unlock after calling. 956 * 957 * The reason we don't lock the dqm is because this function may be 958 * called on multiple queues in a loop, so rather than locking/unlocking 959 * multiple times, we will just keep the dqm locked for all of the calls. 960 */ 961 static int suspend_single_queue(struct device_queue_manager *dqm, 962 struct kfd_process_device *pdd, 963 struct queue *q) 964 { 965 bool is_new; 966 967 if (q->properties.is_suspended) 968 return 0; 969 970 pr_debug("Suspending PASID %u queue [%i]\n", 971 pdd->process->pasid, 972 q->properties.queue_id); 973 974 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 975 976 if (is_new || q->properties.is_being_destroyed) { 977 pr_debug("Suspend: skip %s queue id %i\n", 978 is_new ? "new" : "destroyed", 979 q->properties.queue_id); 980 return -EBUSY; 981 } 982 983 q->properties.is_suspended = true; 984 if (q->properties.is_active) { 985 if (dqm->dev->kfd->shared_resources.enable_mes) { 986 int r = remove_queue_mes(dqm, q, &pdd->qpd); 987 988 if (r) 989 return r; 990 } 991 992 decrement_queue_count(dqm, &pdd->qpd, q); 993 q->properties.is_active = false; 994 } 995 996 return 0; 997 } 998 999 /* resume_single_queue does not lock the dqm like the functions 1000 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1001 * lock the dqm before calling, and unlock after calling. 1002 * 1003 * The reason we don't lock the dqm is because this function may be 1004 * called on multiple queues in a loop, so rather than locking/unlocking 1005 * multiple times, we will just keep the dqm locked for all of the calls. 1006 */ 1007 static int resume_single_queue(struct device_queue_manager *dqm, 1008 struct qcm_process_device *qpd, 1009 struct queue *q) 1010 { 1011 struct kfd_process_device *pdd; 1012 1013 if (!q->properties.is_suspended) 1014 return 0; 1015 1016 pdd = qpd_to_pdd(qpd); 1017 1018 pr_debug("Restoring from suspend PASID %u queue [%i]\n", 1019 pdd->process->pasid, 1020 q->properties.queue_id); 1021 1022 q->properties.is_suspended = false; 1023 1024 if (QUEUE_IS_ACTIVE(q->properties)) { 1025 if (dqm->dev->kfd->shared_resources.enable_mes) { 1026 int r = add_queue_mes(dqm, q, &pdd->qpd); 1027 1028 if (r) 1029 return r; 1030 } 1031 1032 q->properties.is_active = true; 1033 increment_queue_count(dqm, qpd, q); 1034 } 1035 1036 return 0; 1037 } 1038 1039 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1040 struct qcm_process_device *qpd) 1041 { 1042 struct queue *q; 1043 struct mqd_manager *mqd_mgr; 1044 struct kfd_process_device *pdd; 1045 int retval, ret = 0; 1046 1047 dqm_lock(dqm); 1048 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1049 goto out; 1050 1051 pdd = qpd_to_pdd(qpd); 1052 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1053 pdd->process->pasid); 1054 1055 pdd->last_evict_timestamp = get_jiffies_64(); 1056 /* Mark all queues as evicted. Deactivate all active queues on 1057 * the qpd. 1058 */ 1059 list_for_each_entry(q, &qpd->queues_list, list) { 1060 q->properties.is_evicted = true; 1061 if (!q->properties.is_active) 1062 continue; 1063 1064 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1065 q->properties.type)]; 1066 q->properties.is_active = false; 1067 decrement_queue_count(dqm, qpd, q); 1068 1069 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1070 continue; 1071 1072 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1073 (dqm->dev->kfd->cwsr_enabled ? 1074 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1075 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1076 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1077 if (retval && !ret) 1078 /* Return the first error, but keep going to 1079 * maintain a consistent eviction state 1080 */ 1081 ret = retval; 1082 } 1083 1084 out: 1085 dqm_unlock(dqm); 1086 return ret; 1087 } 1088 1089 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1090 struct qcm_process_device *qpd) 1091 { 1092 struct queue *q; 1093 struct kfd_process_device *pdd; 1094 int retval = 0; 1095 1096 dqm_lock(dqm); 1097 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1098 goto out; 1099 1100 pdd = qpd_to_pdd(qpd); 1101 1102 /* The debugger creates processes that temporarily have not acquired 1103 * all VMs for all devices and has no VMs itself. 1104 * Skip queue eviction on process eviction. 1105 */ 1106 if (!pdd->drm_priv) 1107 goto out; 1108 1109 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1110 pdd->process->pasid); 1111 1112 /* Mark all queues as evicted. Deactivate all active queues on 1113 * the qpd. 1114 */ 1115 list_for_each_entry(q, &qpd->queues_list, list) { 1116 q->properties.is_evicted = true; 1117 if (!q->properties.is_active) 1118 continue; 1119 1120 q->properties.is_active = false; 1121 decrement_queue_count(dqm, qpd, q); 1122 1123 if (dqm->dev->kfd->shared_resources.enable_mes) { 1124 retval = remove_queue_mes(dqm, q, qpd); 1125 if (retval) { 1126 pr_err("Failed to evict queue %d\n", 1127 q->properties.queue_id); 1128 goto out; 1129 } 1130 } 1131 } 1132 pdd->last_evict_timestamp = get_jiffies_64(); 1133 if (!dqm->dev->kfd->shared_resources.enable_mes) 1134 retval = execute_queues_cpsch(dqm, 1135 qpd->is_debug ? 1136 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1137 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1138 USE_DEFAULT_GRACE_PERIOD); 1139 1140 out: 1141 dqm_unlock(dqm); 1142 return retval; 1143 } 1144 1145 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1146 struct qcm_process_device *qpd) 1147 { 1148 struct mm_struct *mm = NULL; 1149 struct queue *q; 1150 struct mqd_manager *mqd_mgr; 1151 struct kfd_process_device *pdd; 1152 uint64_t pd_base; 1153 uint64_t eviction_duration; 1154 int retval, ret = 0; 1155 1156 pdd = qpd_to_pdd(qpd); 1157 /* Retrieve PD base */ 1158 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1159 1160 dqm_lock(dqm); 1161 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1162 goto out; 1163 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1164 qpd->evicted--; 1165 goto out; 1166 } 1167 1168 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1169 pdd->process->pasid); 1170 1171 /* Update PD Base in QPD */ 1172 qpd->page_table_base = pd_base; 1173 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1174 1175 if (!list_empty(&qpd->queues_list)) { 1176 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1177 dqm->dev->adev, 1178 qpd->vmid, 1179 qpd->page_table_base); 1180 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1181 } 1182 1183 /* Take a safe reference to the mm_struct, which may otherwise 1184 * disappear even while the kfd_process is still referenced. 1185 */ 1186 mm = get_task_mm(pdd->process->lead_thread); 1187 if (!mm) { 1188 ret = -EFAULT; 1189 goto out; 1190 } 1191 1192 /* Remove the eviction flags. Activate queues that are not 1193 * inactive for other reasons. 1194 */ 1195 list_for_each_entry(q, &qpd->queues_list, list) { 1196 q->properties.is_evicted = false; 1197 if (!QUEUE_IS_ACTIVE(q->properties)) 1198 continue; 1199 1200 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1201 q->properties.type)]; 1202 q->properties.is_active = true; 1203 increment_queue_count(dqm, qpd, q); 1204 1205 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1206 continue; 1207 1208 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1209 q->queue, &q->properties, mm); 1210 if (retval && !ret) 1211 /* Return the first error, but keep going to 1212 * maintain a consistent eviction state 1213 */ 1214 ret = retval; 1215 } 1216 qpd->evicted = 0; 1217 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1218 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1219 out: 1220 if (mm) 1221 mmput(mm); 1222 dqm_unlock(dqm); 1223 return ret; 1224 } 1225 1226 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1227 struct qcm_process_device *qpd) 1228 { 1229 struct queue *q; 1230 struct kfd_process_device *pdd; 1231 uint64_t eviction_duration; 1232 int retval = 0; 1233 1234 pdd = qpd_to_pdd(qpd); 1235 1236 dqm_lock(dqm); 1237 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1238 goto out; 1239 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1240 qpd->evicted--; 1241 goto out; 1242 } 1243 1244 /* The debugger creates processes that temporarily have not acquired 1245 * all VMs for all devices and has no VMs itself. 1246 * Skip queue restore on process restore. 1247 */ 1248 if (!pdd->drm_priv) 1249 goto vm_not_acquired; 1250 1251 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1252 pdd->process->pasid); 1253 1254 /* Update PD Base in QPD */ 1255 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1256 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1257 1258 /* activate all active queues on the qpd */ 1259 list_for_each_entry(q, &qpd->queues_list, list) { 1260 q->properties.is_evicted = false; 1261 if (!QUEUE_IS_ACTIVE(q->properties)) 1262 continue; 1263 1264 q->properties.is_active = true; 1265 increment_queue_count(dqm, &pdd->qpd, q); 1266 1267 if (dqm->dev->kfd->shared_resources.enable_mes) { 1268 retval = add_queue_mes(dqm, q, qpd); 1269 if (retval) { 1270 pr_err("Failed to restore queue %d\n", 1271 q->properties.queue_id); 1272 goto out; 1273 } 1274 } 1275 } 1276 if (!dqm->dev->kfd->shared_resources.enable_mes) 1277 retval = execute_queues_cpsch(dqm, 1278 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1279 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1280 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1281 vm_not_acquired: 1282 qpd->evicted = 0; 1283 out: 1284 dqm_unlock(dqm); 1285 return retval; 1286 } 1287 1288 static int register_process(struct device_queue_manager *dqm, 1289 struct qcm_process_device *qpd) 1290 { 1291 struct device_process_node *n; 1292 struct kfd_process_device *pdd; 1293 uint64_t pd_base; 1294 int retval; 1295 1296 n = kzalloc(sizeof(*n), GFP_KERNEL); 1297 if (!n) 1298 return -ENOMEM; 1299 1300 n->qpd = qpd; 1301 1302 pdd = qpd_to_pdd(qpd); 1303 /* Retrieve PD base */ 1304 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1305 1306 dqm_lock(dqm); 1307 list_add(&n->list, &dqm->queues); 1308 1309 /* Update PD Base in QPD */ 1310 qpd->page_table_base = pd_base; 1311 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1312 1313 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1314 1315 dqm->processes_count++; 1316 1317 dqm_unlock(dqm); 1318 1319 /* Outside the DQM lock because under the DQM lock we can't do 1320 * reclaim or take other locks that others hold while reclaiming. 1321 */ 1322 kfd_inc_compute_active(dqm->dev); 1323 1324 return retval; 1325 } 1326 1327 static int unregister_process(struct device_queue_manager *dqm, 1328 struct qcm_process_device *qpd) 1329 { 1330 int retval; 1331 struct device_process_node *cur, *next; 1332 1333 pr_debug("qpd->queues_list is %s\n", 1334 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1335 1336 retval = 0; 1337 dqm_lock(dqm); 1338 1339 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1340 if (qpd == cur->qpd) { 1341 list_del(&cur->list); 1342 kfree(cur); 1343 dqm->processes_count--; 1344 goto out; 1345 } 1346 } 1347 /* qpd not found in dqm list */ 1348 retval = 1; 1349 out: 1350 dqm_unlock(dqm); 1351 1352 /* Outside the DQM lock because under the DQM lock we can't do 1353 * reclaim or take other locks that others hold while reclaiming. 1354 */ 1355 if (!retval) 1356 kfd_dec_compute_active(dqm->dev); 1357 1358 return retval; 1359 } 1360 1361 static int 1362 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1363 unsigned int vmid) 1364 { 1365 uint32_t xcc_mask = dqm->dev->xcc_mask; 1366 int xcc_id, ret; 1367 1368 for_each_inst(xcc_id, xcc_mask) { 1369 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1370 dqm->dev->adev, pasid, vmid, xcc_id); 1371 if (ret) 1372 break; 1373 } 1374 1375 return ret; 1376 } 1377 1378 static void init_interrupts(struct device_queue_manager *dqm) 1379 { 1380 uint32_t xcc_mask = dqm->dev->xcc_mask; 1381 unsigned int i, xcc_id; 1382 1383 for_each_inst(xcc_id, xcc_mask) { 1384 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1385 if (is_pipe_enabled(dqm, 0, i)) { 1386 dqm->dev->kfd2kgd->init_interrupts( 1387 dqm->dev->adev, i, xcc_id); 1388 } 1389 } 1390 } 1391 } 1392 1393 static int initialize_nocpsch(struct device_queue_manager *dqm) 1394 { 1395 int pipe, queue; 1396 1397 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1398 1399 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1400 sizeof(unsigned int), GFP_KERNEL); 1401 if (!dqm->allocated_queues) 1402 return -ENOMEM; 1403 1404 mutex_init(&dqm->lock_hidden); 1405 INIT_LIST_HEAD(&dqm->queues); 1406 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1407 dqm->active_cp_queue_count = 0; 1408 dqm->gws_queue_count = 0; 1409 1410 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1411 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1412 1413 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1414 if (test_bit(pipe_offset + queue, 1415 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1416 dqm->allocated_queues[pipe] |= 1 << queue; 1417 } 1418 1419 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1420 1421 init_sdma_bitmaps(dqm); 1422 1423 return 0; 1424 } 1425 1426 static void uninitialize(struct device_queue_manager *dqm) 1427 { 1428 int i; 1429 1430 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1431 1432 kfree(dqm->allocated_queues); 1433 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1434 kfree(dqm->mqd_mgrs[i]); 1435 mutex_destroy(&dqm->lock_hidden); 1436 } 1437 1438 static int start_nocpsch(struct device_queue_manager *dqm) 1439 { 1440 int r = 0; 1441 1442 pr_info("SW scheduler is used"); 1443 init_interrupts(dqm); 1444 1445 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1446 r = pm_init(&dqm->packet_mgr, dqm); 1447 if (!r) 1448 dqm->sched_running = true; 1449 1450 return r; 1451 } 1452 1453 static int stop_nocpsch(struct device_queue_manager *dqm) 1454 { 1455 dqm_lock(dqm); 1456 if (!dqm->sched_running) { 1457 dqm_unlock(dqm); 1458 return 0; 1459 } 1460 1461 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1462 pm_uninit(&dqm->packet_mgr, false); 1463 dqm->sched_running = false; 1464 dqm_unlock(dqm); 1465 1466 return 0; 1467 } 1468 1469 static void pre_reset(struct device_queue_manager *dqm) 1470 { 1471 dqm_lock(dqm); 1472 dqm->is_resetting = true; 1473 dqm_unlock(dqm); 1474 } 1475 1476 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1477 struct queue *q, const uint32_t *restore_sdma_id) 1478 { 1479 int bit; 1480 1481 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1482 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1483 pr_err("No more SDMA queue to allocate\n"); 1484 return -ENOMEM; 1485 } 1486 1487 if (restore_sdma_id) { 1488 /* Re-use existing sdma_id */ 1489 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1490 pr_err("SDMA queue already in use\n"); 1491 return -EBUSY; 1492 } 1493 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1494 q->sdma_id = *restore_sdma_id; 1495 } else { 1496 /* Find first available sdma_id */ 1497 bit = find_first_bit(dqm->sdma_bitmap, 1498 get_num_sdma_queues(dqm)); 1499 clear_bit(bit, dqm->sdma_bitmap); 1500 q->sdma_id = bit; 1501 } 1502 1503 q->properties.sdma_engine_id = 1504 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1505 q->properties.sdma_queue_id = q->sdma_id / 1506 kfd_get_num_sdma_engines(dqm->dev); 1507 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1508 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1509 pr_err("No more XGMI SDMA queue to allocate\n"); 1510 return -ENOMEM; 1511 } 1512 if (restore_sdma_id) { 1513 /* Re-use existing sdma_id */ 1514 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1515 pr_err("SDMA queue already in use\n"); 1516 return -EBUSY; 1517 } 1518 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1519 q->sdma_id = *restore_sdma_id; 1520 } else { 1521 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1522 get_num_xgmi_sdma_queues(dqm)); 1523 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1524 q->sdma_id = bit; 1525 } 1526 /* sdma_engine_id is sdma id including 1527 * both PCIe-optimized SDMAs and XGMI- 1528 * optimized SDMAs. The calculation below 1529 * assumes the first N engines are always 1530 * PCIe-optimized ones 1531 */ 1532 q->properties.sdma_engine_id = 1533 kfd_get_num_sdma_engines(dqm->dev) + 1534 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1535 q->properties.sdma_queue_id = q->sdma_id / 1536 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1537 } 1538 1539 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1540 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1541 1542 return 0; 1543 } 1544 1545 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1546 struct queue *q) 1547 { 1548 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1549 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1550 return; 1551 set_bit(q->sdma_id, dqm->sdma_bitmap); 1552 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1553 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1554 return; 1555 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1556 } 1557 } 1558 1559 /* 1560 * Device Queue Manager implementation for cp scheduler 1561 */ 1562 1563 static int set_sched_resources(struct device_queue_manager *dqm) 1564 { 1565 int i, mec; 1566 struct scheduling_resources res; 1567 1568 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1569 1570 res.queue_mask = 0; 1571 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1572 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1573 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1574 1575 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1576 continue; 1577 1578 /* only acquire queues from the first MEC */ 1579 if (mec > 0) 1580 continue; 1581 1582 /* This situation may be hit in the future if a new HW 1583 * generation exposes more than 64 queues. If so, the 1584 * definition of res.queue_mask needs updating 1585 */ 1586 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1587 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1588 break; 1589 } 1590 1591 res.queue_mask |= 1ull 1592 << amdgpu_queue_mask_bit_to_set_resource_bit( 1593 dqm->dev->adev, i); 1594 } 1595 res.gws_mask = ~0ull; 1596 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1597 1598 pr_debug("Scheduling resources:\n" 1599 "vmid mask: 0x%8X\n" 1600 "queue mask: 0x%8llX\n", 1601 res.vmid_mask, res.queue_mask); 1602 1603 return pm_send_set_resources(&dqm->packet_mgr, &res); 1604 } 1605 1606 static int initialize_cpsch(struct device_queue_manager *dqm) 1607 { 1608 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1609 1610 mutex_init(&dqm->lock_hidden); 1611 INIT_LIST_HEAD(&dqm->queues); 1612 dqm->active_queue_count = dqm->processes_count = 0; 1613 dqm->active_cp_queue_count = 0; 1614 dqm->gws_queue_count = 0; 1615 dqm->active_runlist = false; 1616 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1617 dqm->trap_debug_vmid = 0; 1618 1619 init_sdma_bitmaps(dqm); 1620 1621 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1622 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1623 &dqm->wait_times); 1624 return 0; 1625 } 1626 1627 static int start_cpsch(struct device_queue_manager *dqm) 1628 { 1629 int retval; 1630 1631 retval = 0; 1632 1633 dqm_lock(dqm); 1634 1635 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1636 retval = pm_init(&dqm->packet_mgr, dqm); 1637 if (retval) 1638 goto fail_packet_manager_init; 1639 1640 retval = set_sched_resources(dqm); 1641 if (retval) 1642 goto fail_set_sched_resources; 1643 } 1644 pr_debug("Allocating fence memory\n"); 1645 1646 /* allocate fence memory on the gart */ 1647 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1648 &dqm->fence_mem); 1649 1650 if (retval) 1651 goto fail_allocate_vidmem; 1652 1653 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1654 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1655 1656 init_interrupts(dqm); 1657 1658 /* clear hang status when driver try to start the hw scheduler */ 1659 dqm->is_hws_hang = false; 1660 dqm->is_resetting = false; 1661 dqm->sched_running = true; 1662 1663 if (!dqm->dev->kfd->shared_resources.enable_mes) 1664 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1665 dqm_unlock(dqm); 1666 1667 return 0; 1668 fail_allocate_vidmem: 1669 fail_set_sched_resources: 1670 if (!dqm->dev->kfd->shared_resources.enable_mes) 1671 pm_uninit(&dqm->packet_mgr, false); 1672 fail_packet_manager_init: 1673 dqm_unlock(dqm); 1674 return retval; 1675 } 1676 1677 static int stop_cpsch(struct device_queue_manager *dqm) 1678 { 1679 bool hanging; 1680 1681 dqm_lock(dqm); 1682 if (!dqm->sched_running) { 1683 dqm_unlock(dqm); 1684 return 0; 1685 } 1686 1687 if (!dqm->is_hws_hang) { 1688 if (!dqm->dev->kfd->shared_resources.enable_mes) 1689 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1690 else 1691 remove_all_queues_mes(dqm); 1692 } 1693 1694 hanging = dqm->is_hws_hang || dqm->is_resetting; 1695 dqm->sched_running = false; 1696 1697 if (!dqm->dev->kfd->shared_resources.enable_mes) 1698 pm_release_ib(&dqm->packet_mgr); 1699 1700 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1701 if (!dqm->dev->kfd->shared_resources.enable_mes) 1702 pm_uninit(&dqm->packet_mgr, hanging); 1703 dqm_unlock(dqm); 1704 1705 return 0; 1706 } 1707 1708 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1709 struct kernel_queue *kq, 1710 struct qcm_process_device *qpd) 1711 { 1712 dqm_lock(dqm); 1713 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1714 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1715 dqm->total_queue_count); 1716 dqm_unlock(dqm); 1717 return -EPERM; 1718 } 1719 1720 /* 1721 * Unconditionally increment this counter, regardless of the queue's 1722 * type or whether the queue is active. 1723 */ 1724 dqm->total_queue_count++; 1725 pr_debug("Total of %d queues are accountable so far\n", 1726 dqm->total_queue_count); 1727 1728 list_add(&kq->list, &qpd->priv_queue_list); 1729 increment_queue_count(dqm, qpd, kq->queue); 1730 qpd->is_debug = true; 1731 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1732 USE_DEFAULT_GRACE_PERIOD); 1733 dqm_unlock(dqm); 1734 1735 return 0; 1736 } 1737 1738 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1739 struct kernel_queue *kq, 1740 struct qcm_process_device *qpd) 1741 { 1742 dqm_lock(dqm); 1743 list_del(&kq->list); 1744 decrement_queue_count(dqm, qpd, kq->queue); 1745 qpd->is_debug = false; 1746 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1747 USE_DEFAULT_GRACE_PERIOD); 1748 /* 1749 * Unconditionally decrement this counter, regardless of the queue's 1750 * type. 1751 */ 1752 dqm->total_queue_count--; 1753 pr_debug("Total of %d queues are accountable so far\n", 1754 dqm->total_queue_count); 1755 dqm_unlock(dqm); 1756 } 1757 1758 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1759 struct qcm_process_device *qpd, 1760 const struct kfd_criu_queue_priv_data *qd, 1761 const void *restore_mqd, const void *restore_ctl_stack) 1762 { 1763 int retval; 1764 struct mqd_manager *mqd_mgr; 1765 1766 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1767 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1768 dqm->total_queue_count); 1769 retval = -EPERM; 1770 goto out; 1771 } 1772 1773 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1774 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1775 dqm_lock(dqm); 1776 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1777 dqm_unlock(dqm); 1778 if (retval) 1779 goto out; 1780 } 1781 1782 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1783 if (retval) 1784 goto out_deallocate_sdma_queue; 1785 1786 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1787 q->properties.type)]; 1788 1789 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1790 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1791 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1792 q->properties.tba_addr = qpd->tba_addr; 1793 q->properties.tma_addr = qpd->tma_addr; 1794 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1795 if (!q->mqd_mem_obj) { 1796 retval = -ENOMEM; 1797 goto out_deallocate_doorbell; 1798 } 1799 1800 dqm_lock(dqm); 1801 /* 1802 * Eviction state logic: mark all queues as evicted, even ones 1803 * not currently active. Restoring inactive queues later only 1804 * updates the is_evicted flag but is a no-op otherwise. 1805 */ 1806 q->properties.is_evicted = !!qpd->evicted; 1807 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 1808 kfd_dbg_has_cwsr_workaround(q->device); 1809 1810 if (qd) 1811 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1812 &q->properties, restore_mqd, restore_ctl_stack, 1813 qd->ctl_stack_size); 1814 else 1815 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1816 &q->gart_mqd_addr, &q->properties); 1817 1818 list_add(&q->list, &qpd->queues_list); 1819 qpd->queue_count++; 1820 1821 if (q->properties.is_active) { 1822 increment_queue_count(dqm, qpd, q); 1823 1824 if (!dqm->dev->kfd->shared_resources.enable_mes) 1825 retval = execute_queues_cpsch(dqm, 1826 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1827 else 1828 retval = add_queue_mes(dqm, q, qpd); 1829 if (retval) 1830 goto cleanup_queue; 1831 } 1832 1833 /* 1834 * Unconditionally increment this counter, regardless of the queue's 1835 * type or whether the queue is active. 1836 */ 1837 dqm->total_queue_count++; 1838 1839 pr_debug("Total of %d queues are accountable so far\n", 1840 dqm->total_queue_count); 1841 1842 dqm_unlock(dqm); 1843 return retval; 1844 1845 cleanup_queue: 1846 qpd->queue_count--; 1847 list_del(&q->list); 1848 if (q->properties.is_active) 1849 decrement_queue_count(dqm, qpd, q); 1850 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1851 dqm_unlock(dqm); 1852 out_deallocate_doorbell: 1853 deallocate_doorbell(qpd, q); 1854 out_deallocate_sdma_queue: 1855 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1856 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1857 dqm_lock(dqm); 1858 deallocate_sdma_queue(dqm, q); 1859 dqm_unlock(dqm); 1860 } 1861 out: 1862 return retval; 1863 } 1864 1865 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1866 uint64_t fence_value, 1867 unsigned int timeout_ms) 1868 { 1869 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1870 1871 while (*fence_addr != fence_value) { 1872 if (time_after(jiffies, end_jiffies)) { 1873 pr_err("qcm fence wait loop timeout expired\n"); 1874 /* In HWS case, this is used to halt the driver thread 1875 * in order not to mess up CP states before doing 1876 * scandumps for FW debugging. 1877 */ 1878 while (halt_if_hws_hang) 1879 schedule(); 1880 1881 return -ETIME; 1882 } 1883 schedule(); 1884 } 1885 1886 return 0; 1887 } 1888 1889 /* dqm->lock mutex has to be locked before calling this function */ 1890 static int map_queues_cpsch(struct device_queue_manager *dqm) 1891 { 1892 int retval; 1893 1894 if (!dqm->sched_running) 1895 return 0; 1896 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1897 return 0; 1898 if (dqm->active_runlist) 1899 return 0; 1900 1901 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1902 pr_debug("%s sent runlist\n", __func__); 1903 if (retval) { 1904 pr_err("failed to execute runlist\n"); 1905 return retval; 1906 } 1907 dqm->active_runlist = true; 1908 1909 return retval; 1910 } 1911 1912 /* dqm->lock mutex has to be locked before calling this function */ 1913 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1914 enum kfd_unmap_queues_filter filter, 1915 uint32_t filter_param, 1916 uint32_t grace_period, 1917 bool reset) 1918 { 1919 int retval = 0; 1920 struct mqd_manager *mqd_mgr; 1921 1922 if (!dqm->sched_running) 1923 return 0; 1924 if (dqm->is_hws_hang || dqm->is_resetting) 1925 return -EIO; 1926 if (!dqm->active_runlist) 1927 return retval; 1928 1929 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1930 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 1931 if (retval) 1932 return retval; 1933 } 1934 1935 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1936 if (retval) 1937 return retval; 1938 1939 *dqm->fence_addr = KFD_FENCE_INIT; 1940 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1941 KFD_FENCE_COMPLETED); 1942 /* should be timed out */ 1943 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1944 queue_preemption_timeout_ms); 1945 if (retval) { 1946 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1947 kfd_hws_hang(dqm); 1948 return retval; 1949 } 1950 1951 /* In the current MEC firmware implementation, if compute queue 1952 * doesn't response to the preemption request in time, HIQ will 1953 * abandon the unmap request without returning any timeout error 1954 * to driver. Instead, MEC firmware will log the doorbell of the 1955 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1956 * To make sure the queue unmap was successful, driver need to 1957 * check those fields 1958 */ 1959 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1960 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1961 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1962 while (halt_if_hws_hang) 1963 schedule(); 1964 return -ETIME; 1965 } 1966 1967 /* We need to reset the grace period value for this device */ 1968 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1969 if (pm_update_grace_period(&dqm->packet_mgr, 1970 USE_DEFAULT_GRACE_PERIOD)) 1971 pr_err("Failed to reset grace period\n"); 1972 } 1973 1974 pm_release_ib(&dqm->packet_mgr); 1975 dqm->active_runlist = false; 1976 1977 return retval; 1978 } 1979 1980 /* only for compute queue */ 1981 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1982 uint16_t pasid) 1983 { 1984 int retval; 1985 1986 dqm_lock(dqm); 1987 1988 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1989 pasid, USE_DEFAULT_GRACE_PERIOD, true); 1990 1991 dqm_unlock(dqm); 1992 return retval; 1993 } 1994 1995 /* dqm->lock mutex has to be locked before calling this function */ 1996 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1997 enum kfd_unmap_queues_filter filter, 1998 uint32_t filter_param, 1999 uint32_t grace_period) 2000 { 2001 int retval; 2002 2003 if (dqm->is_hws_hang) 2004 return -EIO; 2005 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2006 if (retval) 2007 return retval; 2008 2009 return map_queues_cpsch(dqm); 2010 } 2011 2012 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2013 struct queue *q) 2014 { 2015 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2016 q->process); 2017 int ret = 0; 2018 2019 if (pdd->qpd.is_debug) 2020 return ret; 2021 2022 q->properties.is_being_destroyed = true; 2023 2024 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2025 dqm_unlock(dqm); 2026 mutex_unlock(&q->process->mutex); 2027 ret = wait_event_interruptible(dqm->destroy_wait, 2028 !q->properties.is_suspended); 2029 2030 mutex_lock(&q->process->mutex); 2031 dqm_lock(dqm); 2032 } 2033 2034 return ret; 2035 } 2036 2037 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2038 struct qcm_process_device *qpd, 2039 struct queue *q) 2040 { 2041 int retval; 2042 struct mqd_manager *mqd_mgr; 2043 uint64_t sdma_val = 0; 2044 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2045 2046 /* Get the SDMA queue stats */ 2047 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2048 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2049 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2050 &sdma_val); 2051 if (retval) 2052 pr_err("Failed to read SDMA queue counter for queue: %d\n", 2053 q->properties.queue_id); 2054 } 2055 2056 /* remove queue from list to prevent rescheduling after preemption */ 2057 dqm_lock(dqm); 2058 2059 retval = wait_on_destroy_queue(dqm, q); 2060 2061 if (retval) { 2062 dqm_unlock(dqm); 2063 return retval; 2064 } 2065 2066 if (qpd->is_debug) { 2067 /* 2068 * error, currently we do not allow to destroy a queue 2069 * of a currently debugged process 2070 */ 2071 retval = -EBUSY; 2072 goto failed_try_destroy_debugged_queue; 2073 2074 } 2075 2076 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2077 q->properties.type)]; 2078 2079 deallocate_doorbell(qpd, q); 2080 2081 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2082 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2083 deallocate_sdma_queue(dqm, q); 2084 pdd->sdma_past_activity_counter += sdma_val; 2085 } 2086 2087 list_del(&q->list); 2088 qpd->queue_count--; 2089 if (q->properties.is_active) { 2090 decrement_queue_count(dqm, qpd, q); 2091 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2092 retval = execute_queues_cpsch(dqm, 2093 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2094 USE_DEFAULT_GRACE_PERIOD); 2095 if (retval == -ETIME) 2096 qpd->reset_wavefronts = true; 2097 } else { 2098 retval = remove_queue_mes(dqm, q, qpd); 2099 } 2100 } 2101 2102 /* 2103 * Unconditionally decrement this counter, regardless of the queue's 2104 * type 2105 */ 2106 dqm->total_queue_count--; 2107 pr_debug("Total of %d queues are accountable so far\n", 2108 dqm->total_queue_count); 2109 2110 dqm_unlock(dqm); 2111 2112 /* 2113 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2114 * circular locking 2115 */ 2116 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2117 qpd->pqm->process, q->device, 2118 -1, false, NULL, 0); 2119 2120 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2121 2122 return retval; 2123 2124 failed_try_destroy_debugged_queue: 2125 2126 dqm_unlock(dqm); 2127 return retval; 2128 } 2129 2130 /* 2131 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2132 * stay in user mode. 2133 */ 2134 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2135 /* APE1 limit is inclusive and 64K aligned. */ 2136 #define APE1_LIMIT_ALIGNMENT 0xFFFF 2137 2138 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2139 struct qcm_process_device *qpd, 2140 enum cache_policy default_policy, 2141 enum cache_policy alternate_policy, 2142 void __user *alternate_aperture_base, 2143 uint64_t alternate_aperture_size) 2144 { 2145 bool retval = true; 2146 2147 if (!dqm->asic_ops.set_cache_memory_policy) 2148 return retval; 2149 2150 dqm_lock(dqm); 2151 2152 if (alternate_aperture_size == 0) { 2153 /* base > limit disables APE1 */ 2154 qpd->sh_mem_ape1_base = 1; 2155 qpd->sh_mem_ape1_limit = 0; 2156 } else { 2157 /* 2158 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2159 * SH_MEM_APE1_BASE[31:0], 0x0000 } 2160 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2161 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2162 * Verify that the base and size parameters can be 2163 * represented in this format and convert them. 2164 * Additionally restrict APE1 to user-mode addresses. 2165 */ 2166 2167 uint64_t base = (uintptr_t)alternate_aperture_base; 2168 uint64_t limit = base + alternate_aperture_size - 1; 2169 2170 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2171 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2172 retval = false; 2173 goto out; 2174 } 2175 2176 qpd->sh_mem_ape1_base = base >> 16; 2177 qpd->sh_mem_ape1_limit = limit >> 16; 2178 } 2179 2180 retval = dqm->asic_ops.set_cache_memory_policy( 2181 dqm, 2182 qpd, 2183 default_policy, 2184 alternate_policy, 2185 alternate_aperture_base, 2186 alternate_aperture_size); 2187 2188 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2189 program_sh_mem_settings(dqm, qpd); 2190 2191 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2192 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2193 qpd->sh_mem_ape1_limit); 2194 2195 out: 2196 dqm_unlock(dqm); 2197 return retval; 2198 } 2199 2200 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2201 struct qcm_process_device *qpd) 2202 { 2203 struct queue *q; 2204 struct device_process_node *cur, *next_dpn; 2205 int retval = 0; 2206 bool found = false; 2207 2208 dqm_lock(dqm); 2209 2210 /* Clear all user mode queues */ 2211 while (!list_empty(&qpd->queues_list)) { 2212 struct mqd_manager *mqd_mgr; 2213 int ret; 2214 2215 q = list_first_entry(&qpd->queues_list, struct queue, list); 2216 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2217 q->properties.type)]; 2218 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2219 if (ret) 2220 retval = ret; 2221 dqm_unlock(dqm); 2222 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2223 dqm_lock(dqm); 2224 } 2225 2226 /* Unregister process */ 2227 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2228 if (qpd == cur->qpd) { 2229 list_del(&cur->list); 2230 kfree(cur); 2231 dqm->processes_count--; 2232 found = true; 2233 break; 2234 } 2235 } 2236 2237 dqm_unlock(dqm); 2238 2239 /* Outside the DQM lock because under the DQM lock we can't do 2240 * reclaim or take other locks that others hold while reclaiming. 2241 */ 2242 if (found) 2243 kfd_dec_compute_active(dqm->dev); 2244 2245 return retval; 2246 } 2247 2248 static int get_wave_state(struct device_queue_manager *dqm, 2249 struct queue *q, 2250 void __user *ctl_stack, 2251 u32 *ctl_stack_used_size, 2252 u32 *save_area_used_size) 2253 { 2254 struct mqd_manager *mqd_mgr; 2255 2256 dqm_lock(dqm); 2257 2258 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2259 2260 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2261 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2262 !mqd_mgr->get_wave_state) { 2263 dqm_unlock(dqm); 2264 return -EINVAL; 2265 } 2266 2267 dqm_unlock(dqm); 2268 2269 /* 2270 * get_wave_state is outside the dqm lock to prevent circular locking 2271 * and the queue should be protected against destruction by the process 2272 * lock. 2273 */ 2274 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2275 ctl_stack, ctl_stack_used_size, save_area_used_size); 2276 } 2277 2278 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2279 const struct queue *q, 2280 u32 *mqd_size, 2281 u32 *ctl_stack_size) 2282 { 2283 struct mqd_manager *mqd_mgr; 2284 enum KFD_MQD_TYPE mqd_type = 2285 get_mqd_type_from_queue_type(q->properties.type); 2286 2287 dqm_lock(dqm); 2288 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2289 *mqd_size = mqd_mgr->mqd_size; 2290 *ctl_stack_size = 0; 2291 2292 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2293 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2294 2295 dqm_unlock(dqm); 2296 } 2297 2298 static int checkpoint_mqd(struct device_queue_manager *dqm, 2299 const struct queue *q, 2300 void *mqd, 2301 void *ctl_stack) 2302 { 2303 struct mqd_manager *mqd_mgr; 2304 int r = 0; 2305 enum KFD_MQD_TYPE mqd_type = 2306 get_mqd_type_from_queue_type(q->properties.type); 2307 2308 dqm_lock(dqm); 2309 2310 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2311 r = -EINVAL; 2312 goto dqm_unlock; 2313 } 2314 2315 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2316 if (!mqd_mgr->checkpoint_mqd) { 2317 r = -EOPNOTSUPP; 2318 goto dqm_unlock; 2319 } 2320 2321 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2322 2323 dqm_unlock: 2324 dqm_unlock(dqm); 2325 return r; 2326 } 2327 2328 static int process_termination_cpsch(struct device_queue_manager *dqm, 2329 struct qcm_process_device *qpd) 2330 { 2331 int retval; 2332 struct queue *q; 2333 struct kernel_queue *kq, *kq_next; 2334 struct mqd_manager *mqd_mgr; 2335 struct device_process_node *cur, *next_dpn; 2336 enum kfd_unmap_queues_filter filter = 2337 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2338 bool found = false; 2339 2340 retval = 0; 2341 2342 dqm_lock(dqm); 2343 2344 /* Clean all kernel queues */ 2345 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2346 list_del(&kq->list); 2347 decrement_queue_count(dqm, qpd, kq->queue); 2348 qpd->is_debug = false; 2349 dqm->total_queue_count--; 2350 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2351 } 2352 2353 /* Clear all user mode queues */ 2354 list_for_each_entry(q, &qpd->queues_list, list) { 2355 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2356 deallocate_sdma_queue(dqm, q); 2357 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2358 deallocate_sdma_queue(dqm, q); 2359 2360 if (q->properties.is_active) { 2361 decrement_queue_count(dqm, qpd, q); 2362 2363 if (dqm->dev->kfd->shared_resources.enable_mes) { 2364 retval = remove_queue_mes(dqm, q, qpd); 2365 if (retval) 2366 pr_err("Failed to remove queue %d\n", 2367 q->properties.queue_id); 2368 } 2369 } 2370 2371 dqm->total_queue_count--; 2372 } 2373 2374 /* Unregister process */ 2375 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2376 if (qpd == cur->qpd) { 2377 list_del(&cur->list); 2378 kfree(cur); 2379 dqm->processes_count--; 2380 found = true; 2381 break; 2382 } 2383 } 2384 2385 if (!dqm->dev->kfd->shared_resources.enable_mes) 2386 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2387 2388 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2389 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2390 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2391 qpd->reset_wavefronts = false; 2392 } 2393 2394 /* Lastly, free mqd resources. 2395 * Do free_mqd() after dqm_unlock to avoid circular locking. 2396 */ 2397 while (!list_empty(&qpd->queues_list)) { 2398 q = list_first_entry(&qpd->queues_list, struct queue, list); 2399 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2400 q->properties.type)]; 2401 list_del(&q->list); 2402 qpd->queue_count--; 2403 dqm_unlock(dqm); 2404 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2405 dqm_lock(dqm); 2406 } 2407 dqm_unlock(dqm); 2408 2409 /* Outside the DQM lock because under the DQM lock we can't do 2410 * reclaim or take other locks that others hold while reclaiming. 2411 */ 2412 if (found) 2413 kfd_dec_compute_active(dqm->dev); 2414 2415 return retval; 2416 } 2417 2418 static int init_mqd_managers(struct device_queue_manager *dqm) 2419 { 2420 int i, j; 2421 struct mqd_manager *mqd_mgr; 2422 2423 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2424 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2425 if (!mqd_mgr) { 2426 pr_err("mqd manager [%d] initialization failed\n", i); 2427 goto out_free; 2428 } 2429 dqm->mqd_mgrs[i] = mqd_mgr; 2430 } 2431 2432 return 0; 2433 2434 out_free: 2435 for (j = 0; j < i; j++) { 2436 kfree(dqm->mqd_mgrs[j]); 2437 dqm->mqd_mgrs[j] = NULL; 2438 } 2439 2440 return -ENOMEM; 2441 } 2442 2443 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2444 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2445 { 2446 int retval; 2447 struct kfd_node *dev = dqm->dev; 2448 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2449 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2450 get_num_all_sdma_engines(dqm) * 2451 dev->kfd->device_info.num_sdma_queues_per_engine + 2452 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2453 NUM_XCC(dqm->dev->xcc_mask)); 2454 2455 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2456 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2457 (void *)&(mem_obj->cpu_ptr), false); 2458 2459 return retval; 2460 } 2461 2462 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2463 { 2464 struct device_queue_manager *dqm; 2465 2466 pr_debug("Loading device queue manager\n"); 2467 2468 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2469 if (!dqm) 2470 return NULL; 2471 2472 switch (dev->adev->asic_type) { 2473 /* HWS is not available on Hawaii. */ 2474 case CHIP_HAWAII: 2475 /* HWS depends on CWSR for timely dequeue. CWSR is not 2476 * available on Tonga. 2477 * 2478 * FIXME: This argument also applies to Kaveri. 2479 */ 2480 case CHIP_TONGA: 2481 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2482 break; 2483 default: 2484 dqm->sched_policy = sched_policy; 2485 break; 2486 } 2487 2488 dqm->dev = dev; 2489 switch (dqm->sched_policy) { 2490 case KFD_SCHED_POLICY_HWS: 2491 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2492 /* initialize dqm for cp scheduling */ 2493 dqm->ops.create_queue = create_queue_cpsch; 2494 dqm->ops.initialize = initialize_cpsch; 2495 dqm->ops.start = start_cpsch; 2496 dqm->ops.stop = stop_cpsch; 2497 dqm->ops.pre_reset = pre_reset; 2498 dqm->ops.destroy_queue = destroy_queue_cpsch; 2499 dqm->ops.update_queue = update_queue; 2500 dqm->ops.register_process = register_process; 2501 dqm->ops.unregister_process = unregister_process; 2502 dqm->ops.uninitialize = uninitialize; 2503 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2504 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2505 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2506 dqm->ops.process_termination = process_termination_cpsch; 2507 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2508 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2509 dqm->ops.get_wave_state = get_wave_state; 2510 dqm->ops.reset_queues = reset_queues_cpsch; 2511 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2512 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2513 break; 2514 case KFD_SCHED_POLICY_NO_HWS: 2515 /* initialize dqm for no cp scheduling */ 2516 dqm->ops.start = start_nocpsch; 2517 dqm->ops.stop = stop_nocpsch; 2518 dqm->ops.pre_reset = pre_reset; 2519 dqm->ops.create_queue = create_queue_nocpsch; 2520 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2521 dqm->ops.update_queue = update_queue; 2522 dqm->ops.register_process = register_process; 2523 dqm->ops.unregister_process = unregister_process; 2524 dqm->ops.initialize = initialize_nocpsch; 2525 dqm->ops.uninitialize = uninitialize; 2526 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2527 dqm->ops.process_termination = process_termination_nocpsch; 2528 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2529 dqm->ops.restore_process_queues = 2530 restore_process_queues_nocpsch; 2531 dqm->ops.get_wave_state = get_wave_state; 2532 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2533 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2534 break; 2535 default: 2536 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2537 goto out_free; 2538 } 2539 2540 switch (dev->adev->asic_type) { 2541 case CHIP_CARRIZO: 2542 device_queue_manager_init_vi(&dqm->asic_ops); 2543 break; 2544 2545 case CHIP_KAVERI: 2546 device_queue_manager_init_cik(&dqm->asic_ops); 2547 break; 2548 2549 case CHIP_HAWAII: 2550 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2551 break; 2552 2553 case CHIP_TONGA: 2554 case CHIP_FIJI: 2555 case CHIP_POLARIS10: 2556 case CHIP_POLARIS11: 2557 case CHIP_POLARIS12: 2558 case CHIP_VEGAM: 2559 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2560 break; 2561 2562 default: 2563 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2564 device_queue_manager_init_v11(&dqm->asic_ops); 2565 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2566 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2567 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2568 device_queue_manager_init_v9(&dqm->asic_ops); 2569 else { 2570 WARN(1, "Unexpected ASIC family %u", 2571 dev->adev->asic_type); 2572 goto out_free; 2573 } 2574 } 2575 2576 if (init_mqd_managers(dqm)) 2577 goto out_free; 2578 2579 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2580 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2581 goto out_free; 2582 } 2583 2584 if (!dqm->ops.initialize(dqm)) { 2585 init_waitqueue_head(&dqm->destroy_wait); 2586 return dqm; 2587 } 2588 2589 out_free: 2590 kfree(dqm); 2591 return NULL; 2592 } 2593 2594 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2595 struct kfd_mem_obj *mqd) 2596 { 2597 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2598 2599 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2600 } 2601 2602 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2603 { 2604 dqm->ops.stop(dqm); 2605 dqm->ops.uninitialize(dqm); 2606 if (!dqm->dev->kfd->shared_resources.enable_mes) 2607 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2608 kfree(dqm); 2609 } 2610 2611 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2612 { 2613 struct kfd_process_device *pdd; 2614 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2615 int ret = 0; 2616 2617 if (!p) 2618 return -EINVAL; 2619 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2620 pdd = kfd_get_process_device_data(dqm->dev, p); 2621 if (pdd) 2622 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2623 kfd_unref_process(p); 2624 2625 return ret; 2626 } 2627 2628 static void kfd_process_hw_exception(struct work_struct *work) 2629 { 2630 struct device_queue_manager *dqm = container_of(work, 2631 struct device_queue_manager, hw_exception_work); 2632 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2633 } 2634 2635 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 2636 struct qcm_process_device *qpd) 2637 { 2638 int r; 2639 int updated_vmid_mask; 2640 2641 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2642 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 2643 return -EINVAL; 2644 } 2645 2646 dqm_lock(dqm); 2647 2648 if (dqm->trap_debug_vmid != 0) { 2649 pr_err("Trap debug id already reserved\n"); 2650 r = -EBUSY; 2651 goto out_unlock; 2652 } 2653 2654 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2655 USE_DEFAULT_GRACE_PERIOD, false); 2656 if (r) 2657 goto out_unlock; 2658 2659 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2660 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 2661 2662 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2663 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 2664 r = set_sched_resources(dqm); 2665 if (r) 2666 goto out_unlock; 2667 2668 r = map_queues_cpsch(dqm); 2669 if (r) 2670 goto out_unlock; 2671 2672 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 2673 2674 out_unlock: 2675 dqm_unlock(dqm); 2676 return r; 2677 } 2678 2679 /* 2680 * Releases vmid for the trap debugger 2681 */ 2682 int release_debug_trap_vmid(struct device_queue_manager *dqm, 2683 struct qcm_process_device *qpd) 2684 { 2685 int r; 2686 int updated_vmid_mask; 2687 uint32_t trap_debug_vmid; 2688 2689 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 2690 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 2691 return -EINVAL; 2692 } 2693 2694 dqm_lock(dqm); 2695 trap_debug_vmid = dqm->trap_debug_vmid; 2696 if (dqm->trap_debug_vmid == 0) { 2697 pr_err("Trap debug id is not reserved\n"); 2698 r = -EINVAL; 2699 goto out_unlock; 2700 } 2701 2702 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2703 USE_DEFAULT_GRACE_PERIOD, false); 2704 if (r) 2705 goto out_unlock; 2706 2707 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 2708 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 2709 2710 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 2711 dqm->trap_debug_vmid = 0; 2712 r = set_sched_resources(dqm); 2713 if (r) 2714 goto out_unlock; 2715 2716 r = map_queues_cpsch(dqm); 2717 if (r) 2718 goto out_unlock; 2719 2720 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 2721 2722 out_unlock: 2723 dqm_unlock(dqm); 2724 return r; 2725 } 2726 2727 #define QUEUE_NOT_FOUND -1 2728 /* invalidate queue operation in array */ 2729 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 2730 { 2731 int i; 2732 2733 for (i = 0; i < num_queues; i++) 2734 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 2735 } 2736 2737 /* find queue index in array */ 2738 static int q_array_get_index(unsigned int queue_id, 2739 uint32_t num_queues, 2740 uint32_t *queue_ids) 2741 { 2742 int i; 2743 2744 for (i = 0; i < num_queues; i++) 2745 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 2746 return i; 2747 2748 return QUEUE_NOT_FOUND; 2749 } 2750 2751 struct copy_context_work_handler_workarea { 2752 struct work_struct copy_context_work; 2753 struct kfd_process *p; 2754 }; 2755 2756 static void copy_context_work_handler (struct work_struct *work) 2757 { 2758 struct copy_context_work_handler_workarea *workarea; 2759 struct mqd_manager *mqd_mgr; 2760 struct queue *q; 2761 struct mm_struct *mm; 2762 struct kfd_process *p; 2763 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 2764 int i; 2765 2766 workarea = container_of(work, 2767 struct copy_context_work_handler_workarea, 2768 copy_context_work); 2769 2770 p = workarea->p; 2771 mm = get_task_mm(p->lead_thread); 2772 2773 if (!mm) 2774 return; 2775 2776 kthread_use_mm(mm); 2777 for (i = 0; i < p->n_pdds; i++) { 2778 struct kfd_process_device *pdd = p->pdds[i]; 2779 struct device_queue_manager *dqm = pdd->dev->dqm; 2780 struct qcm_process_device *qpd = &pdd->qpd; 2781 2782 list_for_each_entry(q, &qpd->queues_list, list) { 2783 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2784 2785 /* We ignore the return value from get_wave_state 2786 * because 2787 * i) right now, it always returns 0, and 2788 * ii) if we hit an error, we would continue to the 2789 * next queue anyway. 2790 */ 2791 mqd_mgr->get_wave_state(mqd_mgr, 2792 q->mqd, 2793 &q->properties, 2794 (void __user *) q->properties.ctx_save_restore_area_address, 2795 &tmp_ctl_stack_used_size, 2796 &tmp_save_area_used_size); 2797 } 2798 } 2799 kthread_unuse_mm(mm); 2800 mmput(mm); 2801 } 2802 2803 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 2804 { 2805 size_t array_size = num_queues * sizeof(uint32_t); 2806 uint32_t *queue_ids = NULL; 2807 2808 if (!usr_queue_id_array) 2809 return NULL; 2810 2811 queue_ids = kzalloc(array_size, GFP_KERNEL); 2812 if (!queue_ids) 2813 return ERR_PTR(-ENOMEM); 2814 2815 if (copy_from_user(queue_ids, usr_queue_id_array, array_size)) 2816 return ERR_PTR(-EFAULT); 2817 2818 return queue_ids; 2819 } 2820 2821 int resume_queues(struct kfd_process *p, 2822 uint32_t num_queues, 2823 uint32_t *usr_queue_id_array) 2824 { 2825 uint32_t *queue_ids = NULL; 2826 int total_resumed = 0; 2827 int i; 2828 2829 if (usr_queue_id_array) { 2830 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2831 2832 if (IS_ERR(queue_ids)) 2833 return PTR_ERR(queue_ids); 2834 2835 /* mask all queues as invalid. unmask per successful request */ 2836 q_array_invalidate(num_queues, queue_ids); 2837 } 2838 2839 for (i = 0; i < p->n_pdds; i++) { 2840 struct kfd_process_device *pdd = p->pdds[i]; 2841 struct device_queue_manager *dqm = pdd->dev->dqm; 2842 struct qcm_process_device *qpd = &pdd->qpd; 2843 struct queue *q; 2844 int r, per_device_resumed = 0; 2845 2846 dqm_lock(dqm); 2847 2848 /* unmask queues that resume or already resumed as valid */ 2849 list_for_each_entry(q, &qpd->queues_list, list) { 2850 int q_idx = QUEUE_NOT_FOUND; 2851 2852 if (queue_ids) 2853 q_idx = q_array_get_index( 2854 q->properties.queue_id, 2855 num_queues, 2856 queue_ids); 2857 2858 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 2859 int err = resume_single_queue(dqm, &pdd->qpd, q); 2860 2861 if (queue_ids) { 2862 if (!err) { 2863 queue_ids[q_idx] &= 2864 ~KFD_DBG_QUEUE_INVALID_MASK; 2865 } else { 2866 queue_ids[q_idx] |= 2867 KFD_DBG_QUEUE_ERROR_MASK; 2868 break; 2869 } 2870 } 2871 2872 if (dqm->dev->kfd->shared_resources.enable_mes) { 2873 wake_up_all(&dqm->destroy_wait); 2874 if (!err) 2875 total_resumed++; 2876 } else { 2877 per_device_resumed++; 2878 } 2879 } 2880 } 2881 2882 if (!per_device_resumed) { 2883 dqm_unlock(dqm); 2884 continue; 2885 } 2886 2887 r = execute_queues_cpsch(dqm, 2888 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 2889 0, 2890 USE_DEFAULT_GRACE_PERIOD); 2891 if (r) { 2892 pr_err("Failed to resume process queues\n"); 2893 if (queue_ids) { 2894 list_for_each_entry(q, &qpd->queues_list, list) { 2895 int q_idx = q_array_get_index( 2896 q->properties.queue_id, 2897 num_queues, 2898 queue_ids); 2899 2900 /* mask queue as error on resume fail */ 2901 if (q_idx != QUEUE_NOT_FOUND) 2902 queue_ids[q_idx] |= 2903 KFD_DBG_QUEUE_ERROR_MASK; 2904 } 2905 } 2906 } else { 2907 wake_up_all(&dqm->destroy_wait); 2908 total_resumed += per_device_resumed; 2909 } 2910 2911 dqm_unlock(dqm); 2912 } 2913 2914 if (queue_ids) { 2915 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 2916 num_queues * sizeof(uint32_t))) 2917 pr_err("copy_to_user failed on queue resume\n"); 2918 2919 kfree(queue_ids); 2920 } 2921 2922 return total_resumed; 2923 } 2924 2925 int suspend_queues(struct kfd_process *p, 2926 uint32_t num_queues, 2927 uint32_t grace_period, 2928 uint64_t exception_clear_mask, 2929 uint32_t *usr_queue_id_array) 2930 { 2931 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 2932 int total_suspended = 0; 2933 int i; 2934 2935 if (IS_ERR(queue_ids)) 2936 return PTR_ERR(queue_ids); 2937 2938 /* mask all queues as invalid. umask on successful request */ 2939 q_array_invalidate(num_queues, queue_ids); 2940 2941 for (i = 0; i < p->n_pdds; i++) { 2942 struct kfd_process_device *pdd = p->pdds[i]; 2943 struct device_queue_manager *dqm = pdd->dev->dqm; 2944 struct qcm_process_device *qpd = &pdd->qpd; 2945 struct queue *q; 2946 int r, per_device_suspended = 0; 2947 2948 mutex_lock(&p->event_mutex); 2949 dqm_lock(dqm); 2950 2951 /* unmask queues that suspend or already suspended */ 2952 list_for_each_entry(q, &qpd->queues_list, list) { 2953 int q_idx = q_array_get_index(q->properties.queue_id, 2954 num_queues, 2955 queue_ids); 2956 2957 if (q_idx != QUEUE_NOT_FOUND) { 2958 int err = suspend_single_queue(dqm, pdd, q); 2959 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 2960 2961 if (!err) { 2962 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 2963 if (exception_clear_mask && is_mes) 2964 q->properties.exception_status &= 2965 ~exception_clear_mask; 2966 2967 if (is_mes) 2968 total_suspended++; 2969 else 2970 per_device_suspended++; 2971 } else if (err != -EBUSY) { 2972 r = err; 2973 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 2974 break; 2975 } 2976 } 2977 } 2978 2979 if (!per_device_suspended) { 2980 dqm_unlock(dqm); 2981 mutex_unlock(&p->event_mutex); 2982 if (total_suspended) 2983 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 2984 continue; 2985 } 2986 2987 r = execute_queues_cpsch(dqm, 2988 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2989 grace_period); 2990 2991 if (r) 2992 pr_err("Failed to suspend process queues.\n"); 2993 else 2994 total_suspended += per_device_suspended; 2995 2996 list_for_each_entry(q, &qpd->queues_list, list) { 2997 int q_idx = q_array_get_index(q->properties.queue_id, 2998 num_queues, queue_ids); 2999 3000 if (q_idx == QUEUE_NOT_FOUND) 3001 continue; 3002 3003 /* mask queue as error on suspend fail */ 3004 if (r) 3005 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3006 else if (exception_clear_mask) 3007 q->properties.exception_status &= 3008 ~exception_clear_mask; 3009 } 3010 3011 dqm_unlock(dqm); 3012 mutex_unlock(&p->event_mutex); 3013 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3014 } 3015 3016 if (total_suspended) { 3017 struct copy_context_work_handler_workarea copy_context_worker; 3018 3019 INIT_WORK_ONSTACK( 3020 ©_context_worker.copy_context_work, 3021 copy_context_work_handler); 3022 3023 copy_context_worker.p = p; 3024 3025 schedule_work(©_context_worker.copy_context_work); 3026 3027 3028 flush_work(©_context_worker.copy_context_work); 3029 destroy_work_on_stack(©_context_worker.copy_context_work); 3030 } 3031 3032 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3033 num_queues * sizeof(uint32_t))) 3034 pr_err("copy_to_user failed on queue suspend\n"); 3035 3036 kfree(queue_ids); 3037 3038 return total_suspended; 3039 } 3040 3041 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3042 { 3043 switch (q_props->type) { 3044 case KFD_QUEUE_TYPE_COMPUTE: 3045 return q_props->format == KFD_QUEUE_FORMAT_PM4 3046 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3047 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3048 case KFD_QUEUE_TYPE_SDMA: 3049 return KFD_IOC_QUEUE_TYPE_SDMA; 3050 case KFD_QUEUE_TYPE_SDMA_XGMI: 3051 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3052 default: 3053 WARN_ONCE(true, "queue type not recognized!"); 3054 return 0xffffffff; 3055 }; 3056 } 3057 3058 void set_queue_snapshot_entry(struct queue *q, 3059 uint64_t exception_clear_mask, 3060 struct kfd_queue_snapshot_entry *qss_entry) 3061 { 3062 qss_entry->ring_base_address = q->properties.queue_address; 3063 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3064 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3065 qss_entry->ctx_save_restore_address = 3066 q->properties.ctx_save_restore_area_address; 3067 qss_entry->ctx_save_restore_area_size = 3068 q->properties.ctx_save_restore_area_size; 3069 qss_entry->exception_status = q->properties.exception_status; 3070 qss_entry->queue_id = q->properties.queue_id; 3071 qss_entry->gpu_id = q->device->id; 3072 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3073 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3074 q->properties.exception_status &= ~exception_clear_mask; 3075 } 3076 3077 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3078 { 3079 int r; 3080 3081 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3082 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 3083 return -EINVAL; 3084 } 3085 3086 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3087 return 0; 3088 3089 dqm_lock(dqm); 3090 3091 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3092 if (r) 3093 dqm_unlock(dqm); 3094 3095 return r; 3096 } 3097 3098 int debug_map_and_unlock(struct device_queue_manager *dqm) 3099 { 3100 int r; 3101 3102 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3103 pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy); 3104 return -EINVAL; 3105 } 3106 3107 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3108 return 0; 3109 3110 r = map_queues_cpsch(dqm); 3111 3112 dqm_unlock(dqm); 3113 3114 return r; 3115 } 3116 3117 int debug_refresh_runlist(struct device_queue_manager *dqm) 3118 { 3119 int r = debug_lock_and_unmap(dqm); 3120 3121 if (r) 3122 return r; 3123 3124 return debug_map_and_unlock(dqm); 3125 } 3126 3127 #if defined(CONFIG_DEBUG_FS) 3128 3129 static void seq_reg_dump(struct seq_file *m, 3130 uint32_t (*dump)[2], uint32_t n_regs) 3131 { 3132 uint32_t i, count; 3133 3134 for (i = 0, count = 0; i < n_regs; i++) { 3135 if (count == 0 || 3136 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3137 seq_printf(m, "%s %08x: %08x", 3138 i ? "\n" : "", 3139 dump[i][0], dump[i][1]); 3140 count = 7; 3141 } else { 3142 seq_printf(m, " %08x", dump[i][1]); 3143 count--; 3144 } 3145 } 3146 3147 seq_puts(m, "\n"); 3148 } 3149 3150 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3151 { 3152 struct device_queue_manager *dqm = data; 3153 uint32_t xcc_mask = dqm->dev->xcc_mask; 3154 uint32_t (*dump)[2], n_regs; 3155 int pipe, queue; 3156 int r = 0, xcc_id; 3157 uint32_t sdma_engine_start; 3158 3159 if (!dqm->sched_running) { 3160 seq_puts(m, " Device is stopped\n"); 3161 return 0; 3162 } 3163 3164 for_each_inst(xcc_id, xcc_mask) { 3165 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3166 KFD_CIK_HIQ_PIPE, 3167 KFD_CIK_HIQ_QUEUE, &dump, 3168 &n_regs, xcc_id); 3169 if (!r) { 3170 seq_printf( 3171 m, 3172 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3173 xcc_id, 3174 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3175 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3176 KFD_CIK_HIQ_QUEUE); 3177 seq_reg_dump(m, dump, n_regs); 3178 3179 kfree(dump); 3180 } 3181 3182 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3183 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3184 3185 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3186 if (!test_bit(pipe_offset + queue, 3187 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3188 continue; 3189 3190 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3191 pipe, queue, 3192 &dump, &n_regs, 3193 xcc_id); 3194 if (r) 3195 break; 3196 3197 seq_printf(m, 3198 " Inst %d, CP Pipe %d, Queue %d\n", 3199 xcc_id, pipe, queue); 3200 seq_reg_dump(m, dump, n_regs); 3201 3202 kfree(dump); 3203 } 3204 } 3205 } 3206 3207 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3208 for (pipe = sdma_engine_start; 3209 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3210 pipe++) { 3211 for (queue = 0; 3212 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3213 queue++) { 3214 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3215 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3216 if (r) 3217 break; 3218 3219 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3220 pipe, queue); 3221 seq_reg_dump(m, dump, n_regs); 3222 3223 kfree(dump); 3224 } 3225 } 3226 3227 return r; 3228 } 3229 3230 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3231 { 3232 int r = 0; 3233 3234 dqm_lock(dqm); 3235 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3236 if (r) { 3237 dqm_unlock(dqm); 3238 return r; 3239 } 3240 dqm->active_runlist = true; 3241 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3242 0, USE_DEFAULT_GRACE_PERIOD); 3243 dqm_unlock(dqm); 3244 3245 return r; 3246 } 3247 3248 #endif 3249