1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "mes_api_def.h" 39 40 /* Size of the per-pipe EOP queue */ 41 #define CIK_HPD_EOP_BYTES_LOG2 11 42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 43 44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 45 u32 pasid, unsigned int vmid); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 enum kfd_unmap_queues_filter filter, 49 uint32_t filter_param); 50 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param, bool reset); 53 54 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 56 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 57 struct queue *q); 58 59 static inline void deallocate_hqd(struct device_queue_manager *dqm, 60 struct queue *q); 61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 62 static int allocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q, const uint32_t *restore_sdma_id); 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 78 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return kfd_get_num_sdma_engines(dqm->dev) + 107 kfd_get_num_xgmi_sdma_engines(dqm->dev); 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return kfd_get_num_sdma_engines(dqm->dev) * 113 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 114 } 115 116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 117 { 118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 119 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 120 } 121 122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) 123 { 124 return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 } 135 136 void program_sh_mem_settings(struct device_queue_manager *dqm, 137 struct qcm_process_device *qpd) 138 { 139 uint32_t xcc_mask = dqm->dev->xcc_mask; 140 int xcc_id; 141 142 for_each_inst(xcc_id, xcc_mask) 143 dqm->dev->kfd2kgd->program_sh_mem_settings( 144 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 145 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 146 qpd->sh_mem_bases, xcc_id); 147 } 148 149 static void kfd_hws_hang(struct device_queue_manager *dqm) 150 { 151 /* 152 * Issue a GPU reset if HWS is unresponsive 153 */ 154 dqm->is_hws_hang = true; 155 156 /* It's possible we're detecting a HWS hang in the 157 * middle of a GPU reset. No need to schedule another 158 * reset in this case. 159 */ 160 if (!dqm->is_resetting) 161 schedule_work(&dqm->hw_exception_work); 162 } 163 164 static int convert_to_mes_queue_type(int queue_type) 165 { 166 int mes_queue_type; 167 168 switch (queue_type) { 169 case KFD_QUEUE_TYPE_COMPUTE: 170 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 171 break; 172 case KFD_QUEUE_TYPE_SDMA: 173 mes_queue_type = MES_QUEUE_TYPE_SDMA; 174 break; 175 default: 176 WARN(1, "Invalid queue type %d", queue_type); 177 mes_queue_type = -EINVAL; 178 break; 179 } 180 181 return mes_queue_type; 182 } 183 184 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 185 struct qcm_process_device *qpd) 186 { 187 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 188 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 189 struct mes_add_queue_input queue_input; 190 int r, queue_type; 191 uint64_t wptr_addr_off; 192 193 if (dqm->is_hws_hang) 194 return -EIO; 195 196 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 197 queue_input.process_id = qpd->pqm->process->pasid; 198 queue_input.page_table_base_addr = qpd->page_table_base; 199 queue_input.process_va_start = 0; 200 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 201 /* MES unit for quantum is 100ns */ 202 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 203 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 204 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 205 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 206 queue_input.inprocess_gang_priority = q->properties.priority; 207 queue_input.gang_global_priority_level = 208 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 209 queue_input.doorbell_offset = q->properties.doorbell_off; 210 queue_input.mqd_addr = q->gart_mqd_addr; 211 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 212 213 if (q->wptr_bo) { 214 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 215 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; 216 } 217 218 queue_input.is_kfd_process = 1; 219 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 220 queue_input.queue_size = q->properties.queue_size >> 2; 221 222 queue_input.paging = false; 223 queue_input.tba_addr = qpd->tba_addr; 224 queue_input.tma_addr = qpd->tma_addr; 225 226 queue_type = convert_to_mes_queue_type(q->properties.type); 227 if (queue_type < 0) { 228 pr_err("Queue type not supported with MES, queue:%d\n", 229 q->properties.type); 230 return -EINVAL; 231 } 232 queue_input.queue_type = (uint32_t)queue_type; 233 234 if (q->gws) { 235 queue_input.gws_base = 0; 236 queue_input.gws_size = qpd->num_gws; 237 } 238 239 amdgpu_mes_lock(&adev->mes); 240 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 241 amdgpu_mes_unlock(&adev->mes); 242 if (r) { 243 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 244 q->properties.doorbell_off); 245 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 246 kfd_hws_hang(dqm); 247 } 248 249 return r; 250 } 251 252 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 253 struct qcm_process_device *qpd) 254 { 255 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 256 int r; 257 struct mes_remove_queue_input queue_input; 258 259 if (dqm->is_hws_hang) 260 return -EIO; 261 262 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 263 queue_input.doorbell_offset = q->properties.doorbell_off; 264 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 265 266 amdgpu_mes_lock(&adev->mes); 267 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 268 amdgpu_mes_unlock(&adev->mes); 269 270 if (r) { 271 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 272 q->properties.doorbell_off); 273 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 274 kfd_hws_hang(dqm); 275 } 276 277 return r; 278 } 279 280 static int remove_all_queues_mes(struct device_queue_manager *dqm) 281 { 282 struct device_process_node *cur; 283 struct qcm_process_device *qpd; 284 struct queue *q; 285 int retval = 0; 286 287 list_for_each_entry(cur, &dqm->queues, list) { 288 qpd = cur->qpd; 289 list_for_each_entry(q, &qpd->queues_list, list) { 290 if (q->properties.is_active) { 291 retval = remove_queue_mes(dqm, q, qpd); 292 if (retval) { 293 pr_err("%s: Failed to remove queue %d for dev %d", 294 __func__, 295 q->properties.queue_id, 296 dqm->dev->id); 297 return retval; 298 } 299 } 300 } 301 } 302 303 return retval; 304 } 305 306 static void increment_queue_count(struct device_queue_manager *dqm, 307 struct qcm_process_device *qpd, 308 struct queue *q) 309 { 310 dqm->active_queue_count++; 311 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 312 q->properties.type == KFD_QUEUE_TYPE_DIQ) 313 dqm->active_cp_queue_count++; 314 315 if (q->properties.is_gws) { 316 dqm->gws_queue_count++; 317 qpd->mapped_gws_queue = true; 318 } 319 } 320 321 static void decrement_queue_count(struct device_queue_manager *dqm, 322 struct qcm_process_device *qpd, 323 struct queue *q) 324 { 325 dqm->active_queue_count--; 326 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 327 q->properties.type == KFD_QUEUE_TYPE_DIQ) 328 dqm->active_cp_queue_count--; 329 330 if (q->properties.is_gws) { 331 dqm->gws_queue_count--; 332 qpd->mapped_gws_queue = false; 333 } 334 } 335 336 /* 337 * Allocate a doorbell ID to this queue. 338 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 339 */ 340 static int allocate_doorbell(struct qcm_process_device *qpd, 341 struct queue *q, 342 uint32_t const *restore_id) 343 { 344 struct kfd_node *dev = qpd->dqm->dev; 345 346 if (!KFD_IS_SOC15(dev)) { 347 /* On pre-SOC15 chips we need to use the queue ID to 348 * preserve the user mode ABI. 349 */ 350 351 if (restore_id && *restore_id != q->properties.queue_id) 352 return -EINVAL; 353 354 q->doorbell_id = q->properties.queue_id; 355 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 356 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 357 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 358 * doorbell assignments based on the engine and queue id. 359 * The doobell index distance between RLC (2*i) and (2*i+1) 360 * for a SDMA engine is 512. 361 */ 362 363 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 364 365 /* 366 * q->properties.sdma_engine_id corresponds to the virtual 367 * sdma engine number. However, for doorbell allocation, 368 * we need the physical sdma engine id in order to get the 369 * correct doorbell offset. 370 */ 371 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 372 get_num_all_sdma_engines(qpd->dqm) + 373 q->properties.sdma_engine_id] 374 + (q->properties.sdma_queue_id & 1) 375 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 376 + (q->properties.sdma_queue_id >> 1); 377 378 if (restore_id && *restore_id != valid_id) 379 return -EINVAL; 380 q->doorbell_id = valid_id; 381 } else { 382 /* For CP queues on SOC15 */ 383 if (restore_id) { 384 /* make sure that ID is free */ 385 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 386 return -EINVAL; 387 388 q->doorbell_id = *restore_id; 389 } else { 390 /* or reserve a free doorbell ID */ 391 unsigned int found; 392 393 found = find_first_zero_bit(qpd->doorbell_bitmap, 394 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 395 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 396 pr_debug("No doorbells available"); 397 return -EBUSY; 398 } 399 set_bit(found, qpd->doorbell_bitmap); 400 q->doorbell_id = found; 401 } 402 } 403 404 q->properties.doorbell_off = 405 kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), 406 q->doorbell_id); 407 return 0; 408 } 409 410 static void deallocate_doorbell(struct qcm_process_device *qpd, 411 struct queue *q) 412 { 413 unsigned int old; 414 struct kfd_node *dev = qpd->dqm->dev; 415 416 if (!KFD_IS_SOC15(dev) || 417 q->properties.type == KFD_QUEUE_TYPE_SDMA || 418 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 419 return; 420 421 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 422 WARN_ON(!old); 423 } 424 425 static void program_trap_handler_settings(struct device_queue_manager *dqm, 426 struct qcm_process_device *qpd) 427 { 428 uint32_t xcc_mask = dqm->dev->xcc_mask; 429 int xcc_id; 430 431 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 432 for_each_inst(xcc_id, xcc_mask) 433 dqm->dev->kfd2kgd->program_trap_handler_settings( 434 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 435 qpd->tma_addr, xcc_id); 436 } 437 438 static int allocate_vmid(struct device_queue_manager *dqm, 439 struct qcm_process_device *qpd, 440 struct queue *q) 441 { 442 int allocated_vmid = -1, i; 443 444 for (i = dqm->dev->vm_info.first_vmid_kfd; 445 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 446 if (!dqm->vmid_pasid[i]) { 447 allocated_vmid = i; 448 break; 449 } 450 } 451 452 if (allocated_vmid < 0) { 453 pr_err("no more vmid to allocate\n"); 454 return -ENOSPC; 455 } 456 457 pr_debug("vmid allocated: %d\n", allocated_vmid); 458 459 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 460 461 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 462 463 qpd->vmid = allocated_vmid; 464 q->properties.vmid = allocated_vmid; 465 466 program_sh_mem_settings(dqm, qpd); 467 468 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 469 program_trap_handler_settings(dqm, qpd); 470 471 /* qpd->page_table_base is set earlier when register_process() 472 * is called, i.e. when the first queue is created. 473 */ 474 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 475 qpd->vmid, 476 qpd->page_table_base); 477 /* invalidate the VM context after pasid and vmid mapping is set up */ 478 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 479 480 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 481 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 482 qpd->sh_hidden_private_base, qpd->vmid); 483 484 return 0; 485 } 486 487 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 488 struct qcm_process_device *qpd) 489 { 490 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 491 int ret; 492 493 if (!qpd->ib_kaddr) 494 return -ENOMEM; 495 496 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 497 if (ret) 498 return ret; 499 500 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 501 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 502 pmf->release_mem_size / sizeof(uint32_t)); 503 } 504 505 static void deallocate_vmid(struct device_queue_manager *dqm, 506 struct qcm_process_device *qpd, 507 struct queue *q) 508 { 509 /* On GFX v7, CP doesn't flush TC at dequeue */ 510 if (q->device->adev->asic_type == CHIP_HAWAII) 511 if (flush_texture_cache_nocpsch(q->device, qpd)) 512 pr_err("Failed to flush TC\n"); 513 514 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 515 516 /* Release the vmid mapping */ 517 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 518 dqm->vmid_pasid[qpd->vmid] = 0; 519 520 qpd->vmid = 0; 521 q->properties.vmid = 0; 522 } 523 524 static int create_queue_nocpsch(struct device_queue_manager *dqm, 525 struct queue *q, 526 struct qcm_process_device *qpd, 527 const struct kfd_criu_queue_priv_data *qd, 528 const void *restore_mqd, const void *restore_ctl_stack) 529 { 530 struct mqd_manager *mqd_mgr; 531 int retval; 532 533 dqm_lock(dqm); 534 535 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 536 pr_warn("Can't create new usermode queue because %d queues were already created\n", 537 dqm->total_queue_count); 538 retval = -EPERM; 539 goto out_unlock; 540 } 541 542 if (list_empty(&qpd->queues_list)) { 543 retval = allocate_vmid(dqm, qpd, q); 544 if (retval) 545 goto out_unlock; 546 } 547 q->properties.vmid = qpd->vmid; 548 /* 549 * Eviction state logic: mark all queues as evicted, even ones 550 * not currently active. Restoring inactive queues later only 551 * updates the is_evicted flag but is a no-op otherwise. 552 */ 553 q->properties.is_evicted = !!qpd->evicted; 554 555 q->properties.tba_addr = qpd->tba_addr; 556 q->properties.tma_addr = qpd->tma_addr; 557 558 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 559 q->properties.type)]; 560 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 561 retval = allocate_hqd(dqm, q); 562 if (retval) 563 goto deallocate_vmid; 564 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 565 q->pipe, q->queue); 566 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 567 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 568 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 569 if (retval) 570 goto deallocate_vmid; 571 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 572 } 573 574 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 575 if (retval) 576 goto out_deallocate_hqd; 577 578 /* Temporarily release dqm lock to avoid a circular lock dependency */ 579 dqm_unlock(dqm); 580 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 581 dqm_lock(dqm); 582 583 if (!q->mqd_mem_obj) { 584 retval = -ENOMEM; 585 goto out_deallocate_doorbell; 586 } 587 588 if (qd) 589 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 590 &q->properties, restore_mqd, restore_ctl_stack, 591 qd->ctl_stack_size); 592 else 593 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 594 &q->gart_mqd_addr, &q->properties); 595 596 if (q->properties.is_active) { 597 if (!dqm->sched_running) { 598 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 599 goto add_queue_to_list; 600 } 601 602 if (WARN(q->process->mm != current->mm, 603 "should only run in user thread")) 604 retval = -EFAULT; 605 else 606 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 607 q->queue, &q->properties, current->mm); 608 if (retval) 609 goto out_free_mqd; 610 } 611 612 add_queue_to_list: 613 list_add(&q->list, &qpd->queues_list); 614 qpd->queue_count++; 615 if (q->properties.is_active) 616 increment_queue_count(dqm, qpd, q); 617 618 /* 619 * Unconditionally increment this counter, regardless of the queue's 620 * type or whether the queue is active. 621 */ 622 dqm->total_queue_count++; 623 pr_debug("Total of %d queues are accountable so far\n", 624 dqm->total_queue_count); 625 goto out_unlock; 626 627 out_free_mqd: 628 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 629 out_deallocate_doorbell: 630 deallocate_doorbell(qpd, q); 631 out_deallocate_hqd: 632 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 633 deallocate_hqd(dqm, q); 634 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 635 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 636 deallocate_sdma_queue(dqm, q); 637 deallocate_vmid: 638 if (list_empty(&qpd->queues_list)) 639 deallocate_vmid(dqm, qpd, q); 640 out_unlock: 641 dqm_unlock(dqm); 642 return retval; 643 } 644 645 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 646 { 647 bool set; 648 int pipe, bit, i; 649 650 set = false; 651 652 for (pipe = dqm->next_pipe_to_allocate, i = 0; 653 i < get_pipes_per_mec(dqm); 654 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 655 656 if (!is_pipe_enabled(dqm, 0, pipe)) 657 continue; 658 659 if (dqm->allocated_queues[pipe] != 0) { 660 bit = ffs(dqm->allocated_queues[pipe]) - 1; 661 dqm->allocated_queues[pipe] &= ~(1 << bit); 662 q->pipe = pipe; 663 q->queue = bit; 664 set = true; 665 break; 666 } 667 } 668 669 if (!set) 670 return -EBUSY; 671 672 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 673 /* horizontal hqd allocation */ 674 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 675 676 return 0; 677 } 678 679 static inline void deallocate_hqd(struct device_queue_manager *dqm, 680 struct queue *q) 681 { 682 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 683 } 684 685 #define SQ_IND_CMD_CMD_KILL 0x00000003 686 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 687 688 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 689 { 690 int status = 0; 691 unsigned int vmid; 692 uint16_t queried_pasid; 693 union SQ_CMD_BITS reg_sq_cmd; 694 union GRBM_GFX_INDEX_BITS reg_gfx_index; 695 struct kfd_process_device *pdd; 696 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 697 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 698 uint32_t xcc_mask = dev->xcc_mask; 699 int xcc_id; 700 701 reg_sq_cmd.u32All = 0; 702 reg_gfx_index.u32All = 0; 703 704 pr_debug("Killing all process wavefronts\n"); 705 706 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 707 pr_err("no vmid pasid mapping supported \n"); 708 return -EOPNOTSUPP; 709 } 710 711 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 712 * ATC_VMID15_PASID_MAPPING 713 * to check which VMID the current process is mapped to. 714 */ 715 716 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 717 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 718 (dev->adev, vmid, &queried_pasid); 719 720 if (status && queried_pasid == p->pasid) { 721 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 722 vmid, p->pasid); 723 break; 724 } 725 } 726 727 if (vmid > last_vmid_to_scan) { 728 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 729 return -EFAULT; 730 } 731 732 /* taking the VMID for that process on the safe way using PDD */ 733 pdd = kfd_get_process_device_data(dev, p); 734 if (!pdd) 735 return -EFAULT; 736 737 reg_gfx_index.bits.sh_broadcast_writes = 1; 738 reg_gfx_index.bits.se_broadcast_writes = 1; 739 reg_gfx_index.bits.instance_broadcast_writes = 1; 740 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 741 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 742 reg_sq_cmd.bits.vm_id = vmid; 743 744 for_each_inst(xcc_id, xcc_mask) 745 dev->kfd2kgd->wave_control_execute( 746 dev->adev, reg_gfx_index.u32All, 747 reg_sq_cmd.u32All, xcc_id); 748 749 return 0; 750 } 751 752 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 753 * to avoid asynchronized access 754 */ 755 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 756 struct qcm_process_device *qpd, 757 struct queue *q) 758 { 759 int retval; 760 struct mqd_manager *mqd_mgr; 761 762 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 763 q->properties.type)]; 764 765 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 766 deallocate_hqd(dqm, q); 767 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 768 deallocate_sdma_queue(dqm, q); 769 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 770 deallocate_sdma_queue(dqm, q); 771 else { 772 pr_debug("q->properties.type %d is invalid\n", 773 q->properties.type); 774 return -EINVAL; 775 } 776 dqm->total_queue_count--; 777 778 deallocate_doorbell(qpd, q); 779 780 if (!dqm->sched_running) { 781 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 782 return 0; 783 } 784 785 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 786 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 787 KFD_UNMAP_LATENCY_MS, 788 q->pipe, q->queue); 789 if (retval == -ETIME) 790 qpd->reset_wavefronts = true; 791 792 list_del(&q->list); 793 if (list_empty(&qpd->queues_list)) { 794 if (qpd->reset_wavefronts) { 795 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 796 dqm->dev); 797 /* dbgdev_wave_reset_wavefronts has to be called before 798 * deallocate_vmid(), i.e. when vmid is still in use. 799 */ 800 dbgdev_wave_reset_wavefronts(dqm->dev, 801 qpd->pqm->process); 802 qpd->reset_wavefronts = false; 803 } 804 805 deallocate_vmid(dqm, qpd, q); 806 } 807 qpd->queue_count--; 808 if (q->properties.is_active) 809 decrement_queue_count(dqm, qpd, q); 810 811 return retval; 812 } 813 814 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 815 struct qcm_process_device *qpd, 816 struct queue *q) 817 { 818 int retval; 819 uint64_t sdma_val = 0; 820 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 821 struct mqd_manager *mqd_mgr = 822 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 823 824 /* Get the SDMA queue stats */ 825 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 826 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 827 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 828 &sdma_val); 829 if (retval) 830 pr_err("Failed to read SDMA queue counter for queue: %d\n", 831 q->properties.queue_id); 832 } 833 834 dqm_lock(dqm); 835 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 836 if (!retval) 837 pdd->sdma_past_activity_counter += sdma_val; 838 dqm_unlock(dqm); 839 840 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 841 842 return retval; 843 } 844 845 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 846 struct mqd_update_info *minfo) 847 { 848 int retval = 0; 849 struct mqd_manager *mqd_mgr; 850 struct kfd_process_device *pdd; 851 bool prev_active = false; 852 853 dqm_lock(dqm); 854 pdd = kfd_get_process_device_data(q->device, q->process); 855 if (!pdd) { 856 retval = -ENODEV; 857 goto out_unlock; 858 } 859 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 860 q->properties.type)]; 861 862 /* Save previous activity state for counters */ 863 prev_active = q->properties.is_active; 864 865 /* Make sure the queue is unmapped before updating the MQD */ 866 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 867 if (!dqm->dev->kfd->shared_resources.enable_mes) 868 retval = unmap_queues_cpsch(dqm, 869 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 870 else if (prev_active) 871 retval = remove_queue_mes(dqm, q, &pdd->qpd); 872 873 if (retval) { 874 pr_err("unmap queue failed\n"); 875 goto out_unlock; 876 } 877 } else if (prev_active && 878 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 879 q->properties.type == KFD_QUEUE_TYPE_SDMA || 880 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 881 882 if (!dqm->sched_running) { 883 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 884 goto out_unlock; 885 } 886 887 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 888 (dqm->dev->kfd->cwsr_enabled ? 889 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 890 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 891 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 892 if (retval) { 893 pr_err("destroy mqd failed\n"); 894 goto out_unlock; 895 } 896 } 897 898 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 899 900 /* 901 * check active state vs. the previous state and modify 902 * counter accordingly. map_queues_cpsch uses the 903 * dqm->active_queue_count to determine whether a new runlist must be 904 * uploaded. 905 */ 906 if (q->properties.is_active && !prev_active) { 907 increment_queue_count(dqm, &pdd->qpd, q); 908 } else if (!q->properties.is_active && prev_active) { 909 decrement_queue_count(dqm, &pdd->qpd, q); 910 } else if (q->gws && !q->properties.is_gws) { 911 if (q->properties.is_active) { 912 dqm->gws_queue_count++; 913 pdd->qpd.mapped_gws_queue = true; 914 } 915 q->properties.is_gws = true; 916 } else if (!q->gws && q->properties.is_gws) { 917 if (q->properties.is_active) { 918 dqm->gws_queue_count--; 919 pdd->qpd.mapped_gws_queue = false; 920 } 921 q->properties.is_gws = false; 922 } 923 924 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 925 if (!dqm->dev->kfd->shared_resources.enable_mes) 926 retval = map_queues_cpsch(dqm); 927 else if (q->properties.is_active) 928 retval = add_queue_mes(dqm, q, &pdd->qpd); 929 } else if (q->properties.is_active && 930 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 931 q->properties.type == KFD_QUEUE_TYPE_SDMA || 932 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 933 if (WARN(q->process->mm != current->mm, 934 "should only run in user thread")) 935 retval = -EFAULT; 936 else 937 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 938 q->pipe, q->queue, 939 &q->properties, current->mm); 940 } 941 942 out_unlock: 943 dqm_unlock(dqm); 944 return retval; 945 } 946 947 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 948 struct qcm_process_device *qpd) 949 { 950 struct queue *q; 951 struct mqd_manager *mqd_mgr; 952 struct kfd_process_device *pdd; 953 int retval, ret = 0; 954 955 dqm_lock(dqm); 956 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 957 goto out; 958 959 pdd = qpd_to_pdd(qpd); 960 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 961 pdd->process->pasid); 962 963 pdd->last_evict_timestamp = get_jiffies_64(); 964 /* Mark all queues as evicted. Deactivate all active queues on 965 * the qpd. 966 */ 967 list_for_each_entry(q, &qpd->queues_list, list) { 968 q->properties.is_evicted = true; 969 if (!q->properties.is_active) 970 continue; 971 972 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 973 q->properties.type)]; 974 q->properties.is_active = false; 975 decrement_queue_count(dqm, qpd, q); 976 977 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 978 continue; 979 980 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 981 (dqm->dev->kfd->cwsr_enabled ? 982 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 983 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 984 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 985 if (retval && !ret) 986 /* Return the first error, but keep going to 987 * maintain a consistent eviction state 988 */ 989 ret = retval; 990 } 991 992 out: 993 dqm_unlock(dqm); 994 return ret; 995 } 996 997 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 998 struct qcm_process_device *qpd) 999 { 1000 struct queue *q; 1001 struct kfd_process_device *pdd; 1002 int retval = 0; 1003 1004 dqm_lock(dqm); 1005 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1006 goto out; 1007 1008 pdd = qpd_to_pdd(qpd); 1009 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1010 pdd->process->pasid); 1011 1012 /* Mark all queues as evicted. Deactivate all active queues on 1013 * the qpd. 1014 */ 1015 list_for_each_entry(q, &qpd->queues_list, list) { 1016 q->properties.is_evicted = true; 1017 if (!q->properties.is_active) 1018 continue; 1019 1020 q->properties.is_active = false; 1021 decrement_queue_count(dqm, qpd, q); 1022 1023 if (dqm->dev->kfd->shared_resources.enable_mes) { 1024 retval = remove_queue_mes(dqm, q, qpd); 1025 if (retval) { 1026 pr_err("Failed to evict queue %d\n", 1027 q->properties.queue_id); 1028 goto out; 1029 } 1030 } 1031 } 1032 pdd->last_evict_timestamp = get_jiffies_64(); 1033 if (!dqm->dev->kfd->shared_resources.enable_mes) 1034 retval = execute_queues_cpsch(dqm, 1035 qpd->is_debug ? 1036 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1037 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1038 1039 out: 1040 dqm_unlock(dqm); 1041 return retval; 1042 } 1043 1044 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1045 struct qcm_process_device *qpd) 1046 { 1047 struct mm_struct *mm = NULL; 1048 struct queue *q; 1049 struct mqd_manager *mqd_mgr; 1050 struct kfd_process_device *pdd; 1051 uint64_t pd_base; 1052 uint64_t eviction_duration; 1053 int retval, ret = 0; 1054 1055 pdd = qpd_to_pdd(qpd); 1056 /* Retrieve PD base */ 1057 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1058 1059 dqm_lock(dqm); 1060 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1061 goto out; 1062 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1063 qpd->evicted--; 1064 goto out; 1065 } 1066 1067 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1068 pdd->process->pasid); 1069 1070 /* Update PD Base in QPD */ 1071 qpd->page_table_base = pd_base; 1072 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1073 1074 if (!list_empty(&qpd->queues_list)) { 1075 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1076 dqm->dev->adev, 1077 qpd->vmid, 1078 qpd->page_table_base); 1079 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1080 } 1081 1082 /* Take a safe reference to the mm_struct, which may otherwise 1083 * disappear even while the kfd_process is still referenced. 1084 */ 1085 mm = get_task_mm(pdd->process->lead_thread); 1086 if (!mm) { 1087 ret = -EFAULT; 1088 goto out; 1089 } 1090 1091 /* Remove the eviction flags. Activate queues that are not 1092 * inactive for other reasons. 1093 */ 1094 list_for_each_entry(q, &qpd->queues_list, list) { 1095 q->properties.is_evicted = false; 1096 if (!QUEUE_IS_ACTIVE(q->properties)) 1097 continue; 1098 1099 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1100 q->properties.type)]; 1101 q->properties.is_active = true; 1102 increment_queue_count(dqm, qpd, q); 1103 1104 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1105 continue; 1106 1107 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1108 q->queue, &q->properties, mm); 1109 if (retval && !ret) 1110 /* Return the first error, but keep going to 1111 * maintain a consistent eviction state 1112 */ 1113 ret = retval; 1114 } 1115 qpd->evicted = 0; 1116 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1117 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1118 out: 1119 if (mm) 1120 mmput(mm); 1121 dqm_unlock(dqm); 1122 return ret; 1123 } 1124 1125 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1126 struct qcm_process_device *qpd) 1127 { 1128 struct queue *q; 1129 struct kfd_process_device *pdd; 1130 uint64_t pd_base; 1131 uint64_t eviction_duration; 1132 int retval = 0; 1133 1134 pdd = qpd_to_pdd(qpd); 1135 /* Retrieve PD base */ 1136 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1137 1138 dqm_lock(dqm); 1139 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1140 goto out; 1141 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1142 qpd->evicted--; 1143 goto out; 1144 } 1145 1146 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1147 pdd->process->pasid); 1148 1149 /* Update PD Base in QPD */ 1150 qpd->page_table_base = pd_base; 1151 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1152 1153 /* activate all active queues on the qpd */ 1154 list_for_each_entry(q, &qpd->queues_list, list) { 1155 q->properties.is_evicted = false; 1156 if (!QUEUE_IS_ACTIVE(q->properties)) 1157 continue; 1158 1159 q->properties.is_active = true; 1160 increment_queue_count(dqm, &pdd->qpd, q); 1161 1162 if (dqm->dev->kfd->shared_resources.enable_mes) { 1163 retval = add_queue_mes(dqm, q, qpd); 1164 if (retval) { 1165 pr_err("Failed to restore queue %d\n", 1166 q->properties.queue_id); 1167 goto out; 1168 } 1169 } 1170 } 1171 if (!dqm->dev->kfd->shared_resources.enable_mes) 1172 retval = execute_queues_cpsch(dqm, 1173 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1174 qpd->evicted = 0; 1175 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1176 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1177 out: 1178 dqm_unlock(dqm); 1179 return retval; 1180 } 1181 1182 static int register_process(struct device_queue_manager *dqm, 1183 struct qcm_process_device *qpd) 1184 { 1185 struct device_process_node *n; 1186 struct kfd_process_device *pdd; 1187 uint64_t pd_base; 1188 int retval; 1189 1190 n = kzalloc(sizeof(*n), GFP_KERNEL); 1191 if (!n) 1192 return -ENOMEM; 1193 1194 n->qpd = qpd; 1195 1196 pdd = qpd_to_pdd(qpd); 1197 /* Retrieve PD base */ 1198 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1199 1200 dqm_lock(dqm); 1201 list_add(&n->list, &dqm->queues); 1202 1203 /* Update PD Base in QPD */ 1204 qpd->page_table_base = pd_base; 1205 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1206 1207 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1208 1209 dqm->processes_count++; 1210 1211 dqm_unlock(dqm); 1212 1213 /* Outside the DQM lock because under the DQM lock we can't do 1214 * reclaim or take other locks that others hold while reclaiming. 1215 */ 1216 kfd_inc_compute_active(dqm->dev); 1217 1218 return retval; 1219 } 1220 1221 static int unregister_process(struct device_queue_manager *dqm, 1222 struct qcm_process_device *qpd) 1223 { 1224 int retval; 1225 struct device_process_node *cur, *next; 1226 1227 pr_debug("qpd->queues_list is %s\n", 1228 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1229 1230 retval = 0; 1231 dqm_lock(dqm); 1232 1233 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1234 if (qpd == cur->qpd) { 1235 list_del(&cur->list); 1236 kfree(cur); 1237 dqm->processes_count--; 1238 goto out; 1239 } 1240 } 1241 /* qpd not found in dqm list */ 1242 retval = 1; 1243 out: 1244 dqm_unlock(dqm); 1245 1246 /* Outside the DQM lock because under the DQM lock we can't do 1247 * reclaim or take other locks that others hold while reclaiming. 1248 */ 1249 if (!retval) 1250 kfd_dec_compute_active(dqm->dev); 1251 1252 return retval; 1253 } 1254 1255 static int 1256 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1257 unsigned int vmid) 1258 { 1259 uint32_t xcc_mask = dqm->dev->xcc_mask; 1260 int xcc_id, ret; 1261 1262 for_each_inst(xcc_id, xcc_mask) { 1263 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1264 dqm->dev->adev, pasid, vmid, xcc_id); 1265 if (ret) 1266 break; 1267 } 1268 1269 return ret; 1270 } 1271 1272 static void init_interrupts(struct device_queue_manager *dqm) 1273 { 1274 uint32_t xcc_mask = dqm->dev->xcc_mask; 1275 unsigned int i, xcc_id; 1276 1277 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1278 if (is_pipe_enabled(dqm, 0, i)) { 1279 for_each_inst(xcc_id, xcc_mask) 1280 dqm->dev->kfd2kgd->init_interrupts( 1281 dqm->dev->adev, i, xcc_id); 1282 } 1283 } 1284 } 1285 1286 static int initialize_nocpsch(struct device_queue_manager *dqm) 1287 { 1288 int pipe, queue; 1289 1290 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1291 1292 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1293 sizeof(unsigned int), GFP_KERNEL); 1294 if (!dqm->allocated_queues) 1295 return -ENOMEM; 1296 1297 mutex_init(&dqm->lock_hidden); 1298 INIT_LIST_HEAD(&dqm->queues); 1299 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1300 dqm->active_cp_queue_count = 0; 1301 dqm->gws_queue_count = 0; 1302 1303 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1304 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1305 1306 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1307 if (test_bit(pipe_offset + queue, 1308 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1309 dqm->allocated_queues[pipe] |= 1 << queue; 1310 } 1311 1312 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1313 1314 init_sdma_bitmaps(dqm); 1315 1316 return 0; 1317 } 1318 1319 static void uninitialize(struct device_queue_manager *dqm) 1320 { 1321 int i; 1322 1323 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1324 1325 kfree(dqm->allocated_queues); 1326 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1327 kfree(dqm->mqd_mgrs[i]); 1328 mutex_destroy(&dqm->lock_hidden); 1329 } 1330 1331 static int start_nocpsch(struct device_queue_manager *dqm) 1332 { 1333 int r = 0; 1334 1335 pr_info("SW scheduler is used"); 1336 init_interrupts(dqm); 1337 1338 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1339 r = pm_init(&dqm->packet_mgr, dqm); 1340 if (!r) 1341 dqm->sched_running = true; 1342 1343 return r; 1344 } 1345 1346 static int stop_nocpsch(struct device_queue_manager *dqm) 1347 { 1348 dqm_lock(dqm); 1349 if (!dqm->sched_running) { 1350 dqm_unlock(dqm); 1351 return 0; 1352 } 1353 1354 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1355 pm_uninit(&dqm->packet_mgr, false); 1356 dqm->sched_running = false; 1357 dqm_unlock(dqm); 1358 1359 return 0; 1360 } 1361 1362 static void pre_reset(struct device_queue_manager *dqm) 1363 { 1364 dqm_lock(dqm); 1365 dqm->is_resetting = true; 1366 dqm_unlock(dqm); 1367 } 1368 1369 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1370 struct queue *q, const uint32_t *restore_sdma_id) 1371 { 1372 int bit; 1373 1374 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1375 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1376 pr_err("No more SDMA queue to allocate\n"); 1377 return -ENOMEM; 1378 } 1379 1380 if (restore_sdma_id) { 1381 /* Re-use existing sdma_id */ 1382 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1383 pr_err("SDMA queue already in use\n"); 1384 return -EBUSY; 1385 } 1386 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1387 q->sdma_id = *restore_sdma_id; 1388 } else { 1389 /* Find first available sdma_id */ 1390 bit = find_first_bit(dqm->sdma_bitmap, 1391 get_num_sdma_queues(dqm)); 1392 clear_bit(bit, dqm->sdma_bitmap); 1393 q->sdma_id = bit; 1394 } 1395 1396 q->properties.sdma_engine_id = 1397 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1398 q->properties.sdma_queue_id = q->sdma_id / 1399 kfd_get_num_sdma_engines(dqm->dev); 1400 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1401 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1402 pr_err("No more XGMI SDMA queue to allocate\n"); 1403 return -ENOMEM; 1404 } 1405 if (restore_sdma_id) { 1406 /* Re-use existing sdma_id */ 1407 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1408 pr_err("SDMA queue already in use\n"); 1409 return -EBUSY; 1410 } 1411 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1412 q->sdma_id = *restore_sdma_id; 1413 } else { 1414 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1415 get_num_xgmi_sdma_queues(dqm)); 1416 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1417 q->sdma_id = bit; 1418 } 1419 /* sdma_engine_id is sdma id including 1420 * both PCIe-optimized SDMAs and XGMI- 1421 * optimized SDMAs. The calculation below 1422 * assumes the first N engines are always 1423 * PCIe-optimized ones 1424 */ 1425 q->properties.sdma_engine_id = 1426 kfd_get_num_sdma_engines(dqm->dev) + 1427 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1428 q->properties.sdma_queue_id = q->sdma_id / 1429 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1430 } 1431 1432 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1433 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1434 1435 return 0; 1436 } 1437 1438 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1439 struct queue *q) 1440 { 1441 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1442 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1443 return; 1444 set_bit(q->sdma_id, dqm->sdma_bitmap); 1445 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1446 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1447 return; 1448 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1449 } 1450 } 1451 1452 /* 1453 * Device Queue Manager implementation for cp scheduler 1454 */ 1455 1456 static int set_sched_resources(struct device_queue_manager *dqm) 1457 { 1458 int i, mec; 1459 struct scheduling_resources res; 1460 1461 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1462 1463 res.queue_mask = 0; 1464 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1465 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1466 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1467 1468 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1469 continue; 1470 1471 /* only acquire queues from the first MEC */ 1472 if (mec > 0) 1473 continue; 1474 1475 /* This situation may be hit in the future if a new HW 1476 * generation exposes more than 64 queues. If so, the 1477 * definition of res.queue_mask needs updating 1478 */ 1479 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1480 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1481 break; 1482 } 1483 1484 res.queue_mask |= 1ull 1485 << amdgpu_queue_mask_bit_to_set_resource_bit( 1486 dqm->dev->adev, i); 1487 } 1488 res.gws_mask = ~0ull; 1489 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1490 1491 pr_debug("Scheduling resources:\n" 1492 "vmid mask: 0x%8X\n" 1493 "queue mask: 0x%8llX\n", 1494 res.vmid_mask, res.queue_mask); 1495 1496 return pm_send_set_resources(&dqm->packet_mgr, &res); 1497 } 1498 1499 static int initialize_cpsch(struct device_queue_manager *dqm) 1500 { 1501 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1502 1503 mutex_init(&dqm->lock_hidden); 1504 INIT_LIST_HEAD(&dqm->queues); 1505 dqm->active_queue_count = dqm->processes_count = 0; 1506 dqm->active_cp_queue_count = 0; 1507 dqm->gws_queue_count = 0; 1508 dqm->active_runlist = false; 1509 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1510 1511 init_sdma_bitmaps(dqm); 1512 1513 return 0; 1514 } 1515 1516 static int start_cpsch(struct device_queue_manager *dqm) 1517 { 1518 int retval; 1519 1520 retval = 0; 1521 1522 dqm_lock(dqm); 1523 1524 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1525 retval = pm_init(&dqm->packet_mgr, dqm); 1526 if (retval) 1527 goto fail_packet_manager_init; 1528 1529 retval = set_sched_resources(dqm); 1530 if (retval) 1531 goto fail_set_sched_resources; 1532 } 1533 pr_debug("Allocating fence memory\n"); 1534 1535 /* allocate fence memory on the gart */ 1536 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1537 &dqm->fence_mem); 1538 1539 if (retval) 1540 goto fail_allocate_vidmem; 1541 1542 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1543 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1544 1545 init_interrupts(dqm); 1546 1547 /* clear hang status when driver try to start the hw scheduler */ 1548 dqm->is_hws_hang = false; 1549 dqm->is_resetting = false; 1550 dqm->sched_running = true; 1551 if (!dqm->dev->kfd->shared_resources.enable_mes) 1552 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1553 dqm_unlock(dqm); 1554 1555 return 0; 1556 fail_allocate_vidmem: 1557 fail_set_sched_resources: 1558 if (!dqm->dev->kfd->shared_resources.enable_mes) 1559 pm_uninit(&dqm->packet_mgr, false); 1560 fail_packet_manager_init: 1561 dqm_unlock(dqm); 1562 return retval; 1563 } 1564 1565 static int stop_cpsch(struct device_queue_manager *dqm) 1566 { 1567 bool hanging; 1568 1569 dqm_lock(dqm); 1570 if (!dqm->sched_running) { 1571 dqm_unlock(dqm); 1572 return 0; 1573 } 1574 1575 if (!dqm->is_hws_hang) { 1576 if (!dqm->dev->kfd->shared_resources.enable_mes) 1577 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1578 else 1579 remove_all_queues_mes(dqm); 1580 } 1581 1582 hanging = dqm->is_hws_hang || dqm->is_resetting; 1583 dqm->sched_running = false; 1584 1585 if (!dqm->dev->kfd->shared_resources.enable_mes) 1586 pm_release_ib(&dqm->packet_mgr); 1587 1588 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1589 if (!dqm->dev->kfd->shared_resources.enable_mes) 1590 pm_uninit(&dqm->packet_mgr, hanging); 1591 dqm_unlock(dqm); 1592 1593 return 0; 1594 } 1595 1596 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1597 struct kernel_queue *kq, 1598 struct qcm_process_device *qpd) 1599 { 1600 dqm_lock(dqm); 1601 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1602 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1603 dqm->total_queue_count); 1604 dqm_unlock(dqm); 1605 return -EPERM; 1606 } 1607 1608 /* 1609 * Unconditionally increment this counter, regardless of the queue's 1610 * type or whether the queue is active. 1611 */ 1612 dqm->total_queue_count++; 1613 pr_debug("Total of %d queues are accountable so far\n", 1614 dqm->total_queue_count); 1615 1616 list_add(&kq->list, &qpd->priv_queue_list); 1617 increment_queue_count(dqm, qpd, kq->queue); 1618 qpd->is_debug = true; 1619 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1620 dqm_unlock(dqm); 1621 1622 return 0; 1623 } 1624 1625 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1626 struct kernel_queue *kq, 1627 struct qcm_process_device *qpd) 1628 { 1629 dqm_lock(dqm); 1630 list_del(&kq->list); 1631 decrement_queue_count(dqm, qpd, kq->queue); 1632 qpd->is_debug = false; 1633 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1634 /* 1635 * Unconditionally decrement this counter, regardless of the queue's 1636 * type. 1637 */ 1638 dqm->total_queue_count--; 1639 pr_debug("Total of %d queues are accountable so far\n", 1640 dqm->total_queue_count); 1641 dqm_unlock(dqm); 1642 } 1643 1644 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1645 struct qcm_process_device *qpd, 1646 const struct kfd_criu_queue_priv_data *qd, 1647 const void *restore_mqd, const void *restore_ctl_stack) 1648 { 1649 int retval; 1650 struct mqd_manager *mqd_mgr; 1651 1652 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1653 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1654 dqm->total_queue_count); 1655 retval = -EPERM; 1656 goto out; 1657 } 1658 1659 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1660 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1661 dqm_lock(dqm); 1662 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1663 dqm_unlock(dqm); 1664 if (retval) 1665 goto out; 1666 } 1667 1668 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1669 if (retval) 1670 goto out_deallocate_sdma_queue; 1671 1672 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1673 q->properties.type)]; 1674 1675 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1676 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1677 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1678 q->properties.tba_addr = qpd->tba_addr; 1679 q->properties.tma_addr = qpd->tma_addr; 1680 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1681 if (!q->mqd_mem_obj) { 1682 retval = -ENOMEM; 1683 goto out_deallocate_doorbell; 1684 } 1685 1686 dqm_lock(dqm); 1687 /* 1688 * Eviction state logic: mark all queues as evicted, even ones 1689 * not currently active. Restoring inactive queues later only 1690 * updates the is_evicted flag but is a no-op otherwise. 1691 */ 1692 q->properties.is_evicted = !!qpd->evicted; 1693 1694 if (qd) 1695 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1696 &q->properties, restore_mqd, restore_ctl_stack, 1697 qd->ctl_stack_size); 1698 else 1699 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1700 &q->gart_mqd_addr, &q->properties); 1701 1702 list_add(&q->list, &qpd->queues_list); 1703 qpd->queue_count++; 1704 1705 if (q->properties.is_active) { 1706 increment_queue_count(dqm, qpd, q); 1707 1708 if (!dqm->dev->kfd->shared_resources.enable_mes) 1709 retval = execute_queues_cpsch(dqm, 1710 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1711 else 1712 retval = add_queue_mes(dqm, q, qpd); 1713 if (retval) 1714 goto cleanup_queue; 1715 } 1716 1717 /* 1718 * Unconditionally increment this counter, regardless of the queue's 1719 * type or whether the queue is active. 1720 */ 1721 dqm->total_queue_count++; 1722 1723 pr_debug("Total of %d queues are accountable so far\n", 1724 dqm->total_queue_count); 1725 1726 dqm_unlock(dqm); 1727 return retval; 1728 1729 cleanup_queue: 1730 qpd->queue_count--; 1731 list_del(&q->list); 1732 if (q->properties.is_active) 1733 decrement_queue_count(dqm, qpd, q); 1734 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1735 dqm_unlock(dqm); 1736 out_deallocate_doorbell: 1737 deallocate_doorbell(qpd, q); 1738 out_deallocate_sdma_queue: 1739 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1740 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1741 dqm_lock(dqm); 1742 deallocate_sdma_queue(dqm, q); 1743 dqm_unlock(dqm); 1744 } 1745 out: 1746 return retval; 1747 } 1748 1749 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1750 uint64_t fence_value, 1751 unsigned int timeout_ms) 1752 { 1753 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1754 1755 while (*fence_addr != fence_value) { 1756 if (time_after(jiffies, end_jiffies)) { 1757 pr_err("qcm fence wait loop timeout expired\n"); 1758 /* In HWS case, this is used to halt the driver thread 1759 * in order not to mess up CP states before doing 1760 * scandumps for FW debugging. 1761 */ 1762 while (halt_if_hws_hang) 1763 schedule(); 1764 1765 return -ETIME; 1766 } 1767 schedule(); 1768 } 1769 1770 return 0; 1771 } 1772 1773 /* dqm->lock mutex has to be locked before calling this function */ 1774 static int map_queues_cpsch(struct device_queue_manager *dqm) 1775 { 1776 int retval; 1777 1778 if (!dqm->sched_running) 1779 return 0; 1780 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1781 return 0; 1782 if (dqm->active_runlist) 1783 return 0; 1784 1785 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1786 pr_debug("%s sent runlist\n", __func__); 1787 if (retval) { 1788 pr_err("failed to execute runlist\n"); 1789 return retval; 1790 } 1791 dqm->active_runlist = true; 1792 1793 return retval; 1794 } 1795 1796 /* dqm->lock mutex has to be locked before calling this function */ 1797 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1798 enum kfd_unmap_queues_filter filter, 1799 uint32_t filter_param, bool reset) 1800 { 1801 int retval = 0; 1802 struct mqd_manager *mqd_mgr; 1803 1804 if (!dqm->sched_running) 1805 return 0; 1806 if (dqm->is_hws_hang || dqm->is_resetting) 1807 return -EIO; 1808 if (!dqm->active_runlist) 1809 return retval; 1810 1811 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1812 if (retval) 1813 return retval; 1814 1815 *dqm->fence_addr = KFD_FENCE_INIT; 1816 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1817 KFD_FENCE_COMPLETED); 1818 /* should be timed out */ 1819 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1820 queue_preemption_timeout_ms); 1821 if (retval) { 1822 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1823 kfd_hws_hang(dqm); 1824 return retval; 1825 } 1826 1827 /* In the current MEC firmware implementation, if compute queue 1828 * doesn't response to the preemption request in time, HIQ will 1829 * abandon the unmap request without returning any timeout error 1830 * to driver. Instead, MEC firmware will log the doorbell of the 1831 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1832 * To make sure the queue unmap was successful, driver need to 1833 * check those fields 1834 */ 1835 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1836 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1837 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1838 while (halt_if_hws_hang) 1839 schedule(); 1840 return -ETIME; 1841 } 1842 1843 pm_release_ib(&dqm->packet_mgr); 1844 dqm->active_runlist = false; 1845 1846 return retval; 1847 } 1848 1849 /* only for compute queue */ 1850 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1851 uint16_t pasid) 1852 { 1853 int retval; 1854 1855 dqm_lock(dqm); 1856 1857 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1858 pasid, true); 1859 1860 dqm_unlock(dqm); 1861 return retval; 1862 } 1863 1864 /* dqm->lock mutex has to be locked before calling this function */ 1865 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1866 enum kfd_unmap_queues_filter filter, 1867 uint32_t filter_param) 1868 { 1869 int retval; 1870 1871 if (dqm->is_hws_hang) 1872 return -EIO; 1873 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1874 if (retval) 1875 return retval; 1876 1877 return map_queues_cpsch(dqm); 1878 } 1879 1880 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1881 struct qcm_process_device *qpd, 1882 struct queue *q) 1883 { 1884 int retval; 1885 struct mqd_manager *mqd_mgr; 1886 uint64_t sdma_val = 0; 1887 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1888 1889 /* Get the SDMA queue stats */ 1890 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1891 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1892 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1893 &sdma_val); 1894 if (retval) 1895 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1896 q->properties.queue_id); 1897 } 1898 1899 retval = 0; 1900 1901 /* remove queue from list to prevent rescheduling after preemption */ 1902 dqm_lock(dqm); 1903 1904 if (qpd->is_debug) { 1905 /* 1906 * error, currently we do not allow to destroy a queue 1907 * of a currently debugged process 1908 */ 1909 retval = -EBUSY; 1910 goto failed_try_destroy_debugged_queue; 1911 1912 } 1913 1914 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1915 q->properties.type)]; 1916 1917 deallocate_doorbell(qpd, q); 1918 1919 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1920 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1921 deallocate_sdma_queue(dqm, q); 1922 pdd->sdma_past_activity_counter += sdma_val; 1923 } 1924 1925 list_del(&q->list); 1926 qpd->queue_count--; 1927 if (q->properties.is_active) { 1928 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1929 decrement_queue_count(dqm, qpd, q); 1930 retval = execute_queues_cpsch(dqm, 1931 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1932 if (retval == -ETIME) 1933 qpd->reset_wavefronts = true; 1934 } else { 1935 retval = remove_queue_mes(dqm, q, qpd); 1936 } 1937 } 1938 1939 /* 1940 * Unconditionally decrement this counter, regardless of the queue's 1941 * type 1942 */ 1943 dqm->total_queue_count--; 1944 pr_debug("Total of %d queues are accountable so far\n", 1945 dqm->total_queue_count); 1946 1947 dqm_unlock(dqm); 1948 1949 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1950 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1951 1952 return retval; 1953 1954 failed_try_destroy_debugged_queue: 1955 1956 dqm_unlock(dqm); 1957 return retval; 1958 } 1959 1960 /* 1961 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1962 * stay in user mode. 1963 */ 1964 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1965 /* APE1 limit is inclusive and 64K aligned. */ 1966 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1967 1968 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1969 struct qcm_process_device *qpd, 1970 enum cache_policy default_policy, 1971 enum cache_policy alternate_policy, 1972 void __user *alternate_aperture_base, 1973 uint64_t alternate_aperture_size) 1974 { 1975 bool retval = true; 1976 1977 if (!dqm->asic_ops.set_cache_memory_policy) 1978 return retval; 1979 1980 dqm_lock(dqm); 1981 1982 if (alternate_aperture_size == 0) { 1983 /* base > limit disables APE1 */ 1984 qpd->sh_mem_ape1_base = 1; 1985 qpd->sh_mem_ape1_limit = 0; 1986 } else { 1987 /* 1988 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1989 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1990 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1991 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1992 * Verify that the base and size parameters can be 1993 * represented in this format and convert them. 1994 * Additionally restrict APE1 to user-mode addresses. 1995 */ 1996 1997 uint64_t base = (uintptr_t)alternate_aperture_base; 1998 uint64_t limit = base + alternate_aperture_size - 1; 1999 2000 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2001 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2002 retval = false; 2003 goto out; 2004 } 2005 2006 qpd->sh_mem_ape1_base = base >> 16; 2007 qpd->sh_mem_ape1_limit = limit >> 16; 2008 } 2009 2010 retval = dqm->asic_ops.set_cache_memory_policy( 2011 dqm, 2012 qpd, 2013 default_policy, 2014 alternate_policy, 2015 alternate_aperture_base, 2016 alternate_aperture_size); 2017 2018 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2019 program_sh_mem_settings(dqm, qpd); 2020 2021 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2022 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2023 qpd->sh_mem_ape1_limit); 2024 2025 out: 2026 dqm_unlock(dqm); 2027 return retval; 2028 } 2029 2030 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2031 struct qcm_process_device *qpd) 2032 { 2033 struct queue *q; 2034 struct device_process_node *cur, *next_dpn; 2035 int retval = 0; 2036 bool found = false; 2037 2038 dqm_lock(dqm); 2039 2040 /* Clear all user mode queues */ 2041 while (!list_empty(&qpd->queues_list)) { 2042 struct mqd_manager *mqd_mgr; 2043 int ret; 2044 2045 q = list_first_entry(&qpd->queues_list, struct queue, list); 2046 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2047 q->properties.type)]; 2048 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2049 if (ret) 2050 retval = ret; 2051 dqm_unlock(dqm); 2052 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2053 dqm_lock(dqm); 2054 } 2055 2056 /* Unregister process */ 2057 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2058 if (qpd == cur->qpd) { 2059 list_del(&cur->list); 2060 kfree(cur); 2061 dqm->processes_count--; 2062 found = true; 2063 break; 2064 } 2065 } 2066 2067 dqm_unlock(dqm); 2068 2069 /* Outside the DQM lock because under the DQM lock we can't do 2070 * reclaim or take other locks that others hold while reclaiming. 2071 */ 2072 if (found) 2073 kfd_dec_compute_active(dqm->dev); 2074 2075 return retval; 2076 } 2077 2078 static int get_wave_state(struct device_queue_manager *dqm, 2079 struct queue *q, 2080 void __user *ctl_stack, 2081 u32 *ctl_stack_used_size, 2082 u32 *save_area_used_size) 2083 { 2084 struct mqd_manager *mqd_mgr; 2085 2086 dqm_lock(dqm); 2087 2088 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2089 2090 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2091 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2092 !mqd_mgr->get_wave_state) { 2093 dqm_unlock(dqm); 2094 return -EINVAL; 2095 } 2096 2097 dqm_unlock(dqm); 2098 2099 /* 2100 * get_wave_state is outside the dqm lock to prevent circular locking 2101 * and the queue should be protected against destruction by the process 2102 * lock. 2103 */ 2104 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2105 ctl_stack, ctl_stack_used_size, save_area_used_size); 2106 } 2107 2108 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2109 const struct queue *q, 2110 u32 *mqd_size, 2111 u32 *ctl_stack_size) 2112 { 2113 struct mqd_manager *mqd_mgr; 2114 enum KFD_MQD_TYPE mqd_type = 2115 get_mqd_type_from_queue_type(q->properties.type); 2116 2117 dqm_lock(dqm); 2118 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2119 *mqd_size = mqd_mgr->mqd_size; 2120 *ctl_stack_size = 0; 2121 2122 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2123 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2124 2125 dqm_unlock(dqm); 2126 } 2127 2128 static int checkpoint_mqd(struct device_queue_manager *dqm, 2129 const struct queue *q, 2130 void *mqd, 2131 void *ctl_stack) 2132 { 2133 struct mqd_manager *mqd_mgr; 2134 int r = 0; 2135 enum KFD_MQD_TYPE mqd_type = 2136 get_mqd_type_from_queue_type(q->properties.type); 2137 2138 dqm_lock(dqm); 2139 2140 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2141 r = -EINVAL; 2142 goto dqm_unlock; 2143 } 2144 2145 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2146 if (!mqd_mgr->checkpoint_mqd) { 2147 r = -EOPNOTSUPP; 2148 goto dqm_unlock; 2149 } 2150 2151 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2152 2153 dqm_unlock: 2154 dqm_unlock(dqm); 2155 return r; 2156 } 2157 2158 static int process_termination_cpsch(struct device_queue_manager *dqm, 2159 struct qcm_process_device *qpd) 2160 { 2161 int retval; 2162 struct queue *q; 2163 struct kernel_queue *kq, *kq_next; 2164 struct mqd_manager *mqd_mgr; 2165 struct device_process_node *cur, *next_dpn; 2166 enum kfd_unmap_queues_filter filter = 2167 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2168 bool found = false; 2169 2170 retval = 0; 2171 2172 dqm_lock(dqm); 2173 2174 /* Clean all kernel queues */ 2175 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2176 list_del(&kq->list); 2177 decrement_queue_count(dqm, qpd, kq->queue); 2178 qpd->is_debug = false; 2179 dqm->total_queue_count--; 2180 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2181 } 2182 2183 /* Clear all user mode queues */ 2184 list_for_each_entry(q, &qpd->queues_list, list) { 2185 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2186 deallocate_sdma_queue(dqm, q); 2187 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2188 deallocate_sdma_queue(dqm, q); 2189 2190 if (q->properties.is_active) { 2191 decrement_queue_count(dqm, qpd, q); 2192 2193 if (dqm->dev->kfd->shared_resources.enable_mes) { 2194 retval = remove_queue_mes(dqm, q, qpd); 2195 if (retval) 2196 pr_err("Failed to remove queue %d\n", 2197 q->properties.queue_id); 2198 } 2199 } 2200 2201 dqm->total_queue_count--; 2202 } 2203 2204 /* Unregister process */ 2205 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2206 if (qpd == cur->qpd) { 2207 list_del(&cur->list); 2208 kfree(cur); 2209 dqm->processes_count--; 2210 found = true; 2211 break; 2212 } 2213 } 2214 2215 if (!dqm->dev->kfd->shared_resources.enable_mes) 2216 retval = execute_queues_cpsch(dqm, filter, 0); 2217 2218 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2219 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2220 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2221 qpd->reset_wavefronts = false; 2222 } 2223 2224 /* Lastly, free mqd resources. 2225 * Do free_mqd() after dqm_unlock to avoid circular locking. 2226 */ 2227 while (!list_empty(&qpd->queues_list)) { 2228 q = list_first_entry(&qpd->queues_list, struct queue, list); 2229 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2230 q->properties.type)]; 2231 list_del(&q->list); 2232 qpd->queue_count--; 2233 dqm_unlock(dqm); 2234 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2235 dqm_lock(dqm); 2236 } 2237 dqm_unlock(dqm); 2238 2239 /* Outside the DQM lock because under the DQM lock we can't do 2240 * reclaim or take other locks that others hold while reclaiming. 2241 */ 2242 if (found) 2243 kfd_dec_compute_active(dqm->dev); 2244 2245 return retval; 2246 } 2247 2248 static int init_mqd_managers(struct device_queue_manager *dqm) 2249 { 2250 int i, j; 2251 struct mqd_manager *mqd_mgr; 2252 2253 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2254 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2255 if (!mqd_mgr) { 2256 pr_err("mqd manager [%d] initialization failed\n", i); 2257 goto out_free; 2258 } 2259 dqm->mqd_mgrs[i] = mqd_mgr; 2260 } 2261 2262 return 0; 2263 2264 out_free: 2265 for (j = 0; j < i; j++) { 2266 kfree(dqm->mqd_mgrs[j]); 2267 dqm->mqd_mgrs[j] = NULL; 2268 } 2269 2270 return -ENOMEM; 2271 } 2272 2273 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2274 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2275 { 2276 int retval; 2277 struct kfd_node *dev = dqm->dev; 2278 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2279 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2280 get_num_all_sdma_engines(dqm) * 2281 dev->kfd->device_info.num_sdma_queues_per_engine + 2282 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2283 NUM_XCC(dqm->dev->xcc_mask)); 2284 2285 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2286 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2287 (void *)&(mem_obj->cpu_ptr), false); 2288 2289 return retval; 2290 } 2291 2292 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2293 { 2294 struct device_queue_manager *dqm; 2295 2296 pr_debug("Loading device queue manager\n"); 2297 2298 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2299 if (!dqm) 2300 return NULL; 2301 2302 switch (dev->adev->asic_type) { 2303 /* HWS is not available on Hawaii. */ 2304 case CHIP_HAWAII: 2305 /* HWS depends on CWSR for timely dequeue. CWSR is not 2306 * available on Tonga. 2307 * 2308 * FIXME: This argument also applies to Kaveri. 2309 */ 2310 case CHIP_TONGA: 2311 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2312 break; 2313 default: 2314 dqm->sched_policy = sched_policy; 2315 break; 2316 } 2317 2318 dqm->dev = dev; 2319 switch (dqm->sched_policy) { 2320 case KFD_SCHED_POLICY_HWS: 2321 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2322 /* initialize dqm for cp scheduling */ 2323 dqm->ops.create_queue = create_queue_cpsch; 2324 dqm->ops.initialize = initialize_cpsch; 2325 dqm->ops.start = start_cpsch; 2326 dqm->ops.stop = stop_cpsch; 2327 dqm->ops.pre_reset = pre_reset; 2328 dqm->ops.destroy_queue = destroy_queue_cpsch; 2329 dqm->ops.update_queue = update_queue; 2330 dqm->ops.register_process = register_process; 2331 dqm->ops.unregister_process = unregister_process; 2332 dqm->ops.uninitialize = uninitialize; 2333 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2334 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2335 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2336 dqm->ops.process_termination = process_termination_cpsch; 2337 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2338 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2339 dqm->ops.get_wave_state = get_wave_state; 2340 dqm->ops.reset_queues = reset_queues_cpsch; 2341 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2342 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2343 break; 2344 case KFD_SCHED_POLICY_NO_HWS: 2345 /* initialize dqm for no cp scheduling */ 2346 dqm->ops.start = start_nocpsch; 2347 dqm->ops.stop = stop_nocpsch; 2348 dqm->ops.pre_reset = pre_reset; 2349 dqm->ops.create_queue = create_queue_nocpsch; 2350 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2351 dqm->ops.update_queue = update_queue; 2352 dqm->ops.register_process = register_process; 2353 dqm->ops.unregister_process = unregister_process; 2354 dqm->ops.initialize = initialize_nocpsch; 2355 dqm->ops.uninitialize = uninitialize; 2356 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2357 dqm->ops.process_termination = process_termination_nocpsch; 2358 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2359 dqm->ops.restore_process_queues = 2360 restore_process_queues_nocpsch; 2361 dqm->ops.get_wave_state = get_wave_state; 2362 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2363 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2364 break; 2365 default: 2366 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2367 goto out_free; 2368 } 2369 2370 switch (dev->adev->asic_type) { 2371 case CHIP_CARRIZO: 2372 device_queue_manager_init_vi(&dqm->asic_ops); 2373 break; 2374 2375 case CHIP_KAVERI: 2376 device_queue_manager_init_cik(&dqm->asic_ops); 2377 break; 2378 2379 case CHIP_HAWAII: 2380 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2381 break; 2382 2383 case CHIP_TONGA: 2384 case CHIP_FIJI: 2385 case CHIP_POLARIS10: 2386 case CHIP_POLARIS11: 2387 case CHIP_POLARIS12: 2388 case CHIP_VEGAM: 2389 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2390 break; 2391 2392 default: 2393 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2394 device_queue_manager_init_v11(&dqm->asic_ops); 2395 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2396 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2397 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2398 device_queue_manager_init_v9(&dqm->asic_ops); 2399 else { 2400 WARN(1, "Unexpected ASIC family %u", 2401 dev->adev->asic_type); 2402 goto out_free; 2403 } 2404 } 2405 2406 if (init_mqd_managers(dqm)) 2407 goto out_free; 2408 2409 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2410 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2411 goto out_free; 2412 } 2413 2414 if (!dqm->ops.initialize(dqm)) 2415 return dqm; 2416 2417 out_free: 2418 kfree(dqm); 2419 return NULL; 2420 } 2421 2422 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2423 struct kfd_mem_obj *mqd) 2424 { 2425 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2426 2427 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2428 } 2429 2430 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2431 { 2432 dqm->ops.stop(dqm); 2433 dqm->ops.uninitialize(dqm); 2434 if (!dqm->dev->kfd->shared_resources.enable_mes) 2435 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2436 kfree(dqm); 2437 } 2438 2439 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2440 { 2441 struct kfd_process_device *pdd; 2442 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2443 int ret = 0; 2444 2445 if (!p) 2446 return -EINVAL; 2447 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2448 pdd = kfd_get_process_device_data(dqm->dev, p); 2449 if (pdd) 2450 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2451 kfd_unref_process(p); 2452 2453 return ret; 2454 } 2455 2456 static void kfd_process_hw_exception(struct work_struct *work) 2457 { 2458 struct device_queue_manager *dqm = container_of(work, 2459 struct device_queue_manager, hw_exception_work); 2460 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2461 } 2462 2463 #if defined(CONFIG_DEBUG_FS) 2464 2465 static void seq_reg_dump(struct seq_file *m, 2466 uint32_t (*dump)[2], uint32_t n_regs) 2467 { 2468 uint32_t i, count; 2469 2470 for (i = 0, count = 0; i < n_regs; i++) { 2471 if (count == 0 || 2472 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2473 seq_printf(m, "%s %08x: %08x", 2474 i ? "\n" : "", 2475 dump[i][0], dump[i][1]); 2476 count = 7; 2477 } else { 2478 seq_printf(m, " %08x", dump[i][1]); 2479 count--; 2480 } 2481 } 2482 2483 seq_puts(m, "\n"); 2484 } 2485 2486 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2487 { 2488 struct device_queue_manager *dqm = data; 2489 uint32_t xcc_mask = dqm->dev->xcc_mask; 2490 uint32_t (*dump)[2], n_regs; 2491 int pipe, queue; 2492 int r = 0, xcc_id; 2493 uint32_t sdma_engine_start; 2494 2495 if (!dqm->sched_running) { 2496 seq_puts(m, " Device is stopped\n"); 2497 return 0; 2498 } 2499 2500 for_each_inst(xcc_id, xcc_mask) { 2501 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2502 KFD_CIK_HIQ_PIPE, 2503 KFD_CIK_HIQ_QUEUE, &dump, 2504 &n_regs, xcc_id); 2505 if (!r) { 2506 seq_printf( 2507 m, 2508 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 2509 xcc_id, 2510 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 2511 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 2512 KFD_CIK_HIQ_QUEUE); 2513 seq_reg_dump(m, dump, n_regs); 2514 2515 kfree(dump); 2516 } 2517 2518 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2519 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2520 2521 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2522 if (!test_bit(pipe_offset + queue, 2523 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2524 continue; 2525 2526 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2527 pipe, queue, 2528 &dump, &n_regs, 2529 xcc_id); 2530 if (r) 2531 break; 2532 2533 seq_printf(m, 2534 " Inst %d, CP Pipe %d, Queue %d\n", 2535 xcc_id, pipe, queue); 2536 seq_reg_dump(m, dump, n_regs); 2537 2538 kfree(dump); 2539 } 2540 } 2541 } 2542 2543 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2544 for (pipe = sdma_engine_start; 2545 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 2546 pipe++) { 2547 for (queue = 0; 2548 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2549 queue++) { 2550 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2551 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2552 if (r) 2553 break; 2554 2555 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2556 pipe, queue); 2557 seq_reg_dump(m, dump, n_regs); 2558 2559 kfree(dump); 2560 } 2561 } 2562 2563 return r; 2564 } 2565 2566 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2567 { 2568 int r = 0; 2569 2570 dqm_lock(dqm); 2571 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2572 if (r) { 2573 dqm_unlock(dqm); 2574 return r; 2575 } 2576 dqm->active_runlist = true; 2577 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2578 dqm_unlock(dqm); 2579 2580 return r; 2581 } 2582 2583 #endif 2584