1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "mes_api_def.h" 39 40 /* Size of the per-pipe EOP queue */ 41 #define CIK_HPD_EOP_BYTES_LOG2 11 42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 43 44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 45 u32 pasid, unsigned int vmid); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 enum kfd_unmap_queues_filter filter, 49 uint32_t filter_param); 50 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param, bool reset); 53 54 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 56 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 57 struct queue *q); 58 59 static inline void deallocate_hqd(struct device_queue_manager *dqm, 60 struct queue *q); 61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 62 static int allocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q, const uint32_t *restore_sdma_id); 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 78 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return kfd_get_num_sdma_engines(dqm->dev) + 107 kfd_get_num_xgmi_sdma_engines(dqm->dev); 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return kfd_get_num_sdma_engines(dqm->dev) * 113 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 114 } 115 116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 117 { 118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 119 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 120 } 121 122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) 123 { 124 return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 } 135 136 void program_sh_mem_settings(struct device_queue_manager *dqm, 137 struct qcm_process_device *qpd) 138 { 139 uint32_t xcc_mask = dqm->dev->xcc_mask; 140 int xcc_id; 141 142 for_each_inst(xcc_id, xcc_mask) 143 dqm->dev->kfd2kgd->program_sh_mem_settings( 144 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 145 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 146 qpd->sh_mem_bases, xcc_id); 147 } 148 149 static void kfd_hws_hang(struct device_queue_manager *dqm) 150 { 151 /* 152 * Issue a GPU reset if HWS is unresponsive 153 */ 154 dqm->is_hws_hang = true; 155 156 /* It's possible we're detecting a HWS hang in the 157 * middle of a GPU reset. No need to schedule another 158 * reset in this case. 159 */ 160 if (!dqm->is_resetting) 161 schedule_work(&dqm->hw_exception_work); 162 } 163 164 static int convert_to_mes_queue_type(int queue_type) 165 { 166 int mes_queue_type; 167 168 switch (queue_type) { 169 case KFD_QUEUE_TYPE_COMPUTE: 170 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 171 break; 172 case KFD_QUEUE_TYPE_SDMA: 173 mes_queue_type = MES_QUEUE_TYPE_SDMA; 174 break; 175 default: 176 WARN(1, "Invalid queue type %d", queue_type); 177 mes_queue_type = -EINVAL; 178 break; 179 } 180 181 return mes_queue_type; 182 } 183 184 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 185 struct qcm_process_device *qpd) 186 { 187 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 188 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 189 struct mes_add_queue_input queue_input; 190 int r, queue_type; 191 uint64_t wptr_addr_off; 192 193 if (dqm->is_hws_hang) 194 return -EIO; 195 196 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 197 queue_input.process_id = qpd->pqm->process->pasid; 198 queue_input.page_table_base_addr = qpd->page_table_base; 199 queue_input.process_va_start = 0; 200 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 201 /* MES unit for quantum is 100ns */ 202 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 203 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 204 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 205 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 206 queue_input.inprocess_gang_priority = q->properties.priority; 207 queue_input.gang_global_priority_level = 208 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 209 queue_input.doorbell_offset = q->properties.doorbell_off; 210 queue_input.mqd_addr = q->gart_mqd_addr; 211 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 212 213 if (q->wptr_bo) { 214 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 215 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; 216 } 217 218 queue_input.is_kfd_process = 1; 219 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 220 queue_input.queue_size = q->properties.queue_size >> 2; 221 222 queue_input.paging = false; 223 queue_input.tba_addr = qpd->tba_addr; 224 queue_input.tma_addr = qpd->tma_addr; 225 226 queue_type = convert_to_mes_queue_type(q->properties.type); 227 if (queue_type < 0) { 228 pr_err("Queue type not supported with MES, queue:%d\n", 229 q->properties.type); 230 return -EINVAL; 231 } 232 queue_input.queue_type = (uint32_t)queue_type; 233 234 if (q->gws) { 235 queue_input.gws_base = 0; 236 queue_input.gws_size = qpd->num_gws; 237 } 238 239 amdgpu_mes_lock(&adev->mes); 240 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 241 amdgpu_mes_unlock(&adev->mes); 242 if (r) { 243 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 244 q->properties.doorbell_off); 245 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 246 kfd_hws_hang(dqm); 247 } 248 249 return r; 250 } 251 252 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 253 struct qcm_process_device *qpd) 254 { 255 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 256 int r; 257 struct mes_remove_queue_input queue_input; 258 259 if (dqm->is_hws_hang) 260 return -EIO; 261 262 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 263 queue_input.doorbell_offset = q->properties.doorbell_off; 264 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 265 266 amdgpu_mes_lock(&adev->mes); 267 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 268 amdgpu_mes_unlock(&adev->mes); 269 270 if (r) { 271 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 272 q->properties.doorbell_off); 273 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 274 kfd_hws_hang(dqm); 275 } 276 277 return r; 278 } 279 280 static int remove_all_queues_mes(struct device_queue_manager *dqm) 281 { 282 struct device_process_node *cur; 283 struct qcm_process_device *qpd; 284 struct queue *q; 285 int retval = 0; 286 287 list_for_each_entry(cur, &dqm->queues, list) { 288 qpd = cur->qpd; 289 list_for_each_entry(q, &qpd->queues_list, list) { 290 if (q->properties.is_active) { 291 retval = remove_queue_mes(dqm, q, qpd); 292 if (retval) { 293 pr_err("%s: Failed to remove queue %d for dev %d", 294 __func__, 295 q->properties.queue_id, 296 dqm->dev->id); 297 return retval; 298 } 299 } 300 } 301 } 302 303 return retval; 304 } 305 306 static void increment_queue_count(struct device_queue_manager *dqm, 307 struct qcm_process_device *qpd, 308 struct queue *q) 309 { 310 dqm->active_queue_count++; 311 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 312 q->properties.type == KFD_QUEUE_TYPE_DIQ) 313 dqm->active_cp_queue_count++; 314 315 if (q->properties.is_gws) { 316 dqm->gws_queue_count++; 317 qpd->mapped_gws_queue = true; 318 } 319 } 320 321 static void decrement_queue_count(struct device_queue_manager *dqm, 322 struct qcm_process_device *qpd, 323 struct queue *q) 324 { 325 dqm->active_queue_count--; 326 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 327 q->properties.type == KFD_QUEUE_TYPE_DIQ) 328 dqm->active_cp_queue_count--; 329 330 if (q->properties.is_gws) { 331 dqm->gws_queue_count--; 332 qpd->mapped_gws_queue = false; 333 } 334 } 335 336 /* 337 * Allocate a doorbell ID to this queue. 338 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 339 */ 340 static int allocate_doorbell(struct qcm_process_device *qpd, 341 struct queue *q, 342 uint32_t const *restore_id) 343 { 344 struct kfd_node *dev = qpd->dqm->dev; 345 346 if (!KFD_IS_SOC15(dev)) { 347 /* On pre-SOC15 chips we need to use the queue ID to 348 * preserve the user mode ABI. 349 */ 350 351 if (restore_id && *restore_id != q->properties.queue_id) 352 return -EINVAL; 353 354 q->doorbell_id = q->properties.queue_id; 355 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 356 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 357 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 358 * doorbell assignments based on the engine and queue id. 359 * The doobell index distance between RLC (2*i) and (2*i+1) 360 * for a SDMA engine is 512. 361 */ 362 363 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 364 365 /* 366 * q->properties.sdma_engine_id corresponds to the virtual 367 * sdma engine number. However, for doorbell allocation, 368 * we need the physical sdma engine id in order to get the 369 * correct doorbell offset. 370 */ 371 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 372 get_num_all_sdma_engines(qpd->dqm) + 373 q->properties.sdma_engine_id] 374 + (q->properties.sdma_queue_id & 1) 375 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 376 + (q->properties.sdma_queue_id >> 1); 377 378 if (restore_id && *restore_id != valid_id) 379 return -EINVAL; 380 q->doorbell_id = valid_id; 381 } else { 382 /* For CP queues on SOC15 */ 383 if (restore_id) { 384 /* make sure that ID is free */ 385 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 386 return -EINVAL; 387 388 q->doorbell_id = *restore_id; 389 } else { 390 /* or reserve a free doorbell ID */ 391 unsigned int found; 392 393 found = find_first_zero_bit(qpd->doorbell_bitmap, 394 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 395 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 396 pr_debug("No doorbells available"); 397 return -EBUSY; 398 } 399 set_bit(found, qpd->doorbell_bitmap); 400 q->doorbell_id = found; 401 } 402 } 403 404 q->properties.doorbell_off = 405 kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), 406 q->doorbell_id); 407 return 0; 408 } 409 410 static void deallocate_doorbell(struct qcm_process_device *qpd, 411 struct queue *q) 412 { 413 unsigned int old; 414 struct kfd_node *dev = qpd->dqm->dev; 415 416 if (!KFD_IS_SOC15(dev) || 417 q->properties.type == KFD_QUEUE_TYPE_SDMA || 418 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 419 return; 420 421 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 422 WARN_ON(!old); 423 } 424 425 static void program_trap_handler_settings(struct device_queue_manager *dqm, 426 struct qcm_process_device *qpd) 427 { 428 uint32_t xcc_mask = dqm->dev->xcc_mask; 429 int xcc_id; 430 431 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 432 for_each_inst(xcc_id, xcc_mask) 433 dqm->dev->kfd2kgd->program_trap_handler_settings( 434 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 435 qpd->tma_addr, xcc_id); 436 } 437 438 static int allocate_vmid(struct device_queue_manager *dqm, 439 struct qcm_process_device *qpd, 440 struct queue *q) 441 { 442 int allocated_vmid = -1, i; 443 444 for (i = dqm->dev->vm_info.first_vmid_kfd; 445 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 446 if (!dqm->vmid_pasid[i]) { 447 allocated_vmid = i; 448 break; 449 } 450 } 451 452 if (allocated_vmid < 0) { 453 pr_err("no more vmid to allocate\n"); 454 return -ENOSPC; 455 } 456 457 pr_debug("vmid allocated: %d\n", allocated_vmid); 458 459 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 460 461 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 462 463 qpd->vmid = allocated_vmid; 464 q->properties.vmid = allocated_vmid; 465 466 program_sh_mem_settings(dqm, qpd); 467 468 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 469 program_trap_handler_settings(dqm, qpd); 470 471 /* qpd->page_table_base is set earlier when register_process() 472 * is called, i.e. when the first queue is created. 473 */ 474 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 475 qpd->vmid, 476 qpd->page_table_base); 477 /* invalidate the VM context after pasid and vmid mapping is set up */ 478 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 479 480 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 481 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 482 qpd->sh_hidden_private_base, qpd->vmid); 483 484 return 0; 485 } 486 487 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 488 struct qcm_process_device *qpd) 489 { 490 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 491 int ret; 492 493 if (!qpd->ib_kaddr) 494 return -ENOMEM; 495 496 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 497 if (ret) 498 return ret; 499 500 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 501 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 502 pmf->release_mem_size / sizeof(uint32_t)); 503 } 504 505 static void deallocate_vmid(struct device_queue_manager *dqm, 506 struct qcm_process_device *qpd, 507 struct queue *q) 508 { 509 /* On GFX v7, CP doesn't flush TC at dequeue */ 510 if (q->device->adev->asic_type == CHIP_HAWAII) 511 if (flush_texture_cache_nocpsch(q->device, qpd)) 512 pr_err("Failed to flush TC\n"); 513 514 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 515 516 /* Release the vmid mapping */ 517 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 518 dqm->vmid_pasid[qpd->vmid] = 0; 519 520 qpd->vmid = 0; 521 q->properties.vmid = 0; 522 } 523 524 static int create_queue_nocpsch(struct device_queue_manager *dqm, 525 struct queue *q, 526 struct qcm_process_device *qpd, 527 const struct kfd_criu_queue_priv_data *qd, 528 const void *restore_mqd, const void *restore_ctl_stack) 529 { 530 struct mqd_manager *mqd_mgr; 531 int retval; 532 533 dqm_lock(dqm); 534 535 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 536 pr_warn("Can't create new usermode queue because %d queues were already created\n", 537 dqm->total_queue_count); 538 retval = -EPERM; 539 goto out_unlock; 540 } 541 542 if (list_empty(&qpd->queues_list)) { 543 retval = allocate_vmid(dqm, qpd, q); 544 if (retval) 545 goto out_unlock; 546 } 547 q->properties.vmid = qpd->vmid; 548 /* 549 * Eviction state logic: mark all queues as evicted, even ones 550 * not currently active. Restoring inactive queues later only 551 * updates the is_evicted flag but is a no-op otherwise. 552 */ 553 q->properties.is_evicted = !!qpd->evicted; 554 555 q->properties.tba_addr = qpd->tba_addr; 556 q->properties.tma_addr = qpd->tma_addr; 557 558 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 559 q->properties.type)]; 560 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 561 retval = allocate_hqd(dqm, q); 562 if (retval) 563 goto deallocate_vmid; 564 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 565 q->pipe, q->queue); 566 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 567 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 568 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 569 if (retval) 570 goto deallocate_vmid; 571 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 572 } 573 574 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 575 if (retval) 576 goto out_deallocate_hqd; 577 578 /* Temporarily release dqm lock to avoid a circular lock dependency */ 579 dqm_unlock(dqm); 580 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 581 dqm_lock(dqm); 582 583 if (!q->mqd_mem_obj) { 584 retval = -ENOMEM; 585 goto out_deallocate_doorbell; 586 } 587 588 if (qd) 589 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 590 &q->properties, restore_mqd, restore_ctl_stack, 591 qd->ctl_stack_size); 592 else 593 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 594 &q->gart_mqd_addr, &q->properties); 595 596 if (q->properties.is_active) { 597 if (!dqm->sched_running) { 598 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 599 goto add_queue_to_list; 600 } 601 602 if (WARN(q->process->mm != current->mm, 603 "should only run in user thread")) 604 retval = -EFAULT; 605 else 606 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 607 q->queue, &q->properties, current->mm); 608 if (retval) 609 goto out_free_mqd; 610 } 611 612 add_queue_to_list: 613 list_add(&q->list, &qpd->queues_list); 614 qpd->queue_count++; 615 if (q->properties.is_active) 616 increment_queue_count(dqm, qpd, q); 617 618 /* 619 * Unconditionally increment this counter, regardless of the queue's 620 * type or whether the queue is active. 621 */ 622 dqm->total_queue_count++; 623 pr_debug("Total of %d queues are accountable so far\n", 624 dqm->total_queue_count); 625 goto out_unlock; 626 627 out_free_mqd: 628 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 629 out_deallocate_doorbell: 630 deallocate_doorbell(qpd, q); 631 out_deallocate_hqd: 632 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 633 deallocate_hqd(dqm, q); 634 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 635 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 636 deallocate_sdma_queue(dqm, q); 637 deallocate_vmid: 638 if (list_empty(&qpd->queues_list)) 639 deallocate_vmid(dqm, qpd, q); 640 out_unlock: 641 dqm_unlock(dqm); 642 return retval; 643 } 644 645 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 646 { 647 bool set; 648 int pipe, bit, i; 649 650 set = false; 651 652 for (pipe = dqm->next_pipe_to_allocate, i = 0; 653 i < get_pipes_per_mec(dqm); 654 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 655 656 if (!is_pipe_enabled(dqm, 0, pipe)) 657 continue; 658 659 if (dqm->allocated_queues[pipe] != 0) { 660 bit = ffs(dqm->allocated_queues[pipe]) - 1; 661 dqm->allocated_queues[pipe] &= ~(1 << bit); 662 q->pipe = pipe; 663 q->queue = bit; 664 set = true; 665 break; 666 } 667 } 668 669 if (!set) 670 return -EBUSY; 671 672 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 673 /* horizontal hqd allocation */ 674 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 675 676 return 0; 677 } 678 679 static inline void deallocate_hqd(struct device_queue_manager *dqm, 680 struct queue *q) 681 { 682 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 683 } 684 685 #define SQ_IND_CMD_CMD_KILL 0x00000003 686 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 687 688 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 689 { 690 int status = 0; 691 unsigned int vmid; 692 uint16_t queried_pasid; 693 union SQ_CMD_BITS reg_sq_cmd; 694 union GRBM_GFX_INDEX_BITS reg_gfx_index; 695 struct kfd_process_device *pdd; 696 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 697 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 698 uint32_t xcc_mask = dev->xcc_mask; 699 int xcc_id; 700 701 reg_sq_cmd.u32All = 0; 702 reg_gfx_index.u32All = 0; 703 704 pr_debug("Killing all process wavefronts\n"); 705 706 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 707 pr_err("no vmid pasid mapping supported \n"); 708 return -EOPNOTSUPP; 709 } 710 711 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 712 * ATC_VMID15_PASID_MAPPING 713 * to check which VMID the current process is mapped to. 714 */ 715 716 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 717 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 718 (dev->adev, vmid, &queried_pasid); 719 720 if (status && queried_pasid == p->pasid) { 721 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 722 vmid, p->pasid); 723 break; 724 } 725 } 726 727 if (vmid > last_vmid_to_scan) { 728 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 729 return -EFAULT; 730 } 731 732 /* taking the VMID for that process on the safe way using PDD */ 733 pdd = kfd_get_process_device_data(dev, p); 734 if (!pdd) 735 return -EFAULT; 736 737 reg_gfx_index.bits.sh_broadcast_writes = 1; 738 reg_gfx_index.bits.se_broadcast_writes = 1; 739 reg_gfx_index.bits.instance_broadcast_writes = 1; 740 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 741 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 742 reg_sq_cmd.bits.vm_id = vmid; 743 744 for_each_inst(xcc_id, xcc_mask) 745 dev->kfd2kgd->wave_control_execute( 746 dev->adev, reg_gfx_index.u32All, 747 reg_sq_cmd.u32All, xcc_id); 748 749 return 0; 750 } 751 752 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 753 * to avoid asynchronized access 754 */ 755 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 756 struct qcm_process_device *qpd, 757 struct queue *q) 758 { 759 int retval; 760 struct mqd_manager *mqd_mgr; 761 762 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 763 q->properties.type)]; 764 765 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 766 deallocate_hqd(dqm, q); 767 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 768 deallocate_sdma_queue(dqm, q); 769 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 770 deallocate_sdma_queue(dqm, q); 771 else { 772 pr_debug("q->properties.type %d is invalid\n", 773 q->properties.type); 774 return -EINVAL; 775 } 776 dqm->total_queue_count--; 777 778 deallocate_doorbell(qpd, q); 779 780 if (!dqm->sched_running) { 781 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 782 return 0; 783 } 784 785 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 786 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 787 KFD_UNMAP_LATENCY_MS, 788 q->pipe, q->queue); 789 if (retval == -ETIME) 790 qpd->reset_wavefronts = true; 791 792 list_del(&q->list); 793 if (list_empty(&qpd->queues_list)) { 794 if (qpd->reset_wavefronts) { 795 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 796 dqm->dev); 797 /* dbgdev_wave_reset_wavefronts has to be called before 798 * deallocate_vmid(), i.e. when vmid is still in use. 799 */ 800 dbgdev_wave_reset_wavefronts(dqm->dev, 801 qpd->pqm->process); 802 qpd->reset_wavefronts = false; 803 } 804 805 deallocate_vmid(dqm, qpd, q); 806 } 807 qpd->queue_count--; 808 if (q->properties.is_active) 809 decrement_queue_count(dqm, qpd, q); 810 811 return retval; 812 } 813 814 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 815 struct qcm_process_device *qpd, 816 struct queue *q) 817 { 818 int retval; 819 uint64_t sdma_val = 0; 820 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 821 struct mqd_manager *mqd_mgr = 822 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 823 824 /* Get the SDMA queue stats */ 825 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 826 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 827 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 828 &sdma_val); 829 if (retval) 830 pr_err("Failed to read SDMA queue counter for queue: %d\n", 831 q->properties.queue_id); 832 } 833 834 dqm_lock(dqm); 835 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 836 if (!retval) 837 pdd->sdma_past_activity_counter += sdma_val; 838 dqm_unlock(dqm); 839 840 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 841 842 return retval; 843 } 844 845 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 846 struct mqd_update_info *minfo) 847 { 848 int retval = 0; 849 struct mqd_manager *mqd_mgr; 850 struct kfd_process_device *pdd; 851 bool prev_active = false; 852 853 dqm_lock(dqm); 854 pdd = kfd_get_process_device_data(q->device, q->process); 855 if (!pdd) { 856 retval = -ENODEV; 857 goto out_unlock; 858 } 859 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 860 q->properties.type)]; 861 862 /* Save previous activity state for counters */ 863 prev_active = q->properties.is_active; 864 865 /* Make sure the queue is unmapped before updating the MQD */ 866 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 867 if (!dqm->dev->kfd->shared_resources.enable_mes) 868 retval = unmap_queues_cpsch(dqm, 869 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 870 else if (prev_active) 871 retval = remove_queue_mes(dqm, q, &pdd->qpd); 872 873 if (retval) { 874 pr_err("unmap queue failed\n"); 875 goto out_unlock; 876 } 877 } else if (prev_active && 878 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 879 q->properties.type == KFD_QUEUE_TYPE_SDMA || 880 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 881 882 if (!dqm->sched_running) { 883 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 884 goto out_unlock; 885 } 886 887 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 888 (dqm->dev->kfd->cwsr_enabled ? 889 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 890 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 891 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 892 if (retval) { 893 pr_err("destroy mqd failed\n"); 894 goto out_unlock; 895 } 896 } 897 898 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 899 900 /* 901 * check active state vs. the previous state and modify 902 * counter accordingly. map_queues_cpsch uses the 903 * dqm->active_queue_count to determine whether a new runlist must be 904 * uploaded. 905 */ 906 if (q->properties.is_active && !prev_active) { 907 increment_queue_count(dqm, &pdd->qpd, q); 908 } else if (!q->properties.is_active && prev_active) { 909 decrement_queue_count(dqm, &pdd->qpd, q); 910 } else if (q->gws && !q->properties.is_gws) { 911 if (q->properties.is_active) { 912 dqm->gws_queue_count++; 913 pdd->qpd.mapped_gws_queue = true; 914 } 915 q->properties.is_gws = true; 916 } else if (!q->gws && q->properties.is_gws) { 917 if (q->properties.is_active) { 918 dqm->gws_queue_count--; 919 pdd->qpd.mapped_gws_queue = false; 920 } 921 q->properties.is_gws = false; 922 } 923 924 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 925 if (!dqm->dev->kfd->shared_resources.enable_mes) 926 retval = map_queues_cpsch(dqm); 927 else if (q->properties.is_active) 928 retval = add_queue_mes(dqm, q, &pdd->qpd); 929 } else if (q->properties.is_active && 930 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 931 q->properties.type == KFD_QUEUE_TYPE_SDMA || 932 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 933 if (WARN(q->process->mm != current->mm, 934 "should only run in user thread")) 935 retval = -EFAULT; 936 else 937 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 938 q->pipe, q->queue, 939 &q->properties, current->mm); 940 } 941 942 out_unlock: 943 dqm_unlock(dqm); 944 return retval; 945 } 946 947 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 948 struct qcm_process_device *qpd) 949 { 950 struct queue *q; 951 struct mqd_manager *mqd_mgr; 952 struct kfd_process_device *pdd; 953 int retval, ret = 0; 954 955 dqm_lock(dqm); 956 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 957 goto out; 958 959 pdd = qpd_to_pdd(qpd); 960 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 961 pdd->process->pasid); 962 963 pdd->last_evict_timestamp = get_jiffies_64(); 964 /* Mark all queues as evicted. Deactivate all active queues on 965 * the qpd. 966 */ 967 list_for_each_entry(q, &qpd->queues_list, list) { 968 q->properties.is_evicted = true; 969 if (!q->properties.is_active) 970 continue; 971 972 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 973 q->properties.type)]; 974 q->properties.is_active = false; 975 decrement_queue_count(dqm, qpd, q); 976 977 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 978 continue; 979 980 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 981 (dqm->dev->kfd->cwsr_enabled ? 982 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 983 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 984 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 985 if (retval && !ret) 986 /* Return the first error, but keep going to 987 * maintain a consistent eviction state 988 */ 989 ret = retval; 990 } 991 992 out: 993 dqm_unlock(dqm); 994 return ret; 995 } 996 997 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 998 struct qcm_process_device *qpd) 999 { 1000 struct queue *q; 1001 struct kfd_process_device *pdd; 1002 int retval = 0; 1003 1004 dqm_lock(dqm); 1005 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1006 goto out; 1007 1008 pdd = qpd_to_pdd(qpd); 1009 1010 /* The debugger creates processes that temporarily have not acquired 1011 * all VMs for all devices and has no VMs itself. 1012 * Skip queue eviction on process eviction. 1013 */ 1014 if (!pdd->drm_priv) 1015 goto out; 1016 1017 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1018 pdd->process->pasid); 1019 1020 /* Mark all queues as evicted. Deactivate all active queues on 1021 * the qpd. 1022 */ 1023 list_for_each_entry(q, &qpd->queues_list, list) { 1024 q->properties.is_evicted = true; 1025 if (!q->properties.is_active) 1026 continue; 1027 1028 q->properties.is_active = false; 1029 decrement_queue_count(dqm, qpd, q); 1030 1031 if (dqm->dev->kfd->shared_resources.enable_mes) { 1032 retval = remove_queue_mes(dqm, q, qpd); 1033 if (retval) { 1034 pr_err("Failed to evict queue %d\n", 1035 q->properties.queue_id); 1036 goto out; 1037 } 1038 } 1039 } 1040 pdd->last_evict_timestamp = get_jiffies_64(); 1041 if (!dqm->dev->kfd->shared_resources.enable_mes) 1042 retval = execute_queues_cpsch(dqm, 1043 qpd->is_debug ? 1044 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1045 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1046 1047 out: 1048 dqm_unlock(dqm); 1049 return retval; 1050 } 1051 1052 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1053 struct qcm_process_device *qpd) 1054 { 1055 struct mm_struct *mm = NULL; 1056 struct queue *q; 1057 struct mqd_manager *mqd_mgr; 1058 struct kfd_process_device *pdd; 1059 uint64_t pd_base; 1060 uint64_t eviction_duration; 1061 int retval, ret = 0; 1062 1063 pdd = qpd_to_pdd(qpd); 1064 /* Retrieve PD base */ 1065 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1066 1067 dqm_lock(dqm); 1068 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1069 goto out; 1070 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1071 qpd->evicted--; 1072 goto out; 1073 } 1074 1075 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1076 pdd->process->pasid); 1077 1078 /* Update PD Base in QPD */ 1079 qpd->page_table_base = pd_base; 1080 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1081 1082 if (!list_empty(&qpd->queues_list)) { 1083 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1084 dqm->dev->adev, 1085 qpd->vmid, 1086 qpd->page_table_base); 1087 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1088 } 1089 1090 /* Take a safe reference to the mm_struct, which may otherwise 1091 * disappear even while the kfd_process is still referenced. 1092 */ 1093 mm = get_task_mm(pdd->process->lead_thread); 1094 if (!mm) { 1095 ret = -EFAULT; 1096 goto out; 1097 } 1098 1099 /* Remove the eviction flags. Activate queues that are not 1100 * inactive for other reasons. 1101 */ 1102 list_for_each_entry(q, &qpd->queues_list, list) { 1103 q->properties.is_evicted = false; 1104 if (!QUEUE_IS_ACTIVE(q->properties)) 1105 continue; 1106 1107 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1108 q->properties.type)]; 1109 q->properties.is_active = true; 1110 increment_queue_count(dqm, qpd, q); 1111 1112 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1113 continue; 1114 1115 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1116 q->queue, &q->properties, mm); 1117 if (retval && !ret) 1118 /* Return the first error, but keep going to 1119 * maintain a consistent eviction state 1120 */ 1121 ret = retval; 1122 } 1123 qpd->evicted = 0; 1124 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1125 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1126 out: 1127 if (mm) 1128 mmput(mm); 1129 dqm_unlock(dqm); 1130 return ret; 1131 } 1132 1133 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1134 struct qcm_process_device *qpd) 1135 { 1136 struct queue *q; 1137 struct kfd_process_device *pdd; 1138 uint64_t eviction_duration; 1139 int retval = 0; 1140 1141 pdd = qpd_to_pdd(qpd); 1142 1143 dqm_lock(dqm); 1144 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1145 goto out; 1146 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1147 qpd->evicted--; 1148 goto out; 1149 } 1150 1151 /* The debugger creates processes that temporarily have not acquired 1152 * all VMs for all devices and has no VMs itself. 1153 * Skip queue restore on process restore. 1154 */ 1155 if (!pdd->drm_priv) 1156 goto vm_not_acquired; 1157 1158 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1159 pdd->process->pasid); 1160 1161 /* Update PD Base in QPD */ 1162 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1163 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1164 1165 /* activate all active queues on the qpd */ 1166 list_for_each_entry(q, &qpd->queues_list, list) { 1167 q->properties.is_evicted = false; 1168 if (!QUEUE_IS_ACTIVE(q->properties)) 1169 continue; 1170 1171 q->properties.is_active = true; 1172 increment_queue_count(dqm, &pdd->qpd, q); 1173 1174 if (dqm->dev->kfd->shared_resources.enable_mes) { 1175 retval = add_queue_mes(dqm, q, qpd); 1176 if (retval) { 1177 pr_err("Failed to restore queue %d\n", 1178 q->properties.queue_id); 1179 goto out; 1180 } 1181 } 1182 } 1183 if (!dqm->dev->kfd->shared_resources.enable_mes) 1184 retval = execute_queues_cpsch(dqm, 1185 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1186 1187 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1188 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1189 vm_not_acquired: 1190 qpd->evicted = 0; 1191 out: 1192 dqm_unlock(dqm); 1193 return retval; 1194 } 1195 1196 static int register_process(struct device_queue_manager *dqm, 1197 struct qcm_process_device *qpd) 1198 { 1199 struct device_process_node *n; 1200 struct kfd_process_device *pdd; 1201 uint64_t pd_base; 1202 int retval; 1203 1204 n = kzalloc(sizeof(*n), GFP_KERNEL); 1205 if (!n) 1206 return -ENOMEM; 1207 1208 n->qpd = qpd; 1209 1210 pdd = qpd_to_pdd(qpd); 1211 /* Retrieve PD base */ 1212 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1213 1214 dqm_lock(dqm); 1215 list_add(&n->list, &dqm->queues); 1216 1217 /* Update PD Base in QPD */ 1218 qpd->page_table_base = pd_base; 1219 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1220 1221 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1222 1223 dqm->processes_count++; 1224 1225 dqm_unlock(dqm); 1226 1227 /* Outside the DQM lock because under the DQM lock we can't do 1228 * reclaim or take other locks that others hold while reclaiming. 1229 */ 1230 kfd_inc_compute_active(dqm->dev); 1231 1232 return retval; 1233 } 1234 1235 static int unregister_process(struct device_queue_manager *dqm, 1236 struct qcm_process_device *qpd) 1237 { 1238 int retval; 1239 struct device_process_node *cur, *next; 1240 1241 pr_debug("qpd->queues_list is %s\n", 1242 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1243 1244 retval = 0; 1245 dqm_lock(dqm); 1246 1247 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1248 if (qpd == cur->qpd) { 1249 list_del(&cur->list); 1250 kfree(cur); 1251 dqm->processes_count--; 1252 goto out; 1253 } 1254 } 1255 /* qpd not found in dqm list */ 1256 retval = 1; 1257 out: 1258 dqm_unlock(dqm); 1259 1260 /* Outside the DQM lock because under the DQM lock we can't do 1261 * reclaim or take other locks that others hold while reclaiming. 1262 */ 1263 if (!retval) 1264 kfd_dec_compute_active(dqm->dev); 1265 1266 return retval; 1267 } 1268 1269 static int 1270 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1271 unsigned int vmid) 1272 { 1273 uint32_t xcc_mask = dqm->dev->xcc_mask; 1274 int xcc_id, ret; 1275 1276 for_each_inst(xcc_id, xcc_mask) { 1277 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1278 dqm->dev->adev, pasid, vmid, xcc_id); 1279 if (ret) 1280 break; 1281 } 1282 1283 return ret; 1284 } 1285 1286 static void init_interrupts(struct device_queue_manager *dqm) 1287 { 1288 uint32_t xcc_mask = dqm->dev->xcc_mask; 1289 unsigned int i, xcc_id; 1290 1291 for_each_inst(xcc_id, xcc_mask) { 1292 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1293 if (is_pipe_enabled(dqm, 0, i)) { 1294 dqm->dev->kfd2kgd->init_interrupts( 1295 dqm->dev->adev, i, xcc_id); 1296 } 1297 } 1298 } 1299 } 1300 1301 static int initialize_nocpsch(struct device_queue_manager *dqm) 1302 { 1303 int pipe, queue; 1304 1305 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1306 1307 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1308 sizeof(unsigned int), GFP_KERNEL); 1309 if (!dqm->allocated_queues) 1310 return -ENOMEM; 1311 1312 mutex_init(&dqm->lock_hidden); 1313 INIT_LIST_HEAD(&dqm->queues); 1314 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1315 dqm->active_cp_queue_count = 0; 1316 dqm->gws_queue_count = 0; 1317 1318 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1319 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1320 1321 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1322 if (test_bit(pipe_offset + queue, 1323 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1324 dqm->allocated_queues[pipe] |= 1 << queue; 1325 } 1326 1327 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1328 1329 init_sdma_bitmaps(dqm); 1330 1331 return 0; 1332 } 1333 1334 static void uninitialize(struct device_queue_manager *dqm) 1335 { 1336 int i; 1337 1338 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1339 1340 kfree(dqm->allocated_queues); 1341 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1342 kfree(dqm->mqd_mgrs[i]); 1343 mutex_destroy(&dqm->lock_hidden); 1344 } 1345 1346 static int start_nocpsch(struct device_queue_manager *dqm) 1347 { 1348 int r = 0; 1349 1350 pr_info("SW scheduler is used"); 1351 init_interrupts(dqm); 1352 1353 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1354 r = pm_init(&dqm->packet_mgr, dqm); 1355 if (!r) 1356 dqm->sched_running = true; 1357 1358 return r; 1359 } 1360 1361 static int stop_nocpsch(struct device_queue_manager *dqm) 1362 { 1363 dqm_lock(dqm); 1364 if (!dqm->sched_running) { 1365 dqm_unlock(dqm); 1366 return 0; 1367 } 1368 1369 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1370 pm_uninit(&dqm->packet_mgr, false); 1371 dqm->sched_running = false; 1372 dqm_unlock(dqm); 1373 1374 return 0; 1375 } 1376 1377 static void pre_reset(struct device_queue_manager *dqm) 1378 { 1379 dqm_lock(dqm); 1380 dqm->is_resetting = true; 1381 dqm_unlock(dqm); 1382 } 1383 1384 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1385 struct queue *q, const uint32_t *restore_sdma_id) 1386 { 1387 int bit; 1388 1389 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1390 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1391 pr_err("No more SDMA queue to allocate\n"); 1392 return -ENOMEM; 1393 } 1394 1395 if (restore_sdma_id) { 1396 /* Re-use existing sdma_id */ 1397 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1398 pr_err("SDMA queue already in use\n"); 1399 return -EBUSY; 1400 } 1401 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1402 q->sdma_id = *restore_sdma_id; 1403 } else { 1404 /* Find first available sdma_id */ 1405 bit = find_first_bit(dqm->sdma_bitmap, 1406 get_num_sdma_queues(dqm)); 1407 clear_bit(bit, dqm->sdma_bitmap); 1408 q->sdma_id = bit; 1409 } 1410 1411 q->properties.sdma_engine_id = 1412 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1413 q->properties.sdma_queue_id = q->sdma_id / 1414 kfd_get_num_sdma_engines(dqm->dev); 1415 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1416 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1417 pr_err("No more XGMI SDMA queue to allocate\n"); 1418 return -ENOMEM; 1419 } 1420 if (restore_sdma_id) { 1421 /* Re-use existing sdma_id */ 1422 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1423 pr_err("SDMA queue already in use\n"); 1424 return -EBUSY; 1425 } 1426 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1427 q->sdma_id = *restore_sdma_id; 1428 } else { 1429 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1430 get_num_xgmi_sdma_queues(dqm)); 1431 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1432 q->sdma_id = bit; 1433 } 1434 /* sdma_engine_id is sdma id including 1435 * both PCIe-optimized SDMAs and XGMI- 1436 * optimized SDMAs. The calculation below 1437 * assumes the first N engines are always 1438 * PCIe-optimized ones 1439 */ 1440 q->properties.sdma_engine_id = 1441 kfd_get_num_sdma_engines(dqm->dev) + 1442 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1443 q->properties.sdma_queue_id = q->sdma_id / 1444 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1445 } 1446 1447 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1448 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1449 1450 return 0; 1451 } 1452 1453 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1454 struct queue *q) 1455 { 1456 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1457 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1458 return; 1459 set_bit(q->sdma_id, dqm->sdma_bitmap); 1460 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1461 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1462 return; 1463 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1464 } 1465 } 1466 1467 /* 1468 * Device Queue Manager implementation for cp scheduler 1469 */ 1470 1471 static int set_sched_resources(struct device_queue_manager *dqm) 1472 { 1473 int i, mec; 1474 struct scheduling_resources res; 1475 1476 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1477 1478 res.queue_mask = 0; 1479 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1480 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1481 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1482 1483 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1484 continue; 1485 1486 /* only acquire queues from the first MEC */ 1487 if (mec > 0) 1488 continue; 1489 1490 /* This situation may be hit in the future if a new HW 1491 * generation exposes more than 64 queues. If so, the 1492 * definition of res.queue_mask needs updating 1493 */ 1494 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1495 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1496 break; 1497 } 1498 1499 res.queue_mask |= 1ull 1500 << amdgpu_queue_mask_bit_to_set_resource_bit( 1501 dqm->dev->adev, i); 1502 } 1503 res.gws_mask = ~0ull; 1504 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1505 1506 pr_debug("Scheduling resources:\n" 1507 "vmid mask: 0x%8X\n" 1508 "queue mask: 0x%8llX\n", 1509 res.vmid_mask, res.queue_mask); 1510 1511 return pm_send_set_resources(&dqm->packet_mgr, &res); 1512 } 1513 1514 static int initialize_cpsch(struct device_queue_manager *dqm) 1515 { 1516 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1517 1518 mutex_init(&dqm->lock_hidden); 1519 INIT_LIST_HEAD(&dqm->queues); 1520 dqm->active_queue_count = dqm->processes_count = 0; 1521 dqm->active_cp_queue_count = 0; 1522 dqm->gws_queue_count = 0; 1523 dqm->active_runlist = false; 1524 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1525 1526 init_sdma_bitmaps(dqm); 1527 1528 return 0; 1529 } 1530 1531 static int start_cpsch(struct device_queue_manager *dqm) 1532 { 1533 int retval; 1534 1535 retval = 0; 1536 1537 dqm_lock(dqm); 1538 1539 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1540 retval = pm_init(&dqm->packet_mgr, dqm); 1541 if (retval) 1542 goto fail_packet_manager_init; 1543 1544 retval = set_sched_resources(dqm); 1545 if (retval) 1546 goto fail_set_sched_resources; 1547 } 1548 pr_debug("Allocating fence memory\n"); 1549 1550 /* allocate fence memory on the gart */ 1551 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1552 &dqm->fence_mem); 1553 1554 if (retval) 1555 goto fail_allocate_vidmem; 1556 1557 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1558 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1559 1560 init_interrupts(dqm); 1561 1562 /* clear hang status when driver try to start the hw scheduler */ 1563 dqm->is_hws_hang = false; 1564 dqm->is_resetting = false; 1565 dqm->sched_running = true; 1566 if (!dqm->dev->kfd->shared_resources.enable_mes) 1567 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1568 dqm_unlock(dqm); 1569 1570 return 0; 1571 fail_allocate_vidmem: 1572 fail_set_sched_resources: 1573 if (!dqm->dev->kfd->shared_resources.enable_mes) 1574 pm_uninit(&dqm->packet_mgr, false); 1575 fail_packet_manager_init: 1576 dqm_unlock(dqm); 1577 return retval; 1578 } 1579 1580 static int stop_cpsch(struct device_queue_manager *dqm) 1581 { 1582 bool hanging; 1583 1584 dqm_lock(dqm); 1585 if (!dqm->sched_running) { 1586 dqm_unlock(dqm); 1587 return 0; 1588 } 1589 1590 if (!dqm->is_hws_hang) { 1591 if (!dqm->dev->kfd->shared_resources.enable_mes) 1592 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1593 else 1594 remove_all_queues_mes(dqm); 1595 } 1596 1597 hanging = dqm->is_hws_hang || dqm->is_resetting; 1598 dqm->sched_running = false; 1599 1600 if (!dqm->dev->kfd->shared_resources.enable_mes) 1601 pm_release_ib(&dqm->packet_mgr); 1602 1603 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1604 if (!dqm->dev->kfd->shared_resources.enable_mes) 1605 pm_uninit(&dqm->packet_mgr, hanging); 1606 dqm_unlock(dqm); 1607 1608 return 0; 1609 } 1610 1611 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1612 struct kernel_queue *kq, 1613 struct qcm_process_device *qpd) 1614 { 1615 dqm_lock(dqm); 1616 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1617 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1618 dqm->total_queue_count); 1619 dqm_unlock(dqm); 1620 return -EPERM; 1621 } 1622 1623 /* 1624 * Unconditionally increment this counter, regardless of the queue's 1625 * type or whether the queue is active. 1626 */ 1627 dqm->total_queue_count++; 1628 pr_debug("Total of %d queues are accountable so far\n", 1629 dqm->total_queue_count); 1630 1631 list_add(&kq->list, &qpd->priv_queue_list); 1632 increment_queue_count(dqm, qpd, kq->queue); 1633 qpd->is_debug = true; 1634 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1635 dqm_unlock(dqm); 1636 1637 return 0; 1638 } 1639 1640 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1641 struct kernel_queue *kq, 1642 struct qcm_process_device *qpd) 1643 { 1644 dqm_lock(dqm); 1645 list_del(&kq->list); 1646 decrement_queue_count(dqm, qpd, kq->queue); 1647 qpd->is_debug = false; 1648 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1649 /* 1650 * Unconditionally decrement this counter, regardless of the queue's 1651 * type. 1652 */ 1653 dqm->total_queue_count--; 1654 pr_debug("Total of %d queues are accountable so far\n", 1655 dqm->total_queue_count); 1656 dqm_unlock(dqm); 1657 } 1658 1659 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1660 struct qcm_process_device *qpd, 1661 const struct kfd_criu_queue_priv_data *qd, 1662 const void *restore_mqd, const void *restore_ctl_stack) 1663 { 1664 int retval; 1665 struct mqd_manager *mqd_mgr; 1666 1667 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1668 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1669 dqm->total_queue_count); 1670 retval = -EPERM; 1671 goto out; 1672 } 1673 1674 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1675 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1676 dqm_lock(dqm); 1677 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1678 dqm_unlock(dqm); 1679 if (retval) 1680 goto out; 1681 } 1682 1683 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1684 if (retval) 1685 goto out_deallocate_sdma_queue; 1686 1687 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1688 q->properties.type)]; 1689 1690 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1691 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1692 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1693 q->properties.tba_addr = qpd->tba_addr; 1694 q->properties.tma_addr = qpd->tma_addr; 1695 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1696 if (!q->mqd_mem_obj) { 1697 retval = -ENOMEM; 1698 goto out_deallocate_doorbell; 1699 } 1700 1701 dqm_lock(dqm); 1702 /* 1703 * Eviction state logic: mark all queues as evicted, even ones 1704 * not currently active. Restoring inactive queues later only 1705 * updates the is_evicted flag but is a no-op otherwise. 1706 */ 1707 q->properties.is_evicted = !!qpd->evicted; 1708 1709 if (qd) 1710 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1711 &q->properties, restore_mqd, restore_ctl_stack, 1712 qd->ctl_stack_size); 1713 else 1714 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1715 &q->gart_mqd_addr, &q->properties); 1716 1717 list_add(&q->list, &qpd->queues_list); 1718 qpd->queue_count++; 1719 1720 if (q->properties.is_active) { 1721 increment_queue_count(dqm, qpd, q); 1722 1723 if (!dqm->dev->kfd->shared_resources.enable_mes) 1724 retval = execute_queues_cpsch(dqm, 1725 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1726 else 1727 retval = add_queue_mes(dqm, q, qpd); 1728 if (retval) 1729 goto cleanup_queue; 1730 } 1731 1732 /* 1733 * Unconditionally increment this counter, regardless of the queue's 1734 * type or whether the queue is active. 1735 */ 1736 dqm->total_queue_count++; 1737 1738 pr_debug("Total of %d queues are accountable so far\n", 1739 dqm->total_queue_count); 1740 1741 dqm_unlock(dqm); 1742 return retval; 1743 1744 cleanup_queue: 1745 qpd->queue_count--; 1746 list_del(&q->list); 1747 if (q->properties.is_active) 1748 decrement_queue_count(dqm, qpd, q); 1749 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1750 dqm_unlock(dqm); 1751 out_deallocate_doorbell: 1752 deallocate_doorbell(qpd, q); 1753 out_deallocate_sdma_queue: 1754 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1755 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1756 dqm_lock(dqm); 1757 deallocate_sdma_queue(dqm, q); 1758 dqm_unlock(dqm); 1759 } 1760 out: 1761 return retval; 1762 } 1763 1764 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1765 uint64_t fence_value, 1766 unsigned int timeout_ms) 1767 { 1768 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1769 1770 while (*fence_addr != fence_value) { 1771 if (time_after(jiffies, end_jiffies)) { 1772 pr_err("qcm fence wait loop timeout expired\n"); 1773 /* In HWS case, this is used to halt the driver thread 1774 * in order not to mess up CP states before doing 1775 * scandumps for FW debugging. 1776 */ 1777 while (halt_if_hws_hang) 1778 schedule(); 1779 1780 return -ETIME; 1781 } 1782 schedule(); 1783 } 1784 1785 return 0; 1786 } 1787 1788 /* dqm->lock mutex has to be locked before calling this function */ 1789 static int map_queues_cpsch(struct device_queue_manager *dqm) 1790 { 1791 int retval; 1792 1793 if (!dqm->sched_running) 1794 return 0; 1795 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1796 return 0; 1797 if (dqm->active_runlist) 1798 return 0; 1799 1800 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1801 pr_debug("%s sent runlist\n", __func__); 1802 if (retval) { 1803 pr_err("failed to execute runlist\n"); 1804 return retval; 1805 } 1806 dqm->active_runlist = true; 1807 1808 return retval; 1809 } 1810 1811 /* dqm->lock mutex has to be locked before calling this function */ 1812 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1813 enum kfd_unmap_queues_filter filter, 1814 uint32_t filter_param, bool reset) 1815 { 1816 int retval = 0; 1817 struct mqd_manager *mqd_mgr; 1818 1819 if (!dqm->sched_running) 1820 return 0; 1821 if (dqm->is_hws_hang || dqm->is_resetting) 1822 return -EIO; 1823 if (!dqm->active_runlist) 1824 return retval; 1825 1826 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1827 if (retval) 1828 return retval; 1829 1830 *dqm->fence_addr = KFD_FENCE_INIT; 1831 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1832 KFD_FENCE_COMPLETED); 1833 /* should be timed out */ 1834 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1835 queue_preemption_timeout_ms); 1836 if (retval) { 1837 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1838 kfd_hws_hang(dqm); 1839 return retval; 1840 } 1841 1842 /* In the current MEC firmware implementation, if compute queue 1843 * doesn't response to the preemption request in time, HIQ will 1844 * abandon the unmap request without returning any timeout error 1845 * to driver. Instead, MEC firmware will log the doorbell of the 1846 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1847 * To make sure the queue unmap was successful, driver need to 1848 * check those fields 1849 */ 1850 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1851 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1852 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1853 while (halt_if_hws_hang) 1854 schedule(); 1855 return -ETIME; 1856 } 1857 1858 pm_release_ib(&dqm->packet_mgr); 1859 dqm->active_runlist = false; 1860 1861 return retval; 1862 } 1863 1864 /* only for compute queue */ 1865 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1866 uint16_t pasid) 1867 { 1868 int retval; 1869 1870 dqm_lock(dqm); 1871 1872 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1873 pasid, true); 1874 1875 dqm_unlock(dqm); 1876 return retval; 1877 } 1878 1879 /* dqm->lock mutex has to be locked before calling this function */ 1880 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1881 enum kfd_unmap_queues_filter filter, 1882 uint32_t filter_param) 1883 { 1884 int retval; 1885 1886 if (dqm->is_hws_hang) 1887 return -EIO; 1888 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1889 if (retval) 1890 return retval; 1891 1892 return map_queues_cpsch(dqm); 1893 } 1894 1895 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1896 struct qcm_process_device *qpd, 1897 struct queue *q) 1898 { 1899 int retval; 1900 struct mqd_manager *mqd_mgr; 1901 uint64_t sdma_val = 0; 1902 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1903 1904 /* Get the SDMA queue stats */ 1905 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1906 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1907 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1908 &sdma_val); 1909 if (retval) 1910 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1911 q->properties.queue_id); 1912 } 1913 1914 retval = 0; 1915 1916 /* remove queue from list to prevent rescheduling after preemption */ 1917 dqm_lock(dqm); 1918 1919 if (qpd->is_debug) { 1920 /* 1921 * error, currently we do not allow to destroy a queue 1922 * of a currently debugged process 1923 */ 1924 retval = -EBUSY; 1925 goto failed_try_destroy_debugged_queue; 1926 1927 } 1928 1929 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1930 q->properties.type)]; 1931 1932 deallocate_doorbell(qpd, q); 1933 1934 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1935 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1936 deallocate_sdma_queue(dqm, q); 1937 pdd->sdma_past_activity_counter += sdma_val; 1938 } 1939 1940 list_del(&q->list); 1941 qpd->queue_count--; 1942 if (q->properties.is_active) { 1943 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1944 decrement_queue_count(dqm, qpd, q); 1945 retval = execute_queues_cpsch(dqm, 1946 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1947 if (retval == -ETIME) 1948 qpd->reset_wavefronts = true; 1949 } else { 1950 retval = remove_queue_mes(dqm, q, qpd); 1951 } 1952 } 1953 1954 /* 1955 * Unconditionally decrement this counter, regardless of the queue's 1956 * type 1957 */ 1958 dqm->total_queue_count--; 1959 pr_debug("Total of %d queues are accountable so far\n", 1960 dqm->total_queue_count); 1961 1962 dqm_unlock(dqm); 1963 1964 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1965 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1966 1967 return retval; 1968 1969 failed_try_destroy_debugged_queue: 1970 1971 dqm_unlock(dqm); 1972 return retval; 1973 } 1974 1975 /* 1976 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1977 * stay in user mode. 1978 */ 1979 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1980 /* APE1 limit is inclusive and 64K aligned. */ 1981 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1982 1983 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1984 struct qcm_process_device *qpd, 1985 enum cache_policy default_policy, 1986 enum cache_policy alternate_policy, 1987 void __user *alternate_aperture_base, 1988 uint64_t alternate_aperture_size) 1989 { 1990 bool retval = true; 1991 1992 if (!dqm->asic_ops.set_cache_memory_policy) 1993 return retval; 1994 1995 dqm_lock(dqm); 1996 1997 if (alternate_aperture_size == 0) { 1998 /* base > limit disables APE1 */ 1999 qpd->sh_mem_ape1_base = 1; 2000 qpd->sh_mem_ape1_limit = 0; 2001 } else { 2002 /* 2003 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2004 * SH_MEM_APE1_BASE[31:0], 0x0000 } 2005 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2006 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2007 * Verify that the base and size parameters can be 2008 * represented in this format and convert them. 2009 * Additionally restrict APE1 to user-mode addresses. 2010 */ 2011 2012 uint64_t base = (uintptr_t)alternate_aperture_base; 2013 uint64_t limit = base + alternate_aperture_size - 1; 2014 2015 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2016 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2017 retval = false; 2018 goto out; 2019 } 2020 2021 qpd->sh_mem_ape1_base = base >> 16; 2022 qpd->sh_mem_ape1_limit = limit >> 16; 2023 } 2024 2025 retval = dqm->asic_ops.set_cache_memory_policy( 2026 dqm, 2027 qpd, 2028 default_policy, 2029 alternate_policy, 2030 alternate_aperture_base, 2031 alternate_aperture_size); 2032 2033 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2034 program_sh_mem_settings(dqm, qpd); 2035 2036 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2037 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2038 qpd->sh_mem_ape1_limit); 2039 2040 out: 2041 dqm_unlock(dqm); 2042 return retval; 2043 } 2044 2045 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2046 struct qcm_process_device *qpd) 2047 { 2048 struct queue *q; 2049 struct device_process_node *cur, *next_dpn; 2050 int retval = 0; 2051 bool found = false; 2052 2053 dqm_lock(dqm); 2054 2055 /* Clear all user mode queues */ 2056 while (!list_empty(&qpd->queues_list)) { 2057 struct mqd_manager *mqd_mgr; 2058 int ret; 2059 2060 q = list_first_entry(&qpd->queues_list, struct queue, list); 2061 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2062 q->properties.type)]; 2063 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2064 if (ret) 2065 retval = ret; 2066 dqm_unlock(dqm); 2067 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2068 dqm_lock(dqm); 2069 } 2070 2071 /* Unregister process */ 2072 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2073 if (qpd == cur->qpd) { 2074 list_del(&cur->list); 2075 kfree(cur); 2076 dqm->processes_count--; 2077 found = true; 2078 break; 2079 } 2080 } 2081 2082 dqm_unlock(dqm); 2083 2084 /* Outside the DQM lock because under the DQM lock we can't do 2085 * reclaim or take other locks that others hold while reclaiming. 2086 */ 2087 if (found) 2088 kfd_dec_compute_active(dqm->dev); 2089 2090 return retval; 2091 } 2092 2093 static int get_wave_state(struct device_queue_manager *dqm, 2094 struct queue *q, 2095 void __user *ctl_stack, 2096 u32 *ctl_stack_used_size, 2097 u32 *save_area_used_size) 2098 { 2099 struct mqd_manager *mqd_mgr; 2100 2101 dqm_lock(dqm); 2102 2103 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2104 2105 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2106 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2107 !mqd_mgr->get_wave_state) { 2108 dqm_unlock(dqm); 2109 return -EINVAL; 2110 } 2111 2112 dqm_unlock(dqm); 2113 2114 /* 2115 * get_wave_state is outside the dqm lock to prevent circular locking 2116 * and the queue should be protected against destruction by the process 2117 * lock. 2118 */ 2119 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2120 ctl_stack, ctl_stack_used_size, save_area_used_size); 2121 } 2122 2123 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2124 const struct queue *q, 2125 u32 *mqd_size, 2126 u32 *ctl_stack_size) 2127 { 2128 struct mqd_manager *mqd_mgr; 2129 enum KFD_MQD_TYPE mqd_type = 2130 get_mqd_type_from_queue_type(q->properties.type); 2131 2132 dqm_lock(dqm); 2133 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2134 *mqd_size = mqd_mgr->mqd_size; 2135 *ctl_stack_size = 0; 2136 2137 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2138 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2139 2140 dqm_unlock(dqm); 2141 } 2142 2143 static int checkpoint_mqd(struct device_queue_manager *dqm, 2144 const struct queue *q, 2145 void *mqd, 2146 void *ctl_stack) 2147 { 2148 struct mqd_manager *mqd_mgr; 2149 int r = 0; 2150 enum KFD_MQD_TYPE mqd_type = 2151 get_mqd_type_from_queue_type(q->properties.type); 2152 2153 dqm_lock(dqm); 2154 2155 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2156 r = -EINVAL; 2157 goto dqm_unlock; 2158 } 2159 2160 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2161 if (!mqd_mgr->checkpoint_mqd) { 2162 r = -EOPNOTSUPP; 2163 goto dqm_unlock; 2164 } 2165 2166 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2167 2168 dqm_unlock: 2169 dqm_unlock(dqm); 2170 return r; 2171 } 2172 2173 static int process_termination_cpsch(struct device_queue_manager *dqm, 2174 struct qcm_process_device *qpd) 2175 { 2176 int retval; 2177 struct queue *q; 2178 struct kernel_queue *kq, *kq_next; 2179 struct mqd_manager *mqd_mgr; 2180 struct device_process_node *cur, *next_dpn; 2181 enum kfd_unmap_queues_filter filter = 2182 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2183 bool found = false; 2184 2185 retval = 0; 2186 2187 dqm_lock(dqm); 2188 2189 /* Clean all kernel queues */ 2190 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2191 list_del(&kq->list); 2192 decrement_queue_count(dqm, qpd, kq->queue); 2193 qpd->is_debug = false; 2194 dqm->total_queue_count--; 2195 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2196 } 2197 2198 /* Clear all user mode queues */ 2199 list_for_each_entry(q, &qpd->queues_list, list) { 2200 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2201 deallocate_sdma_queue(dqm, q); 2202 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2203 deallocate_sdma_queue(dqm, q); 2204 2205 if (q->properties.is_active) { 2206 decrement_queue_count(dqm, qpd, q); 2207 2208 if (dqm->dev->kfd->shared_resources.enable_mes) { 2209 retval = remove_queue_mes(dqm, q, qpd); 2210 if (retval) 2211 pr_err("Failed to remove queue %d\n", 2212 q->properties.queue_id); 2213 } 2214 } 2215 2216 dqm->total_queue_count--; 2217 } 2218 2219 /* Unregister process */ 2220 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2221 if (qpd == cur->qpd) { 2222 list_del(&cur->list); 2223 kfree(cur); 2224 dqm->processes_count--; 2225 found = true; 2226 break; 2227 } 2228 } 2229 2230 if (!dqm->dev->kfd->shared_resources.enable_mes) 2231 retval = execute_queues_cpsch(dqm, filter, 0); 2232 2233 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2234 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2235 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2236 qpd->reset_wavefronts = false; 2237 } 2238 2239 /* Lastly, free mqd resources. 2240 * Do free_mqd() after dqm_unlock to avoid circular locking. 2241 */ 2242 while (!list_empty(&qpd->queues_list)) { 2243 q = list_first_entry(&qpd->queues_list, struct queue, list); 2244 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2245 q->properties.type)]; 2246 list_del(&q->list); 2247 qpd->queue_count--; 2248 dqm_unlock(dqm); 2249 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2250 dqm_lock(dqm); 2251 } 2252 dqm_unlock(dqm); 2253 2254 /* Outside the DQM lock because under the DQM lock we can't do 2255 * reclaim or take other locks that others hold while reclaiming. 2256 */ 2257 if (found) 2258 kfd_dec_compute_active(dqm->dev); 2259 2260 return retval; 2261 } 2262 2263 static int init_mqd_managers(struct device_queue_manager *dqm) 2264 { 2265 int i, j; 2266 struct mqd_manager *mqd_mgr; 2267 2268 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2269 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2270 if (!mqd_mgr) { 2271 pr_err("mqd manager [%d] initialization failed\n", i); 2272 goto out_free; 2273 } 2274 dqm->mqd_mgrs[i] = mqd_mgr; 2275 } 2276 2277 return 0; 2278 2279 out_free: 2280 for (j = 0; j < i; j++) { 2281 kfree(dqm->mqd_mgrs[j]); 2282 dqm->mqd_mgrs[j] = NULL; 2283 } 2284 2285 return -ENOMEM; 2286 } 2287 2288 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2289 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2290 { 2291 int retval; 2292 struct kfd_node *dev = dqm->dev; 2293 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2294 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2295 get_num_all_sdma_engines(dqm) * 2296 dev->kfd->device_info.num_sdma_queues_per_engine + 2297 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2298 NUM_XCC(dqm->dev->xcc_mask)); 2299 2300 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2301 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2302 (void *)&(mem_obj->cpu_ptr), false); 2303 2304 return retval; 2305 } 2306 2307 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2308 { 2309 struct device_queue_manager *dqm; 2310 2311 pr_debug("Loading device queue manager\n"); 2312 2313 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2314 if (!dqm) 2315 return NULL; 2316 2317 switch (dev->adev->asic_type) { 2318 /* HWS is not available on Hawaii. */ 2319 case CHIP_HAWAII: 2320 /* HWS depends on CWSR for timely dequeue. CWSR is not 2321 * available on Tonga. 2322 * 2323 * FIXME: This argument also applies to Kaveri. 2324 */ 2325 case CHIP_TONGA: 2326 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2327 break; 2328 default: 2329 dqm->sched_policy = sched_policy; 2330 break; 2331 } 2332 2333 dqm->dev = dev; 2334 switch (dqm->sched_policy) { 2335 case KFD_SCHED_POLICY_HWS: 2336 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2337 /* initialize dqm for cp scheduling */ 2338 dqm->ops.create_queue = create_queue_cpsch; 2339 dqm->ops.initialize = initialize_cpsch; 2340 dqm->ops.start = start_cpsch; 2341 dqm->ops.stop = stop_cpsch; 2342 dqm->ops.pre_reset = pre_reset; 2343 dqm->ops.destroy_queue = destroy_queue_cpsch; 2344 dqm->ops.update_queue = update_queue; 2345 dqm->ops.register_process = register_process; 2346 dqm->ops.unregister_process = unregister_process; 2347 dqm->ops.uninitialize = uninitialize; 2348 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2349 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2350 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2351 dqm->ops.process_termination = process_termination_cpsch; 2352 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2353 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2354 dqm->ops.get_wave_state = get_wave_state; 2355 dqm->ops.reset_queues = reset_queues_cpsch; 2356 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2357 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2358 break; 2359 case KFD_SCHED_POLICY_NO_HWS: 2360 /* initialize dqm for no cp scheduling */ 2361 dqm->ops.start = start_nocpsch; 2362 dqm->ops.stop = stop_nocpsch; 2363 dqm->ops.pre_reset = pre_reset; 2364 dqm->ops.create_queue = create_queue_nocpsch; 2365 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2366 dqm->ops.update_queue = update_queue; 2367 dqm->ops.register_process = register_process; 2368 dqm->ops.unregister_process = unregister_process; 2369 dqm->ops.initialize = initialize_nocpsch; 2370 dqm->ops.uninitialize = uninitialize; 2371 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2372 dqm->ops.process_termination = process_termination_nocpsch; 2373 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2374 dqm->ops.restore_process_queues = 2375 restore_process_queues_nocpsch; 2376 dqm->ops.get_wave_state = get_wave_state; 2377 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2378 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2379 break; 2380 default: 2381 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2382 goto out_free; 2383 } 2384 2385 switch (dev->adev->asic_type) { 2386 case CHIP_CARRIZO: 2387 device_queue_manager_init_vi(&dqm->asic_ops); 2388 break; 2389 2390 case CHIP_KAVERI: 2391 device_queue_manager_init_cik(&dqm->asic_ops); 2392 break; 2393 2394 case CHIP_HAWAII: 2395 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2396 break; 2397 2398 case CHIP_TONGA: 2399 case CHIP_FIJI: 2400 case CHIP_POLARIS10: 2401 case CHIP_POLARIS11: 2402 case CHIP_POLARIS12: 2403 case CHIP_VEGAM: 2404 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2405 break; 2406 2407 default: 2408 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2409 device_queue_manager_init_v11(&dqm->asic_ops); 2410 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2411 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2412 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2413 device_queue_manager_init_v9(&dqm->asic_ops); 2414 else { 2415 WARN(1, "Unexpected ASIC family %u", 2416 dev->adev->asic_type); 2417 goto out_free; 2418 } 2419 } 2420 2421 if (init_mqd_managers(dqm)) 2422 goto out_free; 2423 2424 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2425 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2426 goto out_free; 2427 } 2428 2429 if (!dqm->ops.initialize(dqm)) 2430 return dqm; 2431 2432 out_free: 2433 kfree(dqm); 2434 return NULL; 2435 } 2436 2437 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2438 struct kfd_mem_obj *mqd) 2439 { 2440 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2441 2442 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2443 } 2444 2445 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2446 { 2447 dqm->ops.stop(dqm); 2448 dqm->ops.uninitialize(dqm); 2449 if (!dqm->dev->kfd->shared_resources.enable_mes) 2450 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2451 kfree(dqm); 2452 } 2453 2454 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2455 { 2456 struct kfd_process_device *pdd; 2457 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2458 int ret = 0; 2459 2460 if (!p) 2461 return -EINVAL; 2462 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2463 pdd = kfd_get_process_device_data(dqm->dev, p); 2464 if (pdd) 2465 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2466 kfd_unref_process(p); 2467 2468 return ret; 2469 } 2470 2471 static void kfd_process_hw_exception(struct work_struct *work) 2472 { 2473 struct device_queue_manager *dqm = container_of(work, 2474 struct device_queue_manager, hw_exception_work); 2475 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2476 } 2477 2478 #if defined(CONFIG_DEBUG_FS) 2479 2480 static void seq_reg_dump(struct seq_file *m, 2481 uint32_t (*dump)[2], uint32_t n_regs) 2482 { 2483 uint32_t i, count; 2484 2485 for (i = 0, count = 0; i < n_regs; i++) { 2486 if (count == 0 || 2487 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2488 seq_printf(m, "%s %08x: %08x", 2489 i ? "\n" : "", 2490 dump[i][0], dump[i][1]); 2491 count = 7; 2492 } else { 2493 seq_printf(m, " %08x", dump[i][1]); 2494 count--; 2495 } 2496 } 2497 2498 seq_puts(m, "\n"); 2499 } 2500 2501 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2502 { 2503 struct device_queue_manager *dqm = data; 2504 uint32_t xcc_mask = dqm->dev->xcc_mask; 2505 uint32_t (*dump)[2], n_regs; 2506 int pipe, queue; 2507 int r = 0, xcc_id; 2508 uint32_t sdma_engine_start; 2509 2510 if (!dqm->sched_running) { 2511 seq_puts(m, " Device is stopped\n"); 2512 return 0; 2513 } 2514 2515 for_each_inst(xcc_id, xcc_mask) { 2516 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2517 KFD_CIK_HIQ_PIPE, 2518 KFD_CIK_HIQ_QUEUE, &dump, 2519 &n_regs, xcc_id); 2520 if (!r) { 2521 seq_printf( 2522 m, 2523 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 2524 xcc_id, 2525 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 2526 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 2527 KFD_CIK_HIQ_QUEUE); 2528 seq_reg_dump(m, dump, n_regs); 2529 2530 kfree(dump); 2531 } 2532 2533 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2534 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2535 2536 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2537 if (!test_bit(pipe_offset + queue, 2538 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2539 continue; 2540 2541 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2542 pipe, queue, 2543 &dump, &n_regs, 2544 xcc_id); 2545 if (r) 2546 break; 2547 2548 seq_printf(m, 2549 " Inst %d, CP Pipe %d, Queue %d\n", 2550 xcc_id, pipe, queue); 2551 seq_reg_dump(m, dump, n_regs); 2552 2553 kfree(dump); 2554 } 2555 } 2556 } 2557 2558 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2559 for (pipe = sdma_engine_start; 2560 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 2561 pipe++) { 2562 for (queue = 0; 2563 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2564 queue++) { 2565 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2566 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2567 if (r) 2568 break; 2569 2570 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2571 pipe, queue); 2572 seq_reg_dump(m, dump, n_regs); 2573 2574 kfree(dump); 2575 } 2576 } 2577 2578 return r; 2579 } 2580 2581 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2582 { 2583 int r = 0; 2584 2585 dqm_lock(dqm); 2586 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2587 if (r) { 2588 dqm_unlock(dqm); 2589 return r; 2590 } 2591 dqm->active_runlist = true; 2592 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2593 dqm_unlock(dqm); 2594 2595 return r; 2596 } 2597 2598 #endif 2599