1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "mes_api_def.h" 39 40 /* Size of the per-pipe EOP queue */ 41 #define CIK_HPD_EOP_BYTES_LOG2 11 42 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 43 44 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 45 u32 pasid, unsigned int vmid); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 enum kfd_unmap_queues_filter filter, 49 uint32_t filter_param); 50 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param, bool reset); 53 54 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 56 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 57 struct queue *q); 58 59 static inline void deallocate_hqd(struct device_queue_manager *dqm, 60 struct queue *q); 61 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 62 static int allocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q, const uint32_t *restore_sdma_id); 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 78 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return kfd_get_num_sdma_engines(dqm->dev) + 107 kfd_get_num_xgmi_sdma_engines(dqm->dev); 108 } 109 110 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 111 { 112 return kfd_get_num_sdma_engines(dqm->dev) * 113 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 114 } 115 116 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 117 { 118 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 119 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 120 } 121 122 static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm) 123 { 124 return dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 } 135 136 void program_sh_mem_settings(struct device_queue_manager *dqm, 137 struct qcm_process_device *qpd) 138 { 139 int xcc = 0; 140 141 for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) 142 dqm->dev->kfd2kgd->program_sh_mem_settings( 143 dqm->dev->adev, qpd->vmid, 144 qpd->sh_mem_config, 145 qpd->sh_mem_ape1_base, 146 qpd->sh_mem_ape1_limit, 147 qpd->sh_mem_bases, 148 dqm->dev->start_xcc_id + xcc); 149 } 150 151 static void kfd_hws_hang(struct device_queue_manager *dqm) 152 { 153 /* 154 * Issue a GPU reset if HWS is unresponsive 155 */ 156 dqm->is_hws_hang = true; 157 158 /* It's possible we're detecting a HWS hang in the 159 * middle of a GPU reset. No need to schedule another 160 * reset in this case. 161 */ 162 if (!dqm->is_resetting) 163 schedule_work(&dqm->hw_exception_work); 164 } 165 166 static int convert_to_mes_queue_type(int queue_type) 167 { 168 int mes_queue_type; 169 170 switch (queue_type) { 171 case KFD_QUEUE_TYPE_COMPUTE: 172 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 173 break; 174 case KFD_QUEUE_TYPE_SDMA: 175 mes_queue_type = MES_QUEUE_TYPE_SDMA; 176 break; 177 default: 178 WARN(1, "Invalid queue type %d", queue_type); 179 mes_queue_type = -EINVAL; 180 break; 181 } 182 183 return mes_queue_type; 184 } 185 186 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 187 struct qcm_process_device *qpd) 188 { 189 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 190 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 191 struct mes_add_queue_input queue_input; 192 int r, queue_type; 193 uint64_t wptr_addr_off; 194 195 if (dqm->is_hws_hang) 196 return -EIO; 197 198 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 199 queue_input.process_id = qpd->pqm->process->pasid; 200 queue_input.page_table_base_addr = qpd->page_table_base; 201 queue_input.process_va_start = 0; 202 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 203 /* MES unit for quantum is 100ns */ 204 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 205 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 206 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 207 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 208 queue_input.inprocess_gang_priority = q->properties.priority; 209 queue_input.gang_global_priority_level = 210 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 211 queue_input.doorbell_offset = q->properties.doorbell_off; 212 queue_input.mqd_addr = q->gart_mqd_addr; 213 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 214 215 if (q->wptr_bo) { 216 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 217 queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off; 218 } 219 220 queue_input.is_kfd_process = 1; 221 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 222 queue_input.queue_size = q->properties.queue_size >> 2; 223 224 queue_input.paging = false; 225 queue_input.tba_addr = qpd->tba_addr; 226 queue_input.tma_addr = qpd->tma_addr; 227 228 queue_type = convert_to_mes_queue_type(q->properties.type); 229 if (queue_type < 0) { 230 pr_err("Queue type not supported with MES, queue:%d\n", 231 q->properties.type); 232 return -EINVAL; 233 } 234 queue_input.queue_type = (uint32_t)queue_type; 235 236 if (q->gws) { 237 queue_input.gws_base = 0; 238 queue_input.gws_size = qpd->num_gws; 239 } 240 241 amdgpu_mes_lock(&adev->mes); 242 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 243 amdgpu_mes_unlock(&adev->mes); 244 if (r) { 245 pr_err("failed to add hardware queue to MES, doorbell=0x%x\n", 246 q->properties.doorbell_off); 247 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 248 kfd_hws_hang(dqm); 249 } 250 251 return r; 252 } 253 254 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 255 struct qcm_process_device *qpd) 256 { 257 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 258 int r; 259 struct mes_remove_queue_input queue_input; 260 261 if (dqm->is_hws_hang) 262 return -EIO; 263 264 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 265 queue_input.doorbell_offset = q->properties.doorbell_off; 266 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 267 268 amdgpu_mes_lock(&adev->mes); 269 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 270 amdgpu_mes_unlock(&adev->mes); 271 272 if (r) { 273 pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n", 274 q->properties.doorbell_off); 275 pr_err("MES might be in unrecoverable state, issue a GPU reset\n"); 276 kfd_hws_hang(dqm); 277 } 278 279 return r; 280 } 281 282 static int remove_all_queues_mes(struct device_queue_manager *dqm) 283 { 284 struct device_process_node *cur; 285 struct qcm_process_device *qpd; 286 struct queue *q; 287 int retval = 0; 288 289 list_for_each_entry(cur, &dqm->queues, list) { 290 qpd = cur->qpd; 291 list_for_each_entry(q, &qpd->queues_list, list) { 292 if (q->properties.is_active) { 293 retval = remove_queue_mes(dqm, q, qpd); 294 if (retval) { 295 pr_err("%s: Failed to remove queue %d for dev %d", 296 __func__, 297 q->properties.queue_id, 298 dqm->dev->id); 299 return retval; 300 } 301 } 302 } 303 } 304 305 return retval; 306 } 307 308 static void increment_queue_count(struct device_queue_manager *dqm, 309 struct qcm_process_device *qpd, 310 struct queue *q) 311 { 312 dqm->active_queue_count++; 313 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 314 q->properties.type == KFD_QUEUE_TYPE_DIQ) 315 dqm->active_cp_queue_count++; 316 317 if (q->properties.is_gws) { 318 dqm->gws_queue_count++; 319 qpd->mapped_gws_queue = true; 320 } 321 } 322 323 static void decrement_queue_count(struct device_queue_manager *dqm, 324 struct qcm_process_device *qpd, 325 struct queue *q) 326 { 327 dqm->active_queue_count--; 328 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 329 q->properties.type == KFD_QUEUE_TYPE_DIQ) 330 dqm->active_cp_queue_count--; 331 332 if (q->properties.is_gws) { 333 dqm->gws_queue_count--; 334 qpd->mapped_gws_queue = false; 335 } 336 } 337 338 /* 339 * Allocate a doorbell ID to this queue. 340 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 341 */ 342 static int allocate_doorbell(struct qcm_process_device *qpd, 343 struct queue *q, 344 uint32_t const *restore_id) 345 { 346 struct kfd_node *dev = qpd->dqm->dev; 347 348 if (!KFD_IS_SOC15(dev)) { 349 /* On pre-SOC15 chips we need to use the queue ID to 350 * preserve the user mode ABI. 351 */ 352 353 if (restore_id && *restore_id != q->properties.queue_id) 354 return -EINVAL; 355 356 q->doorbell_id = q->properties.queue_id; 357 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 358 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 359 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 360 * doorbell assignments based on the engine and queue id. 361 * The doobell index distance between RLC (2*i) and (2*i+1) 362 * for a SDMA engine is 512. 363 */ 364 365 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 366 367 /* 368 * q->properties.sdma_engine_id corresponds to the virtual 369 * sdma engine number. However, for doorbell allocation, 370 * we need the physical sdma engine id in order to get the 371 * correct doorbell offset. 372 */ 373 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 374 get_num_all_sdma_engines(qpd->dqm) + 375 q->properties.sdma_engine_id] 376 + (q->properties.sdma_queue_id & 1) 377 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 378 + (q->properties.sdma_queue_id >> 1); 379 380 if (restore_id && *restore_id != valid_id) 381 return -EINVAL; 382 q->doorbell_id = valid_id; 383 } else { 384 /* For CP queues on SOC15 */ 385 if (restore_id) { 386 /* make sure that ID is free */ 387 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 388 return -EINVAL; 389 390 q->doorbell_id = *restore_id; 391 } else { 392 /* or reserve a free doorbell ID */ 393 unsigned int found; 394 395 found = find_first_zero_bit(qpd->doorbell_bitmap, 396 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 397 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 398 pr_debug("No doorbells available"); 399 return -EBUSY; 400 } 401 set_bit(found, qpd->doorbell_bitmap); 402 q->doorbell_id = found; 403 } 404 } 405 406 q->properties.doorbell_off = 407 kfd_get_doorbell_dw_offset_in_bar(dev->kfd, qpd_to_pdd(qpd), 408 q->doorbell_id); 409 return 0; 410 } 411 412 static void deallocate_doorbell(struct qcm_process_device *qpd, 413 struct queue *q) 414 { 415 unsigned int old; 416 struct kfd_node *dev = qpd->dqm->dev; 417 418 if (!KFD_IS_SOC15(dev) || 419 q->properties.type == KFD_QUEUE_TYPE_SDMA || 420 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 421 return; 422 423 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 424 WARN_ON(!old); 425 } 426 427 static void program_trap_handler_settings(struct device_queue_manager *dqm, 428 struct qcm_process_device *qpd) 429 { 430 int xcc = 0; 431 432 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 433 for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) 434 dqm->dev->kfd2kgd->program_trap_handler_settings( 435 dqm->dev->adev, qpd->vmid, 436 qpd->tba_addr, qpd->tma_addr, 437 dqm->dev->start_xcc_id + xcc); 438 } 439 440 static int allocate_vmid(struct device_queue_manager *dqm, 441 struct qcm_process_device *qpd, 442 struct queue *q) 443 { 444 int allocated_vmid = -1, i; 445 446 for (i = dqm->dev->vm_info.first_vmid_kfd; 447 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 448 if (!dqm->vmid_pasid[i]) { 449 allocated_vmid = i; 450 break; 451 } 452 } 453 454 if (allocated_vmid < 0) { 455 pr_err("no more vmid to allocate\n"); 456 return -ENOSPC; 457 } 458 459 pr_debug("vmid allocated: %d\n", allocated_vmid); 460 461 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 462 463 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 464 465 qpd->vmid = allocated_vmid; 466 q->properties.vmid = allocated_vmid; 467 468 program_sh_mem_settings(dqm, qpd); 469 470 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 471 program_trap_handler_settings(dqm, qpd); 472 473 /* qpd->page_table_base is set earlier when register_process() 474 * is called, i.e. when the first queue is created. 475 */ 476 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 477 qpd->vmid, 478 qpd->page_table_base); 479 /* invalidate the VM context after pasid and vmid mapping is set up */ 480 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 481 482 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 483 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 484 qpd->sh_hidden_private_base, qpd->vmid); 485 486 return 0; 487 } 488 489 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 490 struct qcm_process_device *qpd) 491 { 492 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 493 int ret; 494 495 if (!qpd->ib_kaddr) 496 return -ENOMEM; 497 498 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 499 if (ret) 500 return ret; 501 502 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 503 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 504 pmf->release_mem_size / sizeof(uint32_t)); 505 } 506 507 static void deallocate_vmid(struct device_queue_manager *dqm, 508 struct qcm_process_device *qpd, 509 struct queue *q) 510 { 511 /* On GFX v7, CP doesn't flush TC at dequeue */ 512 if (q->device->adev->asic_type == CHIP_HAWAII) 513 if (flush_texture_cache_nocpsch(q->device, qpd)) 514 pr_err("Failed to flush TC\n"); 515 516 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 517 518 /* Release the vmid mapping */ 519 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 520 dqm->vmid_pasid[qpd->vmid] = 0; 521 522 qpd->vmid = 0; 523 q->properties.vmid = 0; 524 } 525 526 static int create_queue_nocpsch(struct device_queue_manager *dqm, 527 struct queue *q, 528 struct qcm_process_device *qpd, 529 const struct kfd_criu_queue_priv_data *qd, 530 const void *restore_mqd, const void *restore_ctl_stack) 531 { 532 struct mqd_manager *mqd_mgr; 533 int retval; 534 535 dqm_lock(dqm); 536 537 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 538 pr_warn("Can't create new usermode queue because %d queues were already created\n", 539 dqm->total_queue_count); 540 retval = -EPERM; 541 goto out_unlock; 542 } 543 544 if (list_empty(&qpd->queues_list)) { 545 retval = allocate_vmid(dqm, qpd, q); 546 if (retval) 547 goto out_unlock; 548 } 549 q->properties.vmid = qpd->vmid; 550 /* 551 * Eviction state logic: mark all queues as evicted, even ones 552 * not currently active. Restoring inactive queues later only 553 * updates the is_evicted flag but is a no-op otherwise. 554 */ 555 q->properties.is_evicted = !!qpd->evicted; 556 557 q->properties.tba_addr = qpd->tba_addr; 558 q->properties.tma_addr = qpd->tma_addr; 559 560 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 561 q->properties.type)]; 562 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 563 retval = allocate_hqd(dqm, q); 564 if (retval) 565 goto deallocate_vmid; 566 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 567 q->pipe, q->queue); 568 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 569 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 570 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 571 if (retval) 572 goto deallocate_vmid; 573 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 574 } 575 576 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 577 if (retval) 578 goto out_deallocate_hqd; 579 580 /* Temporarily release dqm lock to avoid a circular lock dependency */ 581 dqm_unlock(dqm); 582 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 583 dqm_lock(dqm); 584 585 if (!q->mqd_mem_obj) { 586 retval = -ENOMEM; 587 goto out_deallocate_doorbell; 588 } 589 590 if (qd) 591 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 592 &q->properties, restore_mqd, restore_ctl_stack, 593 qd->ctl_stack_size); 594 else 595 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 596 &q->gart_mqd_addr, &q->properties); 597 598 if (q->properties.is_active) { 599 if (!dqm->sched_running) { 600 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 601 goto add_queue_to_list; 602 } 603 604 if (WARN(q->process->mm != current->mm, 605 "should only run in user thread")) 606 retval = -EFAULT; 607 else 608 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 609 q->queue, &q->properties, current->mm); 610 if (retval) 611 goto out_free_mqd; 612 } 613 614 add_queue_to_list: 615 list_add(&q->list, &qpd->queues_list); 616 qpd->queue_count++; 617 if (q->properties.is_active) 618 increment_queue_count(dqm, qpd, q); 619 620 /* 621 * Unconditionally increment this counter, regardless of the queue's 622 * type or whether the queue is active. 623 */ 624 dqm->total_queue_count++; 625 pr_debug("Total of %d queues are accountable so far\n", 626 dqm->total_queue_count); 627 goto out_unlock; 628 629 out_free_mqd: 630 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 631 out_deallocate_doorbell: 632 deallocate_doorbell(qpd, q); 633 out_deallocate_hqd: 634 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 635 deallocate_hqd(dqm, q); 636 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 637 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 638 deallocate_sdma_queue(dqm, q); 639 deallocate_vmid: 640 if (list_empty(&qpd->queues_list)) 641 deallocate_vmid(dqm, qpd, q); 642 out_unlock: 643 dqm_unlock(dqm); 644 return retval; 645 } 646 647 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 648 { 649 bool set; 650 int pipe, bit, i; 651 652 set = false; 653 654 for (pipe = dqm->next_pipe_to_allocate, i = 0; 655 i < get_pipes_per_mec(dqm); 656 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 657 658 if (!is_pipe_enabled(dqm, 0, pipe)) 659 continue; 660 661 if (dqm->allocated_queues[pipe] != 0) { 662 bit = ffs(dqm->allocated_queues[pipe]) - 1; 663 dqm->allocated_queues[pipe] &= ~(1 << bit); 664 q->pipe = pipe; 665 q->queue = bit; 666 set = true; 667 break; 668 } 669 } 670 671 if (!set) 672 return -EBUSY; 673 674 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 675 /* horizontal hqd allocation */ 676 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 677 678 return 0; 679 } 680 681 static inline void deallocate_hqd(struct device_queue_manager *dqm, 682 struct queue *q) 683 { 684 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 685 } 686 687 #define SQ_IND_CMD_CMD_KILL 0x00000003 688 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 689 690 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 691 { 692 int status = 0; 693 unsigned int vmid; 694 uint16_t queried_pasid; 695 union SQ_CMD_BITS reg_sq_cmd; 696 union GRBM_GFX_INDEX_BITS reg_gfx_index; 697 struct kfd_process_device *pdd; 698 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 699 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 700 int xcc = 0; 701 702 reg_sq_cmd.u32All = 0; 703 reg_gfx_index.u32All = 0; 704 705 pr_debug("Killing all process wavefronts\n"); 706 707 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 708 pr_err("no vmid pasid mapping supported \n"); 709 return -EOPNOTSUPP; 710 } 711 712 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 713 * ATC_VMID15_PASID_MAPPING 714 * to check which VMID the current process is mapped to. 715 */ 716 717 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 718 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 719 (dev->adev, vmid, &queried_pasid); 720 721 if (status && queried_pasid == p->pasid) { 722 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 723 vmid, p->pasid); 724 break; 725 } 726 } 727 728 if (vmid > last_vmid_to_scan) { 729 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 730 return -EFAULT; 731 } 732 733 /* taking the VMID for that process on the safe way using PDD */ 734 pdd = kfd_get_process_device_data(dev, p); 735 if (!pdd) 736 return -EFAULT; 737 738 reg_gfx_index.bits.sh_broadcast_writes = 1; 739 reg_gfx_index.bits.se_broadcast_writes = 1; 740 reg_gfx_index.bits.instance_broadcast_writes = 1; 741 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 742 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 743 reg_sq_cmd.bits.vm_id = vmid; 744 745 for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++) 746 dev->kfd2kgd->wave_control_execute(dev->adev, 747 reg_gfx_index.u32All, 748 reg_sq_cmd.u32All, 749 dev->start_xcc_id + xcc); 750 751 return 0; 752 } 753 754 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 755 * to avoid asynchronized access 756 */ 757 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 758 struct qcm_process_device *qpd, 759 struct queue *q) 760 { 761 int retval; 762 struct mqd_manager *mqd_mgr; 763 764 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 765 q->properties.type)]; 766 767 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 768 deallocate_hqd(dqm, q); 769 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 770 deallocate_sdma_queue(dqm, q); 771 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 772 deallocate_sdma_queue(dqm, q); 773 else { 774 pr_debug("q->properties.type %d is invalid\n", 775 q->properties.type); 776 return -EINVAL; 777 } 778 dqm->total_queue_count--; 779 780 deallocate_doorbell(qpd, q); 781 782 if (!dqm->sched_running) { 783 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 784 return 0; 785 } 786 787 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 788 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 789 KFD_UNMAP_LATENCY_MS, 790 q->pipe, q->queue); 791 if (retval == -ETIME) 792 qpd->reset_wavefronts = true; 793 794 list_del(&q->list); 795 if (list_empty(&qpd->queues_list)) { 796 if (qpd->reset_wavefronts) { 797 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 798 dqm->dev); 799 /* dbgdev_wave_reset_wavefronts has to be called before 800 * deallocate_vmid(), i.e. when vmid is still in use. 801 */ 802 dbgdev_wave_reset_wavefronts(dqm->dev, 803 qpd->pqm->process); 804 qpd->reset_wavefronts = false; 805 } 806 807 deallocate_vmid(dqm, qpd, q); 808 } 809 qpd->queue_count--; 810 if (q->properties.is_active) 811 decrement_queue_count(dqm, qpd, q); 812 813 return retval; 814 } 815 816 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 817 struct qcm_process_device *qpd, 818 struct queue *q) 819 { 820 int retval; 821 uint64_t sdma_val = 0; 822 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 823 struct mqd_manager *mqd_mgr = 824 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 825 826 /* Get the SDMA queue stats */ 827 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 828 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 829 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 830 &sdma_val); 831 if (retval) 832 pr_err("Failed to read SDMA queue counter for queue: %d\n", 833 q->properties.queue_id); 834 } 835 836 dqm_lock(dqm); 837 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 838 if (!retval) 839 pdd->sdma_past_activity_counter += sdma_val; 840 dqm_unlock(dqm); 841 842 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 843 844 return retval; 845 } 846 847 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 848 struct mqd_update_info *minfo) 849 { 850 int retval = 0; 851 struct mqd_manager *mqd_mgr; 852 struct kfd_process_device *pdd; 853 bool prev_active = false; 854 855 dqm_lock(dqm); 856 pdd = kfd_get_process_device_data(q->device, q->process); 857 if (!pdd) { 858 retval = -ENODEV; 859 goto out_unlock; 860 } 861 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 862 q->properties.type)]; 863 864 /* Save previous activity state for counters */ 865 prev_active = q->properties.is_active; 866 867 /* Make sure the queue is unmapped before updating the MQD */ 868 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 869 if (!dqm->dev->kfd->shared_resources.enable_mes) 870 retval = unmap_queues_cpsch(dqm, 871 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 872 else if (prev_active) 873 retval = remove_queue_mes(dqm, q, &pdd->qpd); 874 875 if (retval) { 876 pr_err("unmap queue failed\n"); 877 goto out_unlock; 878 } 879 } else if (prev_active && 880 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 881 q->properties.type == KFD_QUEUE_TYPE_SDMA || 882 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 883 884 if (!dqm->sched_running) { 885 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 886 goto out_unlock; 887 } 888 889 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 890 (dqm->dev->kfd->cwsr_enabled ? 891 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 892 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 893 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 894 if (retval) { 895 pr_err("destroy mqd failed\n"); 896 goto out_unlock; 897 } 898 } 899 900 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 901 902 /* 903 * check active state vs. the previous state and modify 904 * counter accordingly. map_queues_cpsch uses the 905 * dqm->active_queue_count to determine whether a new runlist must be 906 * uploaded. 907 */ 908 if (q->properties.is_active && !prev_active) { 909 increment_queue_count(dqm, &pdd->qpd, q); 910 } else if (!q->properties.is_active && prev_active) { 911 decrement_queue_count(dqm, &pdd->qpd, q); 912 } else if (q->gws && !q->properties.is_gws) { 913 if (q->properties.is_active) { 914 dqm->gws_queue_count++; 915 pdd->qpd.mapped_gws_queue = true; 916 } 917 q->properties.is_gws = true; 918 } else if (!q->gws && q->properties.is_gws) { 919 if (q->properties.is_active) { 920 dqm->gws_queue_count--; 921 pdd->qpd.mapped_gws_queue = false; 922 } 923 q->properties.is_gws = false; 924 } 925 926 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 927 if (!dqm->dev->kfd->shared_resources.enable_mes) 928 retval = map_queues_cpsch(dqm); 929 else if (q->properties.is_active) 930 retval = add_queue_mes(dqm, q, &pdd->qpd); 931 } else if (q->properties.is_active && 932 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 933 q->properties.type == KFD_QUEUE_TYPE_SDMA || 934 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 935 if (WARN(q->process->mm != current->mm, 936 "should only run in user thread")) 937 retval = -EFAULT; 938 else 939 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 940 q->pipe, q->queue, 941 &q->properties, current->mm); 942 } 943 944 out_unlock: 945 dqm_unlock(dqm); 946 return retval; 947 } 948 949 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 950 struct qcm_process_device *qpd) 951 { 952 struct queue *q; 953 struct mqd_manager *mqd_mgr; 954 struct kfd_process_device *pdd; 955 int retval, ret = 0; 956 957 dqm_lock(dqm); 958 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 959 goto out; 960 961 pdd = qpd_to_pdd(qpd); 962 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 963 pdd->process->pasid); 964 965 pdd->last_evict_timestamp = get_jiffies_64(); 966 /* Mark all queues as evicted. Deactivate all active queues on 967 * the qpd. 968 */ 969 list_for_each_entry(q, &qpd->queues_list, list) { 970 q->properties.is_evicted = true; 971 if (!q->properties.is_active) 972 continue; 973 974 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 975 q->properties.type)]; 976 q->properties.is_active = false; 977 decrement_queue_count(dqm, qpd, q); 978 979 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 980 continue; 981 982 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 983 (dqm->dev->kfd->cwsr_enabled ? 984 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 985 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 986 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 987 if (retval && !ret) 988 /* Return the first error, but keep going to 989 * maintain a consistent eviction state 990 */ 991 ret = retval; 992 } 993 994 out: 995 dqm_unlock(dqm); 996 return ret; 997 } 998 999 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1000 struct qcm_process_device *qpd) 1001 { 1002 struct queue *q; 1003 struct kfd_process_device *pdd; 1004 int retval = 0; 1005 1006 dqm_lock(dqm); 1007 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1008 goto out; 1009 1010 pdd = qpd_to_pdd(qpd); 1011 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 1012 pdd->process->pasid); 1013 1014 /* Mark all queues as evicted. Deactivate all active queues on 1015 * the qpd. 1016 */ 1017 list_for_each_entry(q, &qpd->queues_list, list) { 1018 q->properties.is_evicted = true; 1019 if (!q->properties.is_active) 1020 continue; 1021 1022 q->properties.is_active = false; 1023 decrement_queue_count(dqm, qpd, q); 1024 1025 if (dqm->dev->kfd->shared_resources.enable_mes) { 1026 retval = remove_queue_mes(dqm, q, qpd); 1027 if (retval) { 1028 pr_err("Failed to evict queue %d\n", 1029 q->properties.queue_id); 1030 goto out; 1031 } 1032 } 1033 } 1034 pdd->last_evict_timestamp = get_jiffies_64(); 1035 if (!dqm->dev->kfd->shared_resources.enable_mes) 1036 retval = execute_queues_cpsch(dqm, 1037 qpd->is_debug ? 1038 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1039 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1040 1041 out: 1042 dqm_unlock(dqm); 1043 return retval; 1044 } 1045 1046 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1047 struct qcm_process_device *qpd) 1048 { 1049 struct mm_struct *mm = NULL; 1050 struct queue *q; 1051 struct mqd_manager *mqd_mgr; 1052 struct kfd_process_device *pdd; 1053 uint64_t pd_base; 1054 uint64_t eviction_duration; 1055 int retval, ret = 0; 1056 1057 pdd = qpd_to_pdd(qpd); 1058 /* Retrieve PD base */ 1059 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1060 1061 dqm_lock(dqm); 1062 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1063 goto out; 1064 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1065 qpd->evicted--; 1066 goto out; 1067 } 1068 1069 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1070 pdd->process->pasid); 1071 1072 /* Update PD Base in QPD */ 1073 qpd->page_table_base = pd_base; 1074 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1075 1076 if (!list_empty(&qpd->queues_list)) { 1077 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1078 dqm->dev->adev, 1079 qpd->vmid, 1080 qpd->page_table_base); 1081 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1082 } 1083 1084 /* Take a safe reference to the mm_struct, which may otherwise 1085 * disappear even while the kfd_process is still referenced. 1086 */ 1087 mm = get_task_mm(pdd->process->lead_thread); 1088 if (!mm) { 1089 ret = -EFAULT; 1090 goto out; 1091 } 1092 1093 /* Remove the eviction flags. Activate queues that are not 1094 * inactive for other reasons. 1095 */ 1096 list_for_each_entry(q, &qpd->queues_list, list) { 1097 q->properties.is_evicted = false; 1098 if (!QUEUE_IS_ACTIVE(q->properties)) 1099 continue; 1100 1101 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1102 q->properties.type)]; 1103 q->properties.is_active = true; 1104 increment_queue_count(dqm, qpd, q); 1105 1106 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1107 continue; 1108 1109 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1110 q->queue, &q->properties, mm); 1111 if (retval && !ret) 1112 /* Return the first error, but keep going to 1113 * maintain a consistent eviction state 1114 */ 1115 ret = retval; 1116 } 1117 qpd->evicted = 0; 1118 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1119 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1120 out: 1121 if (mm) 1122 mmput(mm); 1123 dqm_unlock(dqm); 1124 return ret; 1125 } 1126 1127 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1128 struct qcm_process_device *qpd) 1129 { 1130 struct queue *q; 1131 struct kfd_process_device *pdd; 1132 uint64_t pd_base; 1133 uint64_t eviction_duration; 1134 int retval = 0; 1135 1136 pdd = qpd_to_pdd(qpd); 1137 /* Retrieve PD base */ 1138 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1139 1140 dqm_lock(dqm); 1141 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1142 goto out; 1143 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1144 qpd->evicted--; 1145 goto out; 1146 } 1147 1148 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 1149 pdd->process->pasid); 1150 1151 /* Update PD Base in QPD */ 1152 qpd->page_table_base = pd_base; 1153 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1154 1155 /* activate all active queues on the qpd */ 1156 list_for_each_entry(q, &qpd->queues_list, list) { 1157 q->properties.is_evicted = false; 1158 if (!QUEUE_IS_ACTIVE(q->properties)) 1159 continue; 1160 1161 q->properties.is_active = true; 1162 increment_queue_count(dqm, &pdd->qpd, q); 1163 1164 if (dqm->dev->kfd->shared_resources.enable_mes) { 1165 retval = add_queue_mes(dqm, q, qpd); 1166 if (retval) { 1167 pr_err("Failed to restore queue %d\n", 1168 q->properties.queue_id); 1169 goto out; 1170 } 1171 } 1172 } 1173 if (!dqm->dev->kfd->shared_resources.enable_mes) 1174 retval = execute_queues_cpsch(dqm, 1175 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1176 qpd->evicted = 0; 1177 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1178 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1179 out: 1180 dqm_unlock(dqm); 1181 return retval; 1182 } 1183 1184 static int register_process(struct device_queue_manager *dqm, 1185 struct qcm_process_device *qpd) 1186 { 1187 struct device_process_node *n; 1188 struct kfd_process_device *pdd; 1189 uint64_t pd_base; 1190 int retval; 1191 1192 n = kzalloc(sizeof(*n), GFP_KERNEL); 1193 if (!n) 1194 return -ENOMEM; 1195 1196 n->qpd = qpd; 1197 1198 pdd = qpd_to_pdd(qpd); 1199 /* Retrieve PD base */ 1200 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1201 1202 dqm_lock(dqm); 1203 list_add(&n->list, &dqm->queues); 1204 1205 /* Update PD Base in QPD */ 1206 qpd->page_table_base = pd_base; 1207 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1208 1209 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1210 1211 dqm->processes_count++; 1212 1213 dqm_unlock(dqm); 1214 1215 /* Outside the DQM lock because under the DQM lock we can't do 1216 * reclaim or take other locks that others hold while reclaiming. 1217 */ 1218 kfd_inc_compute_active(dqm->dev); 1219 1220 return retval; 1221 } 1222 1223 static int unregister_process(struct device_queue_manager *dqm, 1224 struct qcm_process_device *qpd) 1225 { 1226 int retval; 1227 struct device_process_node *cur, *next; 1228 1229 pr_debug("qpd->queues_list is %s\n", 1230 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1231 1232 retval = 0; 1233 dqm_lock(dqm); 1234 1235 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1236 if (qpd == cur->qpd) { 1237 list_del(&cur->list); 1238 kfree(cur); 1239 dqm->processes_count--; 1240 goto out; 1241 } 1242 } 1243 /* qpd not found in dqm list */ 1244 retval = 1; 1245 out: 1246 dqm_unlock(dqm); 1247 1248 /* Outside the DQM lock because under the DQM lock we can't do 1249 * reclaim or take other locks that others hold while reclaiming. 1250 */ 1251 if (!retval) 1252 kfd_dec_compute_active(dqm->dev); 1253 1254 return retval; 1255 } 1256 1257 static int 1258 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1259 unsigned int vmid) 1260 { 1261 int xcc = 0, ret; 1262 1263 for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) { 1264 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1265 dqm->dev->adev, pasid, vmid, 1266 dqm->dev->start_xcc_id + xcc); 1267 if (ret) 1268 break; 1269 } 1270 1271 return ret; 1272 } 1273 1274 static void init_interrupts(struct device_queue_manager *dqm) 1275 { 1276 unsigned int i, xcc; 1277 1278 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1279 if (is_pipe_enabled(dqm, 0, i)) { 1280 for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) 1281 dqm->dev->kfd2kgd->init_interrupts( 1282 dqm->dev->adev, i, 1283 dqm->dev->start_xcc_id + 1284 xcc); 1285 } 1286 } 1287 } 1288 1289 static int initialize_nocpsch(struct device_queue_manager *dqm) 1290 { 1291 int pipe, queue; 1292 1293 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1294 1295 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1296 sizeof(unsigned int), GFP_KERNEL); 1297 if (!dqm->allocated_queues) 1298 return -ENOMEM; 1299 1300 mutex_init(&dqm->lock_hidden); 1301 INIT_LIST_HEAD(&dqm->queues); 1302 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1303 dqm->active_cp_queue_count = 0; 1304 dqm->gws_queue_count = 0; 1305 1306 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1307 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1308 1309 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1310 if (test_bit(pipe_offset + queue, 1311 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1312 dqm->allocated_queues[pipe] |= 1 << queue; 1313 } 1314 1315 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1316 1317 init_sdma_bitmaps(dqm); 1318 1319 return 0; 1320 } 1321 1322 static void uninitialize(struct device_queue_manager *dqm) 1323 { 1324 int i; 1325 1326 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1327 1328 kfree(dqm->allocated_queues); 1329 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1330 kfree(dqm->mqd_mgrs[i]); 1331 mutex_destroy(&dqm->lock_hidden); 1332 } 1333 1334 static int start_nocpsch(struct device_queue_manager *dqm) 1335 { 1336 int r = 0; 1337 1338 pr_info("SW scheduler is used"); 1339 init_interrupts(dqm); 1340 1341 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1342 r = pm_init(&dqm->packet_mgr, dqm); 1343 if (!r) 1344 dqm->sched_running = true; 1345 1346 return r; 1347 } 1348 1349 static int stop_nocpsch(struct device_queue_manager *dqm) 1350 { 1351 dqm_lock(dqm); 1352 if (!dqm->sched_running) { 1353 dqm_unlock(dqm); 1354 return 0; 1355 } 1356 1357 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1358 pm_uninit(&dqm->packet_mgr, false); 1359 dqm->sched_running = false; 1360 dqm_unlock(dqm); 1361 1362 return 0; 1363 } 1364 1365 static void pre_reset(struct device_queue_manager *dqm) 1366 { 1367 dqm_lock(dqm); 1368 dqm->is_resetting = true; 1369 dqm_unlock(dqm); 1370 } 1371 1372 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1373 struct queue *q, const uint32_t *restore_sdma_id) 1374 { 1375 int bit; 1376 1377 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1378 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1379 pr_err("No more SDMA queue to allocate\n"); 1380 return -ENOMEM; 1381 } 1382 1383 if (restore_sdma_id) { 1384 /* Re-use existing sdma_id */ 1385 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1386 pr_err("SDMA queue already in use\n"); 1387 return -EBUSY; 1388 } 1389 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1390 q->sdma_id = *restore_sdma_id; 1391 } else { 1392 /* Find first available sdma_id */ 1393 bit = find_first_bit(dqm->sdma_bitmap, 1394 get_num_sdma_queues(dqm)); 1395 clear_bit(bit, dqm->sdma_bitmap); 1396 q->sdma_id = bit; 1397 } 1398 1399 q->properties.sdma_engine_id = 1400 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1401 q->properties.sdma_queue_id = q->sdma_id / 1402 kfd_get_num_sdma_engines(dqm->dev); 1403 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1404 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1405 pr_err("No more XGMI SDMA queue to allocate\n"); 1406 return -ENOMEM; 1407 } 1408 if (restore_sdma_id) { 1409 /* Re-use existing sdma_id */ 1410 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1411 pr_err("SDMA queue already in use\n"); 1412 return -EBUSY; 1413 } 1414 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1415 q->sdma_id = *restore_sdma_id; 1416 } else { 1417 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1418 get_num_xgmi_sdma_queues(dqm)); 1419 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1420 q->sdma_id = bit; 1421 } 1422 /* sdma_engine_id is sdma id including 1423 * both PCIe-optimized SDMAs and XGMI- 1424 * optimized SDMAs. The calculation below 1425 * assumes the first N engines are always 1426 * PCIe-optimized ones 1427 */ 1428 q->properties.sdma_engine_id = 1429 kfd_get_num_sdma_engines(dqm->dev) + 1430 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1431 q->properties.sdma_queue_id = q->sdma_id / 1432 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1433 } 1434 1435 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1436 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1437 1438 return 0; 1439 } 1440 1441 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1442 struct queue *q) 1443 { 1444 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1445 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1446 return; 1447 set_bit(q->sdma_id, dqm->sdma_bitmap); 1448 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1449 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1450 return; 1451 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1452 } 1453 } 1454 1455 /* 1456 * Device Queue Manager implementation for cp scheduler 1457 */ 1458 1459 static int set_sched_resources(struct device_queue_manager *dqm) 1460 { 1461 int i, mec; 1462 struct scheduling_resources res; 1463 1464 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1465 1466 res.queue_mask = 0; 1467 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1468 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1469 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1470 1471 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1472 continue; 1473 1474 /* only acquire queues from the first MEC */ 1475 if (mec > 0) 1476 continue; 1477 1478 /* This situation may be hit in the future if a new HW 1479 * generation exposes more than 64 queues. If so, the 1480 * definition of res.queue_mask needs updating 1481 */ 1482 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1483 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1484 break; 1485 } 1486 1487 res.queue_mask |= 1ull 1488 << amdgpu_queue_mask_bit_to_set_resource_bit( 1489 dqm->dev->adev, i); 1490 } 1491 res.gws_mask = ~0ull; 1492 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1493 1494 pr_debug("Scheduling resources:\n" 1495 "vmid mask: 0x%8X\n" 1496 "queue mask: 0x%8llX\n", 1497 res.vmid_mask, res.queue_mask); 1498 1499 return pm_send_set_resources(&dqm->packet_mgr, &res); 1500 } 1501 1502 static int initialize_cpsch(struct device_queue_manager *dqm) 1503 { 1504 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1505 1506 mutex_init(&dqm->lock_hidden); 1507 INIT_LIST_HEAD(&dqm->queues); 1508 dqm->active_queue_count = dqm->processes_count = 0; 1509 dqm->active_cp_queue_count = 0; 1510 dqm->gws_queue_count = 0; 1511 dqm->active_runlist = false; 1512 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1513 1514 init_sdma_bitmaps(dqm); 1515 1516 return 0; 1517 } 1518 1519 static int start_cpsch(struct device_queue_manager *dqm) 1520 { 1521 int retval; 1522 1523 retval = 0; 1524 1525 dqm_lock(dqm); 1526 1527 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1528 retval = pm_init(&dqm->packet_mgr, dqm); 1529 if (retval) 1530 goto fail_packet_manager_init; 1531 1532 retval = set_sched_resources(dqm); 1533 if (retval) 1534 goto fail_set_sched_resources; 1535 } 1536 pr_debug("Allocating fence memory\n"); 1537 1538 /* allocate fence memory on the gart */ 1539 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1540 &dqm->fence_mem); 1541 1542 if (retval) 1543 goto fail_allocate_vidmem; 1544 1545 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1546 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1547 1548 init_interrupts(dqm); 1549 1550 /* clear hang status when driver try to start the hw scheduler */ 1551 dqm->is_hws_hang = false; 1552 dqm->is_resetting = false; 1553 dqm->sched_running = true; 1554 if (!dqm->dev->kfd->shared_resources.enable_mes) 1555 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1556 dqm_unlock(dqm); 1557 1558 return 0; 1559 fail_allocate_vidmem: 1560 fail_set_sched_resources: 1561 if (!dqm->dev->kfd->shared_resources.enable_mes) 1562 pm_uninit(&dqm->packet_mgr, false); 1563 fail_packet_manager_init: 1564 dqm_unlock(dqm); 1565 return retval; 1566 } 1567 1568 static int stop_cpsch(struct device_queue_manager *dqm) 1569 { 1570 bool hanging; 1571 1572 dqm_lock(dqm); 1573 if (!dqm->sched_running) { 1574 dqm_unlock(dqm); 1575 return 0; 1576 } 1577 1578 if (!dqm->is_hws_hang) { 1579 if (!dqm->dev->kfd->shared_resources.enable_mes) 1580 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1581 else 1582 remove_all_queues_mes(dqm); 1583 } 1584 1585 hanging = dqm->is_hws_hang || dqm->is_resetting; 1586 dqm->sched_running = false; 1587 1588 if (!dqm->dev->kfd->shared_resources.enable_mes) 1589 pm_release_ib(&dqm->packet_mgr); 1590 1591 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1592 if (!dqm->dev->kfd->shared_resources.enable_mes) 1593 pm_uninit(&dqm->packet_mgr, hanging); 1594 dqm_unlock(dqm); 1595 1596 return 0; 1597 } 1598 1599 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1600 struct kernel_queue *kq, 1601 struct qcm_process_device *qpd) 1602 { 1603 dqm_lock(dqm); 1604 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1605 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1606 dqm->total_queue_count); 1607 dqm_unlock(dqm); 1608 return -EPERM; 1609 } 1610 1611 /* 1612 * Unconditionally increment this counter, regardless of the queue's 1613 * type or whether the queue is active. 1614 */ 1615 dqm->total_queue_count++; 1616 pr_debug("Total of %d queues are accountable so far\n", 1617 dqm->total_queue_count); 1618 1619 list_add(&kq->list, &qpd->priv_queue_list); 1620 increment_queue_count(dqm, qpd, kq->queue); 1621 qpd->is_debug = true; 1622 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1623 dqm_unlock(dqm); 1624 1625 return 0; 1626 } 1627 1628 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1629 struct kernel_queue *kq, 1630 struct qcm_process_device *qpd) 1631 { 1632 dqm_lock(dqm); 1633 list_del(&kq->list); 1634 decrement_queue_count(dqm, qpd, kq->queue); 1635 qpd->is_debug = false; 1636 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1637 /* 1638 * Unconditionally decrement this counter, regardless of the queue's 1639 * type. 1640 */ 1641 dqm->total_queue_count--; 1642 pr_debug("Total of %d queues are accountable so far\n", 1643 dqm->total_queue_count); 1644 dqm_unlock(dqm); 1645 } 1646 1647 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1648 struct qcm_process_device *qpd, 1649 const struct kfd_criu_queue_priv_data *qd, 1650 const void *restore_mqd, const void *restore_ctl_stack) 1651 { 1652 int retval; 1653 struct mqd_manager *mqd_mgr; 1654 1655 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1656 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1657 dqm->total_queue_count); 1658 retval = -EPERM; 1659 goto out; 1660 } 1661 1662 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1663 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1664 dqm_lock(dqm); 1665 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1666 dqm_unlock(dqm); 1667 if (retval) 1668 goto out; 1669 } 1670 1671 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1672 if (retval) 1673 goto out_deallocate_sdma_queue; 1674 1675 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1676 q->properties.type)]; 1677 1678 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1679 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1680 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1681 q->properties.tba_addr = qpd->tba_addr; 1682 q->properties.tma_addr = qpd->tma_addr; 1683 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1684 if (!q->mqd_mem_obj) { 1685 retval = -ENOMEM; 1686 goto out_deallocate_doorbell; 1687 } 1688 1689 dqm_lock(dqm); 1690 /* 1691 * Eviction state logic: mark all queues as evicted, even ones 1692 * not currently active. Restoring inactive queues later only 1693 * updates the is_evicted flag but is a no-op otherwise. 1694 */ 1695 q->properties.is_evicted = !!qpd->evicted; 1696 1697 if (qd) 1698 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1699 &q->properties, restore_mqd, restore_ctl_stack, 1700 qd->ctl_stack_size); 1701 else 1702 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1703 &q->gart_mqd_addr, &q->properties); 1704 1705 list_add(&q->list, &qpd->queues_list); 1706 qpd->queue_count++; 1707 1708 if (q->properties.is_active) { 1709 increment_queue_count(dqm, qpd, q); 1710 1711 if (!dqm->dev->kfd->shared_resources.enable_mes) 1712 retval = execute_queues_cpsch(dqm, 1713 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1714 else 1715 retval = add_queue_mes(dqm, q, qpd); 1716 if (retval) 1717 goto cleanup_queue; 1718 } 1719 1720 /* 1721 * Unconditionally increment this counter, regardless of the queue's 1722 * type or whether the queue is active. 1723 */ 1724 dqm->total_queue_count++; 1725 1726 pr_debug("Total of %d queues are accountable so far\n", 1727 dqm->total_queue_count); 1728 1729 dqm_unlock(dqm); 1730 return retval; 1731 1732 cleanup_queue: 1733 qpd->queue_count--; 1734 list_del(&q->list); 1735 if (q->properties.is_active) 1736 decrement_queue_count(dqm, qpd, q); 1737 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1738 dqm_unlock(dqm); 1739 out_deallocate_doorbell: 1740 deallocate_doorbell(qpd, q); 1741 out_deallocate_sdma_queue: 1742 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1743 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1744 dqm_lock(dqm); 1745 deallocate_sdma_queue(dqm, q); 1746 dqm_unlock(dqm); 1747 } 1748 out: 1749 return retval; 1750 } 1751 1752 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1753 uint64_t fence_value, 1754 unsigned int timeout_ms) 1755 { 1756 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1757 1758 while (*fence_addr != fence_value) { 1759 if (time_after(jiffies, end_jiffies)) { 1760 pr_err("qcm fence wait loop timeout expired\n"); 1761 /* In HWS case, this is used to halt the driver thread 1762 * in order not to mess up CP states before doing 1763 * scandumps for FW debugging. 1764 */ 1765 while (halt_if_hws_hang) 1766 schedule(); 1767 1768 return -ETIME; 1769 } 1770 schedule(); 1771 } 1772 1773 return 0; 1774 } 1775 1776 /* dqm->lock mutex has to be locked before calling this function */ 1777 static int map_queues_cpsch(struct device_queue_manager *dqm) 1778 { 1779 int retval; 1780 1781 if (!dqm->sched_running) 1782 return 0; 1783 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1784 return 0; 1785 if (dqm->active_runlist) 1786 return 0; 1787 1788 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1789 pr_debug("%s sent runlist\n", __func__); 1790 if (retval) { 1791 pr_err("failed to execute runlist\n"); 1792 return retval; 1793 } 1794 dqm->active_runlist = true; 1795 1796 return retval; 1797 } 1798 1799 /* dqm->lock mutex has to be locked before calling this function */ 1800 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1801 enum kfd_unmap_queues_filter filter, 1802 uint32_t filter_param, bool reset) 1803 { 1804 int retval = 0; 1805 struct mqd_manager *mqd_mgr; 1806 1807 if (!dqm->sched_running) 1808 return 0; 1809 if (dqm->is_hws_hang || dqm->is_resetting) 1810 return -EIO; 1811 if (!dqm->active_runlist) 1812 return retval; 1813 1814 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1815 if (retval) 1816 return retval; 1817 1818 *dqm->fence_addr = KFD_FENCE_INIT; 1819 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1820 KFD_FENCE_COMPLETED); 1821 /* should be timed out */ 1822 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1823 queue_preemption_timeout_ms); 1824 if (retval) { 1825 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1826 kfd_hws_hang(dqm); 1827 return retval; 1828 } 1829 1830 /* In the current MEC firmware implementation, if compute queue 1831 * doesn't response to the preemption request in time, HIQ will 1832 * abandon the unmap request without returning any timeout error 1833 * to driver. Instead, MEC firmware will log the doorbell of the 1834 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1835 * To make sure the queue unmap was successful, driver need to 1836 * check those fields 1837 */ 1838 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1839 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1840 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1841 while (halt_if_hws_hang) 1842 schedule(); 1843 return -ETIME; 1844 } 1845 1846 pm_release_ib(&dqm->packet_mgr); 1847 dqm->active_runlist = false; 1848 1849 return retval; 1850 } 1851 1852 /* only for compute queue */ 1853 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1854 uint16_t pasid) 1855 { 1856 int retval; 1857 1858 dqm_lock(dqm); 1859 1860 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1861 pasid, true); 1862 1863 dqm_unlock(dqm); 1864 return retval; 1865 } 1866 1867 /* dqm->lock mutex has to be locked before calling this function */ 1868 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1869 enum kfd_unmap_queues_filter filter, 1870 uint32_t filter_param) 1871 { 1872 int retval; 1873 1874 if (dqm->is_hws_hang) 1875 return -EIO; 1876 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1877 if (retval) 1878 return retval; 1879 1880 return map_queues_cpsch(dqm); 1881 } 1882 1883 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1884 struct qcm_process_device *qpd, 1885 struct queue *q) 1886 { 1887 int retval; 1888 struct mqd_manager *mqd_mgr; 1889 uint64_t sdma_val = 0; 1890 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1891 1892 /* Get the SDMA queue stats */ 1893 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1894 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1895 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1896 &sdma_val); 1897 if (retval) 1898 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1899 q->properties.queue_id); 1900 } 1901 1902 retval = 0; 1903 1904 /* remove queue from list to prevent rescheduling after preemption */ 1905 dqm_lock(dqm); 1906 1907 if (qpd->is_debug) { 1908 /* 1909 * error, currently we do not allow to destroy a queue 1910 * of a currently debugged process 1911 */ 1912 retval = -EBUSY; 1913 goto failed_try_destroy_debugged_queue; 1914 1915 } 1916 1917 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1918 q->properties.type)]; 1919 1920 deallocate_doorbell(qpd, q); 1921 1922 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1923 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1924 deallocate_sdma_queue(dqm, q); 1925 pdd->sdma_past_activity_counter += sdma_val; 1926 } 1927 1928 list_del(&q->list); 1929 qpd->queue_count--; 1930 if (q->properties.is_active) { 1931 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1932 decrement_queue_count(dqm, qpd, q); 1933 retval = execute_queues_cpsch(dqm, 1934 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1935 if (retval == -ETIME) 1936 qpd->reset_wavefronts = true; 1937 } else { 1938 retval = remove_queue_mes(dqm, q, qpd); 1939 } 1940 } 1941 1942 /* 1943 * Unconditionally decrement this counter, regardless of the queue's 1944 * type 1945 */ 1946 dqm->total_queue_count--; 1947 pr_debug("Total of %d queues are accountable so far\n", 1948 dqm->total_queue_count); 1949 1950 dqm_unlock(dqm); 1951 1952 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1953 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1954 1955 return retval; 1956 1957 failed_try_destroy_debugged_queue: 1958 1959 dqm_unlock(dqm); 1960 return retval; 1961 } 1962 1963 /* 1964 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1965 * stay in user mode. 1966 */ 1967 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1968 /* APE1 limit is inclusive and 64K aligned. */ 1969 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1970 1971 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1972 struct qcm_process_device *qpd, 1973 enum cache_policy default_policy, 1974 enum cache_policy alternate_policy, 1975 void __user *alternate_aperture_base, 1976 uint64_t alternate_aperture_size) 1977 { 1978 bool retval = true; 1979 1980 if (!dqm->asic_ops.set_cache_memory_policy) 1981 return retval; 1982 1983 dqm_lock(dqm); 1984 1985 if (alternate_aperture_size == 0) { 1986 /* base > limit disables APE1 */ 1987 qpd->sh_mem_ape1_base = 1; 1988 qpd->sh_mem_ape1_limit = 0; 1989 } else { 1990 /* 1991 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1992 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1993 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1994 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1995 * Verify that the base and size parameters can be 1996 * represented in this format and convert them. 1997 * Additionally restrict APE1 to user-mode addresses. 1998 */ 1999 2000 uint64_t base = (uintptr_t)alternate_aperture_base; 2001 uint64_t limit = base + alternate_aperture_size - 1; 2002 2003 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2004 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2005 retval = false; 2006 goto out; 2007 } 2008 2009 qpd->sh_mem_ape1_base = base >> 16; 2010 qpd->sh_mem_ape1_limit = limit >> 16; 2011 } 2012 2013 retval = dqm->asic_ops.set_cache_memory_policy( 2014 dqm, 2015 qpd, 2016 default_policy, 2017 alternate_policy, 2018 alternate_aperture_base, 2019 alternate_aperture_size); 2020 2021 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2022 program_sh_mem_settings(dqm, qpd); 2023 2024 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2025 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2026 qpd->sh_mem_ape1_limit); 2027 2028 out: 2029 dqm_unlock(dqm); 2030 return retval; 2031 } 2032 2033 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2034 struct qcm_process_device *qpd) 2035 { 2036 struct queue *q; 2037 struct device_process_node *cur, *next_dpn; 2038 int retval = 0; 2039 bool found = false; 2040 2041 dqm_lock(dqm); 2042 2043 /* Clear all user mode queues */ 2044 while (!list_empty(&qpd->queues_list)) { 2045 struct mqd_manager *mqd_mgr; 2046 int ret; 2047 2048 q = list_first_entry(&qpd->queues_list, struct queue, list); 2049 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2050 q->properties.type)]; 2051 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2052 if (ret) 2053 retval = ret; 2054 dqm_unlock(dqm); 2055 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2056 dqm_lock(dqm); 2057 } 2058 2059 /* Unregister process */ 2060 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2061 if (qpd == cur->qpd) { 2062 list_del(&cur->list); 2063 kfree(cur); 2064 dqm->processes_count--; 2065 found = true; 2066 break; 2067 } 2068 } 2069 2070 dqm_unlock(dqm); 2071 2072 /* Outside the DQM lock because under the DQM lock we can't do 2073 * reclaim or take other locks that others hold while reclaiming. 2074 */ 2075 if (found) 2076 kfd_dec_compute_active(dqm->dev); 2077 2078 return retval; 2079 } 2080 2081 static int get_wave_state(struct device_queue_manager *dqm, 2082 struct queue *q, 2083 void __user *ctl_stack, 2084 u32 *ctl_stack_used_size, 2085 u32 *save_area_used_size) 2086 { 2087 struct mqd_manager *mqd_mgr; 2088 2089 dqm_lock(dqm); 2090 2091 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2092 2093 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2094 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2095 !mqd_mgr->get_wave_state) { 2096 dqm_unlock(dqm); 2097 return -EINVAL; 2098 } 2099 2100 dqm_unlock(dqm); 2101 2102 /* 2103 * get_wave_state is outside the dqm lock to prevent circular locking 2104 * and the queue should be protected against destruction by the process 2105 * lock. 2106 */ 2107 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2108 ctl_stack, ctl_stack_used_size, save_area_used_size); 2109 } 2110 2111 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2112 const struct queue *q, 2113 u32 *mqd_size, 2114 u32 *ctl_stack_size) 2115 { 2116 struct mqd_manager *mqd_mgr; 2117 enum KFD_MQD_TYPE mqd_type = 2118 get_mqd_type_from_queue_type(q->properties.type); 2119 2120 dqm_lock(dqm); 2121 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2122 *mqd_size = mqd_mgr->mqd_size; 2123 *ctl_stack_size = 0; 2124 2125 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2126 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2127 2128 dqm_unlock(dqm); 2129 } 2130 2131 static int checkpoint_mqd(struct device_queue_manager *dqm, 2132 const struct queue *q, 2133 void *mqd, 2134 void *ctl_stack) 2135 { 2136 struct mqd_manager *mqd_mgr; 2137 int r = 0; 2138 enum KFD_MQD_TYPE mqd_type = 2139 get_mqd_type_from_queue_type(q->properties.type); 2140 2141 dqm_lock(dqm); 2142 2143 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2144 r = -EINVAL; 2145 goto dqm_unlock; 2146 } 2147 2148 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2149 if (!mqd_mgr->checkpoint_mqd) { 2150 r = -EOPNOTSUPP; 2151 goto dqm_unlock; 2152 } 2153 2154 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2155 2156 dqm_unlock: 2157 dqm_unlock(dqm); 2158 return r; 2159 } 2160 2161 static int process_termination_cpsch(struct device_queue_manager *dqm, 2162 struct qcm_process_device *qpd) 2163 { 2164 int retval; 2165 struct queue *q; 2166 struct kernel_queue *kq, *kq_next; 2167 struct mqd_manager *mqd_mgr; 2168 struct device_process_node *cur, *next_dpn; 2169 enum kfd_unmap_queues_filter filter = 2170 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2171 bool found = false; 2172 2173 retval = 0; 2174 2175 dqm_lock(dqm); 2176 2177 /* Clean all kernel queues */ 2178 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2179 list_del(&kq->list); 2180 decrement_queue_count(dqm, qpd, kq->queue); 2181 qpd->is_debug = false; 2182 dqm->total_queue_count--; 2183 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2184 } 2185 2186 /* Clear all user mode queues */ 2187 list_for_each_entry(q, &qpd->queues_list, list) { 2188 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2189 deallocate_sdma_queue(dqm, q); 2190 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2191 deallocate_sdma_queue(dqm, q); 2192 2193 if (q->properties.is_active) { 2194 decrement_queue_count(dqm, qpd, q); 2195 2196 if (dqm->dev->kfd->shared_resources.enable_mes) { 2197 retval = remove_queue_mes(dqm, q, qpd); 2198 if (retval) 2199 pr_err("Failed to remove queue %d\n", 2200 q->properties.queue_id); 2201 } 2202 } 2203 2204 dqm->total_queue_count--; 2205 } 2206 2207 /* Unregister process */ 2208 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2209 if (qpd == cur->qpd) { 2210 list_del(&cur->list); 2211 kfree(cur); 2212 dqm->processes_count--; 2213 found = true; 2214 break; 2215 } 2216 } 2217 2218 if (!dqm->dev->kfd->shared_resources.enable_mes) 2219 retval = execute_queues_cpsch(dqm, filter, 0); 2220 2221 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2222 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2223 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2224 qpd->reset_wavefronts = false; 2225 } 2226 2227 /* Lastly, free mqd resources. 2228 * Do free_mqd() after dqm_unlock to avoid circular locking. 2229 */ 2230 while (!list_empty(&qpd->queues_list)) { 2231 q = list_first_entry(&qpd->queues_list, struct queue, list); 2232 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2233 q->properties.type)]; 2234 list_del(&q->list); 2235 qpd->queue_count--; 2236 dqm_unlock(dqm); 2237 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2238 dqm_lock(dqm); 2239 } 2240 dqm_unlock(dqm); 2241 2242 /* Outside the DQM lock because under the DQM lock we can't do 2243 * reclaim or take other locks that others hold while reclaiming. 2244 */ 2245 if (found) 2246 kfd_dec_compute_active(dqm->dev); 2247 2248 return retval; 2249 } 2250 2251 static int init_mqd_managers(struct device_queue_manager *dqm) 2252 { 2253 int i, j; 2254 struct mqd_manager *mqd_mgr; 2255 2256 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2257 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2258 if (!mqd_mgr) { 2259 pr_err("mqd manager [%d] initialization failed\n", i); 2260 goto out_free; 2261 } 2262 dqm->mqd_mgrs[i] = mqd_mgr; 2263 } 2264 2265 return 0; 2266 2267 out_free: 2268 for (j = 0; j < i; j++) { 2269 kfree(dqm->mqd_mgrs[j]); 2270 dqm->mqd_mgrs[j] = NULL; 2271 } 2272 2273 return -ENOMEM; 2274 } 2275 2276 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2277 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2278 { 2279 int retval; 2280 struct kfd_node *dev = dqm->dev; 2281 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2282 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2283 get_num_all_sdma_engines(dqm) * 2284 dev->kfd->device_info.num_sdma_queues_per_engine + 2285 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2286 dqm->dev->num_xcc_per_node); 2287 2288 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2289 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2290 (void *)&(mem_obj->cpu_ptr), false); 2291 2292 return retval; 2293 } 2294 2295 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2296 { 2297 struct device_queue_manager *dqm; 2298 2299 pr_debug("Loading device queue manager\n"); 2300 2301 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2302 if (!dqm) 2303 return NULL; 2304 2305 switch (dev->adev->asic_type) { 2306 /* HWS is not available on Hawaii. */ 2307 case CHIP_HAWAII: 2308 /* HWS depends on CWSR for timely dequeue. CWSR is not 2309 * available on Tonga. 2310 * 2311 * FIXME: This argument also applies to Kaveri. 2312 */ 2313 case CHIP_TONGA: 2314 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2315 break; 2316 default: 2317 dqm->sched_policy = sched_policy; 2318 break; 2319 } 2320 2321 dqm->dev = dev; 2322 switch (dqm->sched_policy) { 2323 case KFD_SCHED_POLICY_HWS: 2324 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2325 /* initialize dqm for cp scheduling */ 2326 dqm->ops.create_queue = create_queue_cpsch; 2327 dqm->ops.initialize = initialize_cpsch; 2328 dqm->ops.start = start_cpsch; 2329 dqm->ops.stop = stop_cpsch; 2330 dqm->ops.pre_reset = pre_reset; 2331 dqm->ops.destroy_queue = destroy_queue_cpsch; 2332 dqm->ops.update_queue = update_queue; 2333 dqm->ops.register_process = register_process; 2334 dqm->ops.unregister_process = unregister_process; 2335 dqm->ops.uninitialize = uninitialize; 2336 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2337 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2338 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2339 dqm->ops.process_termination = process_termination_cpsch; 2340 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2341 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2342 dqm->ops.get_wave_state = get_wave_state; 2343 dqm->ops.reset_queues = reset_queues_cpsch; 2344 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2345 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2346 break; 2347 case KFD_SCHED_POLICY_NO_HWS: 2348 /* initialize dqm for no cp scheduling */ 2349 dqm->ops.start = start_nocpsch; 2350 dqm->ops.stop = stop_nocpsch; 2351 dqm->ops.pre_reset = pre_reset; 2352 dqm->ops.create_queue = create_queue_nocpsch; 2353 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2354 dqm->ops.update_queue = update_queue; 2355 dqm->ops.register_process = register_process; 2356 dqm->ops.unregister_process = unregister_process; 2357 dqm->ops.initialize = initialize_nocpsch; 2358 dqm->ops.uninitialize = uninitialize; 2359 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2360 dqm->ops.process_termination = process_termination_nocpsch; 2361 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2362 dqm->ops.restore_process_queues = 2363 restore_process_queues_nocpsch; 2364 dqm->ops.get_wave_state = get_wave_state; 2365 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2366 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2367 break; 2368 default: 2369 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2370 goto out_free; 2371 } 2372 2373 switch (dev->adev->asic_type) { 2374 case CHIP_CARRIZO: 2375 device_queue_manager_init_vi(&dqm->asic_ops); 2376 break; 2377 2378 case CHIP_KAVERI: 2379 device_queue_manager_init_cik(&dqm->asic_ops); 2380 break; 2381 2382 case CHIP_HAWAII: 2383 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2384 break; 2385 2386 case CHIP_TONGA: 2387 case CHIP_FIJI: 2388 case CHIP_POLARIS10: 2389 case CHIP_POLARIS11: 2390 case CHIP_POLARIS12: 2391 case CHIP_VEGAM: 2392 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2393 break; 2394 2395 default: 2396 if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2397 device_queue_manager_init_v11(&dqm->asic_ops); 2398 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2399 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2400 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2401 device_queue_manager_init_v9(&dqm->asic_ops); 2402 else { 2403 WARN(1, "Unexpected ASIC family %u", 2404 dev->adev->asic_type); 2405 goto out_free; 2406 } 2407 } 2408 2409 if (init_mqd_managers(dqm)) 2410 goto out_free; 2411 2412 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2413 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2414 goto out_free; 2415 } 2416 2417 if (!dqm->ops.initialize(dqm)) 2418 return dqm; 2419 2420 out_free: 2421 kfree(dqm); 2422 return NULL; 2423 } 2424 2425 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2426 struct kfd_mem_obj *mqd) 2427 { 2428 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2429 2430 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2431 } 2432 2433 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2434 { 2435 dqm->ops.stop(dqm); 2436 dqm->ops.uninitialize(dqm); 2437 if (!dqm->dev->kfd->shared_resources.enable_mes) 2438 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2439 kfree(dqm); 2440 } 2441 2442 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2443 { 2444 struct kfd_process_device *pdd; 2445 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2446 int ret = 0; 2447 2448 if (!p) 2449 return -EINVAL; 2450 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2451 pdd = kfd_get_process_device_data(dqm->dev, p); 2452 if (pdd) 2453 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2454 kfd_unref_process(p); 2455 2456 return ret; 2457 } 2458 2459 static void kfd_process_hw_exception(struct work_struct *work) 2460 { 2461 struct device_queue_manager *dqm = container_of(work, 2462 struct device_queue_manager, hw_exception_work); 2463 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2464 } 2465 2466 #if defined(CONFIG_DEBUG_FS) 2467 2468 static void seq_reg_dump(struct seq_file *m, 2469 uint32_t (*dump)[2], uint32_t n_regs) 2470 { 2471 uint32_t i, count; 2472 2473 for (i = 0, count = 0; i < n_regs; i++) { 2474 if (count == 0 || 2475 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2476 seq_printf(m, "%s %08x: %08x", 2477 i ? "\n" : "", 2478 dump[i][0], dump[i][1]); 2479 count = 7; 2480 } else { 2481 seq_printf(m, " %08x", dump[i][1]); 2482 count--; 2483 } 2484 } 2485 2486 seq_puts(m, "\n"); 2487 } 2488 2489 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2490 { 2491 struct device_queue_manager *dqm = data; 2492 uint32_t (*dump)[2], n_regs; 2493 int pipe, queue; 2494 int r = 0, xcc; 2495 uint32_t inst; 2496 uint32_t sdma_engine_start; 2497 2498 if (!dqm->sched_running) { 2499 seq_puts(m, " Device is stopped\n"); 2500 return 0; 2501 } 2502 2503 for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) { 2504 inst = dqm->dev->start_xcc_id + xcc; 2505 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2506 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2507 &dump, &n_regs, inst); 2508 if (!r) { 2509 seq_printf(m, 2510 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 2511 inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2512 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2513 KFD_CIK_HIQ_QUEUE); 2514 seq_reg_dump(m, dump, n_regs); 2515 2516 kfree(dump); 2517 } 2518 2519 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2520 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2521 2522 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2523 if (!test_bit(pipe_offset + queue, 2524 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2525 continue; 2526 2527 r = dqm->dev->kfd2kgd->hqd_dump( 2528 dqm->dev->adev, pipe, queue, &dump, &n_regs, inst); 2529 if (r) 2530 break; 2531 2532 seq_printf(m, " Inst %d, CP Pipe %d, Queue %d\n", 2533 inst, pipe, queue); 2534 seq_reg_dump(m, dump, n_regs); 2535 2536 kfree(dump); 2537 } 2538 } 2539 } 2540 2541 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2542 for (pipe = sdma_engine_start; 2543 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 2544 pipe++) { 2545 for (queue = 0; 2546 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2547 queue++) { 2548 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2549 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2550 if (r) 2551 break; 2552 2553 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2554 pipe, queue); 2555 seq_reg_dump(m, dump, n_regs); 2556 2557 kfree(dump); 2558 } 2559 } 2560 2561 return r; 2562 } 2563 2564 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2565 { 2566 int r = 0; 2567 2568 dqm_lock(dqm); 2569 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2570 if (r) { 2571 dqm_unlock(dqm); 2572 return r; 2573 } 2574 dqm->active_runlist = true; 2575 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2576 dqm_unlock(dqm); 2577 2578 return r; 2579 } 2580 2581 #endif 2582