1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 39 /* Size of the per-pipe EOP queue */ 40 #define CIK_HPD_EOP_BYTES_LOG2 11 41 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 42 43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 44 u32 pasid, unsigned int vmid); 45 46 static int execute_queues_cpsch(struct device_queue_manager *dqm, 47 enum kfd_unmap_queues_filter filter, 48 uint32_t filter_param); 49 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param, bool reset); 52 53 static int map_queues_cpsch(struct device_queue_manager *dqm); 54 55 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 56 struct queue *q); 57 58 static inline void deallocate_hqd(struct device_queue_manager *dqm, 59 struct queue *q); 60 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 61 static int allocate_sdma_queue(struct device_queue_manager *dqm, 62 struct queue *q, const uint32_t *restore_sdma_id); 63 static void kfd_process_hw_exception(struct work_struct *work); 64 65 static inline 66 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 67 { 68 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 69 return KFD_MQD_TYPE_SDMA; 70 return KFD_MQD_TYPE_CP; 71 } 72 73 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 74 { 75 int i; 76 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 77 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 78 79 /* queue is available for KFD usage if bit is 1 */ 80 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 81 if (test_bit(pipe_offset + i, 82 dqm->dev->shared_resources.cp_queue_bitmap)) 83 return true; 84 return false; 85 } 86 87 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 88 { 89 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 90 KGD_MAX_QUEUES); 91 } 92 93 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 94 { 95 return dqm->dev->shared_resources.num_queue_per_pipe; 96 } 97 98 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 99 { 100 return dqm->dev->shared_resources.num_pipe_per_mec; 101 } 102 103 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 104 { 105 return kfd_get_num_sdma_engines(dqm->dev) + 106 kfd_get_num_xgmi_sdma_engines(dqm->dev); 107 } 108 109 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 110 { 111 return kfd_get_num_sdma_engines(dqm->dev) * 112 dqm->dev->device_info.num_sdma_queues_per_engine; 113 } 114 115 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 118 dqm->dev->device_info.num_sdma_queues_per_engine; 119 } 120 121 void program_sh_mem_settings(struct device_queue_manager *dqm, 122 struct qcm_process_device *qpd) 123 { 124 return dqm->dev->kfd2kgd->program_sh_mem_settings( 125 dqm->dev->adev, qpd->vmid, 126 qpd->sh_mem_config, 127 qpd->sh_mem_ape1_base, 128 qpd->sh_mem_ape1_limit, 129 qpd->sh_mem_bases); 130 } 131 132 static void increment_queue_count(struct device_queue_manager *dqm, 133 struct qcm_process_device *qpd, 134 struct queue *q) 135 { 136 dqm->active_queue_count++; 137 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 138 q->properties.type == KFD_QUEUE_TYPE_DIQ) 139 dqm->active_cp_queue_count++; 140 141 if (q->properties.is_gws) { 142 dqm->gws_queue_count++; 143 qpd->mapped_gws_queue = true; 144 } 145 } 146 147 static void decrement_queue_count(struct device_queue_manager *dqm, 148 struct qcm_process_device *qpd, 149 struct queue *q) 150 { 151 dqm->active_queue_count--; 152 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 153 q->properties.type == KFD_QUEUE_TYPE_DIQ) 154 dqm->active_cp_queue_count--; 155 156 if (q->properties.is_gws) { 157 dqm->gws_queue_count--; 158 qpd->mapped_gws_queue = false; 159 } 160 } 161 162 /* 163 * Allocate a doorbell ID to this queue. 164 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 165 */ 166 static int allocate_doorbell(struct qcm_process_device *qpd, 167 struct queue *q, 168 uint32_t const *restore_id) 169 { 170 struct kfd_dev *dev = qpd->dqm->dev; 171 172 if (!KFD_IS_SOC15(dev)) { 173 /* On pre-SOC15 chips we need to use the queue ID to 174 * preserve the user mode ABI. 175 */ 176 177 if (restore_id && *restore_id != q->properties.queue_id) 178 return -EINVAL; 179 180 q->doorbell_id = q->properties.queue_id; 181 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 182 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 183 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 184 * doorbell assignments based on the engine and queue id. 185 * The doobell index distance between RLC (2*i) and (2*i+1) 186 * for a SDMA engine is 512. 187 */ 188 189 uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; 190 uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] 191 + (q->properties.sdma_queue_id & 1) 192 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 193 + (q->properties.sdma_queue_id >> 1); 194 195 if (restore_id && *restore_id != valid_id) 196 return -EINVAL; 197 q->doorbell_id = valid_id; 198 } else { 199 /* For CP queues on SOC15 */ 200 if (restore_id) { 201 /* make sure that ID is free */ 202 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 203 return -EINVAL; 204 205 q->doorbell_id = *restore_id; 206 } else { 207 /* or reserve a free doorbell ID */ 208 unsigned int found; 209 210 found = find_first_zero_bit(qpd->doorbell_bitmap, 211 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 212 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 213 pr_debug("No doorbells available"); 214 return -EBUSY; 215 } 216 set_bit(found, qpd->doorbell_bitmap); 217 q->doorbell_id = found; 218 } 219 } 220 221 q->properties.doorbell_off = 222 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 223 q->doorbell_id); 224 return 0; 225 } 226 227 static void deallocate_doorbell(struct qcm_process_device *qpd, 228 struct queue *q) 229 { 230 unsigned int old; 231 struct kfd_dev *dev = qpd->dqm->dev; 232 233 if (!KFD_IS_SOC15(dev) || 234 q->properties.type == KFD_QUEUE_TYPE_SDMA || 235 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 236 return; 237 238 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 239 WARN_ON(!old); 240 } 241 242 static void program_trap_handler_settings(struct device_queue_manager *dqm, 243 struct qcm_process_device *qpd) 244 { 245 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 246 dqm->dev->kfd2kgd->program_trap_handler_settings( 247 dqm->dev->adev, qpd->vmid, 248 qpd->tba_addr, qpd->tma_addr); 249 } 250 251 static int allocate_vmid(struct device_queue_manager *dqm, 252 struct qcm_process_device *qpd, 253 struct queue *q) 254 { 255 int allocated_vmid = -1, i; 256 257 for (i = dqm->dev->vm_info.first_vmid_kfd; 258 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 259 if (!dqm->vmid_pasid[i]) { 260 allocated_vmid = i; 261 break; 262 } 263 } 264 265 if (allocated_vmid < 0) { 266 pr_err("no more vmid to allocate\n"); 267 return -ENOSPC; 268 } 269 270 pr_debug("vmid allocated: %d\n", allocated_vmid); 271 272 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 273 274 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 275 276 qpd->vmid = allocated_vmid; 277 q->properties.vmid = allocated_vmid; 278 279 program_sh_mem_settings(dqm, qpd); 280 281 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 282 program_trap_handler_settings(dqm, qpd); 283 284 /* qpd->page_table_base is set earlier when register_process() 285 * is called, i.e. when the first queue is created. 286 */ 287 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 288 qpd->vmid, 289 qpd->page_table_base); 290 /* invalidate the VM context after pasid and vmid mapping is set up */ 291 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 292 293 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 294 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 295 qpd->sh_hidden_private_base, qpd->vmid); 296 297 return 0; 298 } 299 300 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 301 struct qcm_process_device *qpd) 302 { 303 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 304 int ret; 305 306 if (!qpd->ib_kaddr) 307 return -ENOMEM; 308 309 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 310 if (ret) 311 return ret; 312 313 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 314 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 315 pmf->release_mem_size / sizeof(uint32_t)); 316 } 317 318 static void deallocate_vmid(struct device_queue_manager *dqm, 319 struct qcm_process_device *qpd, 320 struct queue *q) 321 { 322 /* On GFX v7, CP doesn't flush TC at dequeue */ 323 if (q->device->adev->asic_type == CHIP_HAWAII) 324 if (flush_texture_cache_nocpsch(q->device, qpd)) 325 pr_err("Failed to flush TC\n"); 326 327 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 328 329 /* Release the vmid mapping */ 330 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 331 dqm->vmid_pasid[qpd->vmid] = 0; 332 333 qpd->vmid = 0; 334 q->properties.vmid = 0; 335 } 336 337 static int create_queue_nocpsch(struct device_queue_manager *dqm, 338 struct queue *q, 339 struct qcm_process_device *qpd, 340 const struct kfd_criu_queue_priv_data *qd, 341 const void *restore_mqd, const void *restore_ctl_stack) 342 { 343 struct mqd_manager *mqd_mgr; 344 int retval; 345 346 dqm_lock(dqm); 347 348 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 349 pr_warn("Can't create new usermode queue because %d queues were already created\n", 350 dqm->total_queue_count); 351 retval = -EPERM; 352 goto out_unlock; 353 } 354 355 if (list_empty(&qpd->queues_list)) { 356 retval = allocate_vmid(dqm, qpd, q); 357 if (retval) 358 goto out_unlock; 359 } 360 q->properties.vmid = qpd->vmid; 361 /* 362 * Eviction state logic: mark all queues as evicted, even ones 363 * not currently active. Restoring inactive queues later only 364 * updates the is_evicted flag but is a no-op otherwise. 365 */ 366 q->properties.is_evicted = !!qpd->evicted; 367 368 q->properties.tba_addr = qpd->tba_addr; 369 q->properties.tma_addr = qpd->tma_addr; 370 371 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 372 q->properties.type)]; 373 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 374 retval = allocate_hqd(dqm, q); 375 if (retval) 376 goto deallocate_vmid; 377 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 378 q->pipe, q->queue); 379 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 380 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 381 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 382 if (retval) 383 goto deallocate_vmid; 384 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 385 } 386 387 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 388 if (retval) 389 goto out_deallocate_hqd; 390 391 /* Temporarily release dqm lock to avoid a circular lock dependency */ 392 dqm_unlock(dqm); 393 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 394 dqm_lock(dqm); 395 396 if (!q->mqd_mem_obj) { 397 retval = -ENOMEM; 398 goto out_deallocate_doorbell; 399 } 400 401 if (qd) 402 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 403 &q->properties, restore_mqd, restore_ctl_stack, 404 qd->ctl_stack_size); 405 else 406 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 407 &q->gart_mqd_addr, &q->properties); 408 409 if (q->properties.is_active) { 410 if (!dqm->sched_running) { 411 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 412 goto add_queue_to_list; 413 } 414 415 if (WARN(q->process->mm != current->mm, 416 "should only run in user thread")) 417 retval = -EFAULT; 418 else 419 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 420 q->queue, &q->properties, current->mm); 421 if (retval) 422 goto out_free_mqd; 423 } 424 425 add_queue_to_list: 426 list_add(&q->list, &qpd->queues_list); 427 qpd->queue_count++; 428 if (q->properties.is_active) 429 increment_queue_count(dqm, qpd, q); 430 431 /* 432 * Unconditionally increment this counter, regardless of the queue's 433 * type or whether the queue is active. 434 */ 435 dqm->total_queue_count++; 436 pr_debug("Total of %d queues are accountable so far\n", 437 dqm->total_queue_count); 438 goto out_unlock; 439 440 out_free_mqd: 441 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 442 out_deallocate_doorbell: 443 deallocate_doorbell(qpd, q); 444 out_deallocate_hqd: 445 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 446 deallocate_hqd(dqm, q); 447 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 448 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 449 deallocate_sdma_queue(dqm, q); 450 deallocate_vmid: 451 if (list_empty(&qpd->queues_list)) 452 deallocate_vmid(dqm, qpd, q); 453 out_unlock: 454 dqm_unlock(dqm); 455 return retval; 456 } 457 458 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 459 { 460 bool set; 461 int pipe, bit, i; 462 463 set = false; 464 465 for (pipe = dqm->next_pipe_to_allocate, i = 0; 466 i < get_pipes_per_mec(dqm); 467 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 468 469 if (!is_pipe_enabled(dqm, 0, pipe)) 470 continue; 471 472 if (dqm->allocated_queues[pipe] != 0) { 473 bit = ffs(dqm->allocated_queues[pipe]) - 1; 474 dqm->allocated_queues[pipe] &= ~(1 << bit); 475 q->pipe = pipe; 476 q->queue = bit; 477 set = true; 478 break; 479 } 480 } 481 482 if (!set) 483 return -EBUSY; 484 485 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 486 /* horizontal hqd allocation */ 487 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 488 489 return 0; 490 } 491 492 static inline void deallocate_hqd(struct device_queue_manager *dqm, 493 struct queue *q) 494 { 495 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 496 } 497 498 #define SQ_IND_CMD_CMD_KILL 0x00000003 499 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 500 501 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 502 { 503 int status = 0; 504 unsigned int vmid; 505 uint16_t queried_pasid; 506 union SQ_CMD_BITS reg_sq_cmd; 507 union GRBM_GFX_INDEX_BITS reg_gfx_index; 508 struct kfd_process_device *pdd; 509 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 510 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 511 512 reg_sq_cmd.u32All = 0; 513 reg_gfx_index.u32All = 0; 514 515 pr_debug("Killing all process wavefronts\n"); 516 517 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 518 pr_err("no vmid pasid mapping supported \n"); 519 return -EOPNOTSUPP; 520 } 521 522 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 523 * ATC_VMID15_PASID_MAPPING 524 * to check which VMID the current process is mapped to. 525 */ 526 527 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 528 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 529 (dev->adev, vmid, &queried_pasid); 530 531 if (status && queried_pasid == p->pasid) { 532 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 533 vmid, p->pasid); 534 break; 535 } 536 } 537 538 if (vmid > last_vmid_to_scan) { 539 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 540 return -EFAULT; 541 } 542 543 /* taking the VMID for that process on the safe way using PDD */ 544 pdd = kfd_get_process_device_data(dev, p); 545 if (!pdd) 546 return -EFAULT; 547 548 reg_gfx_index.bits.sh_broadcast_writes = 1; 549 reg_gfx_index.bits.se_broadcast_writes = 1; 550 reg_gfx_index.bits.instance_broadcast_writes = 1; 551 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 552 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 553 reg_sq_cmd.bits.vm_id = vmid; 554 555 dev->kfd2kgd->wave_control_execute(dev->adev, 556 reg_gfx_index.u32All, 557 reg_sq_cmd.u32All); 558 559 return 0; 560 } 561 562 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 563 * to avoid asynchronized access 564 */ 565 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 566 struct qcm_process_device *qpd, 567 struct queue *q) 568 { 569 int retval; 570 struct mqd_manager *mqd_mgr; 571 572 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 573 q->properties.type)]; 574 575 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 576 deallocate_hqd(dqm, q); 577 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 578 deallocate_sdma_queue(dqm, q); 579 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 580 deallocate_sdma_queue(dqm, q); 581 else { 582 pr_debug("q->properties.type %d is invalid\n", 583 q->properties.type); 584 return -EINVAL; 585 } 586 dqm->total_queue_count--; 587 588 deallocate_doorbell(qpd, q); 589 590 if (!dqm->sched_running) { 591 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 592 return 0; 593 } 594 595 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 596 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 597 KFD_UNMAP_LATENCY_MS, 598 q->pipe, q->queue); 599 if (retval == -ETIME) 600 qpd->reset_wavefronts = true; 601 602 list_del(&q->list); 603 if (list_empty(&qpd->queues_list)) { 604 if (qpd->reset_wavefronts) { 605 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 606 dqm->dev); 607 /* dbgdev_wave_reset_wavefronts has to be called before 608 * deallocate_vmid(), i.e. when vmid is still in use. 609 */ 610 dbgdev_wave_reset_wavefronts(dqm->dev, 611 qpd->pqm->process); 612 qpd->reset_wavefronts = false; 613 } 614 615 deallocate_vmid(dqm, qpd, q); 616 } 617 qpd->queue_count--; 618 if (q->properties.is_active) 619 decrement_queue_count(dqm, qpd, q); 620 621 return retval; 622 } 623 624 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 625 struct qcm_process_device *qpd, 626 struct queue *q) 627 { 628 int retval; 629 uint64_t sdma_val = 0; 630 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 631 struct mqd_manager *mqd_mgr = 632 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 633 634 /* Get the SDMA queue stats */ 635 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 636 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 637 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 638 &sdma_val); 639 if (retval) 640 pr_err("Failed to read SDMA queue counter for queue: %d\n", 641 q->properties.queue_id); 642 } 643 644 dqm_lock(dqm); 645 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 646 if (!retval) 647 pdd->sdma_past_activity_counter += sdma_val; 648 dqm_unlock(dqm); 649 650 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 651 652 return retval; 653 } 654 655 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 656 struct mqd_update_info *minfo) 657 { 658 int retval = 0; 659 struct mqd_manager *mqd_mgr; 660 struct kfd_process_device *pdd; 661 bool prev_active = false; 662 663 dqm_lock(dqm); 664 pdd = kfd_get_process_device_data(q->device, q->process); 665 if (!pdd) { 666 retval = -ENODEV; 667 goto out_unlock; 668 } 669 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 670 q->properties.type)]; 671 672 /* Save previous activity state for counters */ 673 prev_active = q->properties.is_active; 674 675 /* Make sure the queue is unmapped before updating the MQD */ 676 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 677 retval = unmap_queues_cpsch(dqm, 678 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 679 if (retval) { 680 pr_err("unmap queue failed\n"); 681 goto out_unlock; 682 } 683 } else if (prev_active && 684 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 685 q->properties.type == KFD_QUEUE_TYPE_SDMA || 686 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 687 688 if (!dqm->sched_running) { 689 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 690 goto out_unlock; 691 } 692 693 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 694 (dqm->dev->cwsr_enabled ? 695 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 696 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 697 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 698 if (retval) { 699 pr_err("destroy mqd failed\n"); 700 goto out_unlock; 701 } 702 } 703 704 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 705 706 /* 707 * check active state vs. the previous state and modify 708 * counter accordingly. map_queues_cpsch uses the 709 * dqm->active_queue_count to determine whether a new runlist must be 710 * uploaded. 711 */ 712 if (q->properties.is_active && !prev_active) { 713 increment_queue_count(dqm, &pdd->qpd, q); 714 } else if (!q->properties.is_active && prev_active) { 715 decrement_queue_count(dqm, &pdd->qpd, q); 716 } else if (q->gws && !q->properties.is_gws) { 717 if (q->properties.is_active) { 718 dqm->gws_queue_count++; 719 pdd->qpd.mapped_gws_queue = true; 720 } 721 q->properties.is_gws = true; 722 } else if (!q->gws && q->properties.is_gws) { 723 if (q->properties.is_active) { 724 dqm->gws_queue_count--; 725 pdd->qpd.mapped_gws_queue = false; 726 } 727 q->properties.is_gws = false; 728 } 729 730 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 731 retval = map_queues_cpsch(dqm); 732 else if (q->properties.is_active && 733 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 734 q->properties.type == KFD_QUEUE_TYPE_SDMA || 735 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 736 if (WARN(q->process->mm != current->mm, 737 "should only run in user thread")) 738 retval = -EFAULT; 739 else 740 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 741 q->pipe, q->queue, 742 &q->properties, current->mm); 743 } 744 745 out_unlock: 746 dqm_unlock(dqm); 747 return retval; 748 } 749 750 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 751 struct qcm_process_device *qpd) 752 { 753 struct queue *q; 754 struct mqd_manager *mqd_mgr; 755 struct kfd_process_device *pdd; 756 int retval, ret = 0; 757 758 dqm_lock(dqm); 759 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 760 goto out; 761 762 pdd = qpd_to_pdd(qpd); 763 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 764 pdd->process->pasid); 765 766 pdd->last_evict_timestamp = get_jiffies_64(); 767 /* Mark all queues as evicted. Deactivate all active queues on 768 * the qpd. 769 */ 770 list_for_each_entry(q, &qpd->queues_list, list) { 771 q->properties.is_evicted = true; 772 if (!q->properties.is_active) 773 continue; 774 775 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 776 q->properties.type)]; 777 q->properties.is_active = false; 778 decrement_queue_count(dqm, qpd, q); 779 780 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 781 continue; 782 783 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 784 (dqm->dev->cwsr_enabled ? 785 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 786 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 787 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 788 if (retval && !ret) 789 /* Return the first error, but keep going to 790 * maintain a consistent eviction state 791 */ 792 ret = retval; 793 } 794 795 out: 796 dqm_unlock(dqm); 797 return ret; 798 } 799 800 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 801 struct qcm_process_device *qpd) 802 { 803 struct queue *q; 804 struct kfd_process_device *pdd; 805 int retval = 0; 806 807 dqm_lock(dqm); 808 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 809 goto out; 810 811 pdd = qpd_to_pdd(qpd); 812 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 813 pdd->process->pasid); 814 815 /* Mark all queues as evicted. Deactivate all active queues on 816 * the qpd. 817 */ 818 list_for_each_entry(q, &qpd->queues_list, list) { 819 q->properties.is_evicted = true; 820 if (!q->properties.is_active) 821 continue; 822 823 q->properties.is_active = false; 824 decrement_queue_count(dqm, qpd, q); 825 } 826 pdd->last_evict_timestamp = get_jiffies_64(); 827 retval = execute_queues_cpsch(dqm, 828 qpd->is_debug ? 829 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 830 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 831 832 out: 833 dqm_unlock(dqm); 834 return retval; 835 } 836 837 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 838 struct qcm_process_device *qpd) 839 { 840 struct mm_struct *mm = NULL; 841 struct queue *q; 842 struct mqd_manager *mqd_mgr; 843 struct kfd_process_device *pdd; 844 uint64_t pd_base; 845 uint64_t eviction_duration; 846 int retval, ret = 0; 847 848 pdd = qpd_to_pdd(qpd); 849 /* Retrieve PD base */ 850 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 851 852 dqm_lock(dqm); 853 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 854 goto out; 855 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 856 qpd->evicted--; 857 goto out; 858 } 859 860 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 861 pdd->process->pasid); 862 863 /* Update PD Base in QPD */ 864 qpd->page_table_base = pd_base; 865 pr_debug("Updated PD address to 0x%llx\n", pd_base); 866 867 if (!list_empty(&qpd->queues_list)) { 868 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 869 dqm->dev->adev, 870 qpd->vmid, 871 qpd->page_table_base); 872 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 873 } 874 875 /* Take a safe reference to the mm_struct, which may otherwise 876 * disappear even while the kfd_process is still referenced. 877 */ 878 mm = get_task_mm(pdd->process->lead_thread); 879 if (!mm) { 880 ret = -EFAULT; 881 goto out; 882 } 883 884 /* Remove the eviction flags. Activate queues that are not 885 * inactive for other reasons. 886 */ 887 list_for_each_entry(q, &qpd->queues_list, list) { 888 q->properties.is_evicted = false; 889 if (!QUEUE_IS_ACTIVE(q->properties)) 890 continue; 891 892 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 893 q->properties.type)]; 894 q->properties.is_active = true; 895 increment_queue_count(dqm, qpd, q); 896 897 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 898 continue; 899 900 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 901 q->queue, &q->properties, mm); 902 if (retval && !ret) 903 /* Return the first error, but keep going to 904 * maintain a consistent eviction state 905 */ 906 ret = retval; 907 } 908 qpd->evicted = 0; 909 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 910 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 911 out: 912 if (mm) 913 mmput(mm); 914 dqm_unlock(dqm); 915 return ret; 916 } 917 918 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 919 struct qcm_process_device *qpd) 920 { 921 struct queue *q; 922 struct kfd_process_device *pdd; 923 uint64_t pd_base; 924 uint64_t eviction_duration; 925 int retval = 0; 926 927 pdd = qpd_to_pdd(qpd); 928 /* Retrieve PD base */ 929 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 930 931 dqm_lock(dqm); 932 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 933 goto out; 934 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 935 qpd->evicted--; 936 goto out; 937 } 938 939 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 940 pdd->process->pasid); 941 942 /* Update PD Base in QPD */ 943 qpd->page_table_base = pd_base; 944 pr_debug("Updated PD address to 0x%llx\n", pd_base); 945 946 /* activate all active queues on the qpd */ 947 list_for_each_entry(q, &qpd->queues_list, list) { 948 q->properties.is_evicted = false; 949 if (!QUEUE_IS_ACTIVE(q->properties)) 950 continue; 951 952 q->properties.is_active = true; 953 increment_queue_count(dqm, &pdd->qpd, q); 954 } 955 retval = execute_queues_cpsch(dqm, 956 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 957 qpd->evicted = 0; 958 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 959 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 960 out: 961 dqm_unlock(dqm); 962 return retval; 963 } 964 965 static int register_process(struct device_queue_manager *dqm, 966 struct qcm_process_device *qpd) 967 { 968 struct device_process_node *n; 969 struct kfd_process_device *pdd; 970 uint64_t pd_base; 971 int retval; 972 973 n = kzalloc(sizeof(*n), GFP_KERNEL); 974 if (!n) 975 return -ENOMEM; 976 977 n->qpd = qpd; 978 979 pdd = qpd_to_pdd(qpd); 980 /* Retrieve PD base */ 981 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 982 983 dqm_lock(dqm); 984 list_add(&n->list, &dqm->queues); 985 986 /* Update PD Base in QPD */ 987 qpd->page_table_base = pd_base; 988 pr_debug("Updated PD address to 0x%llx\n", pd_base); 989 990 retval = dqm->asic_ops.update_qpd(dqm, qpd); 991 992 dqm->processes_count++; 993 994 dqm_unlock(dqm); 995 996 /* Outside the DQM lock because under the DQM lock we can't do 997 * reclaim or take other locks that others hold while reclaiming. 998 */ 999 kfd_inc_compute_active(dqm->dev); 1000 1001 return retval; 1002 } 1003 1004 static int unregister_process(struct device_queue_manager *dqm, 1005 struct qcm_process_device *qpd) 1006 { 1007 int retval; 1008 struct device_process_node *cur, *next; 1009 1010 pr_debug("qpd->queues_list is %s\n", 1011 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1012 1013 retval = 0; 1014 dqm_lock(dqm); 1015 1016 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1017 if (qpd == cur->qpd) { 1018 list_del(&cur->list); 1019 kfree(cur); 1020 dqm->processes_count--; 1021 goto out; 1022 } 1023 } 1024 /* qpd not found in dqm list */ 1025 retval = 1; 1026 out: 1027 dqm_unlock(dqm); 1028 1029 /* Outside the DQM lock because under the DQM lock we can't do 1030 * reclaim or take other locks that others hold while reclaiming. 1031 */ 1032 if (!retval) 1033 kfd_dec_compute_active(dqm->dev); 1034 1035 return retval; 1036 } 1037 1038 static int 1039 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1040 unsigned int vmid) 1041 { 1042 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1043 dqm->dev->adev, pasid, vmid); 1044 } 1045 1046 static void init_interrupts(struct device_queue_manager *dqm) 1047 { 1048 unsigned int i; 1049 1050 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 1051 if (is_pipe_enabled(dqm, 0, i)) 1052 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); 1053 } 1054 1055 static int initialize_nocpsch(struct device_queue_manager *dqm) 1056 { 1057 int pipe, queue; 1058 1059 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1060 1061 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1062 sizeof(unsigned int), GFP_KERNEL); 1063 if (!dqm->allocated_queues) 1064 return -ENOMEM; 1065 1066 mutex_init(&dqm->lock_hidden); 1067 INIT_LIST_HEAD(&dqm->queues); 1068 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1069 dqm->active_cp_queue_count = 0; 1070 dqm->gws_queue_count = 0; 1071 1072 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1073 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1074 1075 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1076 if (test_bit(pipe_offset + queue, 1077 dqm->dev->shared_resources.cp_queue_bitmap)) 1078 dqm->allocated_queues[pipe] |= 1 << queue; 1079 } 1080 1081 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1082 1083 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); 1084 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); 1085 1086 return 0; 1087 } 1088 1089 static void uninitialize(struct device_queue_manager *dqm) 1090 { 1091 int i; 1092 1093 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1094 1095 kfree(dqm->allocated_queues); 1096 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1097 kfree(dqm->mqd_mgrs[i]); 1098 mutex_destroy(&dqm->lock_hidden); 1099 } 1100 1101 static int start_nocpsch(struct device_queue_manager *dqm) 1102 { 1103 int r = 0; 1104 1105 pr_info("SW scheduler is used"); 1106 init_interrupts(dqm); 1107 1108 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1109 r = pm_init(&dqm->packet_mgr, dqm); 1110 if (!r) 1111 dqm->sched_running = true; 1112 1113 return r; 1114 } 1115 1116 static int stop_nocpsch(struct device_queue_manager *dqm) 1117 { 1118 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1119 pm_uninit(&dqm->packet_mgr, false); 1120 dqm->sched_running = false; 1121 1122 return 0; 1123 } 1124 1125 static void pre_reset(struct device_queue_manager *dqm) 1126 { 1127 dqm_lock(dqm); 1128 dqm->is_resetting = true; 1129 dqm_unlock(dqm); 1130 } 1131 1132 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1133 struct queue *q, const uint32_t *restore_sdma_id) 1134 { 1135 int bit; 1136 1137 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1138 if (dqm->sdma_bitmap == 0) { 1139 pr_err("No more SDMA queue to allocate\n"); 1140 return -ENOMEM; 1141 } 1142 1143 if (restore_sdma_id) { 1144 /* Re-use existing sdma_id */ 1145 if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) { 1146 pr_err("SDMA queue already in use\n"); 1147 return -EBUSY; 1148 } 1149 dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1150 q->sdma_id = *restore_sdma_id; 1151 } else { 1152 /* Find first available sdma_id */ 1153 bit = __ffs64(dqm->sdma_bitmap); 1154 dqm->sdma_bitmap &= ~(1ULL << bit); 1155 q->sdma_id = bit; 1156 } 1157 1158 q->properties.sdma_engine_id = q->sdma_id % 1159 kfd_get_num_sdma_engines(dqm->dev); 1160 q->properties.sdma_queue_id = q->sdma_id / 1161 kfd_get_num_sdma_engines(dqm->dev); 1162 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1163 if (dqm->xgmi_sdma_bitmap == 0) { 1164 pr_err("No more XGMI SDMA queue to allocate\n"); 1165 return -ENOMEM; 1166 } 1167 if (restore_sdma_id) { 1168 /* Re-use existing sdma_id */ 1169 if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) { 1170 pr_err("SDMA queue already in use\n"); 1171 return -EBUSY; 1172 } 1173 dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1174 q->sdma_id = *restore_sdma_id; 1175 } else { 1176 bit = __ffs64(dqm->xgmi_sdma_bitmap); 1177 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 1178 q->sdma_id = bit; 1179 } 1180 /* sdma_engine_id is sdma id including 1181 * both PCIe-optimized SDMAs and XGMI- 1182 * optimized SDMAs. The calculation below 1183 * assumes the first N engines are always 1184 * PCIe-optimized ones 1185 */ 1186 q->properties.sdma_engine_id = 1187 kfd_get_num_sdma_engines(dqm->dev) + 1188 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1189 q->properties.sdma_queue_id = q->sdma_id / 1190 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1191 } 1192 1193 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1194 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1195 1196 return 0; 1197 } 1198 1199 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1200 struct queue *q) 1201 { 1202 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1203 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1204 return; 1205 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 1206 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1207 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1208 return; 1209 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 1210 } 1211 } 1212 1213 /* 1214 * Device Queue Manager implementation for cp scheduler 1215 */ 1216 1217 static int set_sched_resources(struct device_queue_manager *dqm) 1218 { 1219 int i, mec; 1220 struct scheduling_resources res; 1221 1222 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1223 1224 res.queue_mask = 0; 1225 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1226 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1227 / dqm->dev->shared_resources.num_pipe_per_mec; 1228 1229 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1230 continue; 1231 1232 /* only acquire queues from the first MEC */ 1233 if (mec > 0) 1234 continue; 1235 1236 /* This situation may be hit in the future if a new HW 1237 * generation exposes more than 64 queues. If so, the 1238 * definition of res.queue_mask needs updating 1239 */ 1240 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1241 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1242 break; 1243 } 1244 1245 res.queue_mask |= 1ull 1246 << amdgpu_queue_mask_bit_to_set_resource_bit( 1247 dqm->dev->adev, i); 1248 } 1249 res.gws_mask = ~0ull; 1250 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1251 1252 pr_debug("Scheduling resources:\n" 1253 "vmid mask: 0x%8X\n" 1254 "queue mask: 0x%8llX\n", 1255 res.vmid_mask, res.queue_mask); 1256 1257 return pm_send_set_resources(&dqm->packet_mgr, &res); 1258 } 1259 1260 static int initialize_cpsch(struct device_queue_manager *dqm) 1261 { 1262 uint64_t num_sdma_queues; 1263 uint64_t num_xgmi_sdma_queues; 1264 1265 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1266 1267 mutex_init(&dqm->lock_hidden); 1268 INIT_LIST_HEAD(&dqm->queues); 1269 dqm->active_queue_count = dqm->processes_count = 0; 1270 dqm->active_cp_queue_count = 0; 1271 dqm->gws_queue_count = 0; 1272 dqm->active_runlist = false; 1273 1274 num_sdma_queues = get_num_sdma_queues(dqm); 1275 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) 1276 dqm->sdma_bitmap = ULLONG_MAX; 1277 else 1278 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); 1279 1280 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); 1281 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) 1282 dqm->xgmi_sdma_bitmap = ULLONG_MAX; 1283 else 1284 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); 1285 1286 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1287 1288 return 0; 1289 } 1290 1291 static int start_cpsch(struct device_queue_manager *dqm) 1292 { 1293 int retval; 1294 1295 retval = 0; 1296 1297 dqm_lock(dqm); 1298 retval = pm_init(&dqm->packet_mgr, dqm); 1299 if (retval) 1300 goto fail_packet_manager_init; 1301 1302 retval = set_sched_resources(dqm); 1303 if (retval) 1304 goto fail_set_sched_resources; 1305 1306 pr_debug("Allocating fence memory\n"); 1307 1308 /* allocate fence memory on the gart */ 1309 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1310 &dqm->fence_mem); 1311 1312 if (retval) 1313 goto fail_allocate_vidmem; 1314 1315 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1316 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1317 1318 init_interrupts(dqm); 1319 1320 /* clear hang status when driver try to start the hw scheduler */ 1321 dqm->is_hws_hang = false; 1322 dqm->is_resetting = false; 1323 dqm->sched_running = true; 1324 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1325 dqm_unlock(dqm); 1326 1327 return 0; 1328 fail_allocate_vidmem: 1329 fail_set_sched_resources: 1330 pm_uninit(&dqm->packet_mgr, false); 1331 fail_packet_manager_init: 1332 dqm_unlock(dqm); 1333 return retval; 1334 } 1335 1336 static int stop_cpsch(struct device_queue_manager *dqm) 1337 { 1338 bool hanging; 1339 1340 dqm_lock(dqm); 1341 if (!dqm->sched_running) { 1342 dqm_unlock(dqm); 1343 return 0; 1344 } 1345 1346 if (!dqm->is_hws_hang) 1347 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1348 hanging = dqm->is_hws_hang || dqm->is_resetting; 1349 dqm->sched_running = false; 1350 1351 pm_release_ib(&dqm->packet_mgr); 1352 1353 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1354 pm_uninit(&dqm->packet_mgr, hanging); 1355 dqm_unlock(dqm); 1356 1357 return 0; 1358 } 1359 1360 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1361 struct kernel_queue *kq, 1362 struct qcm_process_device *qpd) 1363 { 1364 dqm_lock(dqm); 1365 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1366 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1367 dqm->total_queue_count); 1368 dqm_unlock(dqm); 1369 return -EPERM; 1370 } 1371 1372 /* 1373 * Unconditionally increment this counter, regardless of the queue's 1374 * type or whether the queue is active. 1375 */ 1376 dqm->total_queue_count++; 1377 pr_debug("Total of %d queues are accountable so far\n", 1378 dqm->total_queue_count); 1379 1380 list_add(&kq->list, &qpd->priv_queue_list); 1381 increment_queue_count(dqm, qpd, kq->queue); 1382 qpd->is_debug = true; 1383 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1384 dqm_unlock(dqm); 1385 1386 return 0; 1387 } 1388 1389 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1390 struct kernel_queue *kq, 1391 struct qcm_process_device *qpd) 1392 { 1393 dqm_lock(dqm); 1394 list_del(&kq->list); 1395 decrement_queue_count(dqm, qpd, kq->queue); 1396 qpd->is_debug = false; 1397 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1398 /* 1399 * Unconditionally decrement this counter, regardless of the queue's 1400 * type. 1401 */ 1402 dqm->total_queue_count--; 1403 pr_debug("Total of %d queues are accountable so far\n", 1404 dqm->total_queue_count); 1405 dqm_unlock(dqm); 1406 } 1407 1408 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1409 struct qcm_process_device *qpd, 1410 const struct kfd_criu_queue_priv_data *qd, 1411 const void *restore_mqd, const void *restore_ctl_stack) 1412 { 1413 int retval; 1414 struct mqd_manager *mqd_mgr; 1415 1416 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1417 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1418 dqm->total_queue_count); 1419 retval = -EPERM; 1420 goto out; 1421 } 1422 1423 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1424 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1425 dqm_lock(dqm); 1426 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1427 dqm_unlock(dqm); 1428 if (retval) 1429 goto out; 1430 } 1431 1432 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1433 if (retval) 1434 goto out_deallocate_sdma_queue; 1435 1436 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1437 q->properties.type)]; 1438 1439 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1440 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1441 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1442 q->properties.tba_addr = qpd->tba_addr; 1443 q->properties.tma_addr = qpd->tma_addr; 1444 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1445 if (!q->mqd_mem_obj) { 1446 retval = -ENOMEM; 1447 goto out_deallocate_doorbell; 1448 } 1449 1450 dqm_lock(dqm); 1451 /* 1452 * Eviction state logic: mark all queues as evicted, even ones 1453 * not currently active. Restoring inactive queues later only 1454 * updates the is_evicted flag but is a no-op otherwise. 1455 */ 1456 q->properties.is_evicted = !!qpd->evicted; 1457 1458 if (qd) 1459 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1460 &q->properties, restore_mqd, restore_ctl_stack, 1461 qd->ctl_stack_size); 1462 else 1463 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1464 &q->gart_mqd_addr, &q->properties); 1465 1466 list_add(&q->list, &qpd->queues_list); 1467 qpd->queue_count++; 1468 1469 if (q->properties.is_active) { 1470 increment_queue_count(dqm, qpd, q); 1471 1472 execute_queues_cpsch(dqm, 1473 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1474 } 1475 1476 /* 1477 * Unconditionally increment this counter, regardless of the queue's 1478 * type or whether the queue is active. 1479 */ 1480 dqm->total_queue_count++; 1481 1482 pr_debug("Total of %d queues are accountable so far\n", 1483 dqm->total_queue_count); 1484 1485 dqm_unlock(dqm); 1486 return retval; 1487 1488 out_deallocate_doorbell: 1489 deallocate_doorbell(qpd, q); 1490 out_deallocate_sdma_queue: 1491 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1492 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1493 dqm_lock(dqm); 1494 deallocate_sdma_queue(dqm, q); 1495 dqm_unlock(dqm); 1496 } 1497 out: 1498 return retval; 1499 } 1500 1501 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1502 uint64_t fence_value, 1503 unsigned int timeout_ms) 1504 { 1505 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1506 1507 while (*fence_addr != fence_value) { 1508 if (time_after(jiffies, end_jiffies)) { 1509 pr_err("qcm fence wait loop timeout expired\n"); 1510 /* In HWS case, this is used to halt the driver thread 1511 * in order not to mess up CP states before doing 1512 * scandumps for FW debugging. 1513 */ 1514 while (halt_if_hws_hang) 1515 schedule(); 1516 1517 return -ETIME; 1518 } 1519 schedule(); 1520 } 1521 1522 return 0; 1523 } 1524 1525 /* dqm->lock mutex has to be locked before calling this function */ 1526 static int map_queues_cpsch(struct device_queue_manager *dqm) 1527 { 1528 int retval; 1529 1530 if (!dqm->sched_running) 1531 return 0; 1532 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1533 return 0; 1534 if (dqm->active_runlist) 1535 return 0; 1536 1537 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1538 pr_debug("%s sent runlist\n", __func__); 1539 if (retval) { 1540 pr_err("failed to execute runlist\n"); 1541 return retval; 1542 } 1543 dqm->active_runlist = true; 1544 1545 return retval; 1546 } 1547 1548 /* dqm->lock mutex has to be locked before calling this function */ 1549 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1550 enum kfd_unmap_queues_filter filter, 1551 uint32_t filter_param, bool reset) 1552 { 1553 int retval = 0; 1554 struct mqd_manager *mqd_mgr; 1555 1556 if (!dqm->sched_running) 1557 return 0; 1558 if (dqm->is_hws_hang || dqm->is_resetting) 1559 return -EIO; 1560 if (!dqm->active_runlist) 1561 return retval; 1562 1563 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1564 if (retval) 1565 return retval; 1566 1567 *dqm->fence_addr = KFD_FENCE_INIT; 1568 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1569 KFD_FENCE_COMPLETED); 1570 /* should be timed out */ 1571 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1572 queue_preemption_timeout_ms); 1573 if (retval) { 1574 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1575 dqm->is_hws_hang = true; 1576 /* It's possible we're detecting a HWS hang in the 1577 * middle of a GPU reset. No need to schedule another 1578 * reset in this case. 1579 */ 1580 if (!dqm->is_resetting) 1581 schedule_work(&dqm->hw_exception_work); 1582 return retval; 1583 } 1584 1585 /* In the current MEC firmware implementation, if compute queue 1586 * doesn't response to the preemption request in time, HIQ will 1587 * abandon the unmap request without returning any timeout error 1588 * to driver. Instead, MEC firmware will log the doorbell of the 1589 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1590 * To make sure the queue unmap was successful, driver need to 1591 * check those fields 1592 */ 1593 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1594 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1595 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1596 while (halt_if_hws_hang) 1597 schedule(); 1598 return -ETIME; 1599 } 1600 1601 pm_release_ib(&dqm->packet_mgr); 1602 dqm->active_runlist = false; 1603 1604 return retval; 1605 } 1606 1607 /* only for compute queue */ 1608 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1609 uint16_t pasid) 1610 { 1611 int retval; 1612 1613 dqm_lock(dqm); 1614 1615 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1616 pasid, true); 1617 1618 dqm_unlock(dqm); 1619 return retval; 1620 } 1621 1622 /* dqm->lock mutex has to be locked before calling this function */ 1623 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1624 enum kfd_unmap_queues_filter filter, 1625 uint32_t filter_param) 1626 { 1627 int retval; 1628 1629 if (dqm->is_hws_hang) 1630 return -EIO; 1631 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1632 if (retval) 1633 return retval; 1634 1635 return map_queues_cpsch(dqm); 1636 } 1637 1638 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1639 struct qcm_process_device *qpd, 1640 struct queue *q) 1641 { 1642 int retval; 1643 struct mqd_manager *mqd_mgr; 1644 uint64_t sdma_val = 0; 1645 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1646 1647 /* Get the SDMA queue stats */ 1648 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1649 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1650 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1651 &sdma_val); 1652 if (retval) 1653 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1654 q->properties.queue_id); 1655 } 1656 1657 retval = 0; 1658 1659 /* remove queue from list to prevent rescheduling after preemption */ 1660 dqm_lock(dqm); 1661 1662 if (qpd->is_debug) { 1663 /* 1664 * error, currently we do not allow to destroy a queue 1665 * of a currently debugged process 1666 */ 1667 retval = -EBUSY; 1668 goto failed_try_destroy_debugged_queue; 1669 1670 } 1671 1672 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1673 q->properties.type)]; 1674 1675 deallocate_doorbell(qpd, q); 1676 1677 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1678 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1679 deallocate_sdma_queue(dqm, q); 1680 pdd->sdma_past_activity_counter += sdma_val; 1681 } 1682 1683 list_del(&q->list); 1684 qpd->queue_count--; 1685 if (q->properties.is_active) { 1686 decrement_queue_count(dqm, qpd, q); 1687 retval = execute_queues_cpsch(dqm, 1688 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1689 if (retval == -ETIME) 1690 qpd->reset_wavefronts = true; 1691 } 1692 1693 /* 1694 * Unconditionally decrement this counter, regardless of the queue's 1695 * type 1696 */ 1697 dqm->total_queue_count--; 1698 pr_debug("Total of %d queues are accountable so far\n", 1699 dqm->total_queue_count); 1700 1701 dqm_unlock(dqm); 1702 1703 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1704 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1705 1706 return retval; 1707 1708 failed_try_destroy_debugged_queue: 1709 1710 dqm_unlock(dqm); 1711 return retval; 1712 } 1713 1714 /* 1715 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1716 * stay in user mode. 1717 */ 1718 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1719 /* APE1 limit is inclusive and 64K aligned. */ 1720 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1721 1722 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1723 struct qcm_process_device *qpd, 1724 enum cache_policy default_policy, 1725 enum cache_policy alternate_policy, 1726 void __user *alternate_aperture_base, 1727 uint64_t alternate_aperture_size) 1728 { 1729 bool retval = true; 1730 1731 if (!dqm->asic_ops.set_cache_memory_policy) 1732 return retval; 1733 1734 dqm_lock(dqm); 1735 1736 if (alternate_aperture_size == 0) { 1737 /* base > limit disables APE1 */ 1738 qpd->sh_mem_ape1_base = 1; 1739 qpd->sh_mem_ape1_limit = 0; 1740 } else { 1741 /* 1742 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1743 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1744 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1745 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1746 * Verify that the base and size parameters can be 1747 * represented in this format and convert them. 1748 * Additionally restrict APE1 to user-mode addresses. 1749 */ 1750 1751 uint64_t base = (uintptr_t)alternate_aperture_base; 1752 uint64_t limit = base + alternate_aperture_size - 1; 1753 1754 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1755 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1756 retval = false; 1757 goto out; 1758 } 1759 1760 qpd->sh_mem_ape1_base = base >> 16; 1761 qpd->sh_mem_ape1_limit = limit >> 16; 1762 } 1763 1764 retval = dqm->asic_ops.set_cache_memory_policy( 1765 dqm, 1766 qpd, 1767 default_policy, 1768 alternate_policy, 1769 alternate_aperture_base, 1770 alternate_aperture_size); 1771 1772 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1773 program_sh_mem_settings(dqm, qpd); 1774 1775 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1776 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1777 qpd->sh_mem_ape1_limit); 1778 1779 out: 1780 dqm_unlock(dqm); 1781 return retval; 1782 } 1783 1784 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1785 struct qcm_process_device *qpd) 1786 { 1787 struct queue *q; 1788 struct device_process_node *cur, *next_dpn; 1789 int retval = 0; 1790 bool found = false; 1791 1792 dqm_lock(dqm); 1793 1794 /* Clear all user mode queues */ 1795 while (!list_empty(&qpd->queues_list)) { 1796 struct mqd_manager *mqd_mgr; 1797 int ret; 1798 1799 q = list_first_entry(&qpd->queues_list, struct queue, list); 1800 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1801 q->properties.type)]; 1802 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1803 if (ret) 1804 retval = ret; 1805 dqm_unlock(dqm); 1806 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1807 dqm_lock(dqm); 1808 } 1809 1810 /* Unregister process */ 1811 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1812 if (qpd == cur->qpd) { 1813 list_del(&cur->list); 1814 kfree(cur); 1815 dqm->processes_count--; 1816 found = true; 1817 break; 1818 } 1819 } 1820 1821 dqm_unlock(dqm); 1822 1823 /* Outside the DQM lock because under the DQM lock we can't do 1824 * reclaim or take other locks that others hold while reclaiming. 1825 */ 1826 if (found) 1827 kfd_dec_compute_active(dqm->dev); 1828 1829 return retval; 1830 } 1831 1832 static int get_wave_state(struct device_queue_manager *dqm, 1833 struct queue *q, 1834 void __user *ctl_stack, 1835 u32 *ctl_stack_used_size, 1836 u32 *save_area_used_size) 1837 { 1838 struct mqd_manager *mqd_mgr; 1839 1840 dqm_lock(dqm); 1841 1842 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 1843 1844 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1845 q->properties.is_active || !q->device->cwsr_enabled || 1846 !mqd_mgr->get_wave_state) { 1847 dqm_unlock(dqm); 1848 return -EINVAL; 1849 } 1850 1851 dqm_unlock(dqm); 1852 1853 /* 1854 * get_wave_state is outside the dqm lock to prevent circular locking 1855 * and the queue should be protected against destruction by the process 1856 * lock. 1857 */ 1858 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1859 ctl_stack_used_size, save_area_used_size); 1860 } 1861 1862 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 1863 const struct queue *q, 1864 u32 *mqd_size, 1865 u32 *ctl_stack_size) 1866 { 1867 struct mqd_manager *mqd_mgr; 1868 enum KFD_MQD_TYPE mqd_type = 1869 get_mqd_type_from_queue_type(q->properties.type); 1870 1871 dqm_lock(dqm); 1872 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 1873 *mqd_size = mqd_mgr->mqd_size; 1874 *ctl_stack_size = 0; 1875 1876 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 1877 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 1878 1879 dqm_unlock(dqm); 1880 } 1881 1882 static int checkpoint_mqd(struct device_queue_manager *dqm, 1883 const struct queue *q, 1884 void *mqd, 1885 void *ctl_stack) 1886 { 1887 struct mqd_manager *mqd_mgr; 1888 int r = 0; 1889 enum KFD_MQD_TYPE mqd_type = 1890 get_mqd_type_from_queue_type(q->properties.type); 1891 1892 dqm_lock(dqm); 1893 1894 if (q->properties.is_active || !q->device->cwsr_enabled) { 1895 r = -EINVAL; 1896 goto dqm_unlock; 1897 } 1898 1899 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 1900 if (!mqd_mgr->checkpoint_mqd) { 1901 r = -EOPNOTSUPP; 1902 goto dqm_unlock; 1903 } 1904 1905 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 1906 1907 dqm_unlock: 1908 dqm_unlock(dqm); 1909 return r; 1910 } 1911 1912 static int process_termination_cpsch(struct device_queue_manager *dqm, 1913 struct qcm_process_device *qpd) 1914 { 1915 int retval; 1916 struct queue *q; 1917 struct kernel_queue *kq, *kq_next; 1918 struct mqd_manager *mqd_mgr; 1919 struct device_process_node *cur, *next_dpn; 1920 enum kfd_unmap_queues_filter filter = 1921 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1922 bool found = false; 1923 1924 retval = 0; 1925 1926 dqm_lock(dqm); 1927 1928 /* Clean all kernel queues */ 1929 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1930 list_del(&kq->list); 1931 decrement_queue_count(dqm, qpd, kq->queue); 1932 qpd->is_debug = false; 1933 dqm->total_queue_count--; 1934 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1935 } 1936 1937 /* Clear all user mode queues */ 1938 list_for_each_entry(q, &qpd->queues_list, list) { 1939 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1940 deallocate_sdma_queue(dqm, q); 1941 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1942 deallocate_sdma_queue(dqm, q); 1943 1944 if (q->properties.is_active) 1945 decrement_queue_count(dqm, qpd, q); 1946 1947 dqm->total_queue_count--; 1948 } 1949 1950 /* Unregister process */ 1951 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1952 if (qpd == cur->qpd) { 1953 list_del(&cur->list); 1954 kfree(cur); 1955 dqm->processes_count--; 1956 found = true; 1957 break; 1958 } 1959 } 1960 1961 retval = execute_queues_cpsch(dqm, filter, 0); 1962 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1963 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1964 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1965 qpd->reset_wavefronts = false; 1966 } 1967 1968 /* Lastly, free mqd resources. 1969 * Do free_mqd() after dqm_unlock to avoid circular locking. 1970 */ 1971 while (!list_empty(&qpd->queues_list)) { 1972 q = list_first_entry(&qpd->queues_list, struct queue, list); 1973 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1974 q->properties.type)]; 1975 list_del(&q->list); 1976 qpd->queue_count--; 1977 dqm_unlock(dqm); 1978 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1979 dqm_lock(dqm); 1980 } 1981 dqm_unlock(dqm); 1982 1983 /* Outside the DQM lock because under the DQM lock we can't do 1984 * reclaim or take other locks that others hold while reclaiming. 1985 */ 1986 if (found) 1987 kfd_dec_compute_active(dqm->dev); 1988 1989 return retval; 1990 } 1991 1992 static int init_mqd_managers(struct device_queue_manager *dqm) 1993 { 1994 int i, j; 1995 struct mqd_manager *mqd_mgr; 1996 1997 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 1998 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 1999 if (!mqd_mgr) { 2000 pr_err("mqd manager [%d] initialization failed\n", i); 2001 goto out_free; 2002 } 2003 dqm->mqd_mgrs[i] = mqd_mgr; 2004 } 2005 2006 return 0; 2007 2008 out_free: 2009 for (j = 0; j < i; j++) { 2010 kfree(dqm->mqd_mgrs[j]); 2011 dqm->mqd_mgrs[j] = NULL; 2012 } 2013 2014 return -ENOMEM; 2015 } 2016 2017 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2018 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2019 { 2020 int retval; 2021 struct kfd_dev *dev = dqm->dev; 2022 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2023 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2024 get_num_all_sdma_engines(dqm) * 2025 dev->device_info.num_sdma_queues_per_engine + 2026 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 2027 2028 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2029 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2030 (void *)&(mem_obj->cpu_ptr), false); 2031 2032 return retval; 2033 } 2034 2035 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 2036 { 2037 struct device_queue_manager *dqm; 2038 2039 pr_debug("Loading device queue manager\n"); 2040 2041 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2042 if (!dqm) 2043 return NULL; 2044 2045 switch (dev->adev->asic_type) { 2046 /* HWS is not available on Hawaii. */ 2047 case CHIP_HAWAII: 2048 /* HWS depends on CWSR for timely dequeue. CWSR is not 2049 * available on Tonga. 2050 * 2051 * FIXME: This argument also applies to Kaveri. 2052 */ 2053 case CHIP_TONGA: 2054 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2055 break; 2056 default: 2057 dqm->sched_policy = sched_policy; 2058 break; 2059 } 2060 2061 dqm->dev = dev; 2062 switch (dqm->sched_policy) { 2063 case KFD_SCHED_POLICY_HWS: 2064 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2065 /* initialize dqm for cp scheduling */ 2066 dqm->ops.create_queue = create_queue_cpsch; 2067 dqm->ops.initialize = initialize_cpsch; 2068 dqm->ops.start = start_cpsch; 2069 dqm->ops.stop = stop_cpsch; 2070 dqm->ops.pre_reset = pre_reset; 2071 dqm->ops.destroy_queue = destroy_queue_cpsch; 2072 dqm->ops.update_queue = update_queue; 2073 dqm->ops.register_process = register_process; 2074 dqm->ops.unregister_process = unregister_process; 2075 dqm->ops.uninitialize = uninitialize; 2076 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2077 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2078 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2079 dqm->ops.process_termination = process_termination_cpsch; 2080 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2081 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2082 dqm->ops.get_wave_state = get_wave_state; 2083 dqm->ops.reset_queues = reset_queues_cpsch; 2084 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2085 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2086 break; 2087 case KFD_SCHED_POLICY_NO_HWS: 2088 /* initialize dqm for no cp scheduling */ 2089 dqm->ops.start = start_nocpsch; 2090 dqm->ops.stop = stop_nocpsch; 2091 dqm->ops.pre_reset = pre_reset; 2092 dqm->ops.create_queue = create_queue_nocpsch; 2093 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2094 dqm->ops.update_queue = update_queue; 2095 dqm->ops.register_process = register_process; 2096 dqm->ops.unregister_process = unregister_process; 2097 dqm->ops.initialize = initialize_nocpsch; 2098 dqm->ops.uninitialize = uninitialize; 2099 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2100 dqm->ops.process_termination = process_termination_nocpsch; 2101 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2102 dqm->ops.restore_process_queues = 2103 restore_process_queues_nocpsch; 2104 dqm->ops.get_wave_state = get_wave_state; 2105 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2106 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2107 break; 2108 default: 2109 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2110 goto out_free; 2111 } 2112 2113 switch (dev->adev->asic_type) { 2114 case CHIP_CARRIZO: 2115 device_queue_manager_init_vi(&dqm->asic_ops); 2116 break; 2117 2118 case CHIP_KAVERI: 2119 device_queue_manager_init_cik(&dqm->asic_ops); 2120 break; 2121 2122 case CHIP_HAWAII: 2123 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2124 break; 2125 2126 case CHIP_TONGA: 2127 case CHIP_FIJI: 2128 case CHIP_POLARIS10: 2129 case CHIP_POLARIS11: 2130 case CHIP_POLARIS12: 2131 case CHIP_VEGAM: 2132 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2133 break; 2134 2135 default: 2136 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2137 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2138 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2139 device_queue_manager_init_v9(&dqm->asic_ops); 2140 else { 2141 WARN(1, "Unexpected ASIC family %u", 2142 dev->adev->asic_type); 2143 goto out_free; 2144 } 2145 } 2146 2147 if (init_mqd_managers(dqm)) 2148 goto out_free; 2149 2150 if (allocate_hiq_sdma_mqd(dqm)) { 2151 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2152 goto out_free; 2153 } 2154 2155 if (!dqm->ops.initialize(dqm)) 2156 return dqm; 2157 2158 out_free: 2159 kfree(dqm); 2160 return NULL; 2161 } 2162 2163 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 2164 struct kfd_mem_obj *mqd) 2165 { 2166 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2167 2168 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2169 } 2170 2171 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2172 { 2173 dqm->ops.uninitialize(dqm); 2174 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2175 kfree(dqm); 2176 } 2177 2178 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2179 { 2180 struct kfd_process_device *pdd; 2181 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2182 int ret = 0; 2183 2184 if (!p) 2185 return -EINVAL; 2186 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2187 pdd = kfd_get_process_device_data(dqm->dev, p); 2188 if (pdd) 2189 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2190 kfd_unref_process(p); 2191 2192 return ret; 2193 } 2194 2195 static void kfd_process_hw_exception(struct work_struct *work) 2196 { 2197 struct device_queue_manager *dqm = container_of(work, 2198 struct device_queue_manager, hw_exception_work); 2199 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2200 } 2201 2202 #if defined(CONFIG_DEBUG_FS) 2203 2204 static void seq_reg_dump(struct seq_file *m, 2205 uint32_t (*dump)[2], uint32_t n_regs) 2206 { 2207 uint32_t i, count; 2208 2209 for (i = 0, count = 0; i < n_regs; i++) { 2210 if (count == 0 || 2211 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2212 seq_printf(m, "%s %08x: %08x", 2213 i ? "\n" : "", 2214 dump[i][0], dump[i][1]); 2215 count = 7; 2216 } else { 2217 seq_printf(m, " %08x", dump[i][1]); 2218 count--; 2219 } 2220 } 2221 2222 seq_puts(m, "\n"); 2223 } 2224 2225 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2226 { 2227 struct device_queue_manager *dqm = data; 2228 uint32_t (*dump)[2], n_regs; 2229 int pipe, queue; 2230 int r = 0; 2231 2232 if (!dqm->sched_running) { 2233 seq_puts(m, " Device is stopped\n"); 2234 return 0; 2235 } 2236 2237 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2238 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2239 &dump, &n_regs); 2240 if (!r) { 2241 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 2242 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2243 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2244 KFD_CIK_HIQ_QUEUE); 2245 seq_reg_dump(m, dump, n_regs); 2246 2247 kfree(dump); 2248 } 2249 2250 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2251 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2252 2253 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2254 if (!test_bit(pipe_offset + queue, 2255 dqm->dev->shared_resources.cp_queue_bitmap)) 2256 continue; 2257 2258 r = dqm->dev->kfd2kgd->hqd_dump( 2259 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2260 if (r) 2261 break; 2262 2263 seq_printf(m, " CP Pipe %d, Queue %d\n", 2264 pipe, queue); 2265 seq_reg_dump(m, dump, n_regs); 2266 2267 kfree(dump); 2268 } 2269 } 2270 2271 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2272 for (queue = 0; 2273 queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2274 queue++) { 2275 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2276 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2277 if (r) 2278 break; 2279 2280 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2281 pipe, queue); 2282 seq_reg_dump(m, dump, n_regs); 2283 2284 kfree(dump); 2285 } 2286 } 2287 2288 return r; 2289 } 2290 2291 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2292 { 2293 int r = 0; 2294 2295 dqm_lock(dqm); 2296 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2297 if (r) { 2298 dqm_unlock(dqm); 2299 return r; 2300 } 2301 dqm->active_runlist = true; 2302 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2303 dqm_unlock(dqm); 2304 2305 return r; 2306 } 2307 2308 #endif 2309