1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 39 /* Size of the per-pipe EOP queue */ 40 #define CIK_HPD_EOP_BYTES_LOG2 11 41 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 42 43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 44 u32 pasid, unsigned int vmid); 45 46 static int execute_queues_cpsch(struct device_queue_manager *dqm, 47 enum kfd_unmap_queues_filter filter, 48 uint32_t filter_param); 49 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param, bool reset); 52 53 static int map_queues_cpsch(struct device_queue_manager *dqm); 54 55 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 56 struct queue *q); 57 58 static inline void deallocate_hqd(struct device_queue_manager *dqm, 59 struct queue *q); 60 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 61 static int allocate_sdma_queue(struct device_queue_manager *dqm, 62 struct queue *q, const uint32_t *restore_sdma_id); 63 static void kfd_process_hw_exception(struct work_struct *work); 64 65 static inline 66 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 67 { 68 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 69 return KFD_MQD_TYPE_SDMA; 70 return KFD_MQD_TYPE_CP; 71 } 72 73 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 74 { 75 int i; 76 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 77 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 78 79 /* queue is available for KFD usage if bit is 1 */ 80 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 81 if (test_bit(pipe_offset + i, 82 dqm->dev->shared_resources.cp_queue_bitmap)) 83 return true; 84 return false; 85 } 86 87 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 88 { 89 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 90 KGD_MAX_QUEUES); 91 } 92 93 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 94 { 95 return dqm->dev->shared_resources.num_queue_per_pipe; 96 } 97 98 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 99 { 100 return dqm->dev->shared_resources.num_pipe_per_mec; 101 } 102 103 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 104 { 105 return kfd_get_num_sdma_engines(dqm->dev) + 106 kfd_get_num_xgmi_sdma_engines(dqm->dev); 107 } 108 109 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 110 { 111 return kfd_get_num_sdma_engines(dqm->dev) * 112 dqm->dev->device_info.num_sdma_queues_per_engine; 113 } 114 115 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 118 dqm->dev->device_info.num_sdma_queues_per_engine; 119 } 120 121 void program_sh_mem_settings(struct device_queue_manager *dqm, 122 struct qcm_process_device *qpd) 123 { 124 return dqm->dev->kfd2kgd->program_sh_mem_settings( 125 dqm->dev->adev, qpd->vmid, 126 qpd->sh_mem_config, 127 qpd->sh_mem_ape1_base, 128 qpd->sh_mem_ape1_limit, 129 qpd->sh_mem_bases); 130 } 131 132 static void increment_queue_count(struct device_queue_manager *dqm, 133 enum kfd_queue_type type) 134 { 135 dqm->active_queue_count++; 136 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 137 dqm->active_cp_queue_count++; 138 } 139 140 static void decrement_queue_count(struct device_queue_manager *dqm, 141 enum kfd_queue_type type) 142 { 143 dqm->active_queue_count--; 144 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 145 dqm->active_cp_queue_count--; 146 } 147 148 /* 149 * Allocate a doorbell ID to this queue. 150 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 151 */ 152 static int allocate_doorbell(struct qcm_process_device *qpd, 153 struct queue *q, 154 uint32_t const *restore_id) 155 { 156 struct kfd_dev *dev = qpd->dqm->dev; 157 158 if (!KFD_IS_SOC15(dev)) { 159 /* On pre-SOC15 chips we need to use the queue ID to 160 * preserve the user mode ABI. 161 */ 162 163 if (restore_id && *restore_id != q->properties.queue_id) 164 return -EINVAL; 165 166 q->doorbell_id = q->properties.queue_id; 167 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 168 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 169 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 170 * doorbell assignments based on the engine and queue id. 171 * The doobell index distance between RLC (2*i) and (2*i+1) 172 * for a SDMA engine is 512. 173 */ 174 175 uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx; 176 uint32_t valid_id = idx_offset[q->properties.sdma_engine_id] 177 + (q->properties.sdma_queue_id & 1) 178 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 179 + (q->properties.sdma_queue_id >> 1); 180 181 if (restore_id && *restore_id != valid_id) 182 return -EINVAL; 183 q->doorbell_id = valid_id; 184 } else { 185 /* For CP queues on SOC15 */ 186 if (restore_id) { 187 /* make sure that ID is free */ 188 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 189 return -EINVAL; 190 191 q->doorbell_id = *restore_id; 192 } else { 193 /* or reserve a free doorbell ID */ 194 unsigned int found; 195 196 found = find_first_zero_bit(qpd->doorbell_bitmap, 197 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 198 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 199 pr_debug("No doorbells available"); 200 return -EBUSY; 201 } 202 set_bit(found, qpd->doorbell_bitmap); 203 q->doorbell_id = found; 204 } 205 } 206 207 q->properties.doorbell_off = 208 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 209 q->doorbell_id); 210 return 0; 211 } 212 213 static void deallocate_doorbell(struct qcm_process_device *qpd, 214 struct queue *q) 215 { 216 unsigned int old; 217 struct kfd_dev *dev = qpd->dqm->dev; 218 219 if (!KFD_IS_SOC15(dev) || 220 q->properties.type == KFD_QUEUE_TYPE_SDMA || 221 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 222 return; 223 224 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 225 WARN_ON(!old); 226 } 227 228 static void program_trap_handler_settings(struct device_queue_manager *dqm, 229 struct qcm_process_device *qpd) 230 { 231 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 232 dqm->dev->kfd2kgd->program_trap_handler_settings( 233 dqm->dev->adev, qpd->vmid, 234 qpd->tba_addr, qpd->tma_addr); 235 } 236 237 static int allocate_vmid(struct device_queue_manager *dqm, 238 struct qcm_process_device *qpd, 239 struct queue *q) 240 { 241 int allocated_vmid = -1, i; 242 243 for (i = dqm->dev->vm_info.first_vmid_kfd; 244 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 245 if (!dqm->vmid_pasid[i]) { 246 allocated_vmid = i; 247 break; 248 } 249 } 250 251 if (allocated_vmid < 0) { 252 pr_err("no more vmid to allocate\n"); 253 return -ENOSPC; 254 } 255 256 pr_debug("vmid allocated: %d\n", allocated_vmid); 257 258 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 259 260 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 261 262 qpd->vmid = allocated_vmid; 263 q->properties.vmid = allocated_vmid; 264 265 program_sh_mem_settings(dqm, qpd); 266 267 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 268 program_trap_handler_settings(dqm, qpd); 269 270 /* qpd->page_table_base is set earlier when register_process() 271 * is called, i.e. when the first queue is created. 272 */ 273 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 274 qpd->vmid, 275 qpd->page_table_base); 276 /* invalidate the VM context after pasid and vmid mapping is set up */ 277 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 278 279 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 280 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 281 qpd->sh_hidden_private_base, qpd->vmid); 282 283 return 0; 284 } 285 286 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 287 struct qcm_process_device *qpd) 288 { 289 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 290 int ret; 291 292 if (!qpd->ib_kaddr) 293 return -ENOMEM; 294 295 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 296 if (ret) 297 return ret; 298 299 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 300 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 301 pmf->release_mem_size / sizeof(uint32_t)); 302 } 303 304 static void deallocate_vmid(struct device_queue_manager *dqm, 305 struct qcm_process_device *qpd, 306 struct queue *q) 307 { 308 /* On GFX v7, CP doesn't flush TC at dequeue */ 309 if (q->device->adev->asic_type == CHIP_HAWAII) 310 if (flush_texture_cache_nocpsch(q->device, qpd)) 311 pr_err("Failed to flush TC\n"); 312 313 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 314 315 /* Release the vmid mapping */ 316 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 317 dqm->vmid_pasid[qpd->vmid] = 0; 318 319 qpd->vmid = 0; 320 q->properties.vmid = 0; 321 } 322 323 static int create_queue_nocpsch(struct device_queue_manager *dqm, 324 struct queue *q, 325 struct qcm_process_device *qpd, 326 const struct kfd_criu_queue_priv_data *qd, 327 const void *restore_mqd, const void *restore_ctl_stack) 328 { 329 struct mqd_manager *mqd_mgr; 330 int retval; 331 332 dqm_lock(dqm); 333 334 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 335 pr_warn("Can't create new usermode queue because %d queues were already created\n", 336 dqm->total_queue_count); 337 retval = -EPERM; 338 goto out_unlock; 339 } 340 341 if (list_empty(&qpd->queues_list)) { 342 retval = allocate_vmid(dqm, qpd, q); 343 if (retval) 344 goto out_unlock; 345 } 346 q->properties.vmid = qpd->vmid; 347 /* 348 * Eviction state logic: mark all queues as evicted, even ones 349 * not currently active. Restoring inactive queues later only 350 * updates the is_evicted flag but is a no-op otherwise. 351 */ 352 q->properties.is_evicted = !!qpd->evicted; 353 354 q->properties.tba_addr = qpd->tba_addr; 355 q->properties.tma_addr = qpd->tma_addr; 356 357 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 358 q->properties.type)]; 359 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 360 retval = allocate_hqd(dqm, q); 361 if (retval) 362 goto deallocate_vmid; 363 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 364 q->pipe, q->queue); 365 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 366 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 367 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 368 if (retval) 369 goto deallocate_vmid; 370 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 371 } 372 373 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 374 if (retval) 375 goto out_deallocate_hqd; 376 377 /* Temporarily release dqm lock to avoid a circular lock dependency */ 378 dqm_unlock(dqm); 379 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 380 dqm_lock(dqm); 381 382 if (!q->mqd_mem_obj) { 383 retval = -ENOMEM; 384 goto out_deallocate_doorbell; 385 } 386 387 if (qd) 388 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 389 &q->properties, restore_mqd, restore_ctl_stack, 390 qd->ctl_stack_size); 391 else 392 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 393 &q->gart_mqd_addr, &q->properties); 394 395 if (q->properties.is_active) { 396 if (!dqm->sched_running) { 397 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 398 goto add_queue_to_list; 399 } 400 401 if (WARN(q->process->mm != current->mm, 402 "should only run in user thread")) 403 retval = -EFAULT; 404 else 405 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 406 q->queue, &q->properties, current->mm); 407 if (retval) 408 goto out_free_mqd; 409 } 410 411 add_queue_to_list: 412 list_add(&q->list, &qpd->queues_list); 413 qpd->queue_count++; 414 if (q->properties.is_active) 415 increment_queue_count(dqm, q->properties.type); 416 417 /* 418 * Unconditionally increment this counter, regardless of the queue's 419 * type or whether the queue is active. 420 */ 421 dqm->total_queue_count++; 422 pr_debug("Total of %d queues are accountable so far\n", 423 dqm->total_queue_count); 424 goto out_unlock; 425 426 out_free_mqd: 427 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 428 out_deallocate_doorbell: 429 deallocate_doorbell(qpd, q); 430 out_deallocate_hqd: 431 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 432 deallocate_hqd(dqm, q); 433 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 434 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 435 deallocate_sdma_queue(dqm, q); 436 deallocate_vmid: 437 if (list_empty(&qpd->queues_list)) 438 deallocate_vmid(dqm, qpd, q); 439 out_unlock: 440 dqm_unlock(dqm); 441 return retval; 442 } 443 444 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 445 { 446 bool set; 447 int pipe, bit, i; 448 449 set = false; 450 451 for (pipe = dqm->next_pipe_to_allocate, i = 0; 452 i < get_pipes_per_mec(dqm); 453 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 454 455 if (!is_pipe_enabled(dqm, 0, pipe)) 456 continue; 457 458 if (dqm->allocated_queues[pipe] != 0) { 459 bit = ffs(dqm->allocated_queues[pipe]) - 1; 460 dqm->allocated_queues[pipe] &= ~(1 << bit); 461 q->pipe = pipe; 462 q->queue = bit; 463 set = true; 464 break; 465 } 466 } 467 468 if (!set) 469 return -EBUSY; 470 471 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 472 /* horizontal hqd allocation */ 473 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 474 475 return 0; 476 } 477 478 static inline void deallocate_hqd(struct device_queue_manager *dqm, 479 struct queue *q) 480 { 481 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 482 } 483 484 #define SQ_IND_CMD_CMD_KILL 0x00000003 485 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 486 487 static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) 488 { 489 int status = 0; 490 unsigned int vmid; 491 uint16_t queried_pasid; 492 union SQ_CMD_BITS reg_sq_cmd; 493 union GRBM_GFX_INDEX_BITS reg_gfx_index; 494 struct kfd_process_device *pdd; 495 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 496 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 497 498 reg_sq_cmd.u32All = 0; 499 reg_gfx_index.u32All = 0; 500 501 pr_debug("Killing all process wavefronts\n"); 502 503 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 504 pr_err("no vmid pasid mapping supported \n"); 505 return -EOPNOTSUPP; 506 } 507 508 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 509 * ATC_VMID15_PASID_MAPPING 510 * to check which VMID the current process is mapped to. 511 */ 512 513 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 514 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 515 (dev->adev, vmid, &queried_pasid); 516 517 if (status && queried_pasid == p->pasid) { 518 pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", 519 vmid, p->pasid); 520 break; 521 } 522 } 523 524 if (vmid > last_vmid_to_scan) { 525 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid); 526 return -EFAULT; 527 } 528 529 /* taking the VMID for that process on the safe way using PDD */ 530 pdd = kfd_get_process_device_data(dev, p); 531 if (!pdd) 532 return -EFAULT; 533 534 reg_gfx_index.bits.sh_broadcast_writes = 1; 535 reg_gfx_index.bits.se_broadcast_writes = 1; 536 reg_gfx_index.bits.instance_broadcast_writes = 1; 537 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 538 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 539 reg_sq_cmd.bits.vm_id = vmid; 540 541 dev->kfd2kgd->wave_control_execute(dev->adev, 542 reg_gfx_index.u32All, 543 reg_sq_cmd.u32All); 544 545 return 0; 546 } 547 548 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 549 * to avoid asynchronized access 550 */ 551 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 552 struct qcm_process_device *qpd, 553 struct queue *q) 554 { 555 int retval; 556 struct mqd_manager *mqd_mgr; 557 558 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 559 q->properties.type)]; 560 561 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 562 deallocate_hqd(dqm, q); 563 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 564 deallocate_sdma_queue(dqm, q); 565 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 566 deallocate_sdma_queue(dqm, q); 567 else { 568 pr_debug("q->properties.type %d is invalid\n", 569 q->properties.type); 570 return -EINVAL; 571 } 572 dqm->total_queue_count--; 573 574 deallocate_doorbell(qpd, q); 575 576 if (!dqm->sched_running) { 577 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 578 return 0; 579 } 580 581 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 582 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 583 KFD_UNMAP_LATENCY_MS, 584 q->pipe, q->queue); 585 if (retval == -ETIME) 586 qpd->reset_wavefronts = true; 587 588 list_del(&q->list); 589 if (list_empty(&qpd->queues_list)) { 590 if (qpd->reset_wavefronts) { 591 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 592 dqm->dev); 593 /* dbgdev_wave_reset_wavefronts has to be called before 594 * deallocate_vmid(), i.e. when vmid is still in use. 595 */ 596 dbgdev_wave_reset_wavefronts(dqm->dev, 597 qpd->pqm->process); 598 qpd->reset_wavefronts = false; 599 } 600 601 deallocate_vmid(dqm, qpd, q); 602 } 603 qpd->queue_count--; 604 if (q->properties.is_active) { 605 decrement_queue_count(dqm, q->properties.type); 606 if (q->properties.is_gws) { 607 dqm->gws_queue_count--; 608 qpd->mapped_gws_queue = false; 609 } 610 } 611 612 return retval; 613 } 614 615 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 616 struct qcm_process_device *qpd, 617 struct queue *q) 618 { 619 int retval; 620 uint64_t sdma_val = 0; 621 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 622 struct mqd_manager *mqd_mgr = 623 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 624 625 /* Get the SDMA queue stats */ 626 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 627 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 628 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 629 &sdma_val); 630 if (retval) 631 pr_err("Failed to read SDMA queue counter for queue: %d\n", 632 q->properties.queue_id); 633 } 634 635 dqm_lock(dqm); 636 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 637 if (!retval) 638 pdd->sdma_past_activity_counter += sdma_val; 639 dqm_unlock(dqm); 640 641 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 642 643 return retval; 644 } 645 646 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 647 struct mqd_update_info *minfo) 648 { 649 int retval = 0; 650 struct mqd_manager *mqd_mgr; 651 struct kfd_process_device *pdd; 652 bool prev_active = false; 653 654 dqm_lock(dqm); 655 pdd = kfd_get_process_device_data(q->device, q->process); 656 if (!pdd) { 657 retval = -ENODEV; 658 goto out_unlock; 659 } 660 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 661 q->properties.type)]; 662 663 /* Save previous activity state for counters */ 664 prev_active = q->properties.is_active; 665 666 /* Make sure the queue is unmapped before updating the MQD */ 667 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 668 retval = unmap_queues_cpsch(dqm, 669 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 670 if (retval) { 671 pr_err("unmap queue failed\n"); 672 goto out_unlock; 673 } 674 } else if (prev_active && 675 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 676 q->properties.type == KFD_QUEUE_TYPE_SDMA || 677 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 678 679 if (!dqm->sched_running) { 680 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 681 goto out_unlock; 682 } 683 684 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 685 (dqm->dev->cwsr_enabled ? 686 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 687 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 688 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 689 if (retval) { 690 pr_err("destroy mqd failed\n"); 691 goto out_unlock; 692 } 693 } 694 695 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 696 697 /* 698 * check active state vs. the previous state and modify 699 * counter accordingly. map_queues_cpsch uses the 700 * dqm->active_queue_count to determine whether a new runlist must be 701 * uploaded. 702 */ 703 if (q->properties.is_active && !prev_active) 704 increment_queue_count(dqm, q->properties.type); 705 else if (!q->properties.is_active && prev_active) 706 decrement_queue_count(dqm, q->properties.type); 707 708 if (q->gws && !q->properties.is_gws) { 709 if (q->properties.is_active) { 710 dqm->gws_queue_count++; 711 pdd->qpd.mapped_gws_queue = true; 712 } 713 q->properties.is_gws = true; 714 } else if (!q->gws && q->properties.is_gws) { 715 if (q->properties.is_active) { 716 dqm->gws_queue_count--; 717 pdd->qpd.mapped_gws_queue = false; 718 } 719 q->properties.is_gws = false; 720 } 721 722 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 723 retval = map_queues_cpsch(dqm); 724 else if (q->properties.is_active && 725 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 726 q->properties.type == KFD_QUEUE_TYPE_SDMA || 727 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 728 if (WARN(q->process->mm != current->mm, 729 "should only run in user thread")) 730 retval = -EFAULT; 731 else 732 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 733 q->pipe, q->queue, 734 &q->properties, current->mm); 735 } 736 737 out_unlock: 738 dqm_unlock(dqm); 739 return retval; 740 } 741 742 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 743 struct qcm_process_device *qpd) 744 { 745 struct queue *q; 746 struct mqd_manager *mqd_mgr; 747 struct kfd_process_device *pdd; 748 int retval, ret = 0; 749 750 dqm_lock(dqm); 751 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 752 goto out; 753 754 pdd = qpd_to_pdd(qpd); 755 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 756 pdd->process->pasid); 757 758 pdd->last_evict_timestamp = get_jiffies_64(); 759 /* Mark all queues as evicted. Deactivate all active queues on 760 * the qpd. 761 */ 762 list_for_each_entry(q, &qpd->queues_list, list) { 763 q->properties.is_evicted = true; 764 if (!q->properties.is_active) 765 continue; 766 767 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 768 q->properties.type)]; 769 q->properties.is_active = false; 770 decrement_queue_count(dqm, q->properties.type); 771 if (q->properties.is_gws) { 772 dqm->gws_queue_count--; 773 qpd->mapped_gws_queue = false; 774 } 775 776 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 777 continue; 778 779 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 780 (dqm->dev->cwsr_enabled ? 781 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 782 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 783 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 784 if (retval && !ret) 785 /* Return the first error, but keep going to 786 * maintain a consistent eviction state 787 */ 788 ret = retval; 789 } 790 791 out: 792 dqm_unlock(dqm); 793 return ret; 794 } 795 796 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 797 struct qcm_process_device *qpd) 798 { 799 struct queue *q; 800 struct kfd_process_device *pdd; 801 int retval = 0; 802 803 dqm_lock(dqm); 804 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 805 goto out; 806 807 pdd = qpd_to_pdd(qpd); 808 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 809 pdd->process->pasid); 810 811 /* Mark all queues as evicted. Deactivate all active queues on 812 * the qpd. 813 */ 814 list_for_each_entry(q, &qpd->queues_list, list) { 815 q->properties.is_evicted = true; 816 if (!q->properties.is_active) 817 continue; 818 819 q->properties.is_active = false; 820 decrement_queue_count(dqm, q->properties.type); 821 } 822 pdd->last_evict_timestamp = get_jiffies_64(); 823 retval = execute_queues_cpsch(dqm, 824 qpd->is_debug ? 825 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 826 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 827 828 out: 829 dqm_unlock(dqm); 830 return retval; 831 } 832 833 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 834 struct qcm_process_device *qpd) 835 { 836 struct mm_struct *mm = NULL; 837 struct queue *q; 838 struct mqd_manager *mqd_mgr; 839 struct kfd_process_device *pdd; 840 uint64_t pd_base; 841 uint64_t eviction_duration; 842 int retval, ret = 0; 843 844 pdd = qpd_to_pdd(qpd); 845 /* Retrieve PD base */ 846 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 847 848 dqm_lock(dqm); 849 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 850 goto out; 851 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 852 qpd->evicted--; 853 goto out; 854 } 855 856 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 857 pdd->process->pasid); 858 859 /* Update PD Base in QPD */ 860 qpd->page_table_base = pd_base; 861 pr_debug("Updated PD address to 0x%llx\n", pd_base); 862 863 if (!list_empty(&qpd->queues_list)) { 864 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 865 dqm->dev->adev, 866 qpd->vmid, 867 qpd->page_table_base); 868 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 869 } 870 871 /* Take a safe reference to the mm_struct, which may otherwise 872 * disappear even while the kfd_process is still referenced. 873 */ 874 mm = get_task_mm(pdd->process->lead_thread); 875 if (!mm) { 876 ret = -EFAULT; 877 goto out; 878 } 879 880 /* Remove the eviction flags. Activate queues that are not 881 * inactive for other reasons. 882 */ 883 list_for_each_entry(q, &qpd->queues_list, list) { 884 q->properties.is_evicted = false; 885 if (!QUEUE_IS_ACTIVE(q->properties)) 886 continue; 887 888 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 889 q->properties.type)]; 890 q->properties.is_active = true; 891 increment_queue_count(dqm, q->properties.type); 892 if (q->properties.is_gws) { 893 dqm->gws_queue_count++; 894 qpd->mapped_gws_queue = true; 895 } 896 897 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 898 continue; 899 900 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 901 q->queue, &q->properties, mm); 902 if (retval && !ret) 903 /* Return the first error, but keep going to 904 * maintain a consistent eviction state 905 */ 906 ret = retval; 907 } 908 qpd->evicted = 0; 909 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 910 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 911 out: 912 if (mm) 913 mmput(mm); 914 dqm_unlock(dqm); 915 return ret; 916 } 917 918 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 919 struct qcm_process_device *qpd) 920 { 921 struct queue *q; 922 struct kfd_process_device *pdd; 923 uint64_t pd_base; 924 uint64_t eviction_duration; 925 int retval = 0; 926 927 pdd = qpd_to_pdd(qpd); 928 /* Retrieve PD base */ 929 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 930 931 dqm_lock(dqm); 932 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 933 goto out; 934 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 935 qpd->evicted--; 936 goto out; 937 } 938 939 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 940 pdd->process->pasid); 941 942 /* Update PD Base in QPD */ 943 qpd->page_table_base = pd_base; 944 pr_debug("Updated PD address to 0x%llx\n", pd_base); 945 946 /* activate all active queues on the qpd */ 947 list_for_each_entry(q, &qpd->queues_list, list) { 948 q->properties.is_evicted = false; 949 if (!QUEUE_IS_ACTIVE(q->properties)) 950 continue; 951 952 q->properties.is_active = true; 953 increment_queue_count(dqm, q->properties.type); 954 } 955 retval = execute_queues_cpsch(dqm, 956 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 957 qpd->evicted = 0; 958 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 959 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 960 out: 961 dqm_unlock(dqm); 962 return retval; 963 } 964 965 static int register_process(struct device_queue_manager *dqm, 966 struct qcm_process_device *qpd) 967 { 968 struct device_process_node *n; 969 struct kfd_process_device *pdd; 970 uint64_t pd_base; 971 int retval; 972 973 n = kzalloc(sizeof(*n), GFP_KERNEL); 974 if (!n) 975 return -ENOMEM; 976 977 n->qpd = qpd; 978 979 pdd = qpd_to_pdd(qpd); 980 /* Retrieve PD base */ 981 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 982 983 dqm_lock(dqm); 984 list_add(&n->list, &dqm->queues); 985 986 /* Update PD Base in QPD */ 987 qpd->page_table_base = pd_base; 988 pr_debug("Updated PD address to 0x%llx\n", pd_base); 989 990 retval = dqm->asic_ops.update_qpd(dqm, qpd); 991 992 dqm->processes_count++; 993 994 dqm_unlock(dqm); 995 996 /* Outside the DQM lock because under the DQM lock we can't do 997 * reclaim or take other locks that others hold while reclaiming. 998 */ 999 kfd_inc_compute_active(dqm->dev); 1000 1001 return retval; 1002 } 1003 1004 static int unregister_process(struct device_queue_manager *dqm, 1005 struct qcm_process_device *qpd) 1006 { 1007 int retval; 1008 struct device_process_node *cur, *next; 1009 1010 pr_debug("qpd->queues_list is %s\n", 1011 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1012 1013 retval = 0; 1014 dqm_lock(dqm); 1015 1016 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1017 if (qpd == cur->qpd) { 1018 list_del(&cur->list); 1019 kfree(cur); 1020 dqm->processes_count--; 1021 goto out; 1022 } 1023 } 1024 /* qpd not found in dqm list */ 1025 retval = 1; 1026 out: 1027 dqm_unlock(dqm); 1028 1029 /* Outside the DQM lock because under the DQM lock we can't do 1030 * reclaim or take other locks that others hold while reclaiming. 1031 */ 1032 if (!retval) 1033 kfd_dec_compute_active(dqm->dev); 1034 1035 return retval; 1036 } 1037 1038 static int 1039 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1040 unsigned int vmid) 1041 { 1042 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1043 dqm->dev->adev, pasid, vmid); 1044 } 1045 1046 static void init_interrupts(struct device_queue_manager *dqm) 1047 { 1048 unsigned int i; 1049 1050 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 1051 if (is_pipe_enabled(dqm, 0, i)) 1052 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); 1053 } 1054 1055 static int initialize_nocpsch(struct device_queue_manager *dqm) 1056 { 1057 int pipe, queue; 1058 1059 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1060 1061 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1062 sizeof(unsigned int), GFP_KERNEL); 1063 if (!dqm->allocated_queues) 1064 return -ENOMEM; 1065 1066 mutex_init(&dqm->lock_hidden); 1067 INIT_LIST_HEAD(&dqm->queues); 1068 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1069 dqm->active_cp_queue_count = 0; 1070 dqm->gws_queue_count = 0; 1071 1072 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1073 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1074 1075 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1076 if (test_bit(pipe_offset + queue, 1077 dqm->dev->shared_resources.cp_queue_bitmap)) 1078 dqm->allocated_queues[pipe] |= 1 << queue; 1079 } 1080 1081 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1082 1083 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); 1084 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); 1085 1086 return 0; 1087 } 1088 1089 static void uninitialize(struct device_queue_manager *dqm) 1090 { 1091 int i; 1092 1093 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1094 1095 kfree(dqm->allocated_queues); 1096 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1097 kfree(dqm->mqd_mgrs[i]); 1098 mutex_destroy(&dqm->lock_hidden); 1099 } 1100 1101 static int start_nocpsch(struct device_queue_manager *dqm) 1102 { 1103 int r = 0; 1104 1105 pr_info("SW scheduler is used"); 1106 init_interrupts(dqm); 1107 1108 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1109 r = pm_init(&dqm->packet_mgr, dqm); 1110 if (!r) 1111 dqm->sched_running = true; 1112 1113 return r; 1114 } 1115 1116 static int stop_nocpsch(struct device_queue_manager *dqm) 1117 { 1118 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1119 pm_uninit(&dqm->packet_mgr, false); 1120 dqm->sched_running = false; 1121 1122 return 0; 1123 } 1124 1125 static void pre_reset(struct device_queue_manager *dqm) 1126 { 1127 dqm_lock(dqm); 1128 dqm->is_resetting = true; 1129 dqm_unlock(dqm); 1130 } 1131 1132 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1133 struct queue *q, const uint32_t *restore_sdma_id) 1134 { 1135 int bit; 1136 1137 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1138 if (dqm->sdma_bitmap == 0) { 1139 pr_err("No more SDMA queue to allocate\n"); 1140 return -ENOMEM; 1141 } 1142 1143 if (restore_sdma_id) { 1144 /* Re-use existing sdma_id */ 1145 if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) { 1146 pr_err("SDMA queue already in use\n"); 1147 return -EBUSY; 1148 } 1149 dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1150 q->sdma_id = *restore_sdma_id; 1151 } else { 1152 /* Find first available sdma_id */ 1153 bit = __ffs64(dqm->sdma_bitmap); 1154 dqm->sdma_bitmap &= ~(1ULL << bit); 1155 q->sdma_id = bit; 1156 } 1157 1158 q->properties.sdma_engine_id = q->sdma_id % 1159 kfd_get_num_sdma_engines(dqm->dev); 1160 q->properties.sdma_queue_id = q->sdma_id / 1161 kfd_get_num_sdma_engines(dqm->dev); 1162 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1163 if (dqm->xgmi_sdma_bitmap == 0) { 1164 pr_err("No more XGMI SDMA queue to allocate\n"); 1165 return -ENOMEM; 1166 } 1167 if (restore_sdma_id) { 1168 /* Re-use existing sdma_id */ 1169 if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) { 1170 pr_err("SDMA queue already in use\n"); 1171 return -EBUSY; 1172 } 1173 dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id); 1174 q->sdma_id = *restore_sdma_id; 1175 } else { 1176 bit = __ffs64(dqm->xgmi_sdma_bitmap); 1177 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 1178 q->sdma_id = bit; 1179 } 1180 /* sdma_engine_id is sdma id including 1181 * both PCIe-optimized SDMAs and XGMI- 1182 * optimized SDMAs. The calculation below 1183 * assumes the first N engines are always 1184 * PCIe-optimized ones 1185 */ 1186 q->properties.sdma_engine_id = 1187 kfd_get_num_sdma_engines(dqm->dev) + 1188 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1189 q->properties.sdma_queue_id = q->sdma_id / 1190 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1191 } 1192 1193 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1194 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1195 1196 return 0; 1197 } 1198 1199 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1200 struct queue *q) 1201 { 1202 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1203 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1204 return; 1205 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 1206 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1207 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1208 return; 1209 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 1210 } 1211 } 1212 1213 /* 1214 * Device Queue Manager implementation for cp scheduler 1215 */ 1216 1217 static int set_sched_resources(struct device_queue_manager *dqm) 1218 { 1219 int i, mec; 1220 struct scheduling_resources res; 1221 1222 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1223 1224 res.queue_mask = 0; 1225 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1226 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1227 / dqm->dev->shared_resources.num_pipe_per_mec; 1228 1229 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1230 continue; 1231 1232 /* only acquire queues from the first MEC */ 1233 if (mec > 0) 1234 continue; 1235 1236 /* This situation may be hit in the future if a new HW 1237 * generation exposes more than 64 queues. If so, the 1238 * definition of res.queue_mask needs updating 1239 */ 1240 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1241 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1242 break; 1243 } 1244 1245 res.queue_mask |= 1ull 1246 << amdgpu_queue_mask_bit_to_set_resource_bit( 1247 dqm->dev->adev, i); 1248 } 1249 res.gws_mask = ~0ull; 1250 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1251 1252 pr_debug("Scheduling resources:\n" 1253 "vmid mask: 0x%8X\n" 1254 "queue mask: 0x%8llX\n", 1255 res.vmid_mask, res.queue_mask); 1256 1257 return pm_send_set_resources(&dqm->packet_mgr, &res); 1258 } 1259 1260 static int initialize_cpsch(struct device_queue_manager *dqm) 1261 { 1262 uint64_t num_sdma_queues; 1263 uint64_t num_xgmi_sdma_queues; 1264 1265 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1266 1267 mutex_init(&dqm->lock_hidden); 1268 INIT_LIST_HEAD(&dqm->queues); 1269 dqm->active_queue_count = dqm->processes_count = 0; 1270 dqm->active_cp_queue_count = 0; 1271 dqm->gws_queue_count = 0; 1272 dqm->active_runlist = false; 1273 1274 num_sdma_queues = get_num_sdma_queues(dqm); 1275 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) 1276 dqm->sdma_bitmap = ULLONG_MAX; 1277 else 1278 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); 1279 1280 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); 1281 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) 1282 dqm->xgmi_sdma_bitmap = ULLONG_MAX; 1283 else 1284 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); 1285 1286 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1287 1288 return 0; 1289 } 1290 1291 static int start_cpsch(struct device_queue_manager *dqm) 1292 { 1293 int retval; 1294 1295 retval = 0; 1296 1297 dqm_lock(dqm); 1298 retval = pm_init(&dqm->packet_mgr, dqm); 1299 if (retval) 1300 goto fail_packet_manager_init; 1301 1302 retval = set_sched_resources(dqm); 1303 if (retval) 1304 goto fail_set_sched_resources; 1305 1306 pr_debug("Allocating fence memory\n"); 1307 1308 /* allocate fence memory on the gart */ 1309 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1310 &dqm->fence_mem); 1311 1312 if (retval) 1313 goto fail_allocate_vidmem; 1314 1315 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1316 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1317 1318 init_interrupts(dqm); 1319 1320 /* clear hang status when driver try to start the hw scheduler */ 1321 dqm->is_hws_hang = false; 1322 dqm->is_resetting = false; 1323 dqm->sched_running = true; 1324 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1325 dqm_unlock(dqm); 1326 1327 return 0; 1328 fail_allocate_vidmem: 1329 fail_set_sched_resources: 1330 pm_uninit(&dqm->packet_mgr, false); 1331 fail_packet_manager_init: 1332 dqm_unlock(dqm); 1333 return retval; 1334 } 1335 1336 static int stop_cpsch(struct device_queue_manager *dqm) 1337 { 1338 bool hanging; 1339 1340 dqm_lock(dqm); 1341 if (!dqm->sched_running) { 1342 dqm_unlock(dqm); 1343 return 0; 1344 } 1345 1346 if (!dqm->is_hws_hang) 1347 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1348 hanging = dqm->is_hws_hang || dqm->is_resetting; 1349 dqm->sched_running = false; 1350 1351 pm_release_ib(&dqm->packet_mgr); 1352 1353 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1354 pm_uninit(&dqm->packet_mgr, hanging); 1355 dqm_unlock(dqm); 1356 1357 return 0; 1358 } 1359 1360 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1361 struct kernel_queue *kq, 1362 struct qcm_process_device *qpd) 1363 { 1364 dqm_lock(dqm); 1365 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1366 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1367 dqm->total_queue_count); 1368 dqm_unlock(dqm); 1369 return -EPERM; 1370 } 1371 1372 /* 1373 * Unconditionally increment this counter, regardless of the queue's 1374 * type or whether the queue is active. 1375 */ 1376 dqm->total_queue_count++; 1377 pr_debug("Total of %d queues are accountable so far\n", 1378 dqm->total_queue_count); 1379 1380 list_add(&kq->list, &qpd->priv_queue_list); 1381 increment_queue_count(dqm, kq->queue->properties.type); 1382 qpd->is_debug = true; 1383 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1384 dqm_unlock(dqm); 1385 1386 return 0; 1387 } 1388 1389 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1390 struct kernel_queue *kq, 1391 struct qcm_process_device *qpd) 1392 { 1393 dqm_lock(dqm); 1394 list_del(&kq->list); 1395 decrement_queue_count(dqm, kq->queue->properties.type); 1396 qpd->is_debug = false; 1397 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1398 /* 1399 * Unconditionally decrement this counter, regardless of the queue's 1400 * type. 1401 */ 1402 dqm->total_queue_count--; 1403 pr_debug("Total of %d queues are accountable so far\n", 1404 dqm->total_queue_count); 1405 dqm_unlock(dqm); 1406 } 1407 1408 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1409 struct qcm_process_device *qpd, 1410 const struct kfd_criu_queue_priv_data *qd, 1411 const void *restore_mqd, const void *restore_ctl_stack) 1412 { 1413 int retval; 1414 struct mqd_manager *mqd_mgr; 1415 1416 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1417 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1418 dqm->total_queue_count); 1419 retval = -EPERM; 1420 goto out; 1421 } 1422 1423 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1424 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1425 dqm_lock(dqm); 1426 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1427 dqm_unlock(dqm); 1428 if (retval) 1429 goto out; 1430 } 1431 1432 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1433 if (retval) 1434 goto out_deallocate_sdma_queue; 1435 1436 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1437 q->properties.type)]; 1438 1439 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1440 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1441 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1442 q->properties.tba_addr = qpd->tba_addr; 1443 q->properties.tma_addr = qpd->tma_addr; 1444 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1445 if (!q->mqd_mem_obj) { 1446 retval = -ENOMEM; 1447 goto out_deallocate_doorbell; 1448 } 1449 1450 dqm_lock(dqm); 1451 /* 1452 * Eviction state logic: mark all queues as evicted, even ones 1453 * not currently active. Restoring inactive queues later only 1454 * updates the is_evicted flag but is a no-op otherwise. 1455 */ 1456 q->properties.is_evicted = !!qpd->evicted; 1457 1458 if (qd) 1459 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 1460 &q->properties, restore_mqd, restore_ctl_stack, 1461 qd->ctl_stack_size); 1462 else 1463 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1464 &q->gart_mqd_addr, &q->properties); 1465 1466 list_add(&q->list, &qpd->queues_list); 1467 qpd->queue_count++; 1468 1469 if (q->properties.is_active) { 1470 increment_queue_count(dqm, q->properties.type); 1471 1472 execute_queues_cpsch(dqm, 1473 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1474 } 1475 1476 /* 1477 * Unconditionally increment this counter, regardless of the queue's 1478 * type or whether the queue is active. 1479 */ 1480 dqm->total_queue_count++; 1481 1482 pr_debug("Total of %d queues are accountable so far\n", 1483 dqm->total_queue_count); 1484 1485 dqm_unlock(dqm); 1486 return retval; 1487 1488 out_deallocate_doorbell: 1489 deallocate_doorbell(qpd, q); 1490 out_deallocate_sdma_queue: 1491 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1492 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1493 dqm_lock(dqm); 1494 deallocate_sdma_queue(dqm, q); 1495 dqm_unlock(dqm); 1496 } 1497 out: 1498 return retval; 1499 } 1500 1501 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1502 uint64_t fence_value, 1503 unsigned int timeout_ms) 1504 { 1505 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1506 1507 while (*fence_addr != fence_value) { 1508 if (time_after(jiffies, end_jiffies)) { 1509 pr_err("qcm fence wait loop timeout expired\n"); 1510 /* In HWS case, this is used to halt the driver thread 1511 * in order not to mess up CP states before doing 1512 * scandumps for FW debugging. 1513 */ 1514 while (halt_if_hws_hang) 1515 schedule(); 1516 1517 return -ETIME; 1518 } 1519 schedule(); 1520 } 1521 1522 return 0; 1523 } 1524 1525 /* dqm->lock mutex has to be locked before calling this function */ 1526 static int map_queues_cpsch(struct device_queue_manager *dqm) 1527 { 1528 int retval; 1529 1530 if (!dqm->sched_running) 1531 return 0; 1532 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1533 return 0; 1534 if (dqm->active_runlist) 1535 return 0; 1536 1537 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1538 pr_debug("%s sent runlist\n", __func__); 1539 if (retval) { 1540 pr_err("failed to execute runlist\n"); 1541 return retval; 1542 } 1543 dqm->active_runlist = true; 1544 1545 return retval; 1546 } 1547 1548 /* dqm->lock mutex has to be locked before calling this function */ 1549 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1550 enum kfd_unmap_queues_filter filter, 1551 uint32_t filter_param, bool reset) 1552 { 1553 int retval = 0; 1554 struct mqd_manager *mqd_mgr; 1555 1556 if (!dqm->sched_running) 1557 return 0; 1558 if (dqm->is_hws_hang || dqm->is_resetting) 1559 return -EIO; 1560 if (!dqm->active_runlist) 1561 return retval; 1562 1563 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1564 if (retval) 1565 return retval; 1566 1567 *dqm->fence_addr = KFD_FENCE_INIT; 1568 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1569 KFD_FENCE_COMPLETED); 1570 /* should be timed out */ 1571 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1572 queue_preemption_timeout_ms); 1573 if (retval) { 1574 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1575 dqm->is_hws_hang = true; 1576 /* It's possible we're detecting a HWS hang in the 1577 * middle of a GPU reset. No need to schedule another 1578 * reset in this case. 1579 */ 1580 if (!dqm->is_resetting) 1581 schedule_work(&dqm->hw_exception_work); 1582 return retval; 1583 } 1584 1585 /* In the current MEC firmware implementation, if compute queue 1586 * doesn't response to the preemption request in time, HIQ will 1587 * abandon the unmap request without returning any timeout error 1588 * to driver. Instead, MEC firmware will log the doorbell of the 1589 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1590 * To make sure the queue unmap was successful, driver need to 1591 * check those fields 1592 */ 1593 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1594 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1595 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1596 while (halt_if_hws_hang) 1597 schedule(); 1598 return -ETIME; 1599 } 1600 1601 pm_release_ib(&dqm->packet_mgr); 1602 dqm->active_runlist = false; 1603 1604 return retval; 1605 } 1606 1607 /* only for compute queue */ 1608 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1609 uint16_t pasid) 1610 { 1611 int retval; 1612 1613 dqm_lock(dqm); 1614 1615 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1616 pasid, true); 1617 1618 dqm_unlock(dqm); 1619 return retval; 1620 } 1621 1622 /* dqm->lock mutex has to be locked before calling this function */ 1623 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1624 enum kfd_unmap_queues_filter filter, 1625 uint32_t filter_param) 1626 { 1627 int retval; 1628 1629 if (dqm->is_hws_hang) 1630 return -EIO; 1631 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1632 if (retval) 1633 return retval; 1634 1635 return map_queues_cpsch(dqm); 1636 } 1637 1638 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1639 struct qcm_process_device *qpd, 1640 struct queue *q) 1641 { 1642 int retval; 1643 struct mqd_manager *mqd_mgr; 1644 uint64_t sdma_val = 0; 1645 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1646 1647 /* Get the SDMA queue stats */ 1648 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1649 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1650 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1651 &sdma_val); 1652 if (retval) 1653 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1654 q->properties.queue_id); 1655 } 1656 1657 retval = 0; 1658 1659 /* remove queue from list to prevent rescheduling after preemption */ 1660 dqm_lock(dqm); 1661 1662 if (qpd->is_debug) { 1663 /* 1664 * error, currently we do not allow to destroy a queue 1665 * of a currently debugged process 1666 */ 1667 retval = -EBUSY; 1668 goto failed_try_destroy_debugged_queue; 1669 1670 } 1671 1672 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1673 q->properties.type)]; 1674 1675 deallocate_doorbell(qpd, q); 1676 1677 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1678 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1679 deallocate_sdma_queue(dqm, q); 1680 pdd->sdma_past_activity_counter += sdma_val; 1681 } 1682 1683 list_del(&q->list); 1684 qpd->queue_count--; 1685 if (q->properties.is_active) { 1686 decrement_queue_count(dqm, q->properties.type); 1687 retval = execute_queues_cpsch(dqm, 1688 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1689 if (retval == -ETIME) 1690 qpd->reset_wavefronts = true; 1691 if (q->properties.is_gws) { 1692 dqm->gws_queue_count--; 1693 qpd->mapped_gws_queue = false; 1694 } 1695 } 1696 1697 /* 1698 * Unconditionally decrement this counter, regardless of the queue's 1699 * type 1700 */ 1701 dqm->total_queue_count--; 1702 pr_debug("Total of %d queues are accountable so far\n", 1703 dqm->total_queue_count); 1704 1705 dqm_unlock(dqm); 1706 1707 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1708 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1709 1710 return retval; 1711 1712 failed_try_destroy_debugged_queue: 1713 1714 dqm_unlock(dqm); 1715 return retval; 1716 } 1717 1718 /* 1719 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1720 * stay in user mode. 1721 */ 1722 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1723 /* APE1 limit is inclusive and 64K aligned. */ 1724 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1725 1726 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1727 struct qcm_process_device *qpd, 1728 enum cache_policy default_policy, 1729 enum cache_policy alternate_policy, 1730 void __user *alternate_aperture_base, 1731 uint64_t alternate_aperture_size) 1732 { 1733 bool retval = true; 1734 1735 if (!dqm->asic_ops.set_cache_memory_policy) 1736 return retval; 1737 1738 dqm_lock(dqm); 1739 1740 if (alternate_aperture_size == 0) { 1741 /* base > limit disables APE1 */ 1742 qpd->sh_mem_ape1_base = 1; 1743 qpd->sh_mem_ape1_limit = 0; 1744 } else { 1745 /* 1746 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1747 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1748 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1749 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1750 * Verify that the base and size parameters can be 1751 * represented in this format and convert them. 1752 * Additionally restrict APE1 to user-mode addresses. 1753 */ 1754 1755 uint64_t base = (uintptr_t)alternate_aperture_base; 1756 uint64_t limit = base + alternate_aperture_size - 1; 1757 1758 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1759 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1760 retval = false; 1761 goto out; 1762 } 1763 1764 qpd->sh_mem_ape1_base = base >> 16; 1765 qpd->sh_mem_ape1_limit = limit >> 16; 1766 } 1767 1768 retval = dqm->asic_ops.set_cache_memory_policy( 1769 dqm, 1770 qpd, 1771 default_policy, 1772 alternate_policy, 1773 alternate_aperture_base, 1774 alternate_aperture_size); 1775 1776 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1777 program_sh_mem_settings(dqm, qpd); 1778 1779 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1780 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1781 qpd->sh_mem_ape1_limit); 1782 1783 out: 1784 dqm_unlock(dqm); 1785 return retval; 1786 } 1787 1788 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1789 struct qcm_process_device *qpd) 1790 { 1791 struct queue *q; 1792 struct device_process_node *cur, *next_dpn; 1793 int retval = 0; 1794 bool found = false; 1795 1796 dqm_lock(dqm); 1797 1798 /* Clear all user mode queues */ 1799 while (!list_empty(&qpd->queues_list)) { 1800 struct mqd_manager *mqd_mgr; 1801 int ret; 1802 1803 q = list_first_entry(&qpd->queues_list, struct queue, list); 1804 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1805 q->properties.type)]; 1806 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1807 if (ret) 1808 retval = ret; 1809 dqm_unlock(dqm); 1810 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1811 dqm_lock(dqm); 1812 } 1813 1814 /* Unregister process */ 1815 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1816 if (qpd == cur->qpd) { 1817 list_del(&cur->list); 1818 kfree(cur); 1819 dqm->processes_count--; 1820 found = true; 1821 break; 1822 } 1823 } 1824 1825 dqm_unlock(dqm); 1826 1827 /* Outside the DQM lock because under the DQM lock we can't do 1828 * reclaim or take other locks that others hold while reclaiming. 1829 */ 1830 if (found) 1831 kfd_dec_compute_active(dqm->dev); 1832 1833 return retval; 1834 } 1835 1836 static int get_wave_state(struct device_queue_manager *dqm, 1837 struct queue *q, 1838 void __user *ctl_stack, 1839 u32 *ctl_stack_used_size, 1840 u32 *save_area_used_size) 1841 { 1842 struct mqd_manager *mqd_mgr; 1843 1844 dqm_lock(dqm); 1845 1846 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 1847 1848 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1849 q->properties.is_active || !q->device->cwsr_enabled || 1850 !mqd_mgr->get_wave_state) { 1851 dqm_unlock(dqm); 1852 return -EINVAL; 1853 } 1854 1855 dqm_unlock(dqm); 1856 1857 /* 1858 * get_wave_state is outside the dqm lock to prevent circular locking 1859 * and the queue should be protected against destruction by the process 1860 * lock. 1861 */ 1862 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1863 ctl_stack_used_size, save_area_used_size); 1864 } 1865 1866 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 1867 const struct queue *q, 1868 u32 *mqd_size, 1869 u32 *ctl_stack_size) 1870 { 1871 struct mqd_manager *mqd_mgr; 1872 enum KFD_MQD_TYPE mqd_type = 1873 get_mqd_type_from_queue_type(q->properties.type); 1874 1875 dqm_lock(dqm); 1876 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 1877 *mqd_size = mqd_mgr->mqd_size; 1878 *ctl_stack_size = 0; 1879 1880 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 1881 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 1882 1883 dqm_unlock(dqm); 1884 } 1885 1886 static int checkpoint_mqd(struct device_queue_manager *dqm, 1887 const struct queue *q, 1888 void *mqd, 1889 void *ctl_stack) 1890 { 1891 struct mqd_manager *mqd_mgr; 1892 int r = 0; 1893 enum KFD_MQD_TYPE mqd_type = 1894 get_mqd_type_from_queue_type(q->properties.type); 1895 1896 dqm_lock(dqm); 1897 1898 if (q->properties.is_active || !q->device->cwsr_enabled) { 1899 r = -EINVAL; 1900 goto dqm_unlock; 1901 } 1902 1903 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 1904 if (!mqd_mgr->checkpoint_mqd) { 1905 r = -EOPNOTSUPP; 1906 goto dqm_unlock; 1907 } 1908 1909 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 1910 1911 dqm_unlock: 1912 dqm_unlock(dqm); 1913 return r; 1914 } 1915 1916 static int process_termination_cpsch(struct device_queue_manager *dqm, 1917 struct qcm_process_device *qpd) 1918 { 1919 int retval; 1920 struct queue *q; 1921 struct kernel_queue *kq, *kq_next; 1922 struct mqd_manager *mqd_mgr; 1923 struct device_process_node *cur, *next_dpn; 1924 enum kfd_unmap_queues_filter filter = 1925 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1926 bool found = false; 1927 1928 retval = 0; 1929 1930 dqm_lock(dqm); 1931 1932 /* Clean all kernel queues */ 1933 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1934 list_del(&kq->list); 1935 decrement_queue_count(dqm, kq->queue->properties.type); 1936 qpd->is_debug = false; 1937 dqm->total_queue_count--; 1938 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1939 } 1940 1941 /* Clear all user mode queues */ 1942 list_for_each_entry(q, &qpd->queues_list, list) { 1943 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1944 deallocate_sdma_queue(dqm, q); 1945 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1946 deallocate_sdma_queue(dqm, q); 1947 1948 if (q->properties.is_active) { 1949 decrement_queue_count(dqm, q->properties.type); 1950 if (q->properties.is_gws) { 1951 dqm->gws_queue_count--; 1952 qpd->mapped_gws_queue = false; 1953 } 1954 } 1955 1956 dqm->total_queue_count--; 1957 } 1958 1959 /* Unregister process */ 1960 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1961 if (qpd == cur->qpd) { 1962 list_del(&cur->list); 1963 kfree(cur); 1964 dqm->processes_count--; 1965 found = true; 1966 break; 1967 } 1968 } 1969 1970 retval = execute_queues_cpsch(dqm, filter, 0); 1971 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1972 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1973 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1974 qpd->reset_wavefronts = false; 1975 } 1976 1977 /* Lastly, free mqd resources. 1978 * Do free_mqd() after dqm_unlock to avoid circular locking. 1979 */ 1980 while (!list_empty(&qpd->queues_list)) { 1981 q = list_first_entry(&qpd->queues_list, struct queue, list); 1982 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1983 q->properties.type)]; 1984 list_del(&q->list); 1985 qpd->queue_count--; 1986 dqm_unlock(dqm); 1987 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1988 dqm_lock(dqm); 1989 } 1990 dqm_unlock(dqm); 1991 1992 /* Outside the DQM lock because under the DQM lock we can't do 1993 * reclaim or take other locks that others hold while reclaiming. 1994 */ 1995 if (found) 1996 kfd_dec_compute_active(dqm->dev); 1997 1998 return retval; 1999 } 2000 2001 static int init_mqd_managers(struct device_queue_manager *dqm) 2002 { 2003 int i, j; 2004 struct mqd_manager *mqd_mgr; 2005 2006 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2007 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2008 if (!mqd_mgr) { 2009 pr_err("mqd manager [%d] initialization failed\n", i); 2010 goto out_free; 2011 } 2012 dqm->mqd_mgrs[i] = mqd_mgr; 2013 } 2014 2015 return 0; 2016 2017 out_free: 2018 for (j = 0; j < i; j++) { 2019 kfree(dqm->mqd_mgrs[j]); 2020 dqm->mqd_mgrs[j] = NULL; 2021 } 2022 2023 return -ENOMEM; 2024 } 2025 2026 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2027 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2028 { 2029 int retval; 2030 struct kfd_dev *dev = dqm->dev; 2031 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2032 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2033 get_num_all_sdma_engines(dqm) * 2034 dev->device_info.num_sdma_queues_per_engine + 2035 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 2036 2037 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2038 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2039 (void *)&(mem_obj->cpu_ptr), false); 2040 2041 return retval; 2042 } 2043 2044 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 2045 { 2046 struct device_queue_manager *dqm; 2047 2048 pr_debug("Loading device queue manager\n"); 2049 2050 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2051 if (!dqm) 2052 return NULL; 2053 2054 switch (dev->adev->asic_type) { 2055 /* HWS is not available on Hawaii. */ 2056 case CHIP_HAWAII: 2057 /* HWS depends on CWSR for timely dequeue. CWSR is not 2058 * available on Tonga. 2059 * 2060 * FIXME: This argument also applies to Kaveri. 2061 */ 2062 case CHIP_TONGA: 2063 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2064 break; 2065 default: 2066 dqm->sched_policy = sched_policy; 2067 break; 2068 } 2069 2070 dqm->dev = dev; 2071 switch (dqm->sched_policy) { 2072 case KFD_SCHED_POLICY_HWS: 2073 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2074 /* initialize dqm for cp scheduling */ 2075 dqm->ops.create_queue = create_queue_cpsch; 2076 dqm->ops.initialize = initialize_cpsch; 2077 dqm->ops.start = start_cpsch; 2078 dqm->ops.stop = stop_cpsch; 2079 dqm->ops.pre_reset = pre_reset; 2080 dqm->ops.destroy_queue = destroy_queue_cpsch; 2081 dqm->ops.update_queue = update_queue; 2082 dqm->ops.register_process = register_process; 2083 dqm->ops.unregister_process = unregister_process; 2084 dqm->ops.uninitialize = uninitialize; 2085 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2086 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2087 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2088 dqm->ops.process_termination = process_termination_cpsch; 2089 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2090 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2091 dqm->ops.get_wave_state = get_wave_state; 2092 dqm->ops.reset_queues = reset_queues_cpsch; 2093 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2094 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2095 break; 2096 case KFD_SCHED_POLICY_NO_HWS: 2097 /* initialize dqm for no cp scheduling */ 2098 dqm->ops.start = start_nocpsch; 2099 dqm->ops.stop = stop_nocpsch; 2100 dqm->ops.pre_reset = pre_reset; 2101 dqm->ops.create_queue = create_queue_nocpsch; 2102 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2103 dqm->ops.update_queue = update_queue; 2104 dqm->ops.register_process = register_process; 2105 dqm->ops.unregister_process = unregister_process; 2106 dqm->ops.initialize = initialize_nocpsch; 2107 dqm->ops.uninitialize = uninitialize; 2108 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2109 dqm->ops.process_termination = process_termination_nocpsch; 2110 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2111 dqm->ops.restore_process_queues = 2112 restore_process_queues_nocpsch; 2113 dqm->ops.get_wave_state = get_wave_state; 2114 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2115 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2116 break; 2117 default: 2118 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 2119 goto out_free; 2120 } 2121 2122 switch (dev->adev->asic_type) { 2123 case CHIP_CARRIZO: 2124 device_queue_manager_init_vi(&dqm->asic_ops); 2125 break; 2126 2127 case CHIP_KAVERI: 2128 device_queue_manager_init_cik(&dqm->asic_ops); 2129 break; 2130 2131 case CHIP_HAWAII: 2132 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 2133 break; 2134 2135 case CHIP_TONGA: 2136 case CHIP_FIJI: 2137 case CHIP_POLARIS10: 2138 case CHIP_POLARIS11: 2139 case CHIP_POLARIS12: 2140 case CHIP_VEGAM: 2141 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 2142 break; 2143 2144 default: 2145 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2146 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 2147 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2148 device_queue_manager_init_v9(&dqm->asic_ops); 2149 else { 2150 WARN(1, "Unexpected ASIC family %u", 2151 dev->adev->asic_type); 2152 goto out_free; 2153 } 2154 } 2155 2156 if (init_mqd_managers(dqm)) 2157 goto out_free; 2158 2159 if (allocate_hiq_sdma_mqd(dqm)) { 2160 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 2161 goto out_free; 2162 } 2163 2164 if (!dqm->ops.initialize(dqm)) 2165 return dqm; 2166 2167 out_free: 2168 kfree(dqm); 2169 return NULL; 2170 } 2171 2172 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 2173 struct kfd_mem_obj *mqd) 2174 { 2175 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2176 2177 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 2178 } 2179 2180 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2181 { 2182 dqm->ops.uninitialize(dqm); 2183 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2184 kfree(dqm); 2185 } 2186 2187 int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid) 2188 { 2189 struct kfd_process_device *pdd; 2190 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2191 int ret = 0; 2192 2193 if (!p) 2194 return -EINVAL; 2195 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2196 pdd = kfd_get_process_device_data(dqm->dev, p); 2197 if (pdd) 2198 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2199 kfd_unref_process(p); 2200 2201 return ret; 2202 } 2203 2204 static void kfd_process_hw_exception(struct work_struct *work) 2205 { 2206 struct device_queue_manager *dqm = container_of(work, 2207 struct device_queue_manager, hw_exception_work); 2208 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2209 } 2210 2211 #if defined(CONFIG_DEBUG_FS) 2212 2213 static void seq_reg_dump(struct seq_file *m, 2214 uint32_t (*dump)[2], uint32_t n_regs) 2215 { 2216 uint32_t i, count; 2217 2218 for (i = 0, count = 0; i < n_regs; i++) { 2219 if (count == 0 || 2220 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2221 seq_printf(m, "%s %08x: %08x", 2222 i ? "\n" : "", 2223 dump[i][0], dump[i][1]); 2224 count = 7; 2225 } else { 2226 seq_printf(m, " %08x", dump[i][1]); 2227 count--; 2228 } 2229 } 2230 2231 seq_puts(m, "\n"); 2232 } 2233 2234 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2235 { 2236 struct device_queue_manager *dqm = data; 2237 uint32_t (*dump)[2], n_regs; 2238 int pipe, queue; 2239 int r = 0; 2240 2241 if (!dqm->sched_running) { 2242 seq_puts(m, " Device is stopped\n"); 2243 return 0; 2244 } 2245 2246 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2247 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2248 &dump, &n_regs); 2249 if (!r) { 2250 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 2251 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2252 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2253 KFD_CIK_HIQ_QUEUE); 2254 seq_reg_dump(m, dump, n_regs); 2255 2256 kfree(dump); 2257 } 2258 2259 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2260 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2261 2262 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2263 if (!test_bit(pipe_offset + queue, 2264 dqm->dev->shared_resources.cp_queue_bitmap)) 2265 continue; 2266 2267 r = dqm->dev->kfd2kgd->hqd_dump( 2268 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2269 if (r) 2270 break; 2271 2272 seq_printf(m, " CP Pipe %d, Queue %d\n", 2273 pipe, queue); 2274 seq_reg_dump(m, dump, n_regs); 2275 2276 kfree(dump); 2277 } 2278 } 2279 2280 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2281 for (queue = 0; 2282 queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2283 queue++) { 2284 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2285 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2286 if (r) 2287 break; 2288 2289 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2290 pipe, queue); 2291 seq_reg_dump(m, dump, n_regs); 2292 2293 kfree(dump); 2294 } 2295 } 2296 2297 return r; 2298 } 2299 2300 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2301 { 2302 int r = 0; 2303 2304 dqm_lock(dqm); 2305 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2306 if (r) { 2307 dqm_unlock(dqm); 2308 return r; 2309 } 2310 dqm->active_runlist = true; 2311 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2312 dqm_unlock(dqm); 2313 2314 return r; 2315 } 2316 2317 #endif 2318