1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 #include "amdgpu_amdkfd.h" 37 38 /* Size of the per-pipe EOP queue */ 39 #define CIK_HPD_EOP_BYTES_LOG2 11 40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 41 42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 43 u32 pasid, unsigned int vmid); 44 45 static int execute_queues_cpsch(struct device_queue_manager *dqm, 46 enum kfd_unmap_queues_filter filter, 47 uint32_t filter_param); 48 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param, bool reset); 51 52 static int map_queues_cpsch(struct device_queue_manager *dqm); 53 54 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 55 struct queue *q); 56 57 static inline void deallocate_hqd(struct device_queue_manager *dqm, 58 struct queue *q); 59 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 60 static int allocate_sdma_queue(struct device_queue_manager *dqm, 61 struct queue *q); 62 static void kfd_process_hw_exception(struct work_struct *work); 63 64 static inline 65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 66 { 67 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 68 return KFD_MQD_TYPE_SDMA; 69 return KFD_MQD_TYPE_CP; 70 } 71 72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 73 { 74 int i; 75 int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec 76 + pipe) * dqm->dev->shared_resources.num_queue_per_pipe; 77 78 /* queue is available for KFD usage if bit is 1 */ 79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 80 if (test_bit(pipe_offset + i, 81 dqm->dev->shared_resources.cp_queue_bitmap)) 82 return true; 83 return false; 84 } 85 86 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 87 { 88 return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, 89 KGD_MAX_QUEUES); 90 } 91 92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 93 { 94 return dqm->dev->shared_resources.num_queue_per_pipe; 95 } 96 97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 98 { 99 return dqm->dev->shared_resources.num_pipe_per_mec; 100 } 101 102 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 103 { 104 return kfd_get_num_sdma_engines(dqm->dev) + 105 kfd_get_num_xgmi_sdma_engines(dqm->dev); 106 } 107 108 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 109 { 110 return kfd_get_num_sdma_engines(dqm->dev) * 111 dqm->dev->device_info.num_sdma_queues_per_engine; 112 } 113 114 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 115 { 116 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 117 dqm->dev->device_info.num_sdma_queues_per_engine; 118 } 119 120 void program_sh_mem_settings(struct device_queue_manager *dqm, 121 struct qcm_process_device *qpd) 122 { 123 return dqm->dev->kfd2kgd->program_sh_mem_settings( 124 dqm->dev->adev, qpd->vmid, 125 qpd->sh_mem_config, 126 qpd->sh_mem_ape1_base, 127 qpd->sh_mem_ape1_limit, 128 qpd->sh_mem_bases); 129 } 130 131 static void increment_queue_count(struct device_queue_manager *dqm, 132 enum kfd_queue_type type) 133 { 134 dqm->active_queue_count++; 135 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 136 dqm->active_cp_queue_count++; 137 } 138 139 static void decrement_queue_count(struct device_queue_manager *dqm, 140 enum kfd_queue_type type) 141 { 142 dqm->active_queue_count--; 143 if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) 144 dqm->active_cp_queue_count--; 145 } 146 147 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 148 { 149 struct kfd_dev *dev = qpd->dqm->dev; 150 151 if (!KFD_IS_SOC15(dev)) { 152 /* On pre-SOC15 chips we need to use the queue ID to 153 * preserve the user mode ABI. 154 */ 155 q->doorbell_id = q->properties.queue_id; 156 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 157 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 158 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 159 * doorbell assignments based on the engine and queue id. 160 * The doobell index distance between RLC (2*i) and (2*i+1) 161 * for a SDMA engine is 512. 162 */ 163 uint32_t *idx_offset = 164 dev->shared_resources.sdma_doorbell_idx; 165 166 q->doorbell_id = idx_offset[q->properties.sdma_engine_id] 167 + (q->properties.sdma_queue_id & 1) 168 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 169 + (q->properties.sdma_queue_id >> 1); 170 } else { 171 /* For CP queues on SOC15 reserve a free doorbell ID */ 172 unsigned int found; 173 174 found = find_first_zero_bit(qpd->doorbell_bitmap, 175 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 176 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 177 pr_debug("No doorbells available"); 178 return -EBUSY; 179 } 180 set_bit(found, qpd->doorbell_bitmap); 181 q->doorbell_id = found; 182 } 183 184 q->properties.doorbell_off = 185 kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), 186 q->doorbell_id); 187 return 0; 188 } 189 190 static void deallocate_doorbell(struct qcm_process_device *qpd, 191 struct queue *q) 192 { 193 unsigned int old; 194 struct kfd_dev *dev = qpd->dqm->dev; 195 196 if (!KFD_IS_SOC15(dev) || 197 q->properties.type == KFD_QUEUE_TYPE_SDMA || 198 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 199 return; 200 201 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 202 WARN_ON(!old); 203 } 204 205 static void program_trap_handler_settings(struct device_queue_manager *dqm, 206 struct qcm_process_device *qpd) 207 { 208 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 209 dqm->dev->kfd2kgd->program_trap_handler_settings( 210 dqm->dev->adev, qpd->vmid, 211 qpd->tba_addr, qpd->tma_addr); 212 } 213 214 static int allocate_vmid(struct device_queue_manager *dqm, 215 struct qcm_process_device *qpd, 216 struct queue *q) 217 { 218 int allocated_vmid = -1, i; 219 220 for (i = dqm->dev->vm_info.first_vmid_kfd; 221 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 222 if (!dqm->vmid_pasid[i]) { 223 allocated_vmid = i; 224 break; 225 } 226 } 227 228 if (allocated_vmid < 0) { 229 pr_err("no more vmid to allocate\n"); 230 return -ENOSPC; 231 } 232 233 pr_debug("vmid allocated: %d\n", allocated_vmid); 234 235 dqm->vmid_pasid[allocated_vmid] = q->process->pasid; 236 237 set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid); 238 239 qpd->vmid = allocated_vmid; 240 q->properties.vmid = allocated_vmid; 241 242 program_sh_mem_settings(dqm, qpd); 243 244 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled) 245 program_trap_handler_settings(dqm, qpd); 246 247 /* qpd->page_table_base is set earlier when register_process() 248 * is called, i.e. when the first queue is created. 249 */ 250 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 251 qpd->vmid, 252 qpd->page_table_base); 253 /* invalidate the VM context after pasid and vmid mapping is set up */ 254 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 255 256 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 257 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 258 qpd->sh_hidden_private_base, qpd->vmid); 259 260 return 0; 261 } 262 263 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 264 struct qcm_process_device *qpd) 265 { 266 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 267 int ret; 268 269 if (!qpd->ib_kaddr) 270 return -ENOMEM; 271 272 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 273 if (ret) 274 return ret; 275 276 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 277 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 278 pmf->release_mem_size / sizeof(uint32_t)); 279 } 280 281 static void deallocate_vmid(struct device_queue_manager *dqm, 282 struct qcm_process_device *qpd, 283 struct queue *q) 284 { 285 /* On GFX v7, CP doesn't flush TC at dequeue */ 286 if (q->device->adev->asic_type == CHIP_HAWAII) 287 if (flush_texture_cache_nocpsch(q->device, qpd)) 288 pr_err("Failed to flush TC\n"); 289 290 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 291 292 /* Release the vmid mapping */ 293 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 294 dqm->vmid_pasid[qpd->vmid] = 0; 295 296 qpd->vmid = 0; 297 q->properties.vmid = 0; 298 } 299 300 static int create_queue_nocpsch(struct device_queue_manager *dqm, 301 struct queue *q, 302 struct qcm_process_device *qpd) 303 { 304 struct mqd_manager *mqd_mgr; 305 int retval; 306 307 dqm_lock(dqm); 308 309 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 310 pr_warn("Can't create new usermode queue because %d queues were already created\n", 311 dqm->total_queue_count); 312 retval = -EPERM; 313 goto out_unlock; 314 } 315 316 if (list_empty(&qpd->queues_list)) { 317 retval = allocate_vmid(dqm, qpd, q); 318 if (retval) 319 goto out_unlock; 320 } 321 q->properties.vmid = qpd->vmid; 322 /* 323 * Eviction state logic: mark all queues as evicted, even ones 324 * not currently active. Restoring inactive queues later only 325 * updates the is_evicted flag but is a no-op otherwise. 326 */ 327 q->properties.is_evicted = !!qpd->evicted; 328 329 q->properties.tba_addr = qpd->tba_addr; 330 q->properties.tma_addr = qpd->tma_addr; 331 332 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 333 q->properties.type)]; 334 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 335 retval = allocate_hqd(dqm, q); 336 if (retval) 337 goto deallocate_vmid; 338 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 339 q->pipe, q->queue); 340 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 341 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 342 retval = allocate_sdma_queue(dqm, q); 343 if (retval) 344 goto deallocate_vmid; 345 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 346 } 347 348 retval = allocate_doorbell(qpd, q); 349 if (retval) 350 goto out_deallocate_hqd; 351 352 /* Temporarily release dqm lock to avoid a circular lock dependency */ 353 dqm_unlock(dqm); 354 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 355 dqm_lock(dqm); 356 357 if (!q->mqd_mem_obj) { 358 retval = -ENOMEM; 359 goto out_deallocate_doorbell; 360 } 361 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 362 &q->gart_mqd_addr, &q->properties); 363 if (q->properties.is_active) { 364 if (!dqm->sched_running) { 365 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 366 goto add_queue_to_list; 367 } 368 369 if (WARN(q->process->mm != current->mm, 370 "should only run in user thread")) 371 retval = -EFAULT; 372 else 373 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 374 q->queue, &q->properties, current->mm); 375 if (retval) 376 goto out_free_mqd; 377 } 378 379 add_queue_to_list: 380 list_add(&q->list, &qpd->queues_list); 381 qpd->queue_count++; 382 if (q->properties.is_active) 383 increment_queue_count(dqm, q->properties.type); 384 385 /* 386 * Unconditionally increment this counter, regardless of the queue's 387 * type or whether the queue is active. 388 */ 389 dqm->total_queue_count++; 390 pr_debug("Total of %d queues are accountable so far\n", 391 dqm->total_queue_count); 392 goto out_unlock; 393 394 out_free_mqd: 395 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 396 out_deallocate_doorbell: 397 deallocate_doorbell(qpd, q); 398 out_deallocate_hqd: 399 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 400 deallocate_hqd(dqm, q); 401 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 402 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 403 deallocate_sdma_queue(dqm, q); 404 deallocate_vmid: 405 if (list_empty(&qpd->queues_list)) 406 deallocate_vmid(dqm, qpd, q); 407 out_unlock: 408 dqm_unlock(dqm); 409 return retval; 410 } 411 412 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 413 { 414 bool set; 415 int pipe, bit, i; 416 417 set = false; 418 419 for (pipe = dqm->next_pipe_to_allocate, i = 0; 420 i < get_pipes_per_mec(dqm); 421 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 422 423 if (!is_pipe_enabled(dqm, 0, pipe)) 424 continue; 425 426 if (dqm->allocated_queues[pipe] != 0) { 427 bit = ffs(dqm->allocated_queues[pipe]) - 1; 428 dqm->allocated_queues[pipe] &= ~(1 << bit); 429 q->pipe = pipe; 430 q->queue = bit; 431 set = true; 432 break; 433 } 434 } 435 436 if (!set) 437 return -EBUSY; 438 439 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 440 /* horizontal hqd allocation */ 441 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 442 443 return 0; 444 } 445 446 static inline void deallocate_hqd(struct device_queue_manager *dqm, 447 struct queue *q) 448 { 449 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 450 } 451 452 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 453 * to avoid asynchronized access 454 */ 455 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 456 struct qcm_process_device *qpd, 457 struct queue *q) 458 { 459 int retval; 460 struct mqd_manager *mqd_mgr; 461 462 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 463 q->properties.type)]; 464 465 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 466 deallocate_hqd(dqm, q); 467 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 468 deallocate_sdma_queue(dqm, q); 469 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 470 deallocate_sdma_queue(dqm, q); 471 else { 472 pr_debug("q->properties.type %d is invalid\n", 473 q->properties.type); 474 return -EINVAL; 475 } 476 dqm->total_queue_count--; 477 478 deallocate_doorbell(qpd, q); 479 480 if (!dqm->sched_running) { 481 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 482 return 0; 483 } 484 485 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 486 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 487 KFD_UNMAP_LATENCY_MS, 488 q->pipe, q->queue); 489 if (retval == -ETIME) 490 qpd->reset_wavefronts = true; 491 492 list_del(&q->list); 493 if (list_empty(&qpd->queues_list)) { 494 if (qpd->reset_wavefronts) { 495 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 496 dqm->dev); 497 /* dbgdev_wave_reset_wavefronts has to be called before 498 * deallocate_vmid(), i.e. when vmid is still in use. 499 */ 500 dbgdev_wave_reset_wavefronts(dqm->dev, 501 qpd->pqm->process); 502 qpd->reset_wavefronts = false; 503 } 504 505 deallocate_vmid(dqm, qpd, q); 506 } 507 qpd->queue_count--; 508 if (q->properties.is_active) { 509 decrement_queue_count(dqm, q->properties.type); 510 if (q->properties.is_gws) { 511 dqm->gws_queue_count--; 512 qpd->mapped_gws_queue = false; 513 } 514 } 515 516 return retval; 517 } 518 519 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 520 struct qcm_process_device *qpd, 521 struct queue *q) 522 { 523 int retval; 524 uint64_t sdma_val = 0; 525 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 526 struct mqd_manager *mqd_mgr = 527 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 528 529 /* Get the SDMA queue stats */ 530 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 531 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 532 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 533 &sdma_val); 534 if (retval) 535 pr_err("Failed to read SDMA queue counter for queue: %d\n", 536 q->properties.queue_id); 537 } 538 539 dqm_lock(dqm); 540 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 541 if (!retval) 542 pdd->sdma_past_activity_counter += sdma_val; 543 dqm_unlock(dqm); 544 545 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 546 547 return retval; 548 } 549 550 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 551 struct mqd_update_info *minfo) 552 { 553 int retval = 0; 554 struct mqd_manager *mqd_mgr; 555 struct kfd_process_device *pdd; 556 bool prev_active = false; 557 558 dqm_lock(dqm); 559 pdd = kfd_get_process_device_data(q->device, q->process); 560 if (!pdd) { 561 retval = -ENODEV; 562 goto out_unlock; 563 } 564 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 565 q->properties.type)]; 566 567 /* Save previous activity state for counters */ 568 prev_active = q->properties.is_active; 569 570 /* Make sure the queue is unmapped before updating the MQD */ 571 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 572 retval = unmap_queues_cpsch(dqm, 573 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 574 if (retval) { 575 pr_err("unmap queue failed\n"); 576 goto out_unlock; 577 } 578 } else if (prev_active && 579 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 580 q->properties.type == KFD_QUEUE_TYPE_SDMA || 581 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 582 583 if (!dqm->sched_running) { 584 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 585 goto out_unlock; 586 } 587 588 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 589 (dqm->dev->cwsr_enabled? 590 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 591 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 592 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 593 if (retval) { 594 pr_err("destroy mqd failed\n"); 595 goto out_unlock; 596 } 597 } 598 599 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 600 601 /* 602 * check active state vs. the previous state and modify 603 * counter accordingly. map_queues_cpsch uses the 604 * dqm->active_queue_count to determine whether a new runlist must be 605 * uploaded. 606 */ 607 if (q->properties.is_active && !prev_active) 608 increment_queue_count(dqm, q->properties.type); 609 else if (!q->properties.is_active && prev_active) 610 decrement_queue_count(dqm, q->properties.type); 611 612 if (q->gws && !q->properties.is_gws) { 613 if (q->properties.is_active) { 614 dqm->gws_queue_count++; 615 pdd->qpd.mapped_gws_queue = true; 616 } 617 q->properties.is_gws = true; 618 } else if (!q->gws && q->properties.is_gws) { 619 if (q->properties.is_active) { 620 dqm->gws_queue_count--; 621 pdd->qpd.mapped_gws_queue = false; 622 } 623 q->properties.is_gws = false; 624 } 625 626 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 627 retval = map_queues_cpsch(dqm); 628 else if (q->properties.is_active && 629 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 630 q->properties.type == KFD_QUEUE_TYPE_SDMA || 631 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 632 if (WARN(q->process->mm != current->mm, 633 "should only run in user thread")) 634 retval = -EFAULT; 635 else 636 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 637 q->pipe, q->queue, 638 &q->properties, current->mm); 639 } 640 641 out_unlock: 642 dqm_unlock(dqm); 643 return retval; 644 } 645 646 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 647 struct qcm_process_device *qpd) 648 { 649 struct queue *q; 650 struct mqd_manager *mqd_mgr; 651 struct kfd_process_device *pdd; 652 int retval, ret = 0; 653 654 dqm_lock(dqm); 655 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 656 goto out; 657 658 pdd = qpd_to_pdd(qpd); 659 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 660 pdd->process->pasid); 661 662 pdd->last_evict_timestamp = get_jiffies_64(); 663 /* Mark all queues as evicted. Deactivate all active queues on 664 * the qpd. 665 */ 666 list_for_each_entry(q, &qpd->queues_list, list) { 667 q->properties.is_evicted = true; 668 if (!q->properties.is_active) 669 continue; 670 671 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 672 q->properties.type)]; 673 q->properties.is_active = false; 674 decrement_queue_count(dqm, q->properties.type); 675 if (q->properties.is_gws) { 676 dqm->gws_queue_count--; 677 qpd->mapped_gws_queue = false; 678 } 679 680 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 681 continue; 682 683 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 684 (dqm->dev->cwsr_enabled? 685 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE: 686 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 687 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 688 if (retval && !ret) 689 /* Return the first error, but keep going to 690 * maintain a consistent eviction state 691 */ 692 ret = retval; 693 } 694 695 out: 696 dqm_unlock(dqm); 697 return ret; 698 } 699 700 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 701 struct qcm_process_device *qpd) 702 { 703 struct queue *q; 704 struct kfd_process_device *pdd; 705 int retval = 0; 706 707 dqm_lock(dqm); 708 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 709 goto out; 710 711 pdd = qpd_to_pdd(qpd); 712 pr_debug_ratelimited("Evicting PASID 0x%x queues\n", 713 pdd->process->pasid); 714 715 /* Mark all queues as evicted. Deactivate all active queues on 716 * the qpd. 717 */ 718 list_for_each_entry(q, &qpd->queues_list, list) { 719 q->properties.is_evicted = true; 720 if (!q->properties.is_active) 721 continue; 722 723 q->properties.is_active = false; 724 decrement_queue_count(dqm, q->properties.type); 725 } 726 pdd->last_evict_timestamp = get_jiffies_64(); 727 retval = execute_queues_cpsch(dqm, 728 qpd->is_debug ? 729 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 730 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 731 732 out: 733 dqm_unlock(dqm); 734 return retval; 735 } 736 737 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 738 struct qcm_process_device *qpd) 739 { 740 struct mm_struct *mm = NULL; 741 struct queue *q; 742 struct mqd_manager *mqd_mgr; 743 struct kfd_process_device *pdd; 744 uint64_t pd_base; 745 uint64_t eviction_duration; 746 int retval, ret = 0; 747 748 pdd = qpd_to_pdd(qpd); 749 /* Retrieve PD base */ 750 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 751 752 dqm_lock(dqm); 753 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 754 goto out; 755 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 756 qpd->evicted--; 757 goto out; 758 } 759 760 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 761 pdd->process->pasid); 762 763 /* Update PD Base in QPD */ 764 qpd->page_table_base = pd_base; 765 pr_debug("Updated PD address to 0x%llx\n", pd_base); 766 767 if (!list_empty(&qpd->queues_list)) { 768 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 769 dqm->dev->adev, 770 qpd->vmid, 771 qpd->page_table_base); 772 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 773 } 774 775 /* Take a safe reference to the mm_struct, which may otherwise 776 * disappear even while the kfd_process is still referenced. 777 */ 778 mm = get_task_mm(pdd->process->lead_thread); 779 if (!mm) { 780 ret = -EFAULT; 781 goto out; 782 } 783 784 /* Remove the eviction flags. Activate queues that are not 785 * inactive for other reasons. 786 */ 787 list_for_each_entry(q, &qpd->queues_list, list) { 788 q->properties.is_evicted = false; 789 if (!QUEUE_IS_ACTIVE(q->properties)) 790 continue; 791 792 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 793 q->properties.type)]; 794 q->properties.is_active = true; 795 increment_queue_count(dqm, q->properties.type); 796 if (q->properties.is_gws) { 797 dqm->gws_queue_count++; 798 qpd->mapped_gws_queue = true; 799 } 800 801 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 802 continue; 803 804 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 805 q->queue, &q->properties, mm); 806 if (retval && !ret) 807 /* Return the first error, but keep going to 808 * maintain a consistent eviction state 809 */ 810 ret = retval; 811 } 812 qpd->evicted = 0; 813 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 814 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 815 out: 816 if (mm) 817 mmput(mm); 818 dqm_unlock(dqm); 819 return ret; 820 } 821 822 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 823 struct qcm_process_device *qpd) 824 { 825 struct queue *q; 826 struct kfd_process_device *pdd; 827 uint64_t pd_base; 828 uint64_t eviction_duration; 829 int retval = 0; 830 831 pdd = qpd_to_pdd(qpd); 832 /* Retrieve PD base */ 833 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 834 835 dqm_lock(dqm); 836 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 837 goto out; 838 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 839 qpd->evicted--; 840 goto out; 841 } 842 843 pr_debug_ratelimited("Restoring PASID 0x%x queues\n", 844 pdd->process->pasid); 845 846 /* Update PD Base in QPD */ 847 qpd->page_table_base = pd_base; 848 pr_debug("Updated PD address to 0x%llx\n", pd_base); 849 850 /* activate all active queues on the qpd */ 851 list_for_each_entry(q, &qpd->queues_list, list) { 852 q->properties.is_evicted = false; 853 if (!QUEUE_IS_ACTIVE(q->properties)) 854 continue; 855 856 q->properties.is_active = true; 857 increment_queue_count(dqm, q->properties.type); 858 } 859 retval = execute_queues_cpsch(dqm, 860 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 861 qpd->evicted = 0; 862 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 863 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 864 out: 865 dqm_unlock(dqm); 866 return retval; 867 } 868 869 static int register_process(struct device_queue_manager *dqm, 870 struct qcm_process_device *qpd) 871 { 872 struct device_process_node *n; 873 struct kfd_process_device *pdd; 874 uint64_t pd_base; 875 int retval; 876 877 n = kzalloc(sizeof(*n), GFP_KERNEL); 878 if (!n) 879 return -ENOMEM; 880 881 n->qpd = qpd; 882 883 pdd = qpd_to_pdd(qpd); 884 /* Retrieve PD base */ 885 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 886 887 dqm_lock(dqm); 888 list_add(&n->list, &dqm->queues); 889 890 /* Update PD Base in QPD */ 891 qpd->page_table_base = pd_base; 892 pr_debug("Updated PD address to 0x%llx\n", pd_base); 893 894 retval = dqm->asic_ops.update_qpd(dqm, qpd); 895 896 dqm->processes_count++; 897 898 dqm_unlock(dqm); 899 900 /* Outside the DQM lock because under the DQM lock we can't do 901 * reclaim or take other locks that others hold while reclaiming. 902 */ 903 kfd_inc_compute_active(dqm->dev); 904 905 return retval; 906 } 907 908 static int unregister_process(struct device_queue_manager *dqm, 909 struct qcm_process_device *qpd) 910 { 911 int retval; 912 struct device_process_node *cur, *next; 913 914 pr_debug("qpd->queues_list is %s\n", 915 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 916 917 retval = 0; 918 dqm_lock(dqm); 919 920 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 921 if (qpd == cur->qpd) { 922 list_del(&cur->list); 923 kfree(cur); 924 dqm->processes_count--; 925 goto out; 926 } 927 } 928 /* qpd not found in dqm list */ 929 retval = 1; 930 out: 931 dqm_unlock(dqm); 932 933 /* Outside the DQM lock because under the DQM lock we can't do 934 * reclaim or take other locks that others hold while reclaiming. 935 */ 936 if (!retval) 937 kfd_dec_compute_active(dqm->dev); 938 939 return retval; 940 } 941 942 static int 943 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 944 unsigned int vmid) 945 { 946 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 947 dqm->dev->adev, pasid, vmid); 948 } 949 950 static void init_interrupts(struct device_queue_manager *dqm) 951 { 952 unsigned int i; 953 954 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 955 if (is_pipe_enabled(dqm, 0, i)) 956 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i); 957 } 958 959 static int initialize_nocpsch(struct device_queue_manager *dqm) 960 { 961 int pipe, queue; 962 963 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 964 965 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 966 sizeof(unsigned int), GFP_KERNEL); 967 if (!dqm->allocated_queues) 968 return -ENOMEM; 969 970 mutex_init(&dqm->lock_hidden); 971 INIT_LIST_HEAD(&dqm->queues); 972 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 973 dqm->active_cp_queue_count = 0; 974 dqm->gws_queue_count = 0; 975 976 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 977 int pipe_offset = pipe * get_queues_per_pipe(dqm); 978 979 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 980 if (test_bit(pipe_offset + queue, 981 dqm->dev->shared_resources.cp_queue_bitmap)) 982 dqm->allocated_queues[pipe] |= 1 << queue; 983 } 984 985 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 986 987 dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); 988 dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); 989 990 return 0; 991 } 992 993 static void uninitialize(struct device_queue_manager *dqm) 994 { 995 int i; 996 997 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 998 999 kfree(dqm->allocated_queues); 1000 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1001 kfree(dqm->mqd_mgrs[i]); 1002 mutex_destroy(&dqm->lock_hidden); 1003 } 1004 1005 static int start_nocpsch(struct device_queue_manager *dqm) 1006 { 1007 int r = 0; 1008 1009 pr_info("SW scheduler is used"); 1010 init_interrupts(dqm); 1011 1012 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1013 r = pm_init(&dqm->packet_mgr, dqm); 1014 if (!r) 1015 dqm->sched_running = true; 1016 1017 return r; 1018 } 1019 1020 static int stop_nocpsch(struct device_queue_manager *dqm) 1021 { 1022 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1023 pm_uninit(&dqm->packet_mgr, false); 1024 dqm->sched_running = false; 1025 1026 return 0; 1027 } 1028 1029 static void pre_reset(struct device_queue_manager *dqm) 1030 { 1031 dqm_lock(dqm); 1032 dqm->is_resetting = true; 1033 dqm_unlock(dqm); 1034 } 1035 1036 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1037 struct queue *q) 1038 { 1039 int bit; 1040 1041 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1042 if (dqm->sdma_bitmap == 0) { 1043 pr_err("No more SDMA queue to allocate\n"); 1044 return -ENOMEM; 1045 } 1046 1047 bit = __ffs64(dqm->sdma_bitmap); 1048 dqm->sdma_bitmap &= ~(1ULL << bit); 1049 q->sdma_id = bit; 1050 q->properties.sdma_engine_id = q->sdma_id % 1051 kfd_get_num_sdma_engines(dqm->dev); 1052 q->properties.sdma_queue_id = q->sdma_id / 1053 kfd_get_num_sdma_engines(dqm->dev); 1054 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1055 if (dqm->xgmi_sdma_bitmap == 0) { 1056 pr_err("No more XGMI SDMA queue to allocate\n"); 1057 return -ENOMEM; 1058 } 1059 bit = __ffs64(dqm->xgmi_sdma_bitmap); 1060 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 1061 q->sdma_id = bit; 1062 /* sdma_engine_id is sdma id including 1063 * both PCIe-optimized SDMAs and XGMI- 1064 * optimized SDMAs. The calculation below 1065 * assumes the first N engines are always 1066 * PCIe-optimized ones 1067 */ 1068 q->properties.sdma_engine_id = 1069 kfd_get_num_sdma_engines(dqm->dev) + 1070 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1071 q->properties.sdma_queue_id = q->sdma_id / 1072 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1073 } 1074 1075 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1076 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1077 1078 return 0; 1079 } 1080 1081 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1082 struct queue *q) 1083 { 1084 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1085 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1086 return; 1087 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 1088 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1089 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1090 return; 1091 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 1092 } 1093 } 1094 1095 /* 1096 * Device Queue Manager implementation for cp scheduler 1097 */ 1098 1099 static int set_sched_resources(struct device_queue_manager *dqm) 1100 { 1101 int i, mec; 1102 struct scheduling_resources res; 1103 1104 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1105 1106 res.queue_mask = 0; 1107 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1108 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1109 / dqm->dev->shared_resources.num_pipe_per_mec; 1110 1111 if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) 1112 continue; 1113 1114 /* only acquire queues from the first MEC */ 1115 if (mec > 0) 1116 continue; 1117 1118 /* This situation may be hit in the future if a new HW 1119 * generation exposes more than 64 queues. If so, the 1120 * definition of res.queue_mask needs updating 1121 */ 1122 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1123 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1124 break; 1125 } 1126 1127 res.queue_mask |= 1ull 1128 << amdgpu_queue_mask_bit_to_set_resource_bit( 1129 dqm->dev->adev, i); 1130 } 1131 res.gws_mask = ~0ull; 1132 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1133 1134 pr_debug("Scheduling resources:\n" 1135 "vmid mask: 0x%8X\n" 1136 "queue mask: 0x%8llX\n", 1137 res.vmid_mask, res.queue_mask); 1138 1139 return pm_send_set_resources(&dqm->packet_mgr, &res); 1140 } 1141 1142 static int initialize_cpsch(struct device_queue_manager *dqm) 1143 { 1144 uint64_t num_sdma_queues; 1145 uint64_t num_xgmi_sdma_queues; 1146 1147 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1148 1149 mutex_init(&dqm->lock_hidden); 1150 INIT_LIST_HEAD(&dqm->queues); 1151 dqm->active_queue_count = dqm->processes_count = 0; 1152 dqm->active_cp_queue_count = 0; 1153 dqm->gws_queue_count = 0; 1154 dqm->active_runlist = false; 1155 1156 num_sdma_queues = get_num_sdma_queues(dqm); 1157 if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap)) 1158 dqm->sdma_bitmap = ULLONG_MAX; 1159 else 1160 dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1); 1161 1162 num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm); 1163 if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap)) 1164 dqm->xgmi_sdma_bitmap = ULLONG_MAX; 1165 else 1166 dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1); 1167 1168 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1169 1170 return 0; 1171 } 1172 1173 static int start_cpsch(struct device_queue_manager *dqm) 1174 { 1175 int retval; 1176 1177 retval = 0; 1178 1179 dqm_lock(dqm); 1180 retval = pm_init(&dqm->packet_mgr, dqm); 1181 if (retval) 1182 goto fail_packet_manager_init; 1183 1184 retval = set_sched_resources(dqm); 1185 if (retval) 1186 goto fail_set_sched_resources; 1187 1188 pr_debug("Allocating fence memory\n"); 1189 1190 /* allocate fence memory on the gart */ 1191 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1192 &dqm->fence_mem); 1193 1194 if (retval) 1195 goto fail_allocate_vidmem; 1196 1197 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1198 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1199 1200 init_interrupts(dqm); 1201 1202 /* clear hang status when driver try to start the hw scheduler */ 1203 dqm->is_hws_hang = false; 1204 dqm->is_resetting = false; 1205 dqm->sched_running = true; 1206 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1207 dqm_unlock(dqm); 1208 1209 return 0; 1210 fail_allocate_vidmem: 1211 fail_set_sched_resources: 1212 pm_uninit(&dqm->packet_mgr, false); 1213 fail_packet_manager_init: 1214 dqm_unlock(dqm); 1215 return retval; 1216 } 1217 1218 static int stop_cpsch(struct device_queue_manager *dqm) 1219 { 1220 bool hanging; 1221 1222 dqm_lock(dqm); 1223 if (!dqm->sched_running) { 1224 dqm_unlock(dqm); 1225 return 0; 1226 } 1227 1228 if (!dqm->is_hws_hang) 1229 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1230 hanging = dqm->is_hws_hang || dqm->is_resetting; 1231 dqm->sched_running = false; 1232 1233 pm_release_ib(&dqm->packet_mgr); 1234 1235 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1236 pm_uninit(&dqm->packet_mgr, hanging); 1237 dqm_unlock(dqm); 1238 1239 return 0; 1240 } 1241 1242 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1243 struct kernel_queue *kq, 1244 struct qcm_process_device *qpd) 1245 { 1246 dqm_lock(dqm); 1247 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1248 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1249 dqm->total_queue_count); 1250 dqm_unlock(dqm); 1251 return -EPERM; 1252 } 1253 1254 /* 1255 * Unconditionally increment this counter, regardless of the queue's 1256 * type or whether the queue is active. 1257 */ 1258 dqm->total_queue_count++; 1259 pr_debug("Total of %d queues are accountable so far\n", 1260 dqm->total_queue_count); 1261 1262 list_add(&kq->list, &qpd->priv_queue_list); 1263 increment_queue_count(dqm, kq->queue->properties.type); 1264 qpd->is_debug = true; 1265 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1266 dqm_unlock(dqm); 1267 1268 return 0; 1269 } 1270 1271 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1272 struct kernel_queue *kq, 1273 struct qcm_process_device *qpd) 1274 { 1275 dqm_lock(dqm); 1276 list_del(&kq->list); 1277 decrement_queue_count(dqm, kq->queue->properties.type); 1278 qpd->is_debug = false; 1279 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1280 /* 1281 * Unconditionally decrement this counter, regardless of the queue's 1282 * type. 1283 */ 1284 dqm->total_queue_count--; 1285 pr_debug("Total of %d queues are accountable so far\n", 1286 dqm->total_queue_count); 1287 dqm_unlock(dqm); 1288 } 1289 1290 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1291 struct qcm_process_device *qpd) 1292 { 1293 int retval; 1294 struct mqd_manager *mqd_mgr; 1295 1296 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1297 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1298 dqm->total_queue_count); 1299 retval = -EPERM; 1300 goto out; 1301 } 1302 1303 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1304 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1305 dqm_lock(dqm); 1306 retval = allocate_sdma_queue(dqm, q); 1307 dqm_unlock(dqm); 1308 if (retval) 1309 goto out; 1310 } 1311 1312 retval = allocate_doorbell(qpd, q); 1313 if (retval) 1314 goto out_deallocate_sdma_queue; 1315 1316 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1317 q->properties.type)]; 1318 1319 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1320 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1321 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1322 q->properties.tba_addr = qpd->tba_addr; 1323 q->properties.tma_addr = qpd->tma_addr; 1324 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 1325 if (!q->mqd_mem_obj) { 1326 retval = -ENOMEM; 1327 goto out_deallocate_doorbell; 1328 } 1329 1330 dqm_lock(dqm); 1331 /* 1332 * Eviction state logic: mark all queues as evicted, even ones 1333 * not currently active. Restoring inactive queues later only 1334 * updates the is_evicted flag but is a no-op otherwise. 1335 */ 1336 q->properties.is_evicted = !!qpd->evicted; 1337 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 1338 &q->gart_mqd_addr, &q->properties); 1339 1340 list_add(&q->list, &qpd->queues_list); 1341 qpd->queue_count++; 1342 1343 if (q->properties.is_active) { 1344 increment_queue_count(dqm, q->properties.type); 1345 1346 execute_queues_cpsch(dqm, 1347 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1348 } 1349 1350 /* 1351 * Unconditionally increment this counter, regardless of the queue's 1352 * type or whether the queue is active. 1353 */ 1354 dqm->total_queue_count++; 1355 1356 pr_debug("Total of %d queues are accountable so far\n", 1357 dqm->total_queue_count); 1358 1359 dqm_unlock(dqm); 1360 return retval; 1361 1362 out_deallocate_doorbell: 1363 deallocate_doorbell(qpd, q); 1364 out_deallocate_sdma_queue: 1365 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1366 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1367 dqm_lock(dqm); 1368 deallocate_sdma_queue(dqm, q); 1369 dqm_unlock(dqm); 1370 } 1371 out: 1372 return retval; 1373 } 1374 1375 int amdkfd_fence_wait_timeout(uint64_t *fence_addr, 1376 uint64_t fence_value, 1377 unsigned int timeout_ms) 1378 { 1379 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1380 1381 while (*fence_addr != fence_value) { 1382 if (time_after(jiffies, end_jiffies)) { 1383 pr_err("qcm fence wait loop timeout expired\n"); 1384 /* In HWS case, this is used to halt the driver thread 1385 * in order not to mess up CP states before doing 1386 * scandumps for FW debugging. 1387 */ 1388 while (halt_if_hws_hang) 1389 schedule(); 1390 1391 return -ETIME; 1392 } 1393 schedule(); 1394 } 1395 1396 return 0; 1397 } 1398 1399 /* dqm->lock mutex has to be locked before calling this function */ 1400 static int map_queues_cpsch(struct device_queue_manager *dqm) 1401 { 1402 int retval; 1403 1404 if (!dqm->sched_running) 1405 return 0; 1406 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 1407 return 0; 1408 if (dqm->active_runlist) 1409 return 0; 1410 1411 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 1412 pr_debug("%s sent runlist\n", __func__); 1413 if (retval) { 1414 pr_err("failed to execute runlist\n"); 1415 return retval; 1416 } 1417 dqm->active_runlist = true; 1418 1419 return retval; 1420 } 1421 1422 /* dqm->lock mutex has to be locked before calling this function */ 1423 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1424 enum kfd_unmap_queues_filter filter, 1425 uint32_t filter_param, bool reset) 1426 { 1427 int retval = 0; 1428 struct mqd_manager *mqd_mgr; 1429 1430 if (!dqm->sched_running) 1431 return 0; 1432 if (dqm->is_hws_hang || dqm->is_resetting) 1433 return -EIO; 1434 if (!dqm->active_runlist) 1435 return retval; 1436 1437 retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, 1438 filter, filter_param, reset, 0); 1439 if (retval) 1440 return retval; 1441 1442 *dqm->fence_addr = KFD_FENCE_INIT; 1443 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 1444 KFD_FENCE_COMPLETED); 1445 /* should be timed out */ 1446 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1447 queue_preemption_timeout_ms); 1448 if (retval) { 1449 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1450 dqm->is_hws_hang = true; 1451 /* It's possible we're detecting a HWS hang in the 1452 * middle of a GPU reset. No need to schedule another 1453 * reset in this case. 1454 */ 1455 if (!dqm->is_resetting) 1456 schedule_work(&dqm->hw_exception_work); 1457 return retval; 1458 } 1459 1460 /* In the current MEC firmware implementation, if compute queue 1461 * doesn't response to the preemption request in time, HIQ will 1462 * abandon the unmap request without returning any timeout error 1463 * to driver. Instead, MEC firmware will log the doorbell of the 1464 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 1465 * To make sure the queue unmap was successful, driver need to 1466 * check those fields 1467 */ 1468 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 1469 if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) { 1470 pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n"); 1471 while (halt_if_hws_hang) 1472 schedule(); 1473 return -ETIME; 1474 } 1475 1476 pm_release_ib(&dqm->packet_mgr); 1477 dqm->active_runlist = false; 1478 1479 return retval; 1480 } 1481 1482 /* only for compute queue */ 1483 static int reset_queues_cpsch(struct device_queue_manager *dqm, 1484 uint16_t pasid) 1485 { 1486 int retval; 1487 1488 dqm_lock(dqm); 1489 1490 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1491 pasid, true); 1492 1493 dqm_unlock(dqm); 1494 return retval; 1495 } 1496 1497 /* dqm->lock mutex has to be locked before calling this function */ 1498 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1499 enum kfd_unmap_queues_filter filter, 1500 uint32_t filter_param) 1501 { 1502 int retval; 1503 1504 if (dqm->is_hws_hang) 1505 return -EIO; 1506 retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1507 if (retval) 1508 return retval; 1509 1510 return map_queues_cpsch(dqm); 1511 } 1512 1513 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1514 struct qcm_process_device *qpd, 1515 struct queue *q) 1516 { 1517 int retval; 1518 struct mqd_manager *mqd_mgr; 1519 uint64_t sdma_val = 0; 1520 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1521 1522 /* Get the SDMA queue stats */ 1523 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1524 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1525 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1526 &sdma_val); 1527 if (retval) 1528 pr_err("Failed to read SDMA queue counter for queue: %d\n", 1529 q->properties.queue_id); 1530 } 1531 1532 retval = 0; 1533 1534 /* remove queue from list to prevent rescheduling after preemption */ 1535 dqm_lock(dqm); 1536 1537 if (qpd->is_debug) { 1538 /* 1539 * error, currently we do not allow to destroy a queue 1540 * of a currently debugged process 1541 */ 1542 retval = -EBUSY; 1543 goto failed_try_destroy_debugged_queue; 1544 1545 } 1546 1547 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1548 q->properties.type)]; 1549 1550 deallocate_doorbell(qpd, q); 1551 1552 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1553 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1554 deallocate_sdma_queue(dqm, q); 1555 pdd->sdma_past_activity_counter += sdma_val; 1556 } 1557 1558 list_del(&q->list); 1559 qpd->queue_count--; 1560 if (q->properties.is_active) { 1561 decrement_queue_count(dqm, q->properties.type); 1562 retval = execute_queues_cpsch(dqm, 1563 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1564 if (retval == -ETIME) 1565 qpd->reset_wavefronts = true; 1566 if (q->properties.is_gws) { 1567 dqm->gws_queue_count--; 1568 qpd->mapped_gws_queue = false; 1569 } 1570 } 1571 1572 /* 1573 * Unconditionally decrement this counter, regardless of the queue's 1574 * type 1575 */ 1576 dqm->total_queue_count--; 1577 pr_debug("Total of %d queues are accountable so far\n", 1578 dqm->total_queue_count); 1579 1580 dqm_unlock(dqm); 1581 1582 /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */ 1583 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1584 1585 return retval; 1586 1587 failed_try_destroy_debugged_queue: 1588 1589 dqm_unlock(dqm); 1590 return retval; 1591 } 1592 1593 /* 1594 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1595 * stay in user mode. 1596 */ 1597 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1598 /* APE1 limit is inclusive and 64K aligned. */ 1599 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1600 1601 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1602 struct qcm_process_device *qpd, 1603 enum cache_policy default_policy, 1604 enum cache_policy alternate_policy, 1605 void __user *alternate_aperture_base, 1606 uint64_t alternate_aperture_size) 1607 { 1608 bool retval = true; 1609 1610 if (!dqm->asic_ops.set_cache_memory_policy) 1611 return retval; 1612 1613 dqm_lock(dqm); 1614 1615 if (alternate_aperture_size == 0) { 1616 /* base > limit disables APE1 */ 1617 qpd->sh_mem_ape1_base = 1; 1618 qpd->sh_mem_ape1_limit = 0; 1619 } else { 1620 /* 1621 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1622 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1623 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1624 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1625 * Verify that the base and size parameters can be 1626 * represented in this format and convert them. 1627 * Additionally restrict APE1 to user-mode addresses. 1628 */ 1629 1630 uint64_t base = (uintptr_t)alternate_aperture_base; 1631 uint64_t limit = base + alternate_aperture_size - 1; 1632 1633 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1634 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1635 retval = false; 1636 goto out; 1637 } 1638 1639 qpd->sh_mem_ape1_base = base >> 16; 1640 qpd->sh_mem_ape1_limit = limit >> 16; 1641 } 1642 1643 retval = dqm->asic_ops.set_cache_memory_policy( 1644 dqm, 1645 qpd, 1646 default_policy, 1647 alternate_policy, 1648 alternate_aperture_base, 1649 alternate_aperture_size); 1650 1651 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1652 program_sh_mem_settings(dqm, qpd); 1653 1654 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1655 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1656 qpd->sh_mem_ape1_limit); 1657 1658 out: 1659 dqm_unlock(dqm); 1660 return retval; 1661 } 1662 1663 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1664 struct qcm_process_device *qpd) 1665 { 1666 struct queue *q; 1667 struct device_process_node *cur, *next_dpn; 1668 int retval = 0; 1669 bool found = false; 1670 1671 dqm_lock(dqm); 1672 1673 /* Clear all user mode queues */ 1674 while (!list_empty(&qpd->queues_list)) { 1675 struct mqd_manager *mqd_mgr; 1676 int ret; 1677 1678 q = list_first_entry(&qpd->queues_list, struct queue, list); 1679 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1680 q->properties.type)]; 1681 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1682 if (ret) 1683 retval = ret; 1684 dqm_unlock(dqm); 1685 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1686 dqm_lock(dqm); 1687 } 1688 1689 /* Unregister process */ 1690 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1691 if (qpd == cur->qpd) { 1692 list_del(&cur->list); 1693 kfree(cur); 1694 dqm->processes_count--; 1695 found = true; 1696 break; 1697 } 1698 } 1699 1700 dqm_unlock(dqm); 1701 1702 /* Outside the DQM lock because under the DQM lock we can't do 1703 * reclaim or take other locks that others hold while reclaiming. 1704 */ 1705 if (found) 1706 kfd_dec_compute_active(dqm->dev); 1707 1708 return retval; 1709 } 1710 1711 static int get_wave_state(struct device_queue_manager *dqm, 1712 struct queue *q, 1713 void __user *ctl_stack, 1714 u32 *ctl_stack_used_size, 1715 u32 *save_area_used_size) 1716 { 1717 struct mqd_manager *mqd_mgr; 1718 1719 dqm_lock(dqm); 1720 1721 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 1722 1723 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1724 q->properties.is_active || !q->device->cwsr_enabled || 1725 !mqd_mgr->get_wave_state) { 1726 dqm_unlock(dqm); 1727 return -EINVAL; 1728 } 1729 1730 dqm_unlock(dqm); 1731 1732 /* 1733 * get_wave_state is outside the dqm lock to prevent circular locking 1734 * and the queue should be protected against destruction by the process 1735 * lock. 1736 */ 1737 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1738 ctl_stack_used_size, save_area_used_size); 1739 } 1740 1741 static int process_termination_cpsch(struct device_queue_manager *dqm, 1742 struct qcm_process_device *qpd) 1743 { 1744 int retval; 1745 struct queue *q; 1746 struct kernel_queue *kq, *kq_next; 1747 struct mqd_manager *mqd_mgr; 1748 struct device_process_node *cur, *next_dpn; 1749 enum kfd_unmap_queues_filter filter = 1750 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1751 bool found = false; 1752 1753 retval = 0; 1754 1755 dqm_lock(dqm); 1756 1757 /* Clean all kernel queues */ 1758 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1759 list_del(&kq->list); 1760 decrement_queue_count(dqm, kq->queue->properties.type); 1761 qpd->is_debug = false; 1762 dqm->total_queue_count--; 1763 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1764 } 1765 1766 /* Clear all user mode queues */ 1767 list_for_each_entry(q, &qpd->queues_list, list) { 1768 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1769 deallocate_sdma_queue(dqm, q); 1770 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1771 deallocate_sdma_queue(dqm, q); 1772 1773 if (q->properties.is_active) { 1774 decrement_queue_count(dqm, q->properties.type); 1775 if (q->properties.is_gws) { 1776 dqm->gws_queue_count--; 1777 qpd->mapped_gws_queue = false; 1778 } 1779 } 1780 1781 dqm->total_queue_count--; 1782 } 1783 1784 /* Unregister process */ 1785 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1786 if (qpd == cur->qpd) { 1787 list_del(&cur->list); 1788 kfree(cur); 1789 dqm->processes_count--; 1790 found = true; 1791 break; 1792 } 1793 } 1794 1795 retval = execute_queues_cpsch(dqm, filter, 0); 1796 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1797 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1798 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1799 qpd->reset_wavefronts = false; 1800 } 1801 1802 /* Lastly, free mqd resources. 1803 * Do free_mqd() after dqm_unlock to avoid circular locking. 1804 */ 1805 while (!list_empty(&qpd->queues_list)) { 1806 q = list_first_entry(&qpd->queues_list, struct queue, list); 1807 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1808 q->properties.type)]; 1809 list_del(&q->list); 1810 qpd->queue_count--; 1811 dqm_unlock(dqm); 1812 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1813 dqm_lock(dqm); 1814 } 1815 dqm_unlock(dqm); 1816 1817 /* Outside the DQM lock because under the DQM lock we can't do 1818 * reclaim or take other locks that others hold while reclaiming. 1819 */ 1820 if (found) 1821 kfd_dec_compute_active(dqm->dev); 1822 1823 return retval; 1824 } 1825 1826 static int init_mqd_managers(struct device_queue_manager *dqm) 1827 { 1828 int i, j; 1829 struct mqd_manager *mqd_mgr; 1830 1831 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 1832 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 1833 if (!mqd_mgr) { 1834 pr_err("mqd manager [%d] initialization failed\n", i); 1835 goto out_free; 1836 } 1837 dqm->mqd_mgrs[i] = mqd_mgr; 1838 } 1839 1840 return 0; 1841 1842 out_free: 1843 for (j = 0; j < i; j++) { 1844 kfree(dqm->mqd_mgrs[j]); 1845 dqm->mqd_mgrs[j] = NULL; 1846 } 1847 1848 return -ENOMEM; 1849 } 1850 1851 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 1852 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 1853 { 1854 int retval; 1855 struct kfd_dev *dev = dqm->dev; 1856 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 1857 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 1858 get_num_all_sdma_engines(dqm) * 1859 dev->device_info.num_sdma_queues_per_engine + 1860 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 1861 1862 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 1863 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 1864 (void *)&(mem_obj->cpu_ptr), false); 1865 1866 return retval; 1867 } 1868 1869 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1870 { 1871 struct device_queue_manager *dqm; 1872 1873 pr_debug("Loading device queue manager\n"); 1874 1875 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1876 if (!dqm) 1877 return NULL; 1878 1879 switch (dev->adev->asic_type) { 1880 /* HWS is not available on Hawaii. */ 1881 case CHIP_HAWAII: 1882 /* HWS depends on CWSR for timely dequeue. CWSR is not 1883 * available on Tonga. 1884 * 1885 * FIXME: This argument also applies to Kaveri. 1886 */ 1887 case CHIP_TONGA: 1888 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1889 break; 1890 default: 1891 dqm->sched_policy = sched_policy; 1892 break; 1893 } 1894 1895 dqm->dev = dev; 1896 switch (dqm->sched_policy) { 1897 case KFD_SCHED_POLICY_HWS: 1898 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1899 /* initialize dqm for cp scheduling */ 1900 dqm->ops.create_queue = create_queue_cpsch; 1901 dqm->ops.initialize = initialize_cpsch; 1902 dqm->ops.start = start_cpsch; 1903 dqm->ops.stop = stop_cpsch; 1904 dqm->ops.pre_reset = pre_reset; 1905 dqm->ops.destroy_queue = destroy_queue_cpsch; 1906 dqm->ops.update_queue = update_queue; 1907 dqm->ops.register_process = register_process; 1908 dqm->ops.unregister_process = unregister_process; 1909 dqm->ops.uninitialize = uninitialize; 1910 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1911 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1912 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1913 dqm->ops.process_termination = process_termination_cpsch; 1914 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1915 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1916 dqm->ops.get_wave_state = get_wave_state; 1917 dqm->ops.reset_queues = reset_queues_cpsch; 1918 break; 1919 case KFD_SCHED_POLICY_NO_HWS: 1920 /* initialize dqm for no cp scheduling */ 1921 dqm->ops.start = start_nocpsch; 1922 dqm->ops.stop = stop_nocpsch; 1923 dqm->ops.pre_reset = pre_reset; 1924 dqm->ops.create_queue = create_queue_nocpsch; 1925 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1926 dqm->ops.update_queue = update_queue; 1927 dqm->ops.register_process = register_process; 1928 dqm->ops.unregister_process = unregister_process; 1929 dqm->ops.initialize = initialize_nocpsch; 1930 dqm->ops.uninitialize = uninitialize; 1931 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1932 dqm->ops.process_termination = process_termination_nocpsch; 1933 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1934 dqm->ops.restore_process_queues = 1935 restore_process_queues_nocpsch; 1936 dqm->ops.get_wave_state = get_wave_state; 1937 break; 1938 default: 1939 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1940 goto out_free; 1941 } 1942 1943 switch (dev->adev->asic_type) { 1944 case CHIP_CARRIZO: 1945 device_queue_manager_init_vi(&dqm->asic_ops); 1946 break; 1947 1948 case CHIP_KAVERI: 1949 device_queue_manager_init_cik(&dqm->asic_ops); 1950 break; 1951 1952 case CHIP_HAWAII: 1953 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1954 break; 1955 1956 case CHIP_TONGA: 1957 case CHIP_FIJI: 1958 case CHIP_POLARIS10: 1959 case CHIP_POLARIS11: 1960 case CHIP_POLARIS12: 1961 case CHIP_VEGAM: 1962 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1963 break; 1964 1965 default: 1966 if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 1967 device_queue_manager_init_v10_navi10(&dqm->asic_ops); 1968 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 1969 device_queue_manager_init_v9(&dqm->asic_ops); 1970 else { 1971 WARN(1, "Unexpected ASIC family %u", 1972 dev->adev->asic_type); 1973 goto out_free; 1974 } 1975 } 1976 1977 if (init_mqd_managers(dqm)) 1978 goto out_free; 1979 1980 if (allocate_hiq_sdma_mqd(dqm)) { 1981 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 1982 goto out_free; 1983 } 1984 1985 if (!dqm->ops.initialize(dqm)) 1986 return dqm; 1987 1988 out_free: 1989 kfree(dqm); 1990 return NULL; 1991 } 1992 1993 static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, 1994 struct kfd_mem_obj *mqd) 1995 { 1996 WARN(!mqd, "No hiq sdma mqd trunk to free"); 1997 1998 amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem); 1999 } 2000 2001 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2002 { 2003 dqm->ops.uninitialize(dqm); 2004 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2005 kfree(dqm); 2006 } 2007 2008 int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid) 2009 { 2010 struct kfd_process_device *pdd; 2011 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 2012 int ret = 0; 2013 2014 if (!p) 2015 return -EINVAL; 2016 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 2017 pdd = kfd_get_process_device_data(dqm->dev, p); 2018 if (pdd) 2019 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 2020 kfd_unref_process(p); 2021 2022 return ret; 2023 } 2024 2025 static void kfd_process_hw_exception(struct work_struct *work) 2026 { 2027 struct device_queue_manager *dqm = container_of(work, 2028 struct device_queue_manager, hw_exception_work); 2029 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 2030 } 2031 2032 #if defined(CONFIG_DEBUG_FS) 2033 2034 static void seq_reg_dump(struct seq_file *m, 2035 uint32_t (*dump)[2], uint32_t n_regs) 2036 { 2037 uint32_t i, count; 2038 2039 for (i = 0, count = 0; i < n_regs; i++) { 2040 if (count == 0 || 2041 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 2042 seq_printf(m, "%s %08x: %08x", 2043 i ? "\n" : "", 2044 dump[i][0], dump[i][1]); 2045 count = 7; 2046 } else { 2047 seq_printf(m, " %08x", dump[i][1]); 2048 count--; 2049 } 2050 } 2051 2052 seq_puts(m, "\n"); 2053 } 2054 2055 int dqm_debugfs_hqds(struct seq_file *m, void *data) 2056 { 2057 struct device_queue_manager *dqm = data; 2058 uint32_t (*dump)[2], n_regs; 2059 int pipe, queue; 2060 int r = 0; 2061 2062 if (!dqm->sched_running) { 2063 seq_printf(m, " Device is stopped\n"); 2064 2065 return 0; 2066 } 2067 2068 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 2069 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, 2070 &dump, &n_regs); 2071 if (!r) { 2072 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 2073 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 2074 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 2075 KFD_CIK_HIQ_QUEUE); 2076 seq_reg_dump(m, dump, n_regs); 2077 2078 kfree(dump); 2079 } 2080 2081 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 2082 int pipe_offset = pipe * get_queues_per_pipe(dqm); 2083 2084 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 2085 if (!test_bit(pipe_offset + queue, 2086 dqm->dev->shared_resources.cp_queue_bitmap)) 2087 continue; 2088 2089 r = dqm->dev->kfd2kgd->hqd_dump( 2090 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2091 if (r) 2092 break; 2093 2094 seq_printf(m, " CP Pipe %d, Queue %d\n", 2095 pipe, queue); 2096 seq_reg_dump(m, dump, n_regs); 2097 2098 kfree(dump); 2099 } 2100 } 2101 2102 for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) { 2103 for (queue = 0; 2104 queue < dqm->dev->device_info.num_sdma_queues_per_engine; 2105 queue++) { 2106 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 2107 dqm->dev->adev, pipe, queue, &dump, &n_regs); 2108 if (r) 2109 break; 2110 2111 seq_printf(m, " SDMA Engine %d, RLC %d\n", 2112 pipe, queue); 2113 seq_reg_dump(m, dump, n_regs); 2114 2115 kfree(dump); 2116 } 2117 } 2118 2119 return r; 2120 } 2121 2122 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 2123 { 2124 int r = 0; 2125 2126 dqm_lock(dqm); 2127 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 2128 if (r) { 2129 dqm_unlock(dqm); 2130 return r; 2131 } 2132 dqm->active_runlist = true; 2133 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2134 dqm_unlock(dqm); 2135 2136 return r; 2137 } 2138 2139 #endif 2140