1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 37 /* Size of the per-pipe EOP queue */ 38 #define CIK_HPD_EOP_BYTES_LOG2 11 39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 40 41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 42 unsigned int pasid, unsigned int vmid); 43 44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 45 struct queue *q, 46 struct qcm_process_device *qpd); 47 48 static int execute_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param); 51 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 52 enum kfd_unmap_queues_filter filter, 53 uint32_t filter_param); 54 55 static int map_queues_cpsch(struct device_queue_manager *dqm); 56 57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 58 struct queue *q, 59 struct qcm_process_device *qpd); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 unsigned int sdma_queue_id); 63 64 static inline 65 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 66 { 67 if (type == KFD_QUEUE_TYPE_SDMA) 68 return KFD_MQD_TYPE_SDMA; 69 return KFD_MQD_TYPE_CP; 70 } 71 72 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 73 { 74 int i; 75 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 76 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 77 78 /* queue is available for KFD usage if bit is 1 */ 79 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 80 if (test_bit(pipe_offset + i, 81 dqm->dev->shared_resources.queue_bitmap)) 82 return true; 83 return false; 84 } 85 86 unsigned int get_queues_num(struct device_queue_manager *dqm) 87 { 88 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 89 KGD_MAX_QUEUES); 90 } 91 92 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 93 { 94 return dqm->dev->shared_resources.num_queue_per_pipe; 95 } 96 97 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 98 { 99 return dqm->dev->shared_resources.num_pipe_per_mec; 100 } 101 102 void program_sh_mem_settings(struct device_queue_manager *dqm, 103 struct qcm_process_device *qpd) 104 { 105 return dqm->dev->kfd2kgd->program_sh_mem_settings( 106 dqm->dev->kgd, qpd->vmid, 107 qpd->sh_mem_config, 108 qpd->sh_mem_ape1_base, 109 qpd->sh_mem_ape1_limit, 110 qpd->sh_mem_bases); 111 } 112 113 static int allocate_vmid(struct device_queue_manager *dqm, 114 struct qcm_process_device *qpd, 115 struct queue *q) 116 { 117 int bit, allocated_vmid; 118 119 if (dqm->vmid_bitmap == 0) 120 return -ENOMEM; 121 122 bit = ffs(dqm->vmid_bitmap) - 1; 123 dqm->vmid_bitmap &= ~(1 << bit); 124 125 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 126 pr_debug("vmid allocation %d\n", allocated_vmid); 127 qpd->vmid = allocated_vmid; 128 q->properties.vmid = allocated_vmid; 129 130 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 131 program_sh_mem_settings(dqm, qpd); 132 133 /* qpd->page_table_base is set earlier when register_process() 134 * is called, i.e. when the first queue is created. 135 */ 136 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 137 qpd->vmid, 138 qpd->page_table_base); 139 /* invalidate the VM context after pasid and vmid mapping is set up */ 140 kfd_flush_tlb(qpd_to_pdd(qpd)); 141 142 return 0; 143 } 144 145 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 146 struct qcm_process_device *qpd) 147 { 148 uint32_t len; 149 150 if (!qpd->ib_kaddr) 151 return -ENOMEM; 152 153 len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 154 155 return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 156 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); 157 } 158 159 static void deallocate_vmid(struct device_queue_manager *dqm, 160 struct qcm_process_device *qpd, 161 struct queue *q) 162 { 163 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 164 165 /* On GFX v7, CP doesn't flush TC at dequeue */ 166 if (q->device->device_info->asic_family == CHIP_HAWAII) 167 if (flush_texture_cache_nocpsch(q->device, qpd)) 168 pr_err("Failed to flush TC\n"); 169 170 kfd_flush_tlb(qpd_to_pdd(qpd)); 171 172 /* Release the vmid mapping */ 173 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 174 175 dqm->vmid_bitmap |= (1 << bit); 176 qpd->vmid = 0; 177 q->properties.vmid = 0; 178 } 179 180 static int create_queue_nocpsch(struct device_queue_manager *dqm, 181 struct queue *q, 182 struct qcm_process_device *qpd) 183 { 184 int retval; 185 186 print_queue(q); 187 188 mutex_lock(&dqm->lock); 189 190 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 191 pr_warn("Can't create new usermode queue because %d queues were already created\n", 192 dqm->total_queue_count); 193 retval = -EPERM; 194 goto out_unlock; 195 } 196 197 if (list_empty(&qpd->queues_list)) { 198 retval = allocate_vmid(dqm, qpd, q); 199 if (retval) 200 goto out_unlock; 201 } 202 q->properties.vmid = qpd->vmid; 203 /* 204 * Eviction state logic: we only mark active queues as evicted 205 * to avoid the overhead of restoring inactive queues later 206 */ 207 if (qpd->evicted) 208 q->properties.is_evicted = (q->properties.queue_size > 0 && 209 q->properties.queue_percent > 0 && 210 q->properties.queue_address != 0); 211 212 q->properties.tba_addr = qpd->tba_addr; 213 q->properties.tma_addr = qpd->tma_addr; 214 215 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 216 retval = create_compute_queue_nocpsch(dqm, q, qpd); 217 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 218 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 219 else 220 retval = -EINVAL; 221 222 if (retval) { 223 if (list_empty(&qpd->queues_list)) 224 deallocate_vmid(dqm, qpd, q); 225 goto out_unlock; 226 } 227 228 list_add(&q->list, &qpd->queues_list); 229 qpd->queue_count++; 230 if (q->properties.is_active) 231 dqm->queue_count++; 232 233 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 234 dqm->sdma_queue_count++; 235 236 /* 237 * Unconditionally increment this counter, regardless of the queue's 238 * type or whether the queue is active. 239 */ 240 dqm->total_queue_count++; 241 pr_debug("Total of %d queues are accountable so far\n", 242 dqm->total_queue_count); 243 244 out_unlock: 245 mutex_unlock(&dqm->lock); 246 return retval; 247 } 248 249 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 250 { 251 bool set; 252 int pipe, bit, i; 253 254 set = false; 255 256 for (pipe = dqm->next_pipe_to_allocate, i = 0; 257 i < get_pipes_per_mec(dqm); 258 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 259 260 if (!is_pipe_enabled(dqm, 0, pipe)) 261 continue; 262 263 if (dqm->allocated_queues[pipe] != 0) { 264 bit = ffs(dqm->allocated_queues[pipe]) - 1; 265 dqm->allocated_queues[pipe] &= ~(1 << bit); 266 q->pipe = pipe; 267 q->queue = bit; 268 set = true; 269 break; 270 } 271 } 272 273 if (!set) 274 return -EBUSY; 275 276 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 277 /* horizontal hqd allocation */ 278 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 279 280 return 0; 281 } 282 283 static inline void deallocate_hqd(struct device_queue_manager *dqm, 284 struct queue *q) 285 { 286 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 287 } 288 289 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 290 struct queue *q, 291 struct qcm_process_device *qpd) 292 { 293 int retval; 294 struct mqd_manager *mqd; 295 296 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 297 if (!mqd) 298 return -ENOMEM; 299 300 retval = allocate_hqd(dqm, q); 301 if (retval) 302 return retval; 303 304 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 305 &q->gart_mqd_addr, &q->properties); 306 if (retval) 307 goto out_deallocate_hqd; 308 309 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 310 q->pipe, q->queue); 311 312 dqm->dev->kfd2kgd->set_scratch_backing_va( 313 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 314 315 if (!q->properties.is_active) 316 return 0; 317 318 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, 319 q->process->mm); 320 if (retval) 321 goto out_uninit_mqd; 322 323 return 0; 324 325 out_uninit_mqd: 326 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 327 out_deallocate_hqd: 328 deallocate_hqd(dqm, q); 329 330 return retval; 331 } 332 333 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 334 * to avoid asynchronized access 335 */ 336 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 337 struct qcm_process_device *qpd, 338 struct queue *q) 339 { 340 int retval; 341 struct mqd_manager *mqd; 342 343 mqd = dqm->ops.get_mqd_manager(dqm, 344 get_mqd_type_from_queue_type(q->properties.type)); 345 if (!mqd) 346 return -ENOMEM; 347 348 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 349 deallocate_hqd(dqm, q); 350 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 351 dqm->sdma_queue_count--; 352 deallocate_sdma_queue(dqm, q->sdma_id); 353 } else { 354 pr_debug("q->properties.type %d is invalid\n", 355 q->properties.type); 356 return -EINVAL; 357 } 358 dqm->total_queue_count--; 359 360 retval = mqd->destroy_mqd(mqd, q->mqd, 361 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 362 KFD_UNMAP_LATENCY_MS, 363 q->pipe, q->queue); 364 if (retval == -ETIME) 365 qpd->reset_wavefronts = true; 366 367 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 368 369 list_del(&q->list); 370 if (list_empty(&qpd->queues_list)) { 371 if (qpd->reset_wavefronts) { 372 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 373 dqm->dev); 374 /* dbgdev_wave_reset_wavefronts has to be called before 375 * deallocate_vmid(), i.e. when vmid is still in use. 376 */ 377 dbgdev_wave_reset_wavefronts(dqm->dev, 378 qpd->pqm->process); 379 qpd->reset_wavefronts = false; 380 } 381 382 deallocate_vmid(dqm, qpd, q); 383 } 384 qpd->queue_count--; 385 if (q->properties.is_active) 386 dqm->queue_count--; 387 388 return retval; 389 } 390 391 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 392 struct qcm_process_device *qpd, 393 struct queue *q) 394 { 395 int retval; 396 397 mutex_lock(&dqm->lock); 398 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 399 mutex_unlock(&dqm->lock); 400 401 return retval; 402 } 403 404 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 405 { 406 int retval; 407 struct mqd_manager *mqd; 408 struct kfd_process_device *pdd; 409 bool prev_active = false; 410 411 mutex_lock(&dqm->lock); 412 pdd = kfd_get_process_device_data(q->device, q->process); 413 if (!pdd) { 414 retval = -ENODEV; 415 goto out_unlock; 416 } 417 mqd = dqm->ops.get_mqd_manager(dqm, 418 get_mqd_type_from_queue_type(q->properties.type)); 419 if (!mqd) { 420 retval = -ENOMEM; 421 goto out_unlock; 422 } 423 /* 424 * Eviction state logic: we only mark active queues as evicted 425 * to avoid the overhead of restoring inactive queues later 426 */ 427 if (pdd->qpd.evicted) 428 q->properties.is_evicted = (q->properties.queue_size > 0 && 429 q->properties.queue_percent > 0 && 430 q->properties.queue_address != 0); 431 432 /* Save previous activity state for counters */ 433 prev_active = q->properties.is_active; 434 435 /* Make sure the queue is unmapped before updating the MQD */ 436 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 437 retval = unmap_queues_cpsch(dqm, 438 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 439 if (retval) { 440 pr_err("unmap queue failed\n"); 441 goto out_unlock; 442 } 443 } else if (prev_active && 444 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 445 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 446 retval = mqd->destroy_mqd(mqd, q->mqd, 447 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 448 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 449 if (retval) { 450 pr_err("destroy mqd failed\n"); 451 goto out_unlock; 452 } 453 } 454 455 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 456 457 /* 458 * check active state vs. the previous state and modify 459 * counter accordingly. map_queues_cpsch uses the 460 * dqm->queue_count to determine whether a new runlist must be 461 * uploaded. 462 */ 463 if (q->properties.is_active && !prev_active) 464 dqm->queue_count++; 465 else if (!q->properties.is_active && prev_active) 466 dqm->queue_count--; 467 468 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 469 retval = map_queues_cpsch(dqm); 470 else if (q->properties.is_active && 471 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 472 q->properties.type == KFD_QUEUE_TYPE_SDMA)) 473 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, 474 &q->properties, q->process->mm); 475 476 out_unlock: 477 mutex_unlock(&dqm->lock); 478 return retval; 479 } 480 481 static struct mqd_manager *get_mqd_manager( 482 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 483 { 484 struct mqd_manager *mqd; 485 486 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 487 return NULL; 488 489 pr_debug("mqd type %d\n", type); 490 491 mqd = dqm->mqds[type]; 492 if (!mqd) { 493 mqd = mqd_manager_init(type, dqm->dev); 494 if (!mqd) 495 pr_err("mqd manager is NULL"); 496 dqm->mqds[type] = mqd; 497 } 498 499 return mqd; 500 } 501 502 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 503 struct qcm_process_device *qpd) 504 { 505 struct queue *q; 506 struct mqd_manager *mqd; 507 struct kfd_process_device *pdd; 508 int retval = 0; 509 510 mutex_lock(&dqm->lock); 511 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 512 goto out; 513 514 pdd = qpd_to_pdd(qpd); 515 pr_info_ratelimited("Evicting PASID %u queues\n", 516 pdd->process->pasid); 517 518 /* unactivate all active queues on the qpd */ 519 list_for_each_entry(q, &qpd->queues_list, list) { 520 if (!q->properties.is_active) 521 continue; 522 mqd = dqm->ops.get_mqd_manager(dqm, 523 get_mqd_type_from_queue_type(q->properties.type)); 524 if (!mqd) { /* should not be here */ 525 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 526 retval = -ENOMEM; 527 goto out; 528 } 529 q->properties.is_evicted = true; 530 q->properties.is_active = false; 531 retval = mqd->destroy_mqd(mqd, q->mqd, 532 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 533 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 534 if (retval) 535 goto out; 536 dqm->queue_count--; 537 } 538 539 out: 540 mutex_unlock(&dqm->lock); 541 return retval; 542 } 543 544 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 545 struct qcm_process_device *qpd) 546 { 547 struct queue *q; 548 struct kfd_process_device *pdd; 549 int retval = 0; 550 551 mutex_lock(&dqm->lock); 552 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 553 goto out; 554 555 pdd = qpd_to_pdd(qpd); 556 pr_info_ratelimited("Evicting PASID %u queues\n", 557 pdd->process->pasid); 558 559 /* unactivate all active queues on the qpd */ 560 list_for_each_entry(q, &qpd->queues_list, list) { 561 if (!q->properties.is_active) 562 continue; 563 q->properties.is_evicted = true; 564 q->properties.is_active = false; 565 dqm->queue_count--; 566 } 567 retval = execute_queues_cpsch(dqm, 568 qpd->is_debug ? 569 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 570 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 571 572 out: 573 mutex_unlock(&dqm->lock); 574 return retval; 575 } 576 577 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 578 struct qcm_process_device *qpd) 579 { 580 struct queue *q; 581 struct mqd_manager *mqd; 582 struct kfd_process_device *pdd; 583 uint32_t pd_base; 584 int retval = 0; 585 586 pdd = qpd_to_pdd(qpd); 587 /* Retrieve PD base */ 588 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 589 590 mutex_lock(&dqm->lock); 591 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 592 goto out; 593 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 594 qpd->evicted--; 595 goto out; 596 } 597 598 pr_info_ratelimited("Restoring PASID %u queues\n", 599 pdd->process->pasid); 600 601 /* Update PD Base in QPD */ 602 qpd->page_table_base = pd_base; 603 pr_debug("Updated PD address to 0x%08x\n", pd_base); 604 605 if (!list_empty(&qpd->queues_list)) { 606 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 607 dqm->dev->kgd, 608 qpd->vmid, 609 qpd->page_table_base); 610 kfd_flush_tlb(pdd); 611 } 612 613 /* activate all active queues on the qpd */ 614 list_for_each_entry(q, &qpd->queues_list, list) { 615 if (!q->properties.is_evicted) 616 continue; 617 mqd = dqm->ops.get_mqd_manager(dqm, 618 get_mqd_type_from_queue_type(q->properties.type)); 619 if (!mqd) { /* should not be here */ 620 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 621 retval = -ENOMEM; 622 goto out; 623 } 624 q->properties.is_evicted = false; 625 q->properties.is_active = true; 626 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 627 q->queue, &q->properties, 628 q->process->mm); 629 if (retval) 630 goto out; 631 dqm->queue_count++; 632 } 633 qpd->evicted = 0; 634 out: 635 mutex_unlock(&dqm->lock); 636 return retval; 637 } 638 639 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 640 struct qcm_process_device *qpd) 641 { 642 struct queue *q; 643 struct kfd_process_device *pdd; 644 uint32_t pd_base; 645 int retval = 0; 646 647 pdd = qpd_to_pdd(qpd); 648 /* Retrieve PD base */ 649 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 650 651 mutex_lock(&dqm->lock); 652 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 653 goto out; 654 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 655 qpd->evicted--; 656 goto out; 657 } 658 659 pr_info_ratelimited("Restoring PASID %u queues\n", 660 pdd->process->pasid); 661 662 /* Update PD Base in QPD */ 663 qpd->page_table_base = pd_base; 664 pr_debug("Updated PD address to 0x%08x\n", pd_base); 665 666 /* activate all active queues on the qpd */ 667 list_for_each_entry(q, &qpd->queues_list, list) { 668 if (!q->properties.is_evicted) 669 continue; 670 q->properties.is_evicted = false; 671 q->properties.is_active = true; 672 dqm->queue_count++; 673 } 674 retval = execute_queues_cpsch(dqm, 675 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 676 if (!retval) 677 qpd->evicted = 0; 678 out: 679 mutex_unlock(&dqm->lock); 680 return retval; 681 } 682 683 static int register_process(struct device_queue_manager *dqm, 684 struct qcm_process_device *qpd) 685 { 686 struct device_process_node *n; 687 struct kfd_process_device *pdd; 688 uint32_t pd_base; 689 int retval; 690 691 n = kzalloc(sizeof(*n), GFP_KERNEL); 692 if (!n) 693 return -ENOMEM; 694 695 n->qpd = qpd; 696 697 pdd = qpd_to_pdd(qpd); 698 /* Retrieve PD base */ 699 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 700 701 mutex_lock(&dqm->lock); 702 list_add(&n->list, &dqm->queues); 703 704 /* Update PD Base in QPD */ 705 qpd->page_table_base = pd_base; 706 707 retval = dqm->asic_ops.update_qpd(dqm, qpd); 708 709 dqm->processes_count++; 710 711 mutex_unlock(&dqm->lock); 712 713 return retval; 714 } 715 716 static int unregister_process(struct device_queue_manager *dqm, 717 struct qcm_process_device *qpd) 718 { 719 int retval; 720 struct device_process_node *cur, *next; 721 722 pr_debug("qpd->queues_list is %s\n", 723 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 724 725 retval = 0; 726 mutex_lock(&dqm->lock); 727 728 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 729 if (qpd == cur->qpd) { 730 list_del(&cur->list); 731 kfree(cur); 732 dqm->processes_count--; 733 goto out; 734 } 735 } 736 /* qpd not found in dqm list */ 737 retval = 1; 738 out: 739 mutex_unlock(&dqm->lock); 740 return retval; 741 } 742 743 static int 744 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 745 unsigned int vmid) 746 { 747 uint32_t pasid_mapping; 748 749 pasid_mapping = (pasid == 0) ? 0 : 750 (uint32_t)pasid | 751 ATC_VMID_PASID_MAPPING_VALID; 752 753 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 754 dqm->dev->kgd, pasid_mapping, 755 vmid); 756 } 757 758 static void init_interrupts(struct device_queue_manager *dqm) 759 { 760 unsigned int i; 761 762 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 763 if (is_pipe_enabled(dqm, 0, i)) 764 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 765 } 766 767 static int initialize_nocpsch(struct device_queue_manager *dqm) 768 { 769 int pipe, queue; 770 771 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 772 773 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 774 sizeof(unsigned int), GFP_KERNEL); 775 if (!dqm->allocated_queues) 776 return -ENOMEM; 777 778 mutex_init(&dqm->lock); 779 INIT_LIST_HEAD(&dqm->queues); 780 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 781 dqm->sdma_queue_count = 0; 782 783 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 784 int pipe_offset = pipe * get_queues_per_pipe(dqm); 785 786 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 787 if (test_bit(pipe_offset + queue, 788 dqm->dev->shared_resources.queue_bitmap)) 789 dqm->allocated_queues[pipe] |= 1 << queue; 790 } 791 792 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 793 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 794 795 return 0; 796 } 797 798 static void uninitialize(struct device_queue_manager *dqm) 799 { 800 int i; 801 802 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 803 804 kfree(dqm->allocated_queues); 805 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 806 kfree(dqm->mqds[i]); 807 mutex_destroy(&dqm->lock); 808 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 809 } 810 811 static int start_nocpsch(struct device_queue_manager *dqm) 812 { 813 init_interrupts(dqm); 814 return pm_init(&dqm->packets, dqm); 815 } 816 817 static int stop_nocpsch(struct device_queue_manager *dqm) 818 { 819 pm_uninit(&dqm->packets); 820 return 0; 821 } 822 823 static int allocate_sdma_queue(struct device_queue_manager *dqm, 824 unsigned int *sdma_queue_id) 825 { 826 int bit; 827 828 if (dqm->sdma_bitmap == 0) 829 return -ENOMEM; 830 831 bit = ffs(dqm->sdma_bitmap) - 1; 832 dqm->sdma_bitmap &= ~(1 << bit); 833 *sdma_queue_id = bit; 834 835 return 0; 836 } 837 838 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 839 unsigned int sdma_queue_id) 840 { 841 if (sdma_queue_id >= CIK_SDMA_QUEUES) 842 return; 843 dqm->sdma_bitmap |= (1 << sdma_queue_id); 844 } 845 846 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 847 struct queue *q, 848 struct qcm_process_device *qpd) 849 { 850 struct mqd_manager *mqd; 851 int retval; 852 853 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 854 if (!mqd) 855 return -ENOMEM; 856 857 retval = allocate_sdma_queue(dqm, &q->sdma_id); 858 if (retval) 859 return retval; 860 861 q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 862 q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 863 864 pr_debug("SDMA id is: %d\n", q->sdma_id); 865 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 866 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 867 868 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 869 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 870 &q->gart_mqd_addr, &q->properties); 871 if (retval) 872 goto out_deallocate_sdma_queue; 873 874 retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); 875 if (retval) 876 goto out_uninit_mqd; 877 878 return 0; 879 880 out_uninit_mqd: 881 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 882 out_deallocate_sdma_queue: 883 deallocate_sdma_queue(dqm, q->sdma_id); 884 885 return retval; 886 } 887 888 /* 889 * Device Queue Manager implementation for cp scheduler 890 */ 891 892 static int set_sched_resources(struct device_queue_manager *dqm) 893 { 894 int i, mec; 895 struct scheduling_resources res; 896 897 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 898 899 res.queue_mask = 0; 900 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 901 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 902 / dqm->dev->shared_resources.num_pipe_per_mec; 903 904 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 905 continue; 906 907 /* only acquire queues from the first MEC */ 908 if (mec > 0) 909 continue; 910 911 /* This situation may be hit in the future if a new HW 912 * generation exposes more than 64 queues. If so, the 913 * definition of res.queue_mask needs updating 914 */ 915 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 916 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 917 break; 918 } 919 920 res.queue_mask |= (1ull << i); 921 } 922 res.gws_mask = res.oac_mask = res.gds_heap_base = 923 res.gds_heap_size = 0; 924 925 pr_debug("Scheduling resources:\n" 926 "vmid mask: 0x%8X\n" 927 "queue mask: 0x%8llX\n", 928 res.vmid_mask, res.queue_mask); 929 930 return pm_send_set_resources(&dqm->packets, &res); 931 } 932 933 static int initialize_cpsch(struct device_queue_manager *dqm) 934 { 935 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 936 937 mutex_init(&dqm->lock); 938 INIT_LIST_HEAD(&dqm->queues); 939 dqm->queue_count = dqm->processes_count = 0; 940 dqm->sdma_queue_count = 0; 941 dqm->active_runlist = false; 942 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 943 944 return 0; 945 } 946 947 static int start_cpsch(struct device_queue_manager *dqm) 948 { 949 int retval; 950 951 retval = 0; 952 953 retval = pm_init(&dqm->packets, dqm); 954 if (retval) 955 goto fail_packet_manager_init; 956 957 retval = set_sched_resources(dqm); 958 if (retval) 959 goto fail_set_sched_resources; 960 961 pr_debug("Allocating fence memory\n"); 962 963 /* allocate fence memory on the gart */ 964 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 965 &dqm->fence_mem); 966 967 if (retval) 968 goto fail_allocate_vidmem; 969 970 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 971 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 972 973 init_interrupts(dqm); 974 975 mutex_lock(&dqm->lock); 976 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 977 mutex_unlock(&dqm->lock); 978 979 return 0; 980 fail_allocate_vidmem: 981 fail_set_sched_resources: 982 pm_uninit(&dqm->packets); 983 fail_packet_manager_init: 984 return retval; 985 } 986 987 static int stop_cpsch(struct device_queue_manager *dqm) 988 { 989 mutex_lock(&dqm->lock); 990 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 991 mutex_unlock(&dqm->lock); 992 993 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 994 pm_uninit(&dqm->packets); 995 996 return 0; 997 } 998 999 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1000 struct kernel_queue *kq, 1001 struct qcm_process_device *qpd) 1002 { 1003 mutex_lock(&dqm->lock); 1004 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1005 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1006 dqm->total_queue_count); 1007 mutex_unlock(&dqm->lock); 1008 return -EPERM; 1009 } 1010 1011 /* 1012 * Unconditionally increment this counter, regardless of the queue's 1013 * type or whether the queue is active. 1014 */ 1015 dqm->total_queue_count++; 1016 pr_debug("Total of %d queues are accountable so far\n", 1017 dqm->total_queue_count); 1018 1019 list_add(&kq->list, &qpd->priv_queue_list); 1020 dqm->queue_count++; 1021 qpd->is_debug = true; 1022 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1023 mutex_unlock(&dqm->lock); 1024 1025 return 0; 1026 } 1027 1028 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1029 struct kernel_queue *kq, 1030 struct qcm_process_device *qpd) 1031 { 1032 mutex_lock(&dqm->lock); 1033 list_del(&kq->list); 1034 dqm->queue_count--; 1035 qpd->is_debug = false; 1036 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1037 /* 1038 * Unconditionally decrement this counter, regardless of the queue's 1039 * type. 1040 */ 1041 dqm->total_queue_count--; 1042 pr_debug("Total of %d queues are accountable so far\n", 1043 dqm->total_queue_count); 1044 mutex_unlock(&dqm->lock); 1045 } 1046 1047 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1048 struct qcm_process_device *qpd) 1049 { 1050 int retval; 1051 struct mqd_manager *mqd; 1052 1053 retval = 0; 1054 1055 mutex_lock(&dqm->lock); 1056 1057 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1058 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1059 dqm->total_queue_count); 1060 retval = -EPERM; 1061 goto out_unlock; 1062 } 1063 1064 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1065 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1066 if (retval) 1067 goto out_unlock; 1068 q->properties.sdma_queue_id = 1069 q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 1070 q->properties.sdma_engine_id = 1071 q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 1072 } 1073 mqd = dqm->ops.get_mqd_manager(dqm, 1074 get_mqd_type_from_queue_type(q->properties.type)); 1075 1076 if (!mqd) { 1077 retval = -ENOMEM; 1078 goto out_deallocate_sdma_queue; 1079 } 1080 /* 1081 * Eviction state logic: we only mark active queues as evicted 1082 * to avoid the overhead of restoring inactive queues later 1083 */ 1084 if (qpd->evicted) 1085 q->properties.is_evicted = (q->properties.queue_size > 0 && 1086 q->properties.queue_percent > 0 && 1087 q->properties.queue_address != 0); 1088 1089 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1090 1091 q->properties.tba_addr = qpd->tba_addr; 1092 q->properties.tma_addr = qpd->tma_addr; 1093 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 1094 &q->gart_mqd_addr, &q->properties); 1095 if (retval) 1096 goto out_deallocate_sdma_queue; 1097 1098 list_add(&q->list, &qpd->queues_list); 1099 qpd->queue_count++; 1100 if (q->properties.is_active) { 1101 dqm->queue_count++; 1102 retval = execute_queues_cpsch(dqm, 1103 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1104 } 1105 1106 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1107 dqm->sdma_queue_count++; 1108 /* 1109 * Unconditionally increment this counter, regardless of the queue's 1110 * type or whether the queue is active. 1111 */ 1112 dqm->total_queue_count++; 1113 1114 pr_debug("Total of %d queues are accountable so far\n", 1115 dqm->total_queue_count); 1116 1117 mutex_unlock(&dqm->lock); 1118 return retval; 1119 1120 out_deallocate_sdma_queue: 1121 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1122 deallocate_sdma_queue(dqm, q->sdma_id); 1123 out_unlock: 1124 mutex_unlock(&dqm->lock); 1125 return retval; 1126 } 1127 1128 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1129 unsigned int fence_value, 1130 unsigned int timeout_ms) 1131 { 1132 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1133 1134 while (*fence_addr != fence_value) { 1135 if (time_after(jiffies, end_jiffies)) { 1136 pr_err("qcm fence wait loop timeout expired\n"); 1137 return -ETIME; 1138 } 1139 schedule(); 1140 } 1141 1142 return 0; 1143 } 1144 1145 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1146 unsigned int sdma_engine) 1147 { 1148 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1149 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1150 sdma_engine); 1151 } 1152 1153 /* dqm->lock mutex has to be locked before calling this function */ 1154 static int map_queues_cpsch(struct device_queue_manager *dqm) 1155 { 1156 int retval; 1157 1158 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1159 return 0; 1160 1161 if (dqm->active_runlist) 1162 return 0; 1163 1164 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1165 if (retval) { 1166 pr_err("failed to execute runlist\n"); 1167 return retval; 1168 } 1169 dqm->active_runlist = true; 1170 1171 return retval; 1172 } 1173 1174 /* dqm->lock mutex has to be locked before calling this function */ 1175 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1176 enum kfd_unmap_queues_filter filter, 1177 uint32_t filter_param) 1178 { 1179 int retval = 0; 1180 1181 if (!dqm->active_runlist) 1182 return retval; 1183 1184 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1185 dqm->sdma_queue_count); 1186 1187 if (dqm->sdma_queue_count > 0) { 1188 unmap_sdma_queues(dqm, 0); 1189 unmap_sdma_queues(dqm, 1); 1190 } 1191 1192 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1193 filter, filter_param, false, 0); 1194 if (retval) 1195 return retval; 1196 1197 *dqm->fence_addr = KFD_FENCE_INIT; 1198 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1199 KFD_FENCE_COMPLETED); 1200 /* should be timed out */ 1201 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1202 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1203 if (retval) 1204 return retval; 1205 1206 pm_release_ib(&dqm->packets); 1207 dqm->active_runlist = false; 1208 1209 return retval; 1210 } 1211 1212 /* dqm->lock mutex has to be locked before calling this function */ 1213 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1214 enum kfd_unmap_queues_filter filter, 1215 uint32_t filter_param) 1216 { 1217 int retval; 1218 1219 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1220 if (retval) { 1221 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1222 return retval; 1223 } 1224 1225 return map_queues_cpsch(dqm); 1226 } 1227 1228 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1229 struct qcm_process_device *qpd, 1230 struct queue *q) 1231 { 1232 int retval; 1233 struct mqd_manager *mqd; 1234 bool preempt_all_queues; 1235 1236 preempt_all_queues = false; 1237 1238 retval = 0; 1239 1240 /* remove queue from list to prevent rescheduling after preemption */ 1241 mutex_lock(&dqm->lock); 1242 1243 if (qpd->is_debug) { 1244 /* 1245 * error, currently we do not allow to destroy a queue 1246 * of a currently debugged process 1247 */ 1248 retval = -EBUSY; 1249 goto failed_try_destroy_debugged_queue; 1250 1251 } 1252 1253 mqd = dqm->ops.get_mqd_manager(dqm, 1254 get_mqd_type_from_queue_type(q->properties.type)); 1255 if (!mqd) { 1256 retval = -ENOMEM; 1257 goto failed; 1258 } 1259 1260 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1261 dqm->sdma_queue_count--; 1262 deallocate_sdma_queue(dqm, q->sdma_id); 1263 } 1264 1265 list_del(&q->list); 1266 qpd->queue_count--; 1267 if (q->properties.is_active) { 1268 dqm->queue_count--; 1269 retval = execute_queues_cpsch(dqm, 1270 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1271 if (retval == -ETIME) 1272 qpd->reset_wavefronts = true; 1273 } 1274 1275 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1276 1277 /* 1278 * Unconditionally decrement this counter, regardless of the queue's 1279 * type 1280 */ 1281 dqm->total_queue_count--; 1282 pr_debug("Total of %d queues are accountable so far\n", 1283 dqm->total_queue_count); 1284 1285 mutex_unlock(&dqm->lock); 1286 1287 return retval; 1288 1289 failed: 1290 failed_try_destroy_debugged_queue: 1291 1292 mutex_unlock(&dqm->lock); 1293 return retval; 1294 } 1295 1296 /* 1297 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1298 * stay in user mode. 1299 */ 1300 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1301 /* APE1 limit is inclusive and 64K aligned. */ 1302 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1303 1304 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1305 struct qcm_process_device *qpd, 1306 enum cache_policy default_policy, 1307 enum cache_policy alternate_policy, 1308 void __user *alternate_aperture_base, 1309 uint64_t alternate_aperture_size) 1310 { 1311 bool retval; 1312 1313 mutex_lock(&dqm->lock); 1314 1315 if (alternate_aperture_size == 0) { 1316 /* base > limit disables APE1 */ 1317 qpd->sh_mem_ape1_base = 1; 1318 qpd->sh_mem_ape1_limit = 0; 1319 } else { 1320 /* 1321 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1322 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1323 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1324 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1325 * Verify that the base and size parameters can be 1326 * represented in this format and convert them. 1327 * Additionally restrict APE1 to user-mode addresses. 1328 */ 1329 1330 uint64_t base = (uintptr_t)alternate_aperture_base; 1331 uint64_t limit = base + alternate_aperture_size - 1; 1332 1333 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1334 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1335 retval = false; 1336 goto out; 1337 } 1338 1339 qpd->sh_mem_ape1_base = base >> 16; 1340 qpd->sh_mem_ape1_limit = limit >> 16; 1341 } 1342 1343 retval = dqm->asic_ops.set_cache_memory_policy( 1344 dqm, 1345 qpd, 1346 default_policy, 1347 alternate_policy, 1348 alternate_aperture_base, 1349 alternate_aperture_size); 1350 1351 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1352 program_sh_mem_settings(dqm, qpd); 1353 1354 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1355 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1356 qpd->sh_mem_ape1_limit); 1357 1358 out: 1359 mutex_unlock(&dqm->lock); 1360 return retval; 1361 } 1362 1363 static int set_trap_handler(struct device_queue_manager *dqm, 1364 struct qcm_process_device *qpd, 1365 uint64_t tba_addr, 1366 uint64_t tma_addr) 1367 { 1368 uint64_t *tma; 1369 1370 if (dqm->dev->cwsr_enabled) { 1371 /* Jump from CWSR trap handler to user trap */ 1372 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1373 tma[0] = tba_addr; 1374 tma[1] = tma_addr; 1375 } else { 1376 qpd->tba_addr = tba_addr; 1377 qpd->tma_addr = tma_addr; 1378 } 1379 1380 return 0; 1381 } 1382 1383 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1384 struct qcm_process_device *qpd) 1385 { 1386 struct queue *q, *next; 1387 struct device_process_node *cur, *next_dpn; 1388 int retval = 0; 1389 1390 mutex_lock(&dqm->lock); 1391 1392 /* Clear all user mode queues */ 1393 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1394 int ret; 1395 1396 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1397 if (ret) 1398 retval = ret; 1399 } 1400 1401 /* Unregister process */ 1402 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1403 if (qpd == cur->qpd) { 1404 list_del(&cur->list); 1405 kfree(cur); 1406 dqm->processes_count--; 1407 break; 1408 } 1409 } 1410 1411 mutex_unlock(&dqm->lock); 1412 return retval; 1413 } 1414 1415 1416 static int process_termination_cpsch(struct device_queue_manager *dqm, 1417 struct qcm_process_device *qpd) 1418 { 1419 int retval; 1420 struct queue *q, *next; 1421 struct kernel_queue *kq, *kq_next; 1422 struct mqd_manager *mqd; 1423 struct device_process_node *cur, *next_dpn; 1424 enum kfd_unmap_queues_filter filter = 1425 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1426 1427 retval = 0; 1428 1429 mutex_lock(&dqm->lock); 1430 1431 /* Clean all kernel queues */ 1432 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1433 list_del(&kq->list); 1434 dqm->queue_count--; 1435 qpd->is_debug = false; 1436 dqm->total_queue_count--; 1437 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1438 } 1439 1440 /* Clear all user mode queues */ 1441 list_for_each_entry(q, &qpd->queues_list, list) { 1442 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1443 dqm->sdma_queue_count--; 1444 deallocate_sdma_queue(dqm, q->sdma_id); 1445 } 1446 1447 if (q->properties.is_active) 1448 dqm->queue_count--; 1449 1450 dqm->total_queue_count--; 1451 } 1452 1453 /* Unregister process */ 1454 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1455 if (qpd == cur->qpd) { 1456 list_del(&cur->list); 1457 kfree(cur); 1458 dqm->processes_count--; 1459 break; 1460 } 1461 } 1462 1463 retval = execute_queues_cpsch(dqm, filter, 0); 1464 if (retval || qpd->reset_wavefronts) { 1465 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1466 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1467 qpd->reset_wavefronts = false; 1468 } 1469 1470 /* lastly, free mqd resources */ 1471 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1472 mqd = dqm->ops.get_mqd_manager(dqm, 1473 get_mqd_type_from_queue_type(q->properties.type)); 1474 if (!mqd) { 1475 retval = -ENOMEM; 1476 goto out; 1477 } 1478 list_del(&q->list); 1479 qpd->queue_count--; 1480 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1481 } 1482 1483 out: 1484 mutex_unlock(&dqm->lock); 1485 return retval; 1486 } 1487 1488 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1489 { 1490 struct device_queue_manager *dqm; 1491 1492 pr_debug("Loading device queue manager\n"); 1493 1494 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1495 if (!dqm) 1496 return NULL; 1497 1498 switch (dev->device_info->asic_family) { 1499 /* HWS is not available on Hawaii. */ 1500 case CHIP_HAWAII: 1501 /* HWS depends on CWSR for timely dequeue. CWSR is not 1502 * available on Tonga. 1503 * 1504 * FIXME: This argument also applies to Kaveri. 1505 */ 1506 case CHIP_TONGA: 1507 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1508 break; 1509 default: 1510 dqm->sched_policy = sched_policy; 1511 break; 1512 } 1513 1514 dqm->dev = dev; 1515 switch (dqm->sched_policy) { 1516 case KFD_SCHED_POLICY_HWS: 1517 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1518 /* initialize dqm for cp scheduling */ 1519 dqm->ops.create_queue = create_queue_cpsch; 1520 dqm->ops.initialize = initialize_cpsch; 1521 dqm->ops.start = start_cpsch; 1522 dqm->ops.stop = stop_cpsch; 1523 dqm->ops.destroy_queue = destroy_queue_cpsch; 1524 dqm->ops.update_queue = update_queue; 1525 dqm->ops.get_mqd_manager = get_mqd_manager; 1526 dqm->ops.register_process = register_process; 1527 dqm->ops.unregister_process = unregister_process; 1528 dqm->ops.uninitialize = uninitialize; 1529 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1530 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1531 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1532 dqm->ops.set_trap_handler = set_trap_handler; 1533 dqm->ops.process_termination = process_termination_cpsch; 1534 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1535 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1536 break; 1537 case KFD_SCHED_POLICY_NO_HWS: 1538 /* initialize dqm for no cp scheduling */ 1539 dqm->ops.start = start_nocpsch; 1540 dqm->ops.stop = stop_nocpsch; 1541 dqm->ops.create_queue = create_queue_nocpsch; 1542 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1543 dqm->ops.update_queue = update_queue; 1544 dqm->ops.get_mqd_manager = get_mqd_manager; 1545 dqm->ops.register_process = register_process; 1546 dqm->ops.unregister_process = unregister_process; 1547 dqm->ops.initialize = initialize_nocpsch; 1548 dqm->ops.uninitialize = uninitialize; 1549 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1550 dqm->ops.set_trap_handler = set_trap_handler; 1551 dqm->ops.process_termination = process_termination_nocpsch; 1552 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1553 dqm->ops.restore_process_queues = 1554 restore_process_queues_nocpsch; 1555 break; 1556 default: 1557 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1558 goto out_free; 1559 } 1560 1561 switch (dev->device_info->asic_family) { 1562 case CHIP_CARRIZO: 1563 device_queue_manager_init_vi(&dqm->asic_ops); 1564 break; 1565 1566 case CHIP_KAVERI: 1567 device_queue_manager_init_cik(&dqm->asic_ops); 1568 break; 1569 1570 case CHIP_HAWAII: 1571 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1572 break; 1573 1574 case CHIP_TONGA: 1575 case CHIP_FIJI: 1576 case CHIP_POLARIS10: 1577 case CHIP_POLARIS11: 1578 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1579 break; 1580 default: 1581 WARN(1, "Unexpected ASIC family %u", 1582 dev->device_info->asic_family); 1583 goto out_free; 1584 } 1585 1586 if (!dqm->ops.initialize(dqm)) 1587 return dqm; 1588 1589 out_free: 1590 kfree(dqm); 1591 return NULL; 1592 } 1593 1594 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1595 { 1596 dqm->ops.uninitialize(dqm); 1597 kfree(dqm); 1598 } 1599 1600 #if defined(CONFIG_DEBUG_FS) 1601 1602 static void seq_reg_dump(struct seq_file *m, 1603 uint32_t (*dump)[2], uint32_t n_regs) 1604 { 1605 uint32_t i, count; 1606 1607 for (i = 0, count = 0; i < n_regs; i++) { 1608 if (count == 0 || 1609 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1610 seq_printf(m, "%s %08x: %08x", 1611 i ? "\n" : "", 1612 dump[i][0], dump[i][1]); 1613 count = 7; 1614 } else { 1615 seq_printf(m, " %08x", dump[i][1]); 1616 count--; 1617 } 1618 } 1619 1620 seq_puts(m, "\n"); 1621 } 1622 1623 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1624 { 1625 struct device_queue_manager *dqm = data; 1626 uint32_t (*dump)[2], n_regs; 1627 int pipe, queue; 1628 int r = 0; 1629 1630 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1631 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1632 1633 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1634 if (!test_bit(pipe_offset + queue, 1635 dqm->dev->shared_resources.queue_bitmap)) 1636 continue; 1637 1638 r = dqm->dev->kfd2kgd->hqd_dump( 1639 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1640 if (r) 1641 break; 1642 1643 seq_printf(m, " CP Pipe %d, Queue %d\n", 1644 pipe, queue); 1645 seq_reg_dump(m, dump, n_regs); 1646 1647 kfree(dump); 1648 } 1649 } 1650 1651 for (pipe = 0; pipe < CIK_SDMA_ENGINE_NUM; pipe++) { 1652 for (queue = 0; queue < CIK_SDMA_QUEUES_PER_ENGINE; queue++) { 1653 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1654 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1655 if (r) 1656 break; 1657 1658 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1659 pipe, queue); 1660 seq_reg_dump(m, dump, n_regs); 1661 1662 kfree(dump); 1663 } 1664 } 1665 1666 return r; 1667 } 1668 1669 #endif 1670