1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include <linux/sched.h> 30 #include "kfd_priv.h" 31 #include "kfd_device_queue_manager.h" 32 #include "kfd_mqd_manager.h" 33 #include "cik_regs.h" 34 #include "kfd_kernel_queue.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 41 unsigned int pasid, unsigned int vmid); 42 43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 44 struct queue *q, 45 struct qcm_process_device *qpd); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 enum kfd_unmap_queues_filter filter, 49 uint32_t filter_param); 50 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param); 53 54 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 56 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 57 struct queue *q, 58 struct qcm_process_device *qpd); 59 60 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 61 unsigned int sdma_queue_id); 62 63 static inline 64 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 65 { 66 if (type == KFD_QUEUE_TYPE_SDMA) 67 return KFD_MQD_TYPE_SDMA; 68 return KFD_MQD_TYPE_CP; 69 } 70 71 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 72 { 73 int i; 74 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 75 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 76 77 /* queue is available for KFD usage if bit is 1 */ 78 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 79 if (test_bit(pipe_offset + i, 80 dqm->dev->shared_resources.queue_bitmap)) 81 return true; 82 return false; 83 } 84 85 unsigned int get_queues_num(struct device_queue_manager *dqm) 86 { 87 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 88 KGD_MAX_QUEUES); 89 } 90 91 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 92 { 93 return dqm->dev->shared_resources.num_queue_per_pipe; 94 } 95 96 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 97 { 98 return dqm->dev->shared_resources.num_pipe_per_mec; 99 } 100 101 void program_sh_mem_settings(struct device_queue_manager *dqm, 102 struct qcm_process_device *qpd) 103 { 104 return dqm->dev->kfd2kgd->program_sh_mem_settings( 105 dqm->dev->kgd, qpd->vmid, 106 qpd->sh_mem_config, 107 qpd->sh_mem_ape1_base, 108 qpd->sh_mem_ape1_limit, 109 qpd->sh_mem_bases); 110 } 111 112 static int allocate_vmid(struct device_queue_manager *dqm, 113 struct qcm_process_device *qpd, 114 struct queue *q) 115 { 116 int bit, allocated_vmid; 117 118 if (dqm->vmid_bitmap == 0) 119 return -ENOMEM; 120 121 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, 122 dqm->dev->vm_info.vmid_num_kfd); 123 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 124 125 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 126 pr_debug("vmid allocation %d\n", allocated_vmid); 127 qpd->vmid = allocated_vmid; 128 q->properties.vmid = allocated_vmid; 129 130 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 131 program_sh_mem_settings(dqm, qpd); 132 133 return 0; 134 } 135 136 static void deallocate_vmid(struct device_queue_manager *dqm, 137 struct qcm_process_device *qpd, 138 struct queue *q) 139 { 140 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 141 142 /* Release the vmid mapping */ 143 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 144 145 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 146 qpd->vmid = 0; 147 q->properties.vmid = 0; 148 } 149 150 static int create_queue_nocpsch(struct device_queue_manager *dqm, 151 struct queue *q, 152 struct qcm_process_device *qpd, 153 int *allocated_vmid) 154 { 155 int retval; 156 157 print_queue(q); 158 159 mutex_lock(&dqm->lock); 160 161 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 162 pr_warn("Can't create new usermode queue because %d queues were already created\n", 163 dqm->total_queue_count); 164 retval = -EPERM; 165 goto out_unlock; 166 } 167 168 if (list_empty(&qpd->queues_list)) { 169 retval = allocate_vmid(dqm, qpd, q); 170 if (retval) 171 goto out_unlock; 172 } 173 *allocated_vmid = qpd->vmid; 174 q->properties.vmid = qpd->vmid; 175 176 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 177 retval = create_compute_queue_nocpsch(dqm, q, qpd); 178 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 179 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 180 else 181 retval = -EINVAL; 182 183 if (retval) { 184 if (list_empty(&qpd->queues_list)) { 185 deallocate_vmid(dqm, qpd, q); 186 *allocated_vmid = 0; 187 } 188 goto out_unlock; 189 } 190 191 list_add(&q->list, &qpd->queues_list); 192 qpd->queue_count++; 193 if (q->properties.is_active) 194 dqm->queue_count++; 195 196 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 197 dqm->sdma_queue_count++; 198 199 /* 200 * Unconditionally increment this counter, regardless of the queue's 201 * type or whether the queue is active. 202 */ 203 dqm->total_queue_count++; 204 pr_debug("Total of %d queues are accountable so far\n", 205 dqm->total_queue_count); 206 207 out_unlock: 208 mutex_unlock(&dqm->lock); 209 return retval; 210 } 211 212 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 213 { 214 bool set; 215 int pipe, bit, i; 216 217 set = false; 218 219 for (pipe = dqm->next_pipe_to_allocate, i = 0; 220 i < get_pipes_per_mec(dqm); 221 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 222 223 if (!is_pipe_enabled(dqm, 0, pipe)) 224 continue; 225 226 if (dqm->allocated_queues[pipe] != 0) { 227 bit = find_first_bit( 228 (unsigned long *)&dqm->allocated_queues[pipe], 229 get_queues_per_pipe(dqm)); 230 231 clear_bit(bit, 232 (unsigned long *)&dqm->allocated_queues[pipe]); 233 q->pipe = pipe; 234 q->queue = bit; 235 set = true; 236 break; 237 } 238 } 239 240 if (!set) 241 return -EBUSY; 242 243 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 244 /* horizontal hqd allocation */ 245 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 246 247 return 0; 248 } 249 250 static inline void deallocate_hqd(struct device_queue_manager *dqm, 251 struct queue *q) 252 { 253 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 254 } 255 256 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 257 struct queue *q, 258 struct qcm_process_device *qpd) 259 { 260 int retval; 261 struct mqd_manager *mqd; 262 263 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 264 if (!mqd) 265 return -ENOMEM; 266 267 retval = allocate_hqd(dqm, q); 268 if (retval) 269 return retval; 270 271 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 272 &q->gart_mqd_addr, &q->properties); 273 if (retval) 274 goto out_deallocate_hqd; 275 276 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 277 q->pipe, q->queue); 278 279 dqm->dev->kfd2kgd->set_scratch_backing_va( 280 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 281 282 if (!q->properties.is_active) 283 return 0; 284 285 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, 286 q->process->mm); 287 if (retval) 288 goto out_uninit_mqd; 289 290 return 0; 291 292 out_uninit_mqd: 293 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 294 out_deallocate_hqd: 295 deallocate_hqd(dqm, q); 296 297 return retval; 298 } 299 300 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 301 * to avoid asynchronized access 302 */ 303 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 304 struct qcm_process_device *qpd, 305 struct queue *q) 306 { 307 int retval; 308 struct mqd_manager *mqd; 309 310 mqd = dqm->ops.get_mqd_manager(dqm, 311 get_mqd_type_from_queue_type(q->properties.type)); 312 if (!mqd) 313 return -ENOMEM; 314 315 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 316 deallocate_hqd(dqm, q); 317 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 318 dqm->sdma_queue_count--; 319 deallocate_sdma_queue(dqm, q->sdma_id); 320 } else { 321 pr_debug("q->properties.type %d is invalid\n", 322 q->properties.type); 323 return -EINVAL; 324 } 325 dqm->total_queue_count--; 326 327 retval = mqd->destroy_mqd(mqd, q->mqd, 328 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 329 KFD_UNMAP_LATENCY_MS, 330 q->pipe, q->queue); 331 if (retval == -ETIME) 332 qpd->reset_wavefronts = true; 333 334 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 335 336 list_del(&q->list); 337 if (list_empty(&qpd->queues_list)) { 338 if (qpd->reset_wavefronts) { 339 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 340 dqm->dev); 341 /* dbgdev_wave_reset_wavefronts has to be called before 342 * deallocate_vmid(), i.e. when vmid is still in use. 343 */ 344 dbgdev_wave_reset_wavefronts(dqm->dev, 345 qpd->pqm->process); 346 qpd->reset_wavefronts = false; 347 } 348 349 deallocate_vmid(dqm, qpd, q); 350 } 351 qpd->queue_count--; 352 if (q->properties.is_active) 353 dqm->queue_count--; 354 355 return retval; 356 } 357 358 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 359 struct qcm_process_device *qpd, 360 struct queue *q) 361 { 362 int retval; 363 364 mutex_lock(&dqm->lock); 365 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 366 mutex_unlock(&dqm->lock); 367 368 return retval; 369 } 370 371 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 372 { 373 int retval; 374 struct mqd_manager *mqd; 375 bool prev_active = false; 376 377 mutex_lock(&dqm->lock); 378 mqd = dqm->ops.get_mqd_manager(dqm, 379 get_mqd_type_from_queue_type(q->properties.type)); 380 if (!mqd) { 381 retval = -ENOMEM; 382 goto out_unlock; 383 } 384 385 /* Save previous activity state for counters */ 386 prev_active = q->properties.is_active; 387 388 /* Make sure the queue is unmapped before updating the MQD */ 389 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { 390 retval = unmap_queues_cpsch(dqm, 391 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 392 if (retval) { 393 pr_err("unmap queue failed\n"); 394 goto out_unlock; 395 } 396 } else if (prev_active && 397 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 398 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 399 retval = mqd->destroy_mqd(mqd, q->mqd, 400 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 401 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 402 if (retval) { 403 pr_err("destroy mqd failed\n"); 404 goto out_unlock; 405 } 406 } 407 408 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 409 410 /* 411 * check active state vs. the previous state and modify 412 * counter accordingly. map_queues_cpsch uses the 413 * dqm->queue_count to determine whether a new runlist must be 414 * uploaded. 415 */ 416 if (q->properties.is_active && !prev_active) 417 dqm->queue_count++; 418 else if (!q->properties.is_active && prev_active) 419 dqm->queue_count--; 420 421 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 422 retval = map_queues_cpsch(dqm); 423 else if (q->properties.is_active && 424 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 425 q->properties.type == KFD_QUEUE_TYPE_SDMA)) 426 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, 427 &q->properties, q->process->mm); 428 429 out_unlock: 430 mutex_unlock(&dqm->lock); 431 return retval; 432 } 433 434 static struct mqd_manager *get_mqd_manager( 435 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 436 { 437 struct mqd_manager *mqd; 438 439 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 440 return NULL; 441 442 pr_debug("mqd type %d\n", type); 443 444 mqd = dqm->mqds[type]; 445 if (!mqd) { 446 mqd = mqd_manager_init(type, dqm->dev); 447 if (!mqd) 448 pr_err("mqd manager is NULL"); 449 dqm->mqds[type] = mqd; 450 } 451 452 return mqd; 453 } 454 455 static int register_process(struct device_queue_manager *dqm, 456 struct qcm_process_device *qpd) 457 { 458 struct device_process_node *n; 459 int retval; 460 461 n = kzalloc(sizeof(*n), GFP_KERNEL); 462 if (!n) 463 return -ENOMEM; 464 465 n->qpd = qpd; 466 467 mutex_lock(&dqm->lock); 468 list_add(&n->list, &dqm->queues); 469 470 retval = dqm->asic_ops.update_qpd(dqm, qpd); 471 472 dqm->processes_count++; 473 474 mutex_unlock(&dqm->lock); 475 476 return retval; 477 } 478 479 static int unregister_process(struct device_queue_manager *dqm, 480 struct qcm_process_device *qpd) 481 { 482 int retval; 483 struct device_process_node *cur, *next; 484 485 pr_debug("qpd->queues_list is %s\n", 486 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 487 488 retval = 0; 489 mutex_lock(&dqm->lock); 490 491 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 492 if (qpd == cur->qpd) { 493 list_del(&cur->list); 494 kfree(cur); 495 dqm->processes_count--; 496 goto out; 497 } 498 } 499 /* qpd not found in dqm list */ 500 retval = 1; 501 out: 502 mutex_unlock(&dqm->lock); 503 return retval; 504 } 505 506 static int 507 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 508 unsigned int vmid) 509 { 510 uint32_t pasid_mapping; 511 512 pasid_mapping = (pasid == 0) ? 0 : 513 (uint32_t)pasid | 514 ATC_VMID_PASID_MAPPING_VALID; 515 516 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 517 dqm->dev->kgd, pasid_mapping, 518 vmid); 519 } 520 521 static void init_interrupts(struct device_queue_manager *dqm) 522 { 523 unsigned int i; 524 525 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 526 if (is_pipe_enabled(dqm, 0, i)) 527 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 528 } 529 530 static int initialize_nocpsch(struct device_queue_manager *dqm) 531 { 532 int pipe, queue; 533 534 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 535 536 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 537 sizeof(unsigned int), GFP_KERNEL); 538 if (!dqm->allocated_queues) 539 return -ENOMEM; 540 541 mutex_init(&dqm->lock); 542 INIT_LIST_HEAD(&dqm->queues); 543 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 544 dqm->sdma_queue_count = 0; 545 546 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 547 int pipe_offset = pipe * get_queues_per_pipe(dqm); 548 549 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 550 if (test_bit(pipe_offset + queue, 551 dqm->dev->shared_resources.queue_bitmap)) 552 dqm->allocated_queues[pipe] |= 1 << queue; 553 } 554 555 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 556 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 557 558 return 0; 559 } 560 561 static void uninitialize(struct device_queue_manager *dqm) 562 { 563 int i; 564 565 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 566 567 kfree(dqm->allocated_queues); 568 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 569 kfree(dqm->mqds[i]); 570 mutex_destroy(&dqm->lock); 571 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 572 } 573 574 static int start_nocpsch(struct device_queue_manager *dqm) 575 { 576 init_interrupts(dqm); 577 return 0; 578 } 579 580 static int stop_nocpsch(struct device_queue_manager *dqm) 581 { 582 return 0; 583 } 584 585 static int allocate_sdma_queue(struct device_queue_manager *dqm, 586 unsigned int *sdma_queue_id) 587 { 588 int bit; 589 590 if (dqm->sdma_bitmap == 0) 591 return -ENOMEM; 592 593 bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, 594 CIK_SDMA_QUEUES); 595 596 clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); 597 *sdma_queue_id = bit; 598 599 return 0; 600 } 601 602 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 603 unsigned int sdma_queue_id) 604 { 605 if (sdma_queue_id >= CIK_SDMA_QUEUES) 606 return; 607 set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); 608 } 609 610 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 611 struct queue *q, 612 struct qcm_process_device *qpd) 613 { 614 struct mqd_manager *mqd; 615 int retval; 616 617 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 618 if (!mqd) 619 return -ENOMEM; 620 621 retval = allocate_sdma_queue(dqm, &q->sdma_id); 622 if (retval) 623 return retval; 624 625 q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 626 q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 627 628 pr_debug("SDMA id is: %d\n", q->sdma_id); 629 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 630 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 631 632 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 633 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 634 &q->gart_mqd_addr, &q->properties); 635 if (retval) 636 goto out_deallocate_sdma_queue; 637 638 retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); 639 if (retval) 640 goto out_uninit_mqd; 641 642 return 0; 643 644 out_uninit_mqd: 645 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 646 out_deallocate_sdma_queue: 647 deallocate_sdma_queue(dqm, q->sdma_id); 648 649 return retval; 650 } 651 652 /* 653 * Device Queue Manager implementation for cp scheduler 654 */ 655 656 static int set_sched_resources(struct device_queue_manager *dqm) 657 { 658 int i, mec; 659 struct scheduling_resources res; 660 661 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 662 663 res.queue_mask = 0; 664 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 665 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 666 / dqm->dev->shared_resources.num_pipe_per_mec; 667 668 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 669 continue; 670 671 /* only acquire queues from the first MEC */ 672 if (mec > 0) 673 continue; 674 675 /* This situation may be hit in the future if a new HW 676 * generation exposes more than 64 queues. If so, the 677 * definition of res.queue_mask needs updating 678 */ 679 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 680 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 681 break; 682 } 683 684 res.queue_mask |= (1ull << i); 685 } 686 res.gws_mask = res.oac_mask = res.gds_heap_base = 687 res.gds_heap_size = 0; 688 689 pr_debug("Scheduling resources:\n" 690 "vmid mask: 0x%8X\n" 691 "queue mask: 0x%8llX\n", 692 res.vmid_mask, res.queue_mask); 693 694 return pm_send_set_resources(&dqm->packets, &res); 695 } 696 697 static int initialize_cpsch(struct device_queue_manager *dqm) 698 { 699 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 700 701 mutex_init(&dqm->lock); 702 INIT_LIST_HEAD(&dqm->queues); 703 dqm->queue_count = dqm->processes_count = 0; 704 dqm->sdma_queue_count = 0; 705 dqm->active_runlist = false; 706 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 707 708 return 0; 709 } 710 711 static int start_cpsch(struct device_queue_manager *dqm) 712 { 713 int retval; 714 715 retval = 0; 716 717 retval = pm_init(&dqm->packets, dqm); 718 if (retval) 719 goto fail_packet_manager_init; 720 721 retval = set_sched_resources(dqm); 722 if (retval) 723 goto fail_set_sched_resources; 724 725 pr_debug("Allocating fence memory\n"); 726 727 /* allocate fence memory on the gart */ 728 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 729 &dqm->fence_mem); 730 731 if (retval) 732 goto fail_allocate_vidmem; 733 734 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 735 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 736 737 init_interrupts(dqm); 738 739 mutex_lock(&dqm->lock); 740 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 741 mutex_unlock(&dqm->lock); 742 743 return 0; 744 fail_allocate_vidmem: 745 fail_set_sched_resources: 746 pm_uninit(&dqm->packets); 747 fail_packet_manager_init: 748 return retval; 749 } 750 751 static int stop_cpsch(struct device_queue_manager *dqm) 752 { 753 mutex_lock(&dqm->lock); 754 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 755 mutex_unlock(&dqm->lock); 756 757 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 758 pm_uninit(&dqm->packets); 759 760 return 0; 761 } 762 763 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 764 struct kernel_queue *kq, 765 struct qcm_process_device *qpd) 766 { 767 mutex_lock(&dqm->lock); 768 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 769 pr_warn("Can't create new kernel queue because %d queues were already created\n", 770 dqm->total_queue_count); 771 mutex_unlock(&dqm->lock); 772 return -EPERM; 773 } 774 775 /* 776 * Unconditionally increment this counter, regardless of the queue's 777 * type or whether the queue is active. 778 */ 779 dqm->total_queue_count++; 780 pr_debug("Total of %d queues are accountable so far\n", 781 dqm->total_queue_count); 782 783 list_add(&kq->list, &qpd->priv_queue_list); 784 dqm->queue_count++; 785 qpd->is_debug = true; 786 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 787 mutex_unlock(&dqm->lock); 788 789 return 0; 790 } 791 792 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 793 struct kernel_queue *kq, 794 struct qcm_process_device *qpd) 795 { 796 mutex_lock(&dqm->lock); 797 list_del(&kq->list); 798 dqm->queue_count--; 799 qpd->is_debug = false; 800 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 801 /* 802 * Unconditionally decrement this counter, regardless of the queue's 803 * type. 804 */ 805 dqm->total_queue_count--; 806 pr_debug("Total of %d queues are accountable so far\n", 807 dqm->total_queue_count); 808 mutex_unlock(&dqm->lock); 809 } 810 811 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 812 struct qcm_process_device *qpd, int *allocate_vmid) 813 { 814 int retval; 815 struct mqd_manager *mqd; 816 817 retval = 0; 818 819 if (allocate_vmid) 820 *allocate_vmid = 0; 821 822 mutex_lock(&dqm->lock); 823 824 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 825 pr_warn("Can't create new usermode queue because %d queues were already created\n", 826 dqm->total_queue_count); 827 retval = -EPERM; 828 goto out; 829 } 830 831 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 832 retval = allocate_sdma_queue(dqm, &q->sdma_id); 833 if (retval) 834 goto out; 835 q->properties.sdma_queue_id = 836 q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; 837 q->properties.sdma_engine_id = 838 q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 839 } 840 mqd = dqm->ops.get_mqd_manager(dqm, 841 get_mqd_type_from_queue_type(q->properties.type)); 842 843 if (!mqd) { 844 retval = -ENOMEM; 845 goto out; 846 } 847 848 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 849 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 850 &q->gart_mqd_addr, &q->properties); 851 if (retval) 852 goto out; 853 854 list_add(&q->list, &qpd->queues_list); 855 qpd->queue_count++; 856 if (q->properties.is_active) { 857 dqm->queue_count++; 858 retval = execute_queues_cpsch(dqm, 859 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 860 } 861 862 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 863 dqm->sdma_queue_count++; 864 /* 865 * Unconditionally increment this counter, regardless of the queue's 866 * type or whether the queue is active. 867 */ 868 dqm->total_queue_count++; 869 870 pr_debug("Total of %d queues are accountable so far\n", 871 dqm->total_queue_count); 872 873 out: 874 mutex_unlock(&dqm->lock); 875 return retval; 876 } 877 878 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 879 unsigned int fence_value, 880 unsigned int timeout_ms) 881 { 882 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 883 884 while (*fence_addr != fence_value) { 885 if (time_after(jiffies, end_jiffies)) { 886 pr_err("qcm fence wait loop timeout expired\n"); 887 return -ETIME; 888 } 889 schedule(); 890 } 891 892 return 0; 893 } 894 895 static int unmap_sdma_queues(struct device_queue_manager *dqm, 896 unsigned int sdma_engine) 897 { 898 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 899 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 900 sdma_engine); 901 } 902 903 /* dqm->lock mutex has to be locked before calling this function */ 904 static int map_queues_cpsch(struct device_queue_manager *dqm) 905 { 906 int retval; 907 908 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 909 return 0; 910 911 if (dqm->active_runlist) 912 return 0; 913 914 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 915 if (retval) { 916 pr_err("failed to execute runlist\n"); 917 return retval; 918 } 919 dqm->active_runlist = true; 920 921 return retval; 922 } 923 924 /* dqm->lock mutex has to be locked before calling this function */ 925 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 926 enum kfd_unmap_queues_filter filter, 927 uint32_t filter_param) 928 { 929 int retval = 0; 930 931 if (!dqm->active_runlist) 932 return retval; 933 934 pr_debug("Before destroying queues, sdma queue count is : %u\n", 935 dqm->sdma_queue_count); 936 937 if (dqm->sdma_queue_count > 0) { 938 unmap_sdma_queues(dqm, 0); 939 unmap_sdma_queues(dqm, 1); 940 } 941 942 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 943 filter, filter_param, false, 0); 944 if (retval) 945 return retval; 946 947 *dqm->fence_addr = KFD_FENCE_INIT; 948 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 949 KFD_FENCE_COMPLETED); 950 /* should be timed out */ 951 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 952 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 953 if (retval) 954 return retval; 955 956 pm_release_ib(&dqm->packets); 957 dqm->active_runlist = false; 958 959 return retval; 960 } 961 962 /* dqm->lock mutex has to be locked before calling this function */ 963 static int execute_queues_cpsch(struct device_queue_manager *dqm, 964 enum kfd_unmap_queues_filter filter, 965 uint32_t filter_param) 966 { 967 int retval; 968 969 retval = unmap_queues_cpsch(dqm, filter, filter_param); 970 if (retval) { 971 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 972 return retval; 973 } 974 975 return map_queues_cpsch(dqm); 976 } 977 978 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 979 struct qcm_process_device *qpd, 980 struct queue *q) 981 { 982 int retval; 983 struct mqd_manager *mqd; 984 bool preempt_all_queues; 985 986 preempt_all_queues = false; 987 988 retval = 0; 989 990 /* remove queue from list to prevent rescheduling after preemption */ 991 mutex_lock(&dqm->lock); 992 993 if (qpd->is_debug) { 994 /* 995 * error, currently we do not allow to destroy a queue 996 * of a currently debugged process 997 */ 998 retval = -EBUSY; 999 goto failed_try_destroy_debugged_queue; 1000 1001 } 1002 1003 mqd = dqm->ops.get_mqd_manager(dqm, 1004 get_mqd_type_from_queue_type(q->properties.type)); 1005 if (!mqd) { 1006 retval = -ENOMEM; 1007 goto failed; 1008 } 1009 1010 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1011 dqm->sdma_queue_count--; 1012 deallocate_sdma_queue(dqm, q->sdma_id); 1013 } 1014 1015 list_del(&q->list); 1016 qpd->queue_count--; 1017 if (q->properties.is_active) 1018 dqm->queue_count--; 1019 1020 retval = execute_queues_cpsch(dqm, 1021 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1022 if (retval == -ETIME) 1023 qpd->reset_wavefronts = true; 1024 1025 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1026 1027 /* 1028 * Unconditionally decrement this counter, regardless of the queue's 1029 * type 1030 */ 1031 dqm->total_queue_count--; 1032 pr_debug("Total of %d queues are accountable so far\n", 1033 dqm->total_queue_count); 1034 1035 mutex_unlock(&dqm->lock); 1036 1037 return 0; 1038 1039 failed: 1040 failed_try_destroy_debugged_queue: 1041 1042 mutex_unlock(&dqm->lock); 1043 return retval; 1044 } 1045 1046 /* 1047 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1048 * stay in user mode. 1049 */ 1050 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1051 /* APE1 limit is inclusive and 64K aligned. */ 1052 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1053 1054 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1055 struct qcm_process_device *qpd, 1056 enum cache_policy default_policy, 1057 enum cache_policy alternate_policy, 1058 void __user *alternate_aperture_base, 1059 uint64_t alternate_aperture_size) 1060 { 1061 bool retval; 1062 1063 mutex_lock(&dqm->lock); 1064 1065 if (alternate_aperture_size == 0) { 1066 /* base > limit disables APE1 */ 1067 qpd->sh_mem_ape1_base = 1; 1068 qpd->sh_mem_ape1_limit = 0; 1069 } else { 1070 /* 1071 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1072 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1073 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1074 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1075 * Verify that the base and size parameters can be 1076 * represented in this format and convert them. 1077 * Additionally restrict APE1 to user-mode addresses. 1078 */ 1079 1080 uint64_t base = (uintptr_t)alternate_aperture_base; 1081 uint64_t limit = base + alternate_aperture_size - 1; 1082 1083 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1084 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1085 retval = false; 1086 goto out; 1087 } 1088 1089 qpd->sh_mem_ape1_base = base >> 16; 1090 qpd->sh_mem_ape1_limit = limit >> 16; 1091 } 1092 1093 retval = dqm->asic_ops.set_cache_memory_policy( 1094 dqm, 1095 qpd, 1096 default_policy, 1097 alternate_policy, 1098 alternate_aperture_base, 1099 alternate_aperture_size); 1100 1101 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1102 program_sh_mem_settings(dqm, qpd); 1103 1104 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1105 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1106 qpd->sh_mem_ape1_limit); 1107 1108 out: 1109 mutex_unlock(&dqm->lock); 1110 return retval; 1111 } 1112 1113 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1114 struct qcm_process_device *qpd) 1115 { 1116 struct queue *q, *next; 1117 struct device_process_node *cur, *next_dpn; 1118 int retval = 0; 1119 1120 mutex_lock(&dqm->lock); 1121 1122 /* Clear all user mode queues */ 1123 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1124 int ret; 1125 1126 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1127 if (ret) 1128 retval = ret; 1129 } 1130 1131 /* Unregister process */ 1132 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1133 if (qpd == cur->qpd) { 1134 list_del(&cur->list); 1135 kfree(cur); 1136 dqm->processes_count--; 1137 break; 1138 } 1139 } 1140 1141 mutex_unlock(&dqm->lock); 1142 return retval; 1143 } 1144 1145 1146 static int process_termination_cpsch(struct device_queue_manager *dqm, 1147 struct qcm_process_device *qpd) 1148 { 1149 int retval; 1150 struct queue *q, *next; 1151 struct kernel_queue *kq, *kq_next; 1152 struct mqd_manager *mqd; 1153 struct device_process_node *cur, *next_dpn; 1154 enum kfd_unmap_queues_filter filter = 1155 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1156 1157 retval = 0; 1158 1159 mutex_lock(&dqm->lock); 1160 1161 /* Clean all kernel queues */ 1162 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1163 list_del(&kq->list); 1164 dqm->queue_count--; 1165 qpd->is_debug = false; 1166 dqm->total_queue_count--; 1167 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1168 } 1169 1170 /* Clear all user mode queues */ 1171 list_for_each_entry(q, &qpd->queues_list, list) { 1172 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1173 dqm->sdma_queue_count--; 1174 1175 if (q->properties.is_active) 1176 dqm->queue_count--; 1177 1178 dqm->total_queue_count--; 1179 } 1180 1181 /* Unregister process */ 1182 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1183 if (qpd == cur->qpd) { 1184 list_del(&cur->list); 1185 kfree(cur); 1186 dqm->processes_count--; 1187 break; 1188 } 1189 } 1190 1191 retval = execute_queues_cpsch(dqm, filter, 0); 1192 if (retval || qpd->reset_wavefronts) { 1193 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1194 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1195 qpd->reset_wavefronts = false; 1196 } 1197 1198 /* lastly, free mqd resources */ 1199 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1200 mqd = dqm->ops.get_mqd_manager(dqm, 1201 get_mqd_type_from_queue_type(q->properties.type)); 1202 if (!mqd) { 1203 retval = -ENOMEM; 1204 goto out; 1205 } 1206 list_del(&q->list); 1207 qpd->queue_count--; 1208 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1209 } 1210 1211 out: 1212 mutex_unlock(&dqm->lock); 1213 return retval; 1214 } 1215 1216 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1217 { 1218 struct device_queue_manager *dqm; 1219 1220 pr_debug("Loading device queue manager\n"); 1221 1222 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1223 if (!dqm) 1224 return NULL; 1225 1226 dqm->dev = dev; 1227 switch (sched_policy) { 1228 case KFD_SCHED_POLICY_HWS: 1229 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1230 /* initialize dqm for cp scheduling */ 1231 dqm->ops.create_queue = create_queue_cpsch; 1232 dqm->ops.initialize = initialize_cpsch; 1233 dqm->ops.start = start_cpsch; 1234 dqm->ops.stop = stop_cpsch; 1235 dqm->ops.destroy_queue = destroy_queue_cpsch; 1236 dqm->ops.update_queue = update_queue; 1237 dqm->ops.get_mqd_manager = get_mqd_manager; 1238 dqm->ops.register_process = register_process; 1239 dqm->ops.unregister_process = unregister_process; 1240 dqm->ops.uninitialize = uninitialize; 1241 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1242 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1243 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1244 dqm->ops.process_termination = process_termination_cpsch; 1245 break; 1246 case KFD_SCHED_POLICY_NO_HWS: 1247 /* initialize dqm for no cp scheduling */ 1248 dqm->ops.start = start_nocpsch; 1249 dqm->ops.stop = stop_nocpsch; 1250 dqm->ops.create_queue = create_queue_nocpsch; 1251 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1252 dqm->ops.update_queue = update_queue; 1253 dqm->ops.get_mqd_manager = get_mqd_manager; 1254 dqm->ops.register_process = register_process; 1255 dqm->ops.unregister_process = unregister_process; 1256 dqm->ops.initialize = initialize_nocpsch; 1257 dqm->ops.uninitialize = uninitialize; 1258 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1259 dqm->ops.process_termination = process_termination_nocpsch; 1260 break; 1261 default: 1262 pr_err("Invalid scheduling policy %d\n", sched_policy); 1263 goto out_free; 1264 } 1265 1266 switch (dev->device_info->asic_family) { 1267 case CHIP_CARRIZO: 1268 device_queue_manager_init_vi(&dqm->asic_ops); 1269 break; 1270 1271 case CHIP_KAVERI: 1272 device_queue_manager_init_cik(&dqm->asic_ops); 1273 break; 1274 default: 1275 WARN(1, "Unexpected ASIC family %u", 1276 dev->device_info->asic_family); 1277 goto out_free; 1278 } 1279 1280 if (!dqm->ops.initialize(dqm)) 1281 return dqm; 1282 1283 out_free: 1284 kfree(dqm); 1285 return NULL; 1286 } 1287 1288 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1289 { 1290 dqm->ops.uninitialize(dqm); 1291 kfree(dqm); 1292 } 1293