1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include <linux/sched.h> 30 #include "kfd_priv.h" 31 #include "kfd_device_queue_manager.h" 32 #include "kfd_mqd_manager.h" 33 #include "cik_regs.h" 34 #include "kfd_kernel_queue.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 41 unsigned int pasid, unsigned int vmid); 42 43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 44 struct queue *q, 45 struct qcm_process_device *qpd); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 49 bool preempt_static_queues, bool lock); 50 51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 52 struct queue *q, 53 struct qcm_process_device *qpd); 54 55 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 56 unsigned int sdma_queue_id); 57 58 static inline 59 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 60 { 61 if (type == KFD_QUEUE_TYPE_SDMA) 62 return KFD_MQD_TYPE_SDMA; 63 return KFD_MQD_TYPE_CP; 64 } 65 66 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 67 { 68 int i; 69 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 70 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 71 72 /* queue is available for KFD usage if bit is 1 */ 73 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 74 if (test_bit(pipe_offset + i, 75 dqm->dev->shared_resources.queue_bitmap)) 76 return true; 77 return false; 78 } 79 80 unsigned int get_queues_num(struct device_queue_manager *dqm) 81 { 82 BUG_ON(!dqm || !dqm->dev); 83 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 84 KGD_MAX_QUEUES); 85 } 86 87 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 88 { 89 BUG_ON(!dqm || !dqm->dev); 90 return dqm->dev->shared_resources.num_queue_per_pipe; 91 } 92 93 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 94 { 95 BUG_ON(!dqm || !dqm->dev); 96 return dqm->dev->shared_resources.num_pipe_per_mec; 97 } 98 99 void program_sh_mem_settings(struct device_queue_manager *dqm, 100 struct qcm_process_device *qpd) 101 { 102 return dqm->dev->kfd2kgd->program_sh_mem_settings( 103 dqm->dev->kgd, qpd->vmid, 104 qpd->sh_mem_config, 105 qpd->sh_mem_ape1_base, 106 qpd->sh_mem_ape1_limit, 107 qpd->sh_mem_bases); 108 } 109 110 static int allocate_vmid(struct device_queue_manager *dqm, 111 struct qcm_process_device *qpd, 112 struct queue *q) 113 { 114 int bit, allocated_vmid; 115 116 if (dqm->vmid_bitmap == 0) 117 return -ENOMEM; 118 119 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); 120 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 121 122 /* Kaveri kfd vmid's starts from vmid 8 */ 123 allocated_vmid = bit + KFD_VMID_START_OFFSET; 124 pr_debug("kfd: vmid allocation %d\n", allocated_vmid); 125 qpd->vmid = allocated_vmid; 126 q->properties.vmid = allocated_vmid; 127 128 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 129 program_sh_mem_settings(dqm, qpd); 130 131 return 0; 132 } 133 134 static void deallocate_vmid(struct device_queue_manager *dqm, 135 struct qcm_process_device *qpd, 136 struct queue *q) 137 { 138 int bit = qpd->vmid - KFD_VMID_START_OFFSET; 139 140 /* Release the vmid mapping */ 141 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 142 143 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 144 qpd->vmid = 0; 145 q->properties.vmid = 0; 146 } 147 148 static int create_queue_nocpsch(struct device_queue_manager *dqm, 149 struct queue *q, 150 struct qcm_process_device *qpd, 151 int *allocated_vmid) 152 { 153 int retval; 154 155 BUG_ON(!dqm || !q || !qpd || !allocated_vmid); 156 157 pr_debug("kfd: In func %s\n", __func__); 158 print_queue(q); 159 160 mutex_lock(&dqm->lock); 161 162 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 163 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 164 dqm->total_queue_count); 165 mutex_unlock(&dqm->lock); 166 return -EPERM; 167 } 168 169 if (list_empty(&qpd->queues_list)) { 170 retval = allocate_vmid(dqm, qpd, q); 171 if (retval != 0) { 172 mutex_unlock(&dqm->lock); 173 return retval; 174 } 175 } 176 *allocated_vmid = qpd->vmid; 177 q->properties.vmid = qpd->vmid; 178 179 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 180 retval = create_compute_queue_nocpsch(dqm, q, qpd); 181 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 182 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 183 184 if (retval != 0) { 185 if (list_empty(&qpd->queues_list)) { 186 deallocate_vmid(dqm, qpd, q); 187 *allocated_vmid = 0; 188 } 189 mutex_unlock(&dqm->lock); 190 return retval; 191 } 192 193 list_add(&q->list, &qpd->queues_list); 194 if (q->properties.is_active) 195 dqm->queue_count++; 196 197 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 198 dqm->sdma_queue_count++; 199 200 /* 201 * Unconditionally increment this counter, regardless of the queue's 202 * type or whether the queue is active. 203 */ 204 dqm->total_queue_count++; 205 pr_debug("Total of %d queues are accountable so far\n", 206 dqm->total_queue_count); 207 208 mutex_unlock(&dqm->lock); 209 return 0; 210 } 211 212 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 213 { 214 bool set; 215 int pipe, bit, i; 216 217 set = false; 218 219 for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm); 220 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 221 222 if (!is_pipe_enabled(dqm, 0, pipe)) 223 continue; 224 225 if (dqm->allocated_queues[pipe] != 0) { 226 bit = find_first_bit( 227 (unsigned long *)&dqm->allocated_queues[pipe], 228 get_queues_per_pipe(dqm)); 229 230 clear_bit(bit, 231 (unsigned long *)&dqm->allocated_queues[pipe]); 232 q->pipe = pipe; 233 q->queue = bit; 234 set = true; 235 break; 236 } 237 } 238 239 if (!set) 240 return -EBUSY; 241 242 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", 243 __func__, q->pipe, q->queue); 244 /* horizontal hqd allocation */ 245 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 246 247 return 0; 248 } 249 250 static inline void deallocate_hqd(struct device_queue_manager *dqm, 251 struct queue *q) 252 { 253 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 254 } 255 256 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 257 struct queue *q, 258 struct qcm_process_device *qpd) 259 { 260 int retval; 261 struct mqd_manager *mqd; 262 263 BUG_ON(!dqm || !q || !qpd); 264 265 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 266 if (mqd == NULL) 267 return -ENOMEM; 268 269 retval = allocate_hqd(dqm, q); 270 if (retval != 0) 271 return retval; 272 273 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 274 &q->gart_mqd_addr, &q->properties); 275 if (retval != 0) { 276 deallocate_hqd(dqm, q); 277 return retval; 278 } 279 280 pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", 281 q->pipe, 282 q->queue); 283 284 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 285 q->queue, (uint32_t __user *) q->properties.write_ptr); 286 if (retval != 0) { 287 deallocate_hqd(dqm, q); 288 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 289 return retval; 290 } 291 292 return 0; 293 } 294 295 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 296 struct qcm_process_device *qpd, 297 struct queue *q) 298 { 299 int retval; 300 struct mqd_manager *mqd; 301 302 BUG_ON(!dqm || !q || !q->mqd || !qpd); 303 304 retval = 0; 305 306 pr_debug("kfd: In Func %s\n", __func__); 307 308 mutex_lock(&dqm->lock); 309 310 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 311 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 312 if (mqd == NULL) { 313 retval = -ENOMEM; 314 goto out; 315 } 316 deallocate_hqd(dqm, q); 317 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 318 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 319 if (mqd == NULL) { 320 retval = -ENOMEM; 321 goto out; 322 } 323 dqm->sdma_queue_count--; 324 deallocate_sdma_queue(dqm, q->sdma_id); 325 } else { 326 pr_debug("q->properties.type is invalid (%d)\n", 327 q->properties.type); 328 retval = -EINVAL; 329 goto out; 330 } 331 332 retval = mqd->destroy_mqd(mqd, q->mqd, 333 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 334 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 335 q->pipe, q->queue); 336 337 if (retval != 0) 338 goto out; 339 340 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 341 342 list_del(&q->list); 343 if (list_empty(&qpd->queues_list)) 344 deallocate_vmid(dqm, qpd, q); 345 if (q->properties.is_active) 346 dqm->queue_count--; 347 348 /* 349 * Unconditionally decrement this counter, regardless of the queue's 350 * type 351 */ 352 dqm->total_queue_count--; 353 pr_debug("Total of %d queues are accountable so far\n", 354 dqm->total_queue_count); 355 356 out: 357 mutex_unlock(&dqm->lock); 358 return retval; 359 } 360 361 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 362 { 363 int retval; 364 struct mqd_manager *mqd; 365 bool prev_active = false; 366 367 BUG_ON(!dqm || !q || !q->mqd); 368 369 mutex_lock(&dqm->lock); 370 mqd = dqm->ops.get_mqd_manager(dqm, 371 get_mqd_type_from_queue_type(q->properties.type)); 372 if (mqd == NULL) { 373 mutex_unlock(&dqm->lock); 374 return -ENOMEM; 375 } 376 377 if (q->properties.is_active) 378 prev_active = true; 379 380 /* 381 * 382 * check active state vs. the previous state 383 * and modify counter accordingly 384 */ 385 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 386 if ((q->properties.is_active) && (!prev_active)) 387 dqm->queue_count++; 388 else if ((!q->properties.is_active) && (prev_active)) 389 dqm->queue_count--; 390 391 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 392 retval = execute_queues_cpsch(dqm, false); 393 394 mutex_unlock(&dqm->lock); 395 return retval; 396 } 397 398 static struct mqd_manager *get_mqd_manager_nocpsch( 399 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 400 { 401 struct mqd_manager *mqd; 402 403 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); 404 405 pr_debug("kfd: In func %s mqd type %d\n", __func__, type); 406 407 mqd = dqm->mqds[type]; 408 if (!mqd) { 409 mqd = mqd_manager_init(type, dqm->dev); 410 if (mqd == NULL) 411 pr_err("kfd: mqd manager is NULL"); 412 dqm->mqds[type] = mqd; 413 } 414 415 return mqd; 416 } 417 418 static int register_process_nocpsch(struct device_queue_manager *dqm, 419 struct qcm_process_device *qpd) 420 { 421 struct device_process_node *n; 422 int retval; 423 424 BUG_ON(!dqm || !qpd); 425 426 pr_debug("kfd: In func %s\n", __func__); 427 428 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); 429 if (!n) 430 return -ENOMEM; 431 432 n->qpd = qpd; 433 434 mutex_lock(&dqm->lock); 435 list_add(&n->list, &dqm->queues); 436 437 retval = dqm->ops_asic_specific.register_process(dqm, qpd); 438 439 dqm->processes_count++; 440 441 mutex_unlock(&dqm->lock); 442 443 return retval; 444 } 445 446 static int unregister_process_nocpsch(struct device_queue_manager *dqm, 447 struct qcm_process_device *qpd) 448 { 449 int retval; 450 struct device_process_node *cur, *next; 451 452 BUG_ON(!dqm || !qpd); 453 454 pr_debug("In func %s\n", __func__); 455 456 pr_debug("qpd->queues_list is %s\n", 457 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 458 459 retval = 0; 460 mutex_lock(&dqm->lock); 461 462 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 463 if (qpd == cur->qpd) { 464 list_del(&cur->list); 465 kfree(cur); 466 dqm->processes_count--; 467 goto out; 468 } 469 } 470 /* qpd not found in dqm list */ 471 retval = 1; 472 out: 473 mutex_unlock(&dqm->lock); 474 return retval; 475 } 476 477 static int 478 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 479 unsigned int vmid) 480 { 481 uint32_t pasid_mapping; 482 483 pasid_mapping = (pasid == 0) ? 0 : 484 (uint32_t)pasid | 485 ATC_VMID_PASID_MAPPING_VALID; 486 487 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 488 dqm->dev->kgd, pasid_mapping, 489 vmid); 490 } 491 492 static void init_interrupts(struct device_queue_manager *dqm) 493 { 494 unsigned int i; 495 496 BUG_ON(dqm == NULL); 497 498 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 499 if (is_pipe_enabled(dqm, 0, i)) 500 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 501 } 502 503 static int init_scheduler(struct device_queue_manager *dqm) 504 { 505 int retval = 0; 506 507 BUG_ON(!dqm); 508 509 pr_debug("kfd: In %s\n", __func__); 510 511 return retval; 512 } 513 514 static int initialize_nocpsch(struct device_queue_manager *dqm) 515 { 516 int i; 517 518 BUG_ON(!dqm); 519 520 pr_debug("kfd: In func %s num of pipes: %d\n", 521 __func__, get_pipes_per_mec(dqm)); 522 523 mutex_init(&dqm->lock); 524 INIT_LIST_HEAD(&dqm->queues); 525 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 526 dqm->sdma_queue_count = 0; 527 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 528 sizeof(unsigned int), GFP_KERNEL); 529 if (!dqm->allocated_queues) { 530 mutex_destroy(&dqm->lock); 531 return -ENOMEM; 532 } 533 534 for (i = 0; i < get_pipes_per_mec(dqm); i++) 535 dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1; 536 537 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; 538 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 539 540 init_scheduler(dqm); 541 return 0; 542 } 543 544 static void uninitialize_nocpsch(struct device_queue_manager *dqm) 545 { 546 int i; 547 548 BUG_ON(!dqm); 549 550 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 551 552 kfree(dqm->allocated_queues); 553 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 554 kfree(dqm->mqds[i]); 555 mutex_destroy(&dqm->lock); 556 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 557 } 558 559 static int start_nocpsch(struct device_queue_manager *dqm) 560 { 561 init_interrupts(dqm); 562 return 0; 563 } 564 565 static int stop_nocpsch(struct device_queue_manager *dqm) 566 { 567 return 0; 568 } 569 570 static int allocate_sdma_queue(struct device_queue_manager *dqm, 571 unsigned int *sdma_queue_id) 572 { 573 int bit; 574 575 if (dqm->sdma_bitmap == 0) 576 return -ENOMEM; 577 578 bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, 579 CIK_SDMA_QUEUES); 580 581 clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); 582 *sdma_queue_id = bit; 583 584 return 0; 585 } 586 587 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 588 unsigned int sdma_queue_id) 589 { 590 if (sdma_queue_id >= CIK_SDMA_QUEUES) 591 return; 592 set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); 593 } 594 595 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 596 struct queue *q, 597 struct qcm_process_device *qpd) 598 { 599 struct mqd_manager *mqd; 600 int retval; 601 602 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 603 if (!mqd) 604 return -ENOMEM; 605 606 retval = allocate_sdma_queue(dqm, &q->sdma_id); 607 if (retval != 0) 608 return retval; 609 610 q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 611 q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; 612 613 pr_debug("kfd: sdma id is: %d\n", q->sdma_id); 614 pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); 615 pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); 616 617 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 618 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 619 &q->gart_mqd_addr, &q->properties); 620 if (retval != 0) { 621 deallocate_sdma_queue(dqm, q->sdma_id); 622 return retval; 623 } 624 625 retval = mqd->load_mqd(mqd, q->mqd, 0, 626 0, NULL); 627 if (retval != 0) { 628 deallocate_sdma_queue(dqm, q->sdma_id); 629 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 630 return retval; 631 } 632 633 return 0; 634 } 635 636 /* 637 * Device Queue Manager implementation for cp scheduler 638 */ 639 640 static int set_sched_resources(struct device_queue_manager *dqm) 641 { 642 int i, mec; 643 struct scheduling_resources res; 644 645 BUG_ON(!dqm); 646 647 pr_debug("kfd: In func %s\n", __func__); 648 649 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; 650 res.vmid_mask <<= KFD_VMID_START_OFFSET; 651 652 res.queue_mask = 0; 653 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 654 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 655 / dqm->dev->shared_resources.num_pipe_per_mec; 656 657 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 658 continue; 659 660 /* only acquire queues from the first MEC */ 661 if (mec > 0) 662 continue; 663 664 /* This situation may be hit in the future if a new HW 665 * generation exposes more than 64 queues. If so, the 666 * definition of res.queue_mask needs updating */ 667 if (WARN_ON(i > (sizeof(res.queue_mask)*8))) { 668 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 669 break; 670 } 671 672 res.queue_mask |= (1ull << i); 673 } 674 res.gws_mask = res.oac_mask = res.gds_heap_base = 675 res.gds_heap_size = 0; 676 677 pr_debug("kfd: scheduling resources:\n" 678 " vmid mask: 0x%8X\n" 679 " queue mask: 0x%8llX\n", 680 res.vmid_mask, res.queue_mask); 681 682 return pm_send_set_resources(&dqm->packets, &res); 683 } 684 685 static int initialize_cpsch(struct device_queue_manager *dqm) 686 { 687 int retval; 688 689 BUG_ON(!dqm); 690 691 pr_debug("kfd: In func %s num of pipes: %d\n", 692 __func__, get_pipes_per_mec(dqm)); 693 694 mutex_init(&dqm->lock); 695 INIT_LIST_HEAD(&dqm->queues); 696 dqm->queue_count = dqm->processes_count = 0; 697 dqm->sdma_queue_count = 0; 698 dqm->active_runlist = false; 699 retval = dqm->ops_asic_specific.initialize(dqm); 700 if (retval != 0) 701 goto fail_init_pipelines; 702 703 return 0; 704 705 fail_init_pipelines: 706 mutex_destroy(&dqm->lock); 707 return retval; 708 } 709 710 static int start_cpsch(struct device_queue_manager *dqm) 711 { 712 struct device_process_node *node; 713 int retval; 714 715 BUG_ON(!dqm); 716 717 retval = 0; 718 719 retval = pm_init(&dqm->packets, dqm); 720 if (retval != 0) 721 goto fail_packet_manager_init; 722 723 retval = set_sched_resources(dqm); 724 if (retval != 0) 725 goto fail_set_sched_resources; 726 727 pr_debug("kfd: allocating fence memory\n"); 728 729 /* allocate fence memory on the gart */ 730 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 731 &dqm->fence_mem); 732 733 if (retval != 0) 734 goto fail_allocate_vidmem; 735 736 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 737 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 738 739 init_interrupts(dqm); 740 741 list_for_each_entry(node, &dqm->queues, list) 742 if (node->qpd->pqm->process && dqm->dev) 743 kfd_bind_process_to_device(dqm->dev, 744 node->qpd->pqm->process); 745 746 execute_queues_cpsch(dqm, true); 747 748 return 0; 749 fail_allocate_vidmem: 750 fail_set_sched_resources: 751 pm_uninit(&dqm->packets); 752 fail_packet_manager_init: 753 return retval; 754 } 755 756 static int stop_cpsch(struct device_queue_manager *dqm) 757 { 758 struct device_process_node *node; 759 struct kfd_process_device *pdd; 760 761 BUG_ON(!dqm); 762 763 destroy_queues_cpsch(dqm, true, true); 764 765 list_for_each_entry(node, &dqm->queues, list) { 766 pdd = qpd_to_pdd(node->qpd); 767 pdd->bound = false; 768 } 769 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 770 pm_uninit(&dqm->packets); 771 772 return 0; 773 } 774 775 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 776 struct kernel_queue *kq, 777 struct qcm_process_device *qpd) 778 { 779 BUG_ON(!dqm || !kq || !qpd); 780 781 pr_debug("kfd: In func %s\n", __func__); 782 783 mutex_lock(&dqm->lock); 784 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 785 pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", 786 dqm->total_queue_count); 787 mutex_unlock(&dqm->lock); 788 return -EPERM; 789 } 790 791 /* 792 * Unconditionally increment this counter, regardless of the queue's 793 * type or whether the queue is active. 794 */ 795 dqm->total_queue_count++; 796 pr_debug("Total of %d queues are accountable so far\n", 797 dqm->total_queue_count); 798 799 list_add(&kq->list, &qpd->priv_queue_list); 800 dqm->queue_count++; 801 qpd->is_debug = true; 802 execute_queues_cpsch(dqm, false); 803 mutex_unlock(&dqm->lock); 804 805 return 0; 806 } 807 808 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 809 struct kernel_queue *kq, 810 struct qcm_process_device *qpd) 811 { 812 BUG_ON(!dqm || !kq); 813 814 pr_debug("kfd: In %s\n", __func__); 815 816 mutex_lock(&dqm->lock); 817 /* here we actually preempt the DIQ */ 818 destroy_queues_cpsch(dqm, true, false); 819 list_del(&kq->list); 820 dqm->queue_count--; 821 qpd->is_debug = false; 822 execute_queues_cpsch(dqm, false); 823 /* 824 * Unconditionally decrement this counter, regardless of the queue's 825 * type. 826 */ 827 dqm->total_queue_count--; 828 pr_debug("Total of %d queues are accountable so far\n", 829 dqm->total_queue_count); 830 mutex_unlock(&dqm->lock); 831 } 832 833 static void select_sdma_engine_id(struct queue *q) 834 { 835 static int sdma_id; 836 837 q->sdma_id = sdma_id; 838 sdma_id = (sdma_id + 1) % 2; 839 } 840 841 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 842 struct qcm_process_device *qpd, int *allocate_vmid) 843 { 844 int retval; 845 struct mqd_manager *mqd; 846 847 BUG_ON(!dqm || !q || !qpd); 848 849 retval = 0; 850 851 if (allocate_vmid) 852 *allocate_vmid = 0; 853 854 mutex_lock(&dqm->lock); 855 856 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 857 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 858 dqm->total_queue_count); 859 retval = -EPERM; 860 goto out; 861 } 862 863 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 864 select_sdma_engine_id(q); 865 866 mqd = dqm->ops.get_mqd_manager(dqm, 867 get_mqd_type_from_queue_type(q->properties.type)); 868 869 if (mqd == NULL) { 870 mutex_unlock(&dqm->lock); 871 return -ENOMEM; 872 } 873 874 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 875 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 876 &q->gart_mqd_addr, &q->properties); 877 if (retval != 0) 878 goto out; 879 880 list_add(&q->list, &qpd->queues_list); 881 if (q->properties.is_active) { 882 dqm->queue_count++; 883 retval = execute_queues_cpsch(dqm, false); 884 } 885 886 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 887 dqm->sdma_queue_count++; 888 /* 889 * Unconditionally increment this counter, regardless of the queue's 890 * type or whether the queue is active. 891 */ 892 dqm->total_queue_count++; 893 894 pr_debug("Total of %d queues are accountable so far\n", 895 dqm->total_queue_count); 896 897 out: 898 mutex_unlock(&dqm->lock); 899 return retval; 900 } 901 902 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 903 unsigned int fence_value, 904 unsigned long timeout) 905 { 906 BUG_ON(!fence_addr); 907 timeout += jiffies; 908 909 while (*fence_addr != fence_value) { 910 if (time_after(jiffies, timeout)) { 911 pr_err("kfd: qcm fence wait loop timeout expired\n"); 912 return -ETIME; 913 } 914 schedule(); 915 } 916 917 return 0; 918 } 919 920 static int destroy_sdma_queues(struct device_queue_manager *dqm, 921 unsigned int sdma_engine) 922 { 923 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 924 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, 925 sdma_engine); 926 } 927 928 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 929 bool preempt_static_queues, bool lock) 930 { 931 int retval; 932 enum kfd_preempt_type_filter preempt_type; 933 struct kfd_process_device *pdd; 934 935 BUG_ON(!dqm); 936 937 retval = 0; 938 939 if (lock) 940 mutex_lock(&dqm->lock); 941 if (!dqm->active_runlist) 942 goto out; 943 944 pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", 945 dqm->sdma_queue_count); 946 947 if (dqm->sdma_queue_count > 0) { 948 destroy_sdma_queues(dqm, 0); 949 destroy_sdma_queues(dqm, 1); 950 } 951 952 preempt_type = preempt_static_queues ? 953 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : 954 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; 955 956 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 957 preempt_type, 0, false, 0); 958 if (retval != 0) 959 goto out; 960 961 *dqm->fence_addr = KFD_FENCE_INIT; 962 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 963 KFD_FENCE_COMPLETED); 964 /* should be timed out */ 965 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 966 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 967 if (retval != 0) { 968 pdd = kfd_get_process_device_data(dqm->dev, 969 kfd_get_process(current)); 970 pdd->reset_wavefronts = true; 971 goto out; 972 } 973 pm_release_ib(&dqm->packets); 974 dqm->active_runlist = false; 975 976 out: 977 if (lock) 978 mutex_unlock(&dqm->lock); 979 return retval; 980 } 981 982 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) 983 { 984 int retval; 985 986 BUG_ON(!dqm); 987 988 if (lock) 989 mutex_lock(&dqm->lock); 990 991 retval = destroy_queues_cpsch(dqm, false, false); 992 if (retval != 0) { 993 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 994 goto out; 995 } 996 997 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { 998 retval = 0; 999 goto out; 1000 } 1001 1002 if (dqm->active_runlist) { 1003 retval = 0; 1004 goto out; 1005 } 1006 1007 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1008 if (retval != 0) { 1009 pr_err("kfd: failed to execute runlist"); 1010 goto out; 1011 } 1012 dqm->active_runlist = true; 1013 1014 out: 1015 if (lock) 1016 mutex_unlock(&dqm->lock); 1017 return retval; 1018 } 1019 1020 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1021 struct qcm_process_device *qpd, 1022 struct queue *q) 1023 { 1024 int retval; 1025 struct mqd_manager *mqd; 1026 bool preempt_all_queues; 1027 1028 BUG_ON(!dqm || !qpd || !q); 1029 1030 preempt_all_queues = false; 1031 1032 retval = 0; 1033 1034 /* remove queue from list to prevent rescheduling after preemption */ 1035 mutex_lock(&dqm->lock); 1036 1037 if (qpd->is_debug) { 1038 /* 1039 * error, currently we do not allow to destroy a queue 1040 * of a currently debugged process 1041 */ 1042 retval = -EBUSY; 1043 goto failed_try_destroy_debugged_queue; 1044 1045 } 1046 1047 mqd = dqm->ops.get_mqd_manager(dqm, 1048 get_mqd_type_from_queue_type(q->properties.type)); 1049 if (!mqd) { 1050 retval = -ENOMEM; 1051 goto failed; 1052 } 1053 1054 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1055 dqm->sdma_queue_count--; 1056 1057 list_del(&q->list); 1058 if (q->properties.is_active) 1059 dqm->queue_count--; 1060 1061 execute_queues_cpsch(dqm, false); 1062 1063 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1064 1065 /* 1066 * Unconditionally decrement this counter, regardless of the queue's 1067 * type 1068 */ 1069 dqm->total_queue_count--; 1070 pr_debug("Total of %d queues are accountable so far\n", 1071 dqm->total_queue_count); 1072 1073 mutex_unlock(&dqm->lock); 1074 1075 return 0; 1076 1077 failed: 1078 failed_try_destroy_debugged_queue: 1079 1080 mutex_unlock(&dqm->lock); 1081 return retval; 1082 } 1083 1084 /* 1085 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1086 * stay in user mode. 1087 */ 1088 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1089 /* APE1 limit is inclusive and 64K aligned. */ 1090 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1091 1092 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1093 struct qcm_process_device *qpd, 1094 enum cache_policy default_policy, 1095 enum cache_policy alternate_policy, 1096 void __user *alternate_aperture_base, 1097 uint64_t alternate_aperture_size) 1098 { 1099 bool retval; 1100 1101 pr_debug("kfd: In func %s\n", __func__); 1102 1103 mutex_lock(&dqm->lock); 1104 1105 if (alternate_aperture_size == 0) { 1106 /* base > limit disables APE1 */ 1107 qpd->sh_mem_ape1_base = 1; 1108 qpd->sh_mem_ape1_limit = 0; 1109 } else { 1110 /* 1111 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1112 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1113 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1114 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1115 * Verify that the base and size parameters can be 1116 * represented in this format and convert them. 1117 * Additionally restrict APE1 to user-mode addresses. 1118 */ 1119 1120 uint64_t base = (uintptr_t)alternate_aperture_base; 1121 uint64_t limit = base + alternate_aperture_size - 1; 1122 1123 if (limit <= base) 1124 goto out; 1125 1126 if ((base & APE1_FIXED_BITS_MASK) != 0) 1127 goto out; 1128 1129 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) 1130 goto out; 1131 1132 qpd->sh_mem_ape1_base = base >> 16; 1133 qpd->sh_mem_ape1_limit = limit >> 16; 1134 } 1135 1136 retval = dqm->ops_asic_specific.set_cache_memory_policy( 1137 dqm, 1138 qpd, 1139 default_policy, 1140 alternate_policy, 1141 alternate_aperture_base, 1142 alternate_aperture_size); 1143 1144 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1145 program_sh_mem_settings(dqm, qpd); 1146 1147 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1148 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1149 qpd->sh_mem_ape1_limit); 1150 1151 mutex_unlock(&dqm->lock); 1152 return retval; 1153 1154 out: 1155 mutex_unlock(&dqm->lock); 1156 return false; 1157 } 1158 1159 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1160 { 1161 struct device_queue_manager *dqm; 1162 1163 BUG_ON(!dev); 1164 1165 pr_debug("kfd: loading device queue manager\n"); 1166 1167 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); 1168 if (!dqm) 1169 return NULL; 1170 1171 dqm->dev = dev; 1172 switch (sched_policy) { 1173 case KFD_SCHED_POLICY_HWS: 1174 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1175 /* initialize dqm for cp scheduling */ 1176 dqm->ops.create_queue = create_queue_cpsch; 1177 dqm->ops.initialize = initialize_cpsch; 1178 dqm->ops.start = start_cpsch; 1179 dqm->ops.stop = stop_cpsch; 1180 dqm->ops.destroy_queue = destroy_queue_cpsch; 1181 dqm->ops.update_queue = update_queue; 1182 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1183 dqm->ops.register_process = register_process_nocpsch; 1184 dqm->ops.unregister_process = unregister_process_nocpsch; 1185 dqm->ops.uninitialize = uninitialize_nocpsch; 1186 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1187 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1188 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1189 break; 1190 case KFD_SCHED_POLICY_NO_HWS: 1191 /* initialize dqm for no cp scheduling */ 1192 dqm->ops.start = start_nocpsch; 1193 dqm->ops.stop = stop_nocpsch; 1194 dqm->ops.create_queue = create_queue_nocpsch; 1195 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1196 dqm->ops.update_queue = update_queue; 1197 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1198 dqm->ops.register_process = register_process_nocpsch; 1199 dqm->ops.unregister_process = unregister_process_nocpsch; 1200 dqm->ops.initialize = initialize_nocpsch; 1201 dqm->ops.uninitialize = uninitialize_nocpsch; 1202 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1203 break; 1204 default: 1205 BUG(); 1206 break; 1207 } 1208 1209 switch (dev->device_info->asic_family) { 1210 case CHIP_CARRIZO: 1211 device_queue_manager_init_vi(&dqm->ops_asic_specific); 1212 break; 1213 1214 case CHIP_KAVERI: 1215 device_queue_manager_init_cik(&dqm->ops_asic_specific); 1216 break; 1217 } 1218 1219 if (dqm->ops.initialize(dqm) != 0) { 1220 kfree(dqm); 1221 return NULL; 1222 } 1223 1224 return dqm; 1225 } 1226 1227 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1228 { 1229 BUG_ON(!dqm); 1230 1231 dqm->ops.uninitialize(dqm); 1232 kfree(dqm); 1233 } 1234