1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2015-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/pci.h> 25 #include <linux/acpi.h> 26 #include "kfd_crat.h" 27 #include "kfd_priv.h" 28 #include "kfd_topology.h" 29 #include "kfd_iommu.h" 30 #include "amdgpu.h" 31 #include "amdgpu_amdkfd.h" 32 33 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. 34 * GPU processor ID are expressed with Bit[31]=1. 35 * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs 36 * used in the CRAT. 37 */ 38 static uint32_t gpu_processor_id_low = 0x80001000; 39 40 /* Return the next available gpu_processor_id and increment it for next GPU 41 * @total_cu_count - Total CUs present in the GPU including ones 42 * masked off 43 */ 44 static inline unsigned int get_and_inc_gpu_processor_id( 45 unsigned int total_cu_count) 46 { 47 int current_id = gpu_processor_id_low; 48 49 gpu_processor_id_low += total_cu_count; 50 return current_id; 51 } 52 53 54 static struct kfd_gpu_cache_info kaveri_cache_info[] = { 55 { 56 /* TCP L1 Cache per CU */ 57 .cache_size = 16, 58 .cache_level = 1, 59 .flags = (CRAT_CACHE_FLAGS_ENABLED | 60 CRAT_CACHE_FLAGS_DATA_CACHE | 61 CRAT_CACHE_FLAGS_SIMD_CACHE), 62 .num_cu_shared = 1, 63 }, 64 { 65 /* Scalar L1 Instruction Cache (in SQC module) per bank */ 66 .cache_size = 16, 67 .cache_level = 1, 68 .flags = (CRAT_CACHE_FLAGS_ENABLED | 69 CRAT_CACHE_FLAGS_INST_CACHE | 70 CRAT_CACHE_FLAGS_SIMD_CACHE), 71 .num_cu_shared = 2, 72 }, 73 { 74 /* Scalar L1 Data Cache (in SQC module) per bank */ 75 .cache_size = 8, 76 .cache_level = 1, 77 .flags = (CRAT_CACHE_FLAGS_ENABLED | 78 CRAT_CACHE_FLAGS_DATA_CACHE | 79 CRAT_CACHE_FLAGS_SIMD_CACHE), 80 .num_cu_shared = 2, 81 }, 82 83 /* TODO: Add L2 Cache information */ 84 }; 85 86 87 static struct kfd_gpu_cache_info carrizo_cache_info[] = { 88 { 89 /* TCP L1 Cache per CU */ 90 .cache_size = 16, 91 .cache_level = 1, 92 .flags = (CRAT_CACHE_FLAGS_ENABLED | 93 CRAT_CACHE_FLAGS_DATA_CACHE | 94 CRAT_CACHE_FLAGS_SIMD_CACHE), 95 .num_cu_shared = 1, 96 }, 97 { 98 /* Scalar L1 Instruction Cache (in SQC module) per bank */ 99 .cache_size = 8, 100 .cache_level = 1, 101 .flags = (CRAT_CACHE_FLAGS_ENABLED | 102 CRAT_CACHE_FLAGS_INST_CACHE | 103 CRAT_CACHE_FLAGS_SIMD_CACHE), 104 .num_cu_shared = 4, 105 }, 106 { 107 /* Scalar L1 Data Cache (in SQC module) per bank. */ 108 .cache_size = 4, 109 .cache_level = 1, 110 .flags = (CRAT_CACHE_FLAGS_ENABLED | 111 CRAT_CACHE_FLAGS_DATA_CACHE | 112 CRAT_CACHE_FLAGS_SIMD_CACHE), 113 .num_cu_shared = 4, 114 }, 115 116 /* TODO: Add L2 Cache information */ 117 }; 118 119 #define hawaii_cache_info kaveri_cache_info 120 #define tonga_cache_info carrizo_cache_info 121 #define fiji_cache_info carrizo_cache_info 122 #define polaris10_cache_info carrizo_cache_info 123 #define polaris11_cache_info carrizo_cache_info 124 #define polaris12_cache_info carrizo_cache_info 125 #define vegam_cache_info carrizo_cache_info 126 127 /* NOTE: L1 cache information has been updated and L2/L3 128 * cache information has been added for Vega10 and 129 * newer ASICs. The unit for cache_size is KiB. 130 * In future, check & update cache details 131 * for every new ASIC is required. 132 */ 133 134 static struct kfd_gpu_cache_info vega10_cache_info[] = { 135 { 136 /* TCP L1 Cache per CU */ 137 .cache_size = 16, 138 .cache_level = 1, 139 .flags = (CRAT_CACHE_FLAGS_ENABLED | 140 CRAT_CACHE_FLAGS_DATA_CACHE | 141 CRAT_CACHE_FLAGS_SIMD_CACHE), 142 .num_cu_shared = 1, 143 }, 144 { 145 /* Scalar L1 Instruction Cache per SQC */ 146 .cache_size = 32, 147 .cache_level = 1, 148 .flags = (CRAT_CACHE_FLAGS_ENABLED | 149 CRAT_CACHE_FLAGS_INST_CACHE | 150 CRAT_CACHE_FLAGS_SIMD_CACHE), 151 .num_cu_shared = 3, 152 }, 153 { 154 /* Scalar L1 Data Cache per SQC */ 155 .cache_size = 16, 156 .cache_level = 1, 157 .flags = (CRAT_CACHE_FLAGS_ENABLED | 158 CRAT_CACHE_FLAGS_DATA_CACHE | 159 CRAT_CACHE_FLAGS_SIMD_CACHE), 160 .num_cu_shared = 3, 161 }, 162 { 163 /* L2 Data Cache per GPU (Total Tex Cache) */ 164 .cache_size = 4096, 165 .cache_level = 2, 166 .flags = (CRAT_CACHE_FLAGS_ENABLED | 167 CRAT_CACHE_FLAGS_DATA_CACHE | 168 CRAT_CACHE_FLAGS_SIMD_CACHE), 169 .num_cu_shared = 16, 170 }, 171 }; 172 173 static struct kfd_gpu_cache_info raven_cache_info[] = { 174 { 175 /* TCP L1 Cache per CU */ 176 .cache_size = 16, 177 .cache_level = 1, 178 .flags = (CRAT_CACHE_FLAGS_ENABLED | 179 CRAT_CACHE_FLAGS_DATA_CACHE | 180 CRAT_CACHE_FLAGS_SIMD_CACHE), 181 .num_cu_shared = 1, 182 }, 183 { 184 /* Scalar L1 Instruction Cache per SQC */ 185 .cache_size = 32, 186 .cache_level = 1, 187 .flags = (CRAT_CACHE_FLAGS_ENABLED | 188 CRAT_CACHE_FLAGS_INST_CACHE | 189 CRAT_CACHE_FLAGS_SIMD_CACHE), 190 .num_cu_shared = 3, 191 }, 192 { 193 /* Scalar L1 Data Cache per SQC */ 194 .cache_size = 16, 195 .cache_level = 1, 196 .flags = (CRAT_CACHE_FLAGS_ENABLED | 197 CRAT_CACHE_FLAGS_DATA_CACHE | 198 CRAT_CACHE_FLAGS_SIMD_CACHE), 199 .num_cu_shared = 3, 200 }, 201 { 202 /* L2 Data Cache per GPU (Total Tex Cache) */ 203 .cache_size = 1024, 204 .cache_level = 2, 205 .flags = (CRAT_CACHE_FLAGS_ENABLED | 206 CRAT_CACHE_FLAGS_DATA_CACHE | 207 CRAT_CACHE_FLAGS_SIMD_CACHE), 208 .num_cu_shared = 11, 209 }, 210 }; 211 212 static struct kfd_gpu_cache_info renoir_cache_info[] = { 213 { 214 /* TCP L1 Cache per CU */ 215 .cache_size = 16, 216 .cache_level = 1, 217 .flags = (CRAT_CACHE_FLAGS_ENABLED | 218 CRAT_CACHE_FLAGS_DATA_CACHE | 219 CRAT_CACHE_FLAGS_SIMD_CACHE), 220 .num_cu_shared = 1, 221 }, 222 { 223 /* Scalar L1 Instruction Cache per SQC */ 224 .cache_size = 32, 225 .cache_level = 1, 226 .flags = (CRAT_CACHE_FLAGS_ENABLED | 227 CRAT_CACHE_FLAGS_INST_CACHE | 228 CRAT_CACHE_FLAGS_SIMD_CACHE), 229 .num_cu_shared = 3, 230 }, 231 { 232 /* Scalar L1 Data Cache per SQC */ 233 .cache_size = 16, 234 .cache_level = 1, 235 .flags = (CRAT_CACHE_FLAGS_ENABLED | 236 CRAT_CACHE_FLAGS_DATA_CACHE | 237 CRAT_CACHE_FLAGS_SIMD_CACHE), 238 .num_cu_shared = 3, 239 }, 240 { 241 /* L2 Data Cache per GPU (Total Tex Cache) */ 242 .cache_size = 1024, 243 .cache_level = 2, 244 .flags = (CRAT_CACHE_FLAGS_ENABLED | 245 CRAT_CACHE_FLAGS_DATA_CACHE | 246 CRAT_CACHE_FLAGS_SIMD_CACHE), 247 .num_cu_shared = 8, 248 }, 249 }; 250 251 static struct kfd_gpu_cache_info vega12_cache_info[] = { 252 { 253 /* TCP L1 Cache per CU */ 254 .cache_size = 16, 255 .cache_level = 1, 256 .flags = (CRAT_CACHE_FLAGS_ENABLED | 257 CRAT_CACHE_FLAGS_DATA_CACHE | 258 CRAT_CACHE_FLAGS_SIMD_CACHE), 259 .num_cu_shared = 1, 260 }, 261 { 262 /* Scalar L1 Instruction Cache per SQC */ 263 .cache_size = 32, 264 .cache_level = 1, 265 .flags = (CRAT_CACHE_FLAGS_ENABLED | 266 CRAT_CACHE_FLAGS_INST_CACHE | 267 CRAT_CACHE_FLAGS_SIMD_CACHE), 268 .num_cu_shared = 3, 269 }, 270 { 271 /* Scalar L1 Data Cache per SQC */ 272 .cache_size = 16, 273 .cache_level = 1, 274 .flags = (CRAT_CACHE_FLAGS_ENABLED | 275 CRAT_CACHE_FLAGS_DATA_CACHE | 276 CRAT_CACHE_FLAGS_SIMD_CACHE), 277 .num_cu_shared = 3, 278 }, 279 { 280 /* L2 Data Cache per GPU (Total Tex Cache) */ 281 .cache_size = 2048, 282 .cache_level = 2, 283 .flags = (CRAT_CACHE_FLAGS_ENABLED | 284 CRAT_CACHE_FLAGS_DATA_CACHE | 285 CRAT_CACHE_FLAGS_SIMD_CACHE), 286 .num_cu_shared = 5, 287 }, 288 }; 289 290 static struct kfd_gpu_cache_info vega20_cache_info[] = { 291 { 292 /* TCP L1 Cache per CU */ 293 .cache_size = 16, 294 .cache_level = 1, 295 .flags = (CRAT_CACHE_FLAGS_ENABLED | 296 CRAT_CACHE_FLAGS_DATA_CACHE | 297 CRAT_CACHE_FLAGS_SIMD_CACHE), 298 .num_cu_shared = 1, 299 }, 300 { 301 /* Scalar L1 Instruction Cache per SQC */ 302 .cache_size = 32, 303 .cache_level = 1, 304 .flags = (CRAT_CACHE_FLAGS_ENABLED | 305 CRAT_CACHE_FLAGS_INST_CACHE | 306 CRAT_CACHE_FLAGS_SIMD_CACHE), 307 .num_cu_shared = 3, 308 }, 309 { 310 /* Scalar L1 Data Cache per SQC */ 311 .cache_size = 16, 312 .cache_level = 1, 313 .flags = (CRAT_CACHE_FLAGS_ENABLED | 314 CRAT_CACHE_FLAGS_DATA_CACHE | 315 CRAT_CACHE_FLAGS_SIMD_CACHE), 316 .num_cu_shared = 3, 317 }, 318 { 319 /* L2 Data Cache per GPU (Total Tex Cache) */ 320 .cache_size = 8192, 321 .cache_level = 2, 322 .flags = (CRAT_CACHE_FLAGS_ENABLED | 323 CRAT_CACHE_FLAGS_DATA_CACHE | 324 CRAT_CACHE_FLAGS_SIMD_CACHE), 325 .num_cu_shared = 16, 326 }, 327 }; 328 329 static struct kfd_gpu_cache_info aldebaran_cache_info[] = { 330 { 331 /* TCP L1 Cache per CU */ 332 .cache_size = 16, 333 .cache_level = 1, 334 .flags = (CRAT_CACHE_FLAGS_ENABLED | 335 CRAT_CACHE_FLAGS_DATA_CACHE | 336 CRAT_CACHE_FLAGS_SIMD_CACHE), 337 .num_cu_shared = 1, 338 }, 339 { 340 /* Scalar L1 Instruction Cache per SQC */ 341 .cache_size = 32, 342 .cache_level = 1, 343 .flags = (CRAT_CACHE_FLAGS_ENABLED | 344 CRAT_CACHE_FLAGS_INST_CACHE | 345 CRAT_CACHE_FLAGS_SIMD_CACHE), 346 .num_cu_shared = 2, 347 }, 348 { 349 /* Scalar L1 Data Cache per SQC */ 350 .cache_size = 16, 351 .cache_level = 1, 352 .flags = (CRAT_CACHE_FLAGS_ENABLED | 353 CRAT_CACHE_FLAGS_DATA_CACHE | 354 CRAT_CACHE_FLAGS_SIMD_CACHE), 355 .num_cu_shared = 2, 356 }, 357 { 358 /* L2 Data Cache per GPU (Total Tex Cache) */ 359 .cache_size = 8192, 360 .cache_level = 2, 361 .flags = (CRAT_CACHE_FLAGS_ENABLED | 362 CRAT_CACHE_FLAGS_DATA_CACHE | 363 CRAT_CACHE_FLAGS_SIMD_CACHE), 364 .num_cu_shared = 14, 365 }, 366 }; 367 368 static struct kfd_gpu_cache_info navi10_cache_info[] = { 369 { 370 /* TCP L1 Cache per CU */ 371 .cache_size = 16, 372 .cache_level = 1, 373 .flags = (CRAT_CACHE_FLAGS_ENABLED | 374 CRAT_CACHE_FLAGS_DATA_CACHE | 375 CRAT_CACHE_FLAGS_SIMD_CACHE), 376 .num_cu_shared = 1, 377 }, 378 { 379 /* Scalar L1 Instruction Cache per SQC */ 380 .cache_size = 32, 381 .cache_level = 1, 382 .flags = (CRAT_CACHE_FLAGS_ENABLED | 383 CRAT_CACHE_FLAGS_INST_CACHE | 384 CRAT_CACHE_FLAGS_SIMD_CACHE), 385 .num_cu_shared = 2, 386 }, 387 { 388 /* Scalar L1 Data Cache per SQC */ 389 .cache_size = 16, 390 .cache_level = 1, 391 .flags = (CRAT_CACHE_FLAGS_ENABLED | 392 CRAT_CACHE_FLAGS_DATA_CACHE | 393 CRAT_CACHE_FLAGS_SIMD_CACHE), 394 .num_cu_shared = 2, 395 }, 396 { 397 /* GL1 Data Cache per SA */ 398 .cache_size = 128, 399 .cache_level = 1, 400 .flags = (CRAT_CACHE_FLAGS_ENABLED | 401 CRAT_CACHE_FLAGS_DATA_CACHE | 402 CRAT_CACHE_FLAGS_SIMD_CACHE), 403 .num_cu_shared = 10, 404 }, 405 { 406 /* L2 Data Cache per GPU (Total Tex Cache) */ 407 .cache_size = 4096, 408 .cache_level = 2, 409 .flags = (CRAT_CACHE_FLAGS_ENABLED | 410 CRAT_CACHE_FLAGS_DATA_CACHE | 411 CRAT_CACHE_FLAGS_SIMD_CACHE), 412 .num_cu_shared = 10, 413 }, 414 }; 415 416 static struct kfd_gpu_cache_info vangogh_cache_info[] = { 417 { 418 /* TCP L1 Cache per CU */ 419 .cache_size = 16, 420 .cache_level = 1, 421 .flags = (CRAT_CACHE_FLAGS_ENABLED | 422 CRAT_CACHE_FLAGS_DATA_CACHE | 423 CRAT_CACHE_FLAGS_SIMD_CACHE), 424 .num_cu_shared = 1, 425 }, 426 { 427 /* Scalar L1 Instruction Cache per SQC */ 428 .cache_size = 32, 429 .cache_level = 1, 430 .flags = (CRAT_CACHE_FLAGS_ENABLED | 431 CRAT_CACHE_FLAGS_INST_CACHE | 432 CRAT_CACHE_FLAGS_SIMD_CACHE), 433 .num_cu_shared = 2, 434 }, 435 { 436 /* Scalar L1 Data Cache per SQC */ 437 .cache_size = 16, 438 .cache_level = 1, 439 .flags = (CRAT_CACHE_FLAGS_ENABLED | 440 CRAT_CACHE_FLAGS_DATA_CACHE | 441 CRAT_CACHE_FLAGS_SIMD_CACHE), 442 .num_cu_shared = 2, 443 }, 444 { 445 /* GL1 Data Cache per SA */ 446 .cache_size = 128, 447 .cache_level = 1, 448 .flags = (CRAT_CACHE_FLAGS_ENABLED | 449 CRAT_CACHE_FLAGS_DATA_CACHE | 450 CRAT_CACHE_FLAGS_SIMD_CACHE), 451 .num_cu_shared = 8, 452 }, 453 { 454 /* L2 Data Cache per GPU (Total Tex Cache) */ 455 .cache_size = 1024, 456 .cache_level = 2, 457 .flags = (CRAT_CACHE_FLAGS_ENABLED | 458 CRAT_CACHE_FLAGS_DATA_CACHE | 459 CRAT_CACHE_FLAGS_SIMD_CACHE), 460 .num_cu_shared = 8, 461 }, 462 }; 463 464 static struct kfd_gpu_cache_info navi14_cache_info[] = { 465 { 466 /* TCP L1 Cache per CU */ 467 .cache_size = 16, 468 .cache_level = 1, 469 .flags = (CRAT_CACHE_FLAGS_ENABLED | 470 CRAT_CACHE_FLAGS_DATA_CACHE | 471 CRAT_CACHE_FLAGS_SIMD_CACHE), 472 .num_cu_shared = 1, 473 }, 474 { 475 /* Scalar L1 Instruction Cache per SQC */ 476 .cache_size = 32, 477 .cache_level = 1, 478 .flags = (CRAT_CACHE_FLAGS_ENABLED | 479 CRAT_CACHE_FLAGS_INST_CACHE | 480 CRAT_CACHE_FLAGS_SIMD_CACHE), 481 .num_cu_shared = 2, 482 }, 483 { 484 /* Scalar L1 Data Cache per SQC */ 485 .cache_size = 16, 486 .cache_level = 1, 487 .flags = (CRAT_CACHE_FLAGS_ENABLED | 488 CRAT_CACHE_FLAGS_DATA_CACHE | 489 CRAT_CACHE_FLAGS_SIMD_CACHE), 490 .num_cu_shared = 2, 491 }, 492 { 493 /* GL1 Data Cache per SA */ 494 .cache_size = 128, 495 .cache_level = 1, 496 .flags = (CRAT_CACHE_FLAGS_ENABLED | 497 CRAT_CACHE_FLAGS_DATA_CACHE | 498 CRAT_CACHE_FLAGS_SIMD_CACHE), 499 .num_cu_shared = 12, 500 }, 501 { 502 /* L2 Data Cache per GPU (Total Tex Cache) */ 503 .cache_size = 2048, 504 .cache_level = 2, 505 .flags = (CRAT_CACHE_FLAGS_ENABLED | 506 CRAT_CACHE_FLAGS_DATA_CACHE | 507 CRAT_CACHE_FLAGS_SIMD_CACHE), 508 .num_cu_shared = 12, 509 }, 510 }; 511 512 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = { 513 { 514 /* TCP L1 Cache per CU */ 515 .cache_size = 16, 516 .cache_level = 1, 517 .flags = (CRAT_CACHE_FLAGS_ENABLED | 518 CRAT_CACHE_FLAGS_DATA_CACHE | 519 CRAT_CACHE_FLAGS_SIMD_CACHE), 520 .num_cu_shared = 1, 521 }, 522 { 523 /* Scalar L1 Instruction Cache per SQC */ 524 .cache_size = 32, 525 .cache_level = 1, 526 .flags = (CRAT_CACHE_FLAGS_ENABLED | 527 CRAT_CACHE_FLAGS_INST_CACHE | 528 CRAT_CACHE_FLAGS_SIMD_CACHE), 529 .num_cu_shared = 2, 530 }, 531 { 532 /* Scalar L1 Data Cache per SQC */ 533 .cache_size = 16, 534 .cache_level = 1, 535 .flags = (CRAT_CACHE_FLAGS_ENABLED | 536 CRAT_CACHE_FLAGS_DATA_CACHE | 537 CRAT_CACHE_FLAGS_SIMD_CACHE), 538 .num_cu_shared = 2, 539 }, 540 { 541 /* GL1 Data Cache per SA */ 542 .cache_size = 128, 543 .cache_level = 1, 544 .flags = (CRAT_CACHE_FLAGS_ENABLED | 545 CRAT_CACHE_FLAGS_DATA_CACHE | 546 CRAT_CACHE_FLAGS_SIMD_CACHE), 547 .num_cu_shared = 10, 548 }, 549 { 550 /* L2 Data Cache per GPU (Total Tex Cache) */ 551 .cache_size = 4096, 552 .cache_level = 2, 553 .flags = (CRAT_CACHE_FLAGS_ENABLED | 554 CRAT_CACHE_FLAGS_DATA_CACHE | 555 CRAT_CACHE_FLAGS_SIMD_CACHE), 556 .num_cu_shared = 10, 557 }, 558 { 559 /* L3 Data Cache per GPU */ 560 .cache_size = 128*1024, 561 .cache_level = 3, 562 .flags = (CRAT_CACHE_FLAGS_ENABLED | 563 CRAT_CACHE_FLAGS_DATA_CACHE | 564 CRAT_CACHE_FLAGS_SIMD_CACHE), 565 .num_cu_shared = 10, 566 }, 567 }; 568 569 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = { 570 { 571 /* TCP L1 Cache per CU */ 572 .cache_size = 16, 573 .cache_level = 1, 574 .flags = (CRAT_CACHE_FLAGS_ENABLED | 575 CRAT_CACHE_FLAGS_DATA_CACHE | 576 CRAT_CACHE_FLAGS_SIMD_CACHE), 577 .num_cu_shared = 1, 578 }, 579 { 580 /* Scalar L1 Instruction Cache per SQC */ 581 .cache_size = 32, 582 .cache_level = 1, 583 .flags = (CRAT_CACHE_FLAGS_ENABLED | 584 CRAT_CACHE_FLAGS_INST_CACHE | 585 CRAT_CACHE_FLAGS_SIMD_CACHE), 586 .num_cu_shared = 2, 587 }, 588 { 589 /* Scalar L1 Data Cache per SQC */ 590 .cache_size = 16, 591 .cache_level = 1, 592 .flags = (CRAT_CACHE_FLAGS_ENABLED | 593 CRAT_CACHE_FLAGS_DATA_CACHE | 594 CRAT_CACHE_FLAGS_SIMD_CACHE), 595 .num_cu_shared = 2, 596 }, 597 { 598 /* GL1 Data Cache per SA */ 599 .cache_size = 128, 600 .cache_level = 1, 601 .flags = (CRAT_CACHE_FLAGS_ENABLED | 602 CRAT_CACHE_FLAGS_DATA_CACHE | 603 CRAT_CACHE_FLAGS_SIMD_CACHE), 604 .num_cu_shared = 10, 605 }, 606 { 607 /* L2 Data Cache per GPU (Total Tex Cache) */ 608 .cache_size = 3072, 609 .cache_level = 2, 610 .flags = (CRAT_CACHE_FLAGS_ENABLED | 611 CRAT_CACHE_FLAGS_DATA_CACHE | 612 CRAT_CACHE_FLAGS_SIMD_CACHE), 613 .num_cu_shared = 10, 614 }, 615 { 616 /* L3 Data Cache per GPU */ 617 .cache_size = 96*1024, 618 .cache_level = 3, 619 .flags = (CRAT_CACHE_FLAGS_ENABLED | 620 CRAT_CACHE_FLAGS_DATA_CACHE | 621 CRAT_CACHE_FLAGS_SIMD_CACHE), 622 .num_cu_shared = 10, 623 }, 624 }; 625 626 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = { 627 { 628 /* TCP L1 Cache per CU */ 629 .cache_size = 16, 630 .cache_level = 1, 631 .flags = (CRAT_CACHE_FLAGS_ENABLED | 632 CRAT_CACHE_FLAGS_DATA_CACHE | 633 CRAT_CACHE_FLAGS_SIMD_CACHE), 634 .num_cu_shared = 1, 635 }, 636 { 637 /* Scalar L1 Instruction Cache per SQC */ 638 .cache_size = 32, 639 .cache_level = 1, 640 .flags = (CRAT_CACHE_FLAGS_ENABLED | 641 CRAT_CACHE_FLAGS_INST_CACHE | 642 CRAT_CACHE_FLAGS_SIMD_CACHE), 643 .num_cu_shared = 2, 644 }, 645 { 646 /* Scalar L1 Data Cache per SQC */ 647 .cache_size = 16, 648 .cache_level = 1, 649 .flags = (CRAT_CACHE_FLAGS_ENABLED | 650 CRAT_CACHE_FLAGS_DATA_CACHE | 651 CRAT_CACHE_FLAGS_SIMD_CACHE), 652 .num_cu_shared = 2, 653 }, 654 { 655 /* GL1 Data Cache per SA */ 656 .cache_size = 128, 657 .cache_level = 1, 658 .flags = (CRAT_CACHE_FLAGS_ENABLED | 659 CRAT_CACHE_FLAGS_DATA_CACHE | 660 CRAT_CACHE_FLAGS_SIMD_CACHE), 661 .num_cu_shared = 8, 662 }, 663 { 664 /* L2 Data Cache per GPU (Total Tex Cache) */ 665 .cache_size = 2048, 666 .cache_level = 2, 667 .flags = (CRAT_CACHE_FLAGS_ENABLED | 668 CRAT_CACHE_FLAGS_DATA_CACHE | 669 CRAT_CACHE_FLAGS_SIMD_CACHE), 670 .num_cu_shared = 8, 671 }, 672 { 673 /* L3 Data Cache per GPU */ 674 .cache_size = 32*1024, 675 .cache_level = 3, 676 .flags = (CRAT_CACHE_FLAGS_ENABLED | 677 CRAT_CACHE_FLAGS_DATA_CACHE | 678 CRAT_CACHE_FLAGS_SIMD_CACHE), 679 .num_cu_shared = 8, 680 }, 681 }; 682 683 static struct kfd_gpu_cache_info beige_goby_cache_info[] = { 684 { 685 /* TCP L1 Cache per CU */ 686 .cache_size = 16, 687 .cache_level = 1, 688 .flags = (CRAT_CACHE_FLAGS_ENABLED | 689 CRAT_CACHE_FLAGS_DATA_CACHE | 690 CRAT_CACHE_FLAGS_SIMD_CACHE), 691 .num_cu_shared = 1, 692 }, 693 { 694 /* Scalar L1 Instruction Cache per SQC */ 695 .cache_size = 32, 696 .cache_level = 1, 697 .flags = (CRAT_CACHE_FLAGS_ENABLED | 698 CRAT_CACHE_FLAGS_INST_CACHE | 699 CRAT_CACHE_FLAGS_SIMD_CACHE), 700 .num_cu_shared = 2, 701 }, 702 { 703 /* Scalar L1 Data Cache per SQC */ 704 .cache_size = 16, 705 .cache_level = 1, 706 .flags = (CRAT_CACHE_FLAGS_ENABLED | 707 CRAT_CACHE_FLAGS_DATA_CACHE | 708 CRAT_CACHE_FLAGS_SIMD_CACHE), 709 .num_cu_shared = 2, 710 }, 711 { 712 /* GL1 Data Cache per SA */ 713 .cache_size = 128, 714 .cache_level = 1, 715 .flags = (CRAT_CACHE_FLAGS_ENABLED | 716 CRAT_CACHE_FLAGS_DATA_CACHE | 717 CRAT_CACHE_FLAGS_SIMD_CACHE), 718 .num_cu_shared = 8, 719 }, 720 { 721 /* L2 Data Cache per GPU (Total Tex Cache) */ 722 .cache_size = 1024, 723 .cache_level = 2, 724 .flags = (CRAT_CACHE_FLAGS_ENABLED | 725 CRAT_CACHE_FLAGS_DATA_CACHE | 726 CRAT_CACHE_FLAGS_SIMD_CACHE), 727 .num_cu_shared = 8, 728 }, 729 { 730 /* L3 Data Cache per GPU */ 731 .cache_size = 16*1024, 732 .cache_level = 3, 733 .flags = (CRAT_CACHE_FLAGS_ENABLED | 734 CRAT_CACHE_FLAGS_DATA_CACHE | 735 CRAT_CACHE_FLAGS_SIMD_CACHE), 736 .num_cu_shared = 8, 737 }, 738 }; 739 740 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = { 741 { 742 /* TCP L1 Cache per CU */ 743 .cache_size = 16, 744 .cache_level = 1, 745 .flags = (CRAT_CACHE_FLAGS_ENABLED | 746 CRAT_CACHE_FLAGS_DATA_CACHE | 747 CRAT_CACHE_FLAGS_SIMD_CACHE), 748 .num_cu_shared = 1, 749 }, 750 { 751 /* Scalar L1 Instruction Cache per SQC */ 752 .cache_size = 32, 753 .cache_level = 1, 754 .flags = (CRAT_CACHE_FLAGS_ENABLED | 755 CRAT_CACHE_FLAGS_INST_CACHE | 756 CRAT_CACHE_FLAGS_SIMD_CACHE), 757 .num_cu_shared = 2, 758 }, 759 { 760 /* Scalar L1 Data Cache per SQC */ 761 .cache_size = 16, 762 .cache_level = 1, 763 .flags = (CRAT_CACHE_FLAGS_ENABLED | 764 CRAT_CACHE_FLAGS_DATA_CACHE | 765 CRAT_CACHE_FLAGS_SIMD_CACHE), 766 .num_cu_shared = 2, 767 }, 768 { 769 /* GL1 Data Cache per SA */ 770 .cache_size = 128, 771 .cache_level = 1, 772 .flags = (CRAT_CACHE_FLAGS_ENABLED | 773 CRAT_CACHE_FLAGS_DATA_CACHE | 774 CRAT_CACHE_FLAGS_SIMD_CACHE), 775 .num_cu_shared = 6, 776 }, 777 { 778 /* L2 Data Cache per GPU (Total Tex Cache) */ 779 .cache_size = 2048, 780 .cache_level = 2, 781 .flags = (CRAT_CACHE_FLAGS_ENABLED | 782 CRAT_CACHE_FLAGS_DATA_CACHE | 783 CRAT_CACHE_FLAGS_SIMD_CACHE), 784 .num_cu_shared = 6, 785 }, 786 }; 787 788 static struct kfd_gpu_cache_info gfx1037_cache_info[] = { 789 { 790 /* TCP L1 Cache per CU */ 791 .cache_size = 16, 792 .cache_level = 1, 793 .flags = (CRAT_CACHE_FLAGS_ENABLED | 794 CRAT_CACHE_FLAGS_DATA_CACHE | 795 CRAT_CACHE_FLAGS_SIMD_CACHE), 796 .num_cu_shared = 1, 797 }, 798 { 799 /* Scalar L1 Instruction Cache per SQC */ 800 .cache_size = 32, 801 .cache_level = 1, 802 .flags = (CRAT_CACHE_FLAGS_ENABLED | 803 CRAT_CACHE_FLAGS_INST_CACHE | 804 CRAT_CACHE_FLAGS_SIMD_CACHE), 805 .num_cu_shared = 2, 806 }, 807 { 808 /* Scalar L1 Data Cache per SQC */ 809 .cache_size = 16, 810 .cache_level = 1, 811 .flags = (CRAT_CACHE_FLAGS_ENABLED | 812 CRAT_CACHE_FLAGS_DATA_CACHE | 813 CRAT_CACHE_FLAGS_SIMD_CACHE), 814 .num_cu_shared = 2, 815 }, 816 { 817 /* GL1 Data Cache per SA */ 818 .cache_size = 128, 819 .cache_level = 1, 820 .flags = (CRAT_CACHE_FLAGS_ENABLED | 821 CRAT_CACHE_FLAGS_DATA_CACHE | 822 CRAT_CACHE_FLAGS_SIMD_CACHE), 823 .num_cu_shared = 2, 824 }, 825 { 826 /* L2 Data Cache per GPU (Total Tex Cache) */ 827 .cache_size = 256, 828 .cache_level = 2, 829 .flags = (CRAT_CACHE_FLAGS_ENABLED | 830 CRAT_CACHE_FLAGS_DATA_CACHE | 831 CRAT_CACHE_FLAGS_SIMD_CACHE), 832 .num_cu_shared = 2, 833 }, 834 }; 835 836 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = { 837 { 838 /* TCP L1 Cache per CU */ 839 .cache_size = 16, 840 .cache_level = 1, 841 .flags = (CRAT_CACHE_FLAGS_ENABLED | 842 CRAT_CACHE_FLAGS_DATA_CACHE | 843 CRAT_CACHE_FLAGS_SIMD_CACHE), 844 .num_cu_shared = 1, 845 }, 846 { 847 /* Scalar L1 Instruction Cache per SQC */ 848 .cache_size = 32, 849 .cache_level = 1, 850 .flags = (CRAT_CACHE_FLAGS_ENABLED | 851 CRAT_CACHE_FLAGS_INST_CACHE | 852 CRAT_CACHE_FLAGS_SIMD_CACHE), 853 .num_cu_shared = 2, 854 }, 855 { 856 /* Scalar L1 Data Cache per SQC */ 857 .cache_size = 16, 858 .cache_level = 1, 859 .flags = (CRAT_CACHE_FLAGS_ENABLED | 860 CRAT_CACHE_FLAGS_DATA_CACHE | 861 CRAT_CACHE_FLAGS_SIMD_CACHE), 862 .num_cu_shared = 2, 863 }, 864 { 865 /* GL1 Data Cache per SA */ 866 .cache_size = 128, 867 .cache_level = 1, 868 .flags = (CRAT_CACHE_FLAGS_ENABLED | 869 CRAT_CACHE_FLAGS_DATA_CACHE | 870 CRAT_CACHE_FLAGS_SIMD_CACHE), 871 .num_cu_shared = 2, 872 }, 873 { 874 /* L2 Data Cache per GPU (Total Tex Cache) */ 875 .cache_size = 256, 876 .cache_level = 2, 877 .flags = (CRAT_CACHE_FLAGS_ENABLED | 878 CRAT_CACHE_FLAGS_DATA_CACHE | 879 CRAT_CACHE_FLAGS_SIMD_CACHE), 880 .num_cu_shared = 2, 881 }, 882 }; 883 884 static struct kfd_gpu_cache_info dummy_cache_info[] = { 885 { 886 /* TCP L1 Cache per CU */ 887 .cache_size = 16, 888 .cache_level = 1, 889 .flags = (CRAT_CACHE_FLAGS_ENABLED | 890 CRAT_CACHE_FLAGS_DATA_CACHE | 891 CRAT_CACHE_FLAGS_SIMD_CACHE), 892 .num_cu_shared = 1, 893 }, 894 { 895 /* Scalar L1 Instruction Cache per SQC */ 896 .cache_size = 32, 897 .cache_level = 1, 898 .flags = (CRAT_CACHE_FLAGS_ENABLED | 899 CRAT_CACHE_FLAGS_INST_CACHE | 900 CRAT_CACHE_FLAGS_SIMD_CACHE), 901 .num_cu_shared = 2, 902 }, 903 { 904 /* Scalar L1 Data Cache per SQC */ 905 .cache_size = 16, 906 .cache_level = 1, 907 .flags = (CRAT_CACHE_FLAGS_ENABLED | 908 CRAT_CACHE_FLAGS_DATA_CACHE | 909 CRAT_CACHE_FLAGS_SIMD_CACHE), 910 .num_cu_shared = 2, 911 }, 912 { 913 /* GL1 Data Cache per SA */ 914 .cache_size = 128, 915 .cache_level = 1, 916 .flags = (CRAT_CACHE_FLAGS_ENABLED | 917 CRAT_CACHE_FLAGS_DATA_CACHE | 918 CRAT_CACHE_FLAGS_SIMD_CACHE), 919 .num_cu_shared = 6, 920 }, 921 { 922 /* L2 Data Cache per GPU (Total Tex Cache) */ 923 .cache_size = 2048, 924 .cache_level = 2, 925 .flags = (CRAT_CACHE_FLAGS_ENABLED | 926 CRAT_CACHE_FLAGS_DATA_CACHE | 927 CRAT_CACHE_FLAGS_SIMD_CACHE), 928 .num_cu_shared = 6, 929 }, 930 }; 931 932 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, 933 struct crat_subtype_computeunit *cu) 934 { 935 dev->node_props.cpu_cores_count = cu->num_cpu_cores; 936 dev->node_props.cpu_core_id_base = cu->processor_id_low; 937 if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) 938 dev->node_props.capability |= HSA_CAP_ATS_PRESENT; 939 940 pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, 941 cu->processor_id_low); 942 } 943 944 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, 945 struct crat_subtype_computeunit *cu) 946 { 947 dev->node_props.simd_id_base = cu->processor_id_low; 948 dev->node_props.simd_count = cu->num_simd_cores; 949 dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; 950 dev->node_props.max_waves_per_simd = cu->max_waves_simd; 951 dev->node_props.wave_front_size = cu->wave_front_size; 952 dev->node_props.array_count = cu->array_count; 953 dev->node_props.cu_per_simd_array = cu->num_cu_per_array; 954 dev->node_props.simd_per_cu = cu->num_simd_per_cu; 955 dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; 956 if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) 957 dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; 958 pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); 959 } 960 961 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct 962 * topology device present in the device_list 963 */ 964 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, 965 struct list_head *device_list) 966 { 967 struct kfd_topology_device *dev; 968 969 pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", 970 cu->proximity_domain, cu->hsa_capability); 971 list_for_each_entry(dev, device_list, list) { 972 if (cu->proximity_domain == dev->proximity_domain) { 973 if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) 974 kfd_populated_cu_info_cpu(dev, cu); 975 976 if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) 977 kfd_populated_cu_info_gpu(dev, cu); 978 break; 979 } 980 } 981 982 return 0; 983 } 984 985 static struct kfd_mem_properties * 986 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, 987 struct kfd_topology_device *dev) 988 { 989 struct kfd_mem_properties *props; 990 991 list_for_each_entry(props, &dev->mem_props, list) { 992 if (props->heap_type == heap_type 993 && props->flags == flags 994 && props->width == width) 995 return props; 996 } 997 998 return NULL; 999 } 1000 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct 1001 * topology device present in the device_list 1002 */ 1003 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, 1004 struct list_head *device_list) 1005 { 1006 struct kfd_mem_properties *props; 1007 struct kfd_topology_device *dev; 1008 uint32_t heap_type; 1009 uint64_t size_in_bytes; 1010 uint32_t flags = 0; 1011 uint32_t width; 1012 1013 pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", 1014 mem->proximity_domain); 1015 list_for_each_entry(dev, device_list, list) { 1016 if (mem->proximity_domain == dev->proximity_domain) { 1017 /* We're on GPU node */ 1018 if (dev->node_props.cpu_cores_count == 0) { 1019 /* APU */ 1020 if (mem->visibility_type == 0) 1021 heap_type = 1022 HSA_MEM_HEAP_TYPE_FB_PRIVATE; 1023 /* dGPU */ 1024 else 1025 heap_type = mem->visibility_type; 1026 } else 1027 heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; 1028 1029 if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) 1030 flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; 1031 if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) 1032 flags |= HSA_MEM_FLAGS_NON_VOLATILE; 1033 1034 size_in_bytes = 1035 ((uint64_t)mem->length_high << 32) + 1036 mem->length_low; 1037 width = mem->width; 1038 1039 /* Multiple banks of the same type are aggregated into 1040 * one. User mode doesn't care about multiple physical 1041 * memory segments. It's managed as a single virtual 1042 * heap for user mode. 1043 */ 1044 props = find_subtype_mem(heap_type, flags, width, dev); 1045 if (props) { 1046 props->size_in_bytes += size_in_bytes; 1047 break; 1048 } 1049 1050 props = kfd_alloc_struct(props); 1051 if (!props) 1052 return -ENOMEM; 1053 1054 props->heap_type = heap_type; 1055 props->flags = flags; 1056 props->size_in_bytes = size_in_bytes; 1057 props->width = width; 1058 1059 dev->node_props.mem_banks_count++; 1060 list_add_tail(&props->list, &dev->mem_props); 1061 1062 break; 1063 } 1064 } 1065 1066 return 0; 1067 } 1068 1069 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct 1070 * topology device present in the device_list 1071 */ 1072 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, 1073 struct list_head *device_list) 1074 { 1075 struct kfd_cache_properties *props; 1076 struct kfd_topology_device *dev; 1077 uint32_t id; 1078 uint32_t total_num_of_cu; 1079 1080 id = cache->processor_id_low; 1081 1082 pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); 1083 list_for_each_entry(dev, device_list, list) { 1084 total_num_of_cu = (dev->node_props.array_count * 1085 dev->node_props.cu_per_simd_array); 1086 1087 /* Cache infomration in CRAT doesn't have proximity_domain 1088 * information as it is associated with a CPU core or GPU 1089 * Compute Unit. So map the cache using CPU core Id or SIMD 1090 * (GPU) ID. 1091 * TODO: This works because currently we can safely assume that 1092 * Compute Units are parsed before caches are parsed. In 1093 * future, remove this dependency 1094 */ 1095 if ((id >= dev->node_props.cpu_core_id_base && 1096 id <= dev->node_props.cpu_core_id_base + 1097 dev->node_props.cpu_cores_count) || 1098 (id >= dev->node_props.simd_id_base && 1099 id < dev->node_props.simd_id_base + 1100 total_num_of_cu)) { 1101 props = kfd_alloc_struct(props); 1102 if (!props) 1103 return -ENOMEM; 1104 1105 props->processor_id_low = id; 1106 props->cache_level = cache->cache_level; 1107 props->cache_size = cache->cache_size; 1108 props->cacheline_size = cache->cache_line_size; 1109 props->cachelines_per_tag = cache->lines_per_tag; 1110 props->cache_assoc = cache->associativity; 1111 props->cache_latency = cache->cache_latency; 1112 1113 memcpy(props->sibling_map, cache->sibling_map, 1114 CRAT_SIBLINGMAP_SIZE); 1115 1116 /* set the sibling_map_size as 32 for CRAT from ACPI */ 1117 props->sibling_map_size = CRAT_SIBLINGMAP_SIZE; 1118 1119 if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) 1120 props->cache_type |= HSA_CACHE_TYPE_DATA; 1121 if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) 1122 props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; 1123 if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) 1124 props->cache_type |= HSA_CACHE_TYPE_CPU; 1125 if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) 1126 props->cache_type |= HSA_CACHE_TYPE_HSACU; 1127 1128 dev->node_props.caches_count++; 1129 list_add_tail(&props->list, &dev->cache_props); 1130 1131 break; 1132 } 1133 } 1134 1135 return 0; 1136 } 1137 1138 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct 1139 * topology device present in the device_list 1140 */ 1141 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, 1142 struct list_head *device_list) 1143 { 1144 struct kfd_iolink_properties *props = NULL, *props2; 1145 struct kfd_topology_device *dev, *to_dev; 1146 uint32_t id_from; 1147 uint32_t id_to; 1148 1149 id_from = iolink->proximity_domain_from; 1150 id_to = iolink->proximity_domain_to; 1151 1152 pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", 1153 id_from, id_to); 1154 list_for_each_entry(dev, device_list, list) { 1155 if (id_from == dev->proximity_domain) { 1156 props = kfd_alloc_struct(props); 1157 if (!props) 1158 return -ENOMEM; 1159 1160 props->node_from = id_from; 1161 props->node_to = id_to; 1162 props->ver_maj = iolink->version_major; 1163 props->ver_min = iolink->version_minor; 1164 props->iolink_type = iolink->io_interface_type; 1165 1166 if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) 1167 props->weight = 20; 1168 else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) 1169 props->weight = 15 * iolink->num_hops_xgmi; 1170 else 1171 props->weight = node_distance(id_from, id_to); 1172 1173 props->min_latency = iolink->minimum_latency; 1174 props->max_latency = iolink->maximum_latency; 1175 props->min_bandwidth = iolink->minimum_bandwidth_mbs; 1176 props->max_bandwidth = iolink->maximum_bandwidth_mbs; 1177 props->rec_transfer_size = 1178 iolink->recommended_transfer_size; 1179 1180 dev->node_props.io_links_count++; 1181 list_add_tail(&props->list, &dev->io_link_props); 1182 break; 1183 } 1184 } 1185 1186 /* CPU topology is created before GPUs are detected, so CPU->GPU 1187 * links are not built at that time. If a PCIe type is discovered, it 1188 * means a GPU is detected and we are adding GPU->CPU to the topology. 1189 * At this time, also add the corresponded CPU->GPU link if GPU 1190 * is large bar. 1191 * For xGMI, we only added the link with one direction in the crat 1192 * table, add corresponded reversed direction link now. 1193 */ 1194 if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { 1195 to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to); 1196 if (!to_dev) 1197 return -ENODEV; 1198 /* same everything but the other direction */ 1199 props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); 1200 if (!props2) 1201 return -ENOMEM; 1202 1203 props2->node_from = id_to; 1204 props2->node_to = id_from; 1205 props2->kobj = NULL; 1206 to_dev->node_props.io_links_count++; 1207 list_add_tail(&props2->list, &to_dev->io_link_props); 1208 } 1209 1210 return 0; 1211 } 1212 1213 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device 1214 * present in the device_list 1215 * @sub_type_hdr - subtype section of crat_image 1216 * @device_list - list of topology devices present in this crat_image 1217 */ 1218 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, 1219 struct list_head *device_list) 1220 { 1221 struct crat_subtype_computeunit *cu; 1222 struct crat_subtype_memory *mem; 1223 struct crat_subtype_cache *cache; 1224 struct crat_subtype_iolink *iolink; 1225 int ret = 0; 1226 1227 switch (sub_type_hdr->type) { 1228 case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: 1229 cu = (struct crat_subtype_computeunit *)sub_type_hdr; 1230 ret = kfd_parse_subtype_cu(cu, device_list); 1231 break; 1232 case CRAT_SUBTYPE_MEMORY_AFFINITY: 1233 mem = (struct crat_subtype_memory *)sub_type_hdr; 1234 ret = kfd_parse_subtype_mem(mem, device_list); 1235 break; 1236 case CRAT_SUBTYPE_CACHE_AFFINITY: 1237 cache = (struct crat_subtype_cache *)sub_type_hdr; 1238 ret = kfd_parse_subtype_cache(cache, device_list); 1239 break; 1240 case CRAT_SUBTYPE_TLB_AFFINITY: 1241 /* 1242 * For now, nothing to do here 1243 */ 1244 pr_debug("Found TLB entry in CRAT table (not processing)\n"); 1245 break; 1246 case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: 1247 /* 1248 * For now, nothing to do here 1249 */ 1250 pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); 1251 break; 1252 case CRAT_SUBTYPE_IOLINK_AFFINITY: 1253 iolink = (struct crat_subtype_iolink *)sub_type_hdr; 1254 ret = kfd_parse_subtype_iolink(iolink, device_list); 1255 break; 1256 default: 1257 pr_warn("Unknown subtype %d in CRAT\n", 1258 sub_type_hdr->type); 1259 } 1260 1261 return ret; 1262 } 1263 1264 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT 1265 * create a kfd_topology_device and add in to device_list. Also parse 1266 * CRAT subtypes and attach it to appropriate kfd_topology_device 1267 * @crat_image - input image containing CRAT 1268 * @device_list - [OUT] list of kfd_topology_device generated after 1269 * parsing crat_image 1270 * @proximity_domain - Proximity domain of the first device in the table 1271 * 1272 * Return - 0 if successful else -ve value 1273 */ 1274 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, 1275 uint32_t proximity_domain) 1276 { 1277 struct kfd_topology_device *top_dev = NULL; 1278 struct crat_subtype_generic *sub_type_hdr; 1279 uint16_t node_id; 1280 int ret = 0; 1281 struct crat_header *crat_table = (struct crat_header *)crat_image; 1282 uint16_t num_nodes; 1283 uint32_t image_len; 1284 1285 if (!crat_image) 1286 return -EINVAL; 1287 1288 if (!list_empty(device_list)) { 1289 pr_warn("Error device list should be empty\n"); 1290 return -EINVAL; 1291 } 1292 1293 num_nodes = crat_table->num_domains; 1294 image_len = crat_table->length; 1295 1296 pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); 1297 1298 for (node_id = 0; node_id < num_nodes; node_id++) { 1299 top_dev = kfd_create_topology_device(device_list); 1300 if (!top_dev) 1301 break; 1302 top_dev->proximity_domain = proximity_domain++; 1303 } 1304 1305 if (!top_dev) { 1306 ret = -ENOMEM; 1307 goto err; 1308 } 1309 1310 memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); 1311 memcpy(top_dev->oem_table_id, crat_table->oem_table_id, 1312 CRAT_OEMTABLEID_LENGTH); 1313 top_dev->oem_revision = crat_table->oem_revision; 1314 1315 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 1316 while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < 1317 ((char *)crat_image) + image_len) { 1318 if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { 1319 ret = kfd_parse_subtype(sub_type_hdr, device_list); 1320 if (ret) 1321 break; 1322 } 1323 1324 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1325 sub_type_hdr->length); 1326 } 1327 1328 err: 1329 if (ret) 1330 kfd_release_topology_device_list(device_list); 1331 1332 return ret; 1333 } 1334 1335 1336 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, 1337 struct kfd_gpu_cache_info *pcache_info) 1338 { 1339 struct amdgpu_device *adev = kdev->adev; 1340 int i = 0; 1341 1342 /* TCP L1 Cache per CU */ 1343 if (adev->gfx.config.gc_tcp_l1_size) { 1344 pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size; 1345 pcache_info[i].cache_level = 1; 1346 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1347 CRAT_CACHE_FLAGS_DATA_CACHE | 1348 CRAT_CACHE_FLAGS_SIMD_CACHE); 1349 pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; 1350 i++; 1351 } 1352 /* Scalar L1 Instruction Cache per SQC */ 1353 if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) { 1354 pcache_info[i].cache_size = 1355 adev->gfx.config.gc_l1_instruction_cache_size_per_sqc; 1356 pcache_info[i].cache_level = 1; 1357 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1358 CRAT_CACHE_FLAGS_INST_CACHE | 1359 CRAT_CACHE_FLAGS_SIMD_CACHE); 1360 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1361 i++; 1362 } 1363 /* Scalar L1 Data Cache per SQC */ 1364 if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) { 1365 pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc; 1366 pcache_info[i].cache_level = 1; 1367 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1368 CRAT_CACHE_FLAGS_DATA_CACHE | 1369 CRAT_CACHE_FLAGS_SIMD_CACHE); 1370 pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; 1371 i++; 1372 } 1373 /* GL1 Data Cache per SA */ 1374 if (adev->gfx.config.gc_gl1c_per_sa && 1375 adev->gfx.config.gc_gl1c_size_per_instance) { 1376 pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa * 1377 adev->gfx.config.gc_gl1c_size_per_instance; 1378 pcache_info[i].cache_level = 1; 1379 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1380 CRAT_CACHE_FLAGS_DATA_CACHE | 1381 CRAT_CACHE_FLAGS_SIMD_CACHE); 1382 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1383 i++; 1384 } 1385 /* L2 Data Cache per GPU (Total Tex Cache) */ 1386 if (adev->gfx.config.gc_gl2c_per_gpu) { 1387 pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu; 1388 pcache_info[i].cache_level = 2; 1389 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1390 CRAT_CACHE_FLAGS_DATA_CACHE | 1391 CRAT_CACHE_FLAGS_SIMD_CACHE); 1392 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1393 i++; 1394 } 1395 /* L3 Data Cache per GPU */ 1396 if (adev->gmc.mall_size) { 1397 pcache_info[i].cache_size = adev->gmc.mall_size / 1024; 1398 pcache_info[i].cache_level = 3; 1399 pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | 1400 CRAT_CACHE_FLAGS_DATA_CACHE | 1401 CRAT_CACHE_FLAGS_SIMD_CACHE); 1402 pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; 1403 i++; 1404 } 1405 return i; 1406 } 1407 1408 int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info) 1409 { 1410 int num_of_cache_types = 0; 1411 1412 switch (kdev->adev->asic_type) { 1413 case CHIP_KAVERI: 1414 *pcache_info = kaveri_cache_info; 1415 num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); 1416 break; 1417 case CHIP_HAWAII: 1418 *pcache_info = hawaii_cache_info; 1419 num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); 1420 break; 1421 case CHIP_CARRIZO: 1422 *pcache_info = carrizo_cache_info; 1423 num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); 1424 break; 1425 case CHIP_TONGA: 1426 *pcache_info = tonga_cache_info; 1427 num_of_cache_types = ARRAY_SIZE(tonga_cache_info); 1428 break; 1429 case CHIP_FIJI: 1430 *pcache_info = fiji_cache_info; 1431 num_of_cache_types = ARRAY_SIZE(fiji_cache_info); 1432 break; 1433 case CHIP_POLARIS10: 1434 *pcache_info = polaris10_cache_info; 1435 num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); 1436 break; 1437 case CHIP_POLARIS11: 1438 *pcache_info = polaris11_cache_info; 1439 num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); 1440 break; 1441 case CHIP_POLARIS12: 1442 *pcache_info = polaris12_cache_info; 1443 num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); 1444 break; 1445 case CHIP_VEGAM: 1446 *pcache_info = vegam_cache_info; 1447 num_of_cache_types = ARRAY_SIZE(vegam_cache_info); 1448 break; 1449 default: 1450 switch (KFD_GC_VERSION(kdev)) { 1451 case IP_VERSION(9, 0, 1): 1452 *pcache_info = vega10_cache_info; 1453 num_of_cache_types = ARRAY_SIZE(vega10_cache_info); 1454 break; 1455 case IP_VERSION(9, 2, 1): 1456 *pcache_info = vega12_cache_info; 1457 num_of_cache_types = ARRAY_SIZE(vega12_cache_info); 1458 break; 1459 case IP_VERSION(9, 4, 0): 1460 case IP_VERSION(9, 4, 1): 1461 *pcache_info = vega20_cache_info; 1462 num_of_cache_types = ARRAY_SIZE(vega20_cache_info); 1463 break; 1464 case IP_VERSION(9, 4, 2): 1465 *pcache_info = aldebaran_cache_info; 1466 num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); 1467 break; 1468 case IP_VERSION(9, 1, 0): 1469 case IP_VERSION(9, 2, 2): 1470 *pcache_info = raven_cache_info; 1471 num_of_cache_types = ARRAY_SIZE(raven_cache_info); 1472 break; 1473 case IP_VERSION(9, 3, 0): 1474 *pcache_info = renoir_cache_info; 1475 num_of_cache_types = ARRAY_SIZE(renoir_cache_info); 1476 break; 1477 case IP_VERSION(10, 1, 10): 1478 case IP_VERSION(10, 1, 2): 1479 case IP_VERSION(10, 1, 3): 1480 case IP_VERSION(10, 1, 4): 1481 *pcache_info = navi10_cache_info; 1482 num_of_cache_types = ARRAY_SIZE(navi10_cache_info); 1483 break; 1484 case IP_VERSION(10, 1, 1): 1485 *pcache_info = navi14_cache_info; 1486 num_of_cache_types = ARRAY_SIZE(navi14_cache_info); 1487 break; 1488 case IP_VERSION(10, 3, 0): 1489 *pcache_info = sienna_cichlid_cache_info; 1490 num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); 1491 break; 1492 case IP_VERSION(10, 3, 2): 1493 *pcache_info = navy_flounder_cache_info; 1494 num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); 1495 break; 1496 case IP_VERSION(10, 3, 4): 1497 *pcache_info = dimgrey_cavefish_cache_info; 1498 num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); 1499 break; 1500 case IP_VERSION(10, 3, 1): 1501 *pcache_info = vangogh_cache_info; 1502 num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); 1503 break; 1504 case IP_VERSION(10, 3, 5): 1505 *pcache_info = beige_goby_cache_info; 1506 num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); 1507 break; 1508 case IP_VERSION(10, 3, 3): 1509 *pcache_info = yellow_carp_cache_info; 1510 num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); 1511 break; 1512 case IP_VERSION(10, 3, 6): 1513 *pcache_info = gc_10_3_6_cache_info; 1514 num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info); 1515 break; 1516 case IP_VERSION(10, 3, 7): 1517 *pcache_info = gfx1037_cache_info; 1518 num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info); 1519 break; 1520 case IP_VERSION(11, 0, 0): 1521 case IP_VERSION(11, 0, 1): 1522 case IP_VERSION(11, 0, 2): 1523 case IP_VERSION(11, 0, 3): 1524 case IP_VERSION(11, 0, 4): 1525 num_of_cache_types = 1526 kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info); 1527 break; 1528 default: 1529 *pcache_info = dummy_cache_info; 1530 num_of_cache_types = ARRAY_SIZE(dummy_cache_info); 1531 pr_warn("dummy cache info is used temporarily and real cache info need update later.\n"); 1532 break; 1533 } 1534 } 1535 return num_of_cache_types; 1536 } 1537 1538 static bool kfd_ignore_crat(void) 1539 { 1540 bool ret; 1541 1542 if (ignore_crat) 1543 return true; 1544 1545 #ifndef KFD_SUPPORT_IOMMU_V2 1546 ret = true; 1547 #else 1548 ret = false; 1549 #endif 1550 1551 return ret; 1552 } 1553 1554 /* 1555 * kfd_create_crat_image_acpi - Allocates memory for CRAT image and 1556 * copies CRAT from ACPI (if available). 1557 * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 1558 * 1559 * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then 1560 * crat_image will be NULL 1561 * @size: [OUT] size of crat_image 1562 * 1563 * Return 0 if successful else return error code 1564 */ 1565 int kfd_create_crat_image_acpi(void **crat_image, size_t *size) 1566 { 1567 struct acpi_table_header *crat_table; 1568 acpi_status status; 1569 void *pcrat_image; 1570 int rc = 0; 1571 1572 if (!crat_image) 1573 return -EINVAL; 1574 1575 *crat_image = NULL; 1576 1577 if (kfd_ignore_crat()) { 1578 pr_info("CRAT table disabled by module option\n"); 1579 return -ENODATA; 1580 } 1581 1582 /* Fetch the CRAT table from ACPI */ 1583 status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); 1584 if (status == AE_NOT_FOUND) { 1585 pr_info("CRAT table not found\n"); 1586 return -ENODATA; 1587 } else if (ACPI_FAILURE(status)) { 1588 const char *err = acpi_format_exception(status); 1589 1590 pr_err("CRAT table error: %s\n", err); 1591 return -EINVAL; 1592 } 1593 1594 pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); 1595 if (!pcrat_image) { 1596 rc = -ENOMEM; 1597 goto out; 1598 } 1599 1600 memcpy(pcrat_image, crat_table, crat_table->length); 1601 *crat_image = pcrat_image; 1602 *size = crat_table->length; 1603 out: 1604 acpi_put_table(crat_table); 1605 return rc; 1606 } 1607 1608 /* Memory required to create Virtual CRAT. 1609 * Since there is no easy way to predict the amount of memory required, the 1610 * following amount is allocated for GPU Virtual CRAT. This is 1611 * expected to cover all known conditions. But to be safe additional check 1612 * is put in the code to ensure we don't overwrite. 1613 */ 1614 #define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) 1615 1616 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node 1617 * 1618 * @numa_node_id: CPU NUMA node id 1619 * @avail_size: Available size in the memory 1620 * @sub_type_hdr: Memory into which compute info will be filled in 1621 * 1622 * Return 0 if successful else return -ve value 1623 */ 1624 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, 1625 int proximity_domain, 1626 struct crat_subtype_computeunit *sub_type_hdr) 1627 { 1628 const struct cpumask *cpumask; 1629 1630 *avail_size -= sizeof(struct crat_subtype_computeunit); 1631 if (*avail_size < 0) 1632 return -ENOMEM; 1633 1634 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 1635 1636 /* Fill in subtype header data */ 1637 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 1638 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 1639 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1640 1641 cpumask = cpumask_of_node(numa_node_id); 1642 1643 /* Fill in CU data */ 1644 sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; 1645 sub_type_hdr->proximity_domain = proximity_domain; 1646 sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); 1647 if (sub_type_hdr->processor_id_low == -1) 1648 return -EINVAL; 1649 1650 sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); 1651 1652 return 0; 1653 } 1654 1655 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node 1656 * 1657 * @numa_node_id: CPU NUMA node id 1658 * @avail_size: Available size in the memory 1659 * @sub_type_hdr: Memory into which compute info will be filled in 1660 * 1661 * Return 0 if successful else return -ve value 1662 */ 1663 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, 1664 int proximity_domain, 1665 struct crat_subtype_memory *sub_type_hdr) 1666 { 1667 uint64_t mem_in_bytes = 0; 1668 pg_data_t *pgdat; 1669 int zone_type; 1670 1671 *avail_size -= sizeof(struct crat_subtype_memory); 1672 if (*avail_size < 0) 1673 return -ENOMEM; 1674 1675 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 1676 1677 /* Fill in subtype header data */ 1678 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 1679 sub_type_hdr->length = sizeof(struct crat_subtype_memory); 1680 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1681 1682 /* Fill in Memory Subunit data */ 1683 1684 /* Unlike si_meminfo, si_meminfo_node is not exported. So 1685 * the following lines are duplicated from si_meminfo_node 1686 * function 1687 */ 1688 pgdat = NODE_DATA(numa_node_id); 1689 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 1690 mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); 1691 mem_in_bytes <<= PAGE_SHIFT; 1692 1693 sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); 1694 sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); 1695 sub_type_hdr->proximity_domain = proximity_domain; 1696 1697 return 0; 1698 } 1699 1700 #ifdef CONFIG_X86_64 1701 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, 1702 uint32_t *num_entries, 1703 struct crat_subtype_iolink *sub_type_hdr) 1704 { 1705 int nid; 1706 struct cpuinfo_x86 *c = &cpu_data(0); 1707 uint8_t link_type; 1708 1709 if (c->x86_vendor == X86_VENDOR_AMD) 1710 link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; 1711 else 1712 link_type = CRAT_IOLINK_TYPE_QPI_1_1; 1713 1714 *num_entries = 0; 1715 1716 /* Create IO links from this node to other CPU nodes */ 1717 for_each_online_node(nid) { 1718 if (nid == numa_node_id) /* node itself */ 1719 continue; 1720 1721 *avail_size -= sizeof(struct crat_subtype_iolink); 1722 if (*avail_size < 0) 1723 return -ENOMEM; 1724 1725 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1726 1727 /* Fill in subtype header data */ 1728 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1729 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1730 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 1731 1732 /* Fill in IO link data */ 1733 sub_type_hdr->proximity_domain_from = numa_node_id; 1734 sub_type_hdr->proximity_domain_to = nid; 1735 sub_type_hdr->io_interface_type = link_type; 1736 1737 (*num_entries)++; 1738 sub_type_hdr++; 1739 } 1740 1741 return 0; 1742 } 1743 #endif 1744 1745 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU 1746 * 1747 * @pcrat_image: Fill in VCRAT for CPU 1748 * @size: [IN] allocated size of crat_image. 1749 * [OUT] actual size of data filled in crat_image 1750 */ 1751 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) 1752 { 1753 struct crat_header *crat_table = (struct crat_header *)pcrat_image; 1754 struct acpi_table_header *acpi_table; 1755 acpi_status status; 1756 struct crat_subtype_generic *sub_type_hdr; 1757 int avail_size = *size; 1758 int numa_node_id; 1759 #ifdef CONFIG_X86_64 1760 uint32_t entries = 0; 1761 #endif 1762 int ret = 0; 1763 1764 if (!pcrat_image) 1765 return -EINVAL; 1766 1767 /* Fill in CRAT Header. 1768 * Modify length and total_entries as subunits are added. 1769 */ 1770 avail_size -= sizeof(struct crat_header); 1771 if (avail_size < 0) 1772 return -ENOMEM; 1773 1774 memset(crat_table, 0, sizeof(struct crat_header)); 1775 memcpy(&crat_table->signature, CRAT_SIGNATURE, 1776 sizeof(crat_table->signature)); 1777 crat_table->length = sizeof(struct crat_header); 1778 1779 status = acpi_get_table("DSDT", 0, &acpi_table); 1780 if (status != AE_OK) 1781 pr_warn("DSDT table not found for OEM information\n"); 1782 else { 1783 crat_table->oem_revision = acpi_table->revision; 1784 memcpy(crat_table->oem_id, acpi_table->oem_id, 1785 CRAT_OEMID_LENGTH); 1786 memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, 1787 CRAT_OEMTABLEID_LENGTH); 1788 acpi_put_table(acpi_table); 1789 } 1790 crat_table->total_entries = 0; 1791 crat_table->num_domains = 0; 1792 1793 sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); 1794 1795 for_each_online_node(numa_node_id) { 1796 if (kfd_numa_node_to_apic_id(numa_node_id) == -1) 1797 continue; 1798 1799 /* Fill in Subtype: Compute Unit */ 1800 ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, 1801 crat_table->num_domains, 1802 (struct crat_subtype_computeunit *)sub_type_hdr); 1803 if (ret < 0) 1804 return ret; 1805 crat_table->length += sub_type_hdr->length; 1806 crat_table->total_entries++; 1807 1808 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1809 sub_type_hdr->length); 1810 1811 /* Fill in Subtype: Memory */ 1812 ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, 1813 crat_table->num_domains, 1814 (struct crat_subtype_memory *)sub_type_hdr); 1815 if (ret < 0) 1816 return ret; 1817 crat_table->length += sub_type_hdr->length; 1818 crat_table->total_entries++; 1819 1820 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1821 sub_type_hdr->length); 1822 1823 /* Fill in Subtype: IO Link */ 1824 #ifdef CONFIG_X86_64 1825 ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, 1826 &entries, 1827 (struct crat_subtype_iolink *)sub_type_hdr); 1828 if (ret < 0) 1829 return ret; 1830 1831 if (entries) { 1832 crat_table->length += (sub_type_hdr->length * entries); 1833 crat_table->total_entries += entries; 1834 1835 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 1836 sub_type_hdr->length * entries); 1837 } 1838 #else 1839 pr_info("IO link not available for non x86 platforms\n"); 1840 #endif 1841 1842 crat_table->num_domains++; 1843 } 1844 1845 /* TODO: Add cache Subtype for CPU. 1846 * Currently, CPU cache information is available in function 1847 * detect_cache_attributes(cpu) defined in the file 1848 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not 1849 * exported and to get the same information the code needs to be 1850 * duplicated. 1851 */ 1852 1853 *size = crat_table->length; 1854 pr_info("Virtual CRAT table created for CPU\n"); 1855 1856 return 0; 1857 } 1858 1859 static int kfd_fill_gpu_memory_affinity(int *avail_size, 1860 struct kfd_dev *kdev, uint8_t type, uint64_t size, 1861 struct crat_subtype_memory *sub_type_hdr, 1862 uint32_t proximity_domain, 1863 const struct kfd_local_mem_info *local_mem_info) 1864 { 1865 *avail_size -= sizeof(struct crat_subtype_memory); 1866 if (*avail_size < 0) 1867 return -ENOMEM; 1868 1869 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); 1870 sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; 1871 sub_type_hdr->length = sizeof(struct crat_subtype_memory); 1872 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1873 1874 sub_type_hdr->proximity_domain = proximity_domain; 1875 1876 pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", 1877 type, size); 1878 1879 sub_type_hdr->length_low = lower_32_bits(size); 1880 sub_type_hdr->length_high = upper_32_bits(size); 1881 1882 sub_type_hdr->width = local_mem_info->vram_width; 1883 sub_type_hdr->visibility_type = type; 1884 1885 return 0; 1886 } 1887 1888 #ifdef CONFIG_ACPI_NUMA 1889 static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev) 1890 { 1891 struct acpi_table_header *table_header = NULL; 1892 struct acpi_subtable_header *sub_header = NULL; 1893 unsigned long table_end, subtable_len; 1894 u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 | 1895 pci_dev_id(kdev->adev->pdev); 1896 u32 bdf; 1897 acpi_status status; 1898 struct acpi_srat_cpu_affinity *cpu; 1899 struct acpi_srat_generic_affinity *gpu; 1900 int pxm = 0, max_pxm = 0; 1901 int numa_node = NUMA_NO_NODE; 1902 bool found = false; 1903 1904 /* Fetch the SRAT table from ACPI */ 1905 status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header); 1906 if (status == AE_NOT_FOUND) { 1907 pr_warn("SRAT table not found\n"); 1908 return; 1909 } else if (ACPI_FAILURE(status)) { 1910 const char *err = acpi_format_exception(status); 1911 pr_err("SRAT table error: %s\n", err); 1912 return; 1913 } 1914 1915 table_end = (unsigned long)table_header + table_header->length; 1916 1917 /* Parse all entries looking for a match. */ 1918 sub_header = (struct acpi_subtable_header *) 1919 ((unsigned long)table_header + 1920 sizeof(struct acpi_table_srat)); 1921 subtable_len = sub_header->length; 1922 1923 while (((unsigned long)sub_header) + subtable_len < table_end) { 1924 /* 1925 * If length is 0, break from this loop to avoid 1926 * infinite loop. 1927 */ 1928 if (subtable_len == 0) { 1929 pr_err("SRAT invalid zero length\n"); 1930 break; 1931 } 1932 1933 switch (sub_header->type) { 1934 case ACPI_SRAT_TYPE_CPU_AFFINITY: 1935 cpu = (struct acpi_srat_cpu_affinity *)sub_header; 1936 pxm = *((u32 *)cpu->proximity_domain_hi) << 8 | 1937 cpu->proximity_domain_lo; 1938 if (pxm > max_pxm) 1939 max_pxm = pxm; 1940 break; 1941 case ACPI_SRAT_TYPE_GENERIC_AFFINITY: 1942 gpu = (struct acpi_srat_generic_affinity *)sub_header; 1943 bdf = *((u16 *)(&gpu->device_handle[0])) << 16 | 1944 *((u16 *)(&gpu->device_handle[2])); 1945 if (bdf == pci_id) { 1946 found = true; 1947 numa_node = pxm_to_node(gpu->proximity_domain); 1948 } 1949 break; 1950 default: 1951 break; 1952 } 1953 1954 if (found) 1955 break; 1956 1957 sub_header = (struct acpi_subtable_header *) 1958 ((unsigned long)sub_header + subtable_len); 1959 subtable_len = sub_header->length; 1960 } 1961 1962 acpi_put_table(table_header); 1963 1964 /* Workaround bad cpu-gpu binding case */ 1965 if (found && (numa_node < 0 || 1966 numa_node > pxm_to_node(max_pxm))) 1967 numa_node = 0; 1968 1969 if (numa_node != NUMA_NO_NODE) 1970 set_dev_node(&kdev->adev->pdev->dev, numa_node); 1971 } 1972 #endif 1973 1974 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU 1975 * to its NUMA node 1976 * @avail_size: Available size in the memory 1977 * @kdev - [IN] GPU device 1978 * @sub_type_hdr: Memory into which io link info will be filled in 1979 * @proximity_domain - proximity domain of the GPU node 1980 * 1981 * Return 0 if successful else return -ve value 1982 */ 1983 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, 1984 struct kfd_dev *kdev, 1985 struct crat_subtype_iolink *sub_type_hdr, 1986 uint32_t proximity_domain) 1987 { 1988 *avail_size -= sizeof(struct crat_subtype_iolink); 1989 if (*avail_size < 0) 1990 return -ENOMEM; 1991 1992 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 1993 1994 /* Fill in subtype header data */ 1995 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 1996 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 1997 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; 1998 if (kfd_dev_is_large_bar(kdev)) 1999 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2000 2001 /* Fill in IOLINK subtype. 2002 * TODO: Fill-in other fields of iolink subtype 2003 */ 2004 if (kdev->adev->gmc.xgmi.connected_to_cpu) { 2005 /* 2006 * with host gpu xgmi link, host can access gpu memory whether 2007 * or not pcie bar type is large, so always create bidirectional 2008 * io link. 2009 */ 2010 sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2011 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 2012 sub_type_hdr->num_hops_xgmi = 1; 2013 if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) { 2014 sub_type_hdr->minimum_bandwidth_mbs = 2015 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( 2016 kdev->adev, NULL, true); 2017 sub_type_hdr->maximum_bandwidth_mbs = 2018 sub_type_hdr->minimum_bandwidth_mbs; 2019 } 2020 } else { 2021 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; 2022 sub_type_hdr->minimum_bandwidth_mbs = 2023 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true); 2024 sub_type_hdr->maximum_bandwidth_mbs = 2025 amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false); 2026 } 2027 2028 sub_type_hdr->proximity_domain_from = proximity_domain; 2029 2030 #ifdef CONFIG_ACPI_NUMA 2031 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) 2032 kfd_find_numa_node_in_srat(kdev); 2033 #endif 2034 #ifdef CONFIG_NUMA 2035 if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE) 2036 sub_type_hdr->proximity_domain_to = 0; 2037 else 2038 sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node; 2039 #else 2040 sub_type_hdr->proximity_domain_to = 0; 2041 #endif 2042 return 0; 2043 } 2044 2045 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, 2046 struct kfd_dev *kdev, 2047 struct kfd_dev *peer_kdev, 2048 struct crat_subtype_iolink *sub_type_hdr, 2049 uint32_t proximity_domain_from, 2050 uint32_t proximity_domain_to) 2051 { 2052 *avail_size -= sizeof(struct crat_subtype_iolink); 2053 if (*avail_size < 0) 2054 return -ENOMEM; 2055 2056 memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); 2057 2058 sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; 2059 sub_type_hdr->length = sizeof(struct crat_subtype_iolink); 2060 sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | 2061 CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; 2062 2063 sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; 2064 sub_type_hdr->proximity_domain_from = proximity_domain_from; 2065 sub_type_hdr->proximity_domain_to = proximity_domain_to; 2066 sub_type_hdr->num_hops_xgmi = 2067 amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev); 2068 sub_type_hdr->maximum_bandwidth_mbs = 2069 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false); 2070 sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? 2071 amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0; 2072 2073 return 0; 2074 } 2075 2076 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU 2077 * 2078 * @pcrat_image: Fill in VCRAT for GPU 2079 * @size: [IN] allocated size of crat_image. 2080 * [OUT] actual size of data filled in crat_image 2081 */ 2082 static int kfd_create_vcrat_image_gpu(void *pcrat_image, 2083 size_t *size, struct kfd_dev *kdev, 2084 uint32_t proximity_domain) 2085 { 2086 struct crat_header *crat_table = (struct crat_header *)pcrat_image; 2087 struct crat_subtype_generic *sub_type_hdr; 2088 struct kfd_local_mem_info local_mem_info; 2089 struct kfd_topology_device *peer_dev; 2090 struct crat_subtype_computeunit *cu; 2091 struct kfd_cu_info cu_info; 2092 int avail_size = *size; 2093 uint32_t total_num_of_cu; 2094 uint32_t nid = 0; 2095 int ret = 0; 2096 2097 if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) 2098 return -EINVAL; 2099 2100 /* Fill the CRAT Header. 2101 * Modify length and total_entries as subunits are added. 2102 */ 2103 avail_size -= sizeof(struct crat_header); 2104 if (avail_size < 0) 2105 return -ENOMEM; 2106 2107 memset(crat_table, 0, sizeof(struct crat_header)); 2108 2109 memcpy(&crat_table->signature, CRAT_SIGNATURE, 2110 sizeof(crat_table->signature)); 2111 /* Change length as we add more subtypes*/ 2112 crat_table->length = sizeof(struct crat_header); 2113 crat_table->num_domains = 1; 2114 crat_table->total_entries = 0; 2115 2116 /* Fill in Subtype: Compute Unit 2117 * First fill in the sub type header and then sub type data 2118 */ 2119 avail_size -= sizeof(struct crat_subtype_computeunit); 2120 if (avail_size < 0) 2121 return -ENOMEM; 2122 2123 sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); 2124 memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); 2125 2126 sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; 2127 sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); 2128 sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; 2129 2130 /* Fill CU subtype data */ 2131 cu = (struct crat_subtype_computeunit *)sub_type_hdr; 2132 cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; 2133 cu->proximity_domain = proximity_domain; 2134 2135 amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info); 2136 cu->num_simd_per_cu = cu_info.simd_per_cu; 2137 cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; 2138 cu->max_waves_simd = cu_info.max_waves_per_simd; 2139 2140 cu->wave_front_size = cu_info.wave_front_size; 2141 cu->array_count = cu_info.num_shader_arrays_per_engine * 2142 cu_info.num_shader_engines; 2143 total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); 2144 cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); 2145 cu->num_cu_per_array = cu_info.num_cu_per_sh; 2146 cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; 2147 cu->num_banks = cu_info.num_shader_engines; 2148 cu->lds_size_in_kb = cu_info.lds_size; 2149 2150 cu->hsa_capability = 0; 2151 2152 /* Check if this node supports IOMMU. During parsing this flag will 2153 * translate to HSA_CAP_ATS_PRESENT 2154 */ 2155 if (!kfd_iommu_check_device(kdev)) 2156 cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; 2157 2158 crat_table->length += sub_type_hdr->length; 2159 crat_table->total_entries++; 2160 2161 /* Fill in Subtype: Memory. Only on systems with large BAR (no 2162 * private FB), report memory as public. On other systems 2163 * report the total FB size (public+private) as a single 2164 * private heap. 2165 */ 2166 local_mem_info = kdev->local_mem_info; 2167 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 2168 sub_type_hdr->length); 2169 2170 if (debug_largebar) 2171 local_mem_info.local_mem_size_private = 0; 2172 2173 if (local_mem_info.local_mem_size_private == 0) 2174 ret = kfd_fill_gpu_memory_affinity(&avail_size, 2175 kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, 2176 local_mem_info.local_mem_size_public, 2177 (struct crat_subtype_memory *)sub_type_hdr, 2178 proximity_domain, 2179 &local_mem_info); 2180 else 2181 ret = kfd_fill_gpu_memory_affinity(&avail_size, 2182 kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, 2183 local_mem_info.local_mem_size_public + 2184 local_mem_info.local_mem_size_private, 2185 (struct crat_subtype_memory *)sub_type_hdr, 2186 proximity_domain, 2187 &local_mem_info); 2188 if (ret < 0) 2189 return ret; 2190 2191 crat_table->length += sizeof(struct crat_subtype_memory); 2192 crat_table->total_entries++; 2193 2194 /* Fill in Subtype: IO_LINKS 2195 * Only direct links are added here which is Link from GPU to 2196 * its NUMA node. Indirect links are added by userspace. 2197 */ 2198 sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + 2199 sub_type_hdr->length); 2200 ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, 2201 (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); 2202 2203 if (ret < 0) 2204 return ret; 2205 2206 crat_table->length += sub_type_hdr->length; 2207 crat_table->total_entries++; 2208 2209 2210 /* Fill in Subtype: IO_LINKS 2211 * Direct links from GPU to other GPUs through xGMI. 2212 * We will loop GPUs that already be processed (with lower value 2213 * of proximity_domain), add the link for the GPUs with same 2214 * hive id (from this GPU to other GPU) . The reversed iolink 2215 * (from other GPU to this GPU) will be added 2216 * in kfd_parse_subtype_iolink. 2217 */ 2218 if (kdev->hive_id) { 2219 for (nid = 0; nid < proximity_domain; ++nid) { 2220 peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); 2221 if (!peer_dev->gpu) 2222 continue; 2223 if (peer_dev->gpu->hive_id != kdev->hive_id) 2224 continue; 2225 sub_type_hdr = (typeof(sub_type_hdr))( 2226 (char *)sub_type_hdr + 2227 sizeof(struct crat_subtype_iolink)); 2228 ret = kfd_fill_gpu_xgmi_link_to_gpu( 2229 &avail_size, kdev, peer_dev->gpu, 2230 (struct crat_subtype_iolink *)sub_type_hdr, 2231 proximity_domain, nid); 2232 if (ret < 0) 2233 return ret; 2234 crat_table->length += sub_type_hdr->length; 2235 crat_table->total_entries++; 2236 } 2237 } 2238 *size = crat_table->length; 2239 pr_info("Virtual CRAT table created for GPU\n"); 2240 2241 return ret; 2242 } 2243 2244 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and 2245 * creates a Virtual CRAT (VCRAT) image 2246 * 2247 * NOTE: Call kfd_destroy_crat_image to free CRAT image memory 2248 * 2249 * @crat_image: VCRAT image created because ACPI does not have a 2250 * CRAT for this device 2251 * @size: [OUT] size of virtual crat_image 2252 * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device 2253 * COMPUTE_UNIT_GPU - Create VCRAT for GPU 2254 * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU 2255 * -- this option is not currently implemented. 2256 * The assumption is that all AMD APUs will have CRAT 2257 * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU 2258 * 2259 * Return 0 if successful else return -ve value 2260 */ 2261 int kfd_create_crat_image_virtual(void **crat_image, size_t *size, 2262 int flags, struct kfd_dev *kdev, 2263 uint32_t proximity_domain) 2264 { 2265 void *pcrat_image = NULL; 2266 int ret = 0, num_nodes; 2267 size_t dyn_size; 2268 2269 if (!crat_image) 2270 return -EINVAL; 2271 2272 *crat_image = NULL; 2273 2274 /* Allocate the CPU Virtual CRAT size based on the number of online 2275 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. 2276 * This should cover all the current conditions. A check is put not 2277 * to overwrite beyond allocated size for GPUs 2278 */ 2279 switch (flags) { 2280 case COMPUTE_UNIT_CPU: 2281 num_nodes = num_online_nodes(); 2282 dyn_size = sizeof(struct crat_header) + 2283 num_nodes * (sizeof(struct crat_subtype_computeunit) + 2284 sizeof(struct crat_subtype_memory) + 2285 (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); 2286 pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); 2287 if (!pcrat_image) 2288 return -ENOMEM; 2289 *size = dyn_size; 2290 pr_debug("CRAT size is %ld", dyn_size); 2291 ret = kfd_create_vcrat_image_cpu(pcrat_image, size); 2292 break; 2293 case COMPUTE_UNIT_GPU: 2294 if (!kdev) 2295 return -EINVAL; 2296 pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); 2297 if (!pcrat_image) 2298 return -ENOMEM; 2299 *size = VCRAT_SIZE_FOR_GPU; 2300 ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, 2301 proximity_domain); 2302 break; 2303 case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): 2304 /* TODO: */ 2305 ret = -EINVAL; 2306 pr_err("VCRAT not implemented for APU\n"); 2307 break; 2308 default: 2309 ret = -EINVAL; 2310 } 2311 2312 if (!ret) 2313 *crat_image = pcrat_image; 2314 else 2315 kvfree(pcrat_image); 2316 2317 return ret; 2318 } 2319 2320 2321 /* kfd_destroy_crat_image 2322 * 2323 * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) 2324 * 2325 */ 2326 void kfd_destroy_crat_image(void *crat_image) 2327 { 2328 kvfree(crat_image); 2329 } 2330