xref: /openbmc/linux/drivers/gpu/drm/amd/amdkfd/kfd_crat.c (revision 2b80ffc2)
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2015-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <linux/pci.h>
25 #include <linux/acpi.h>
26 #include "kfd_crat.h"
27 #include "kfd_priv.h"
28 #include "kfd_topology.h"
29 #include "kfd_iommu.h"
30 #include "amdgpu.h"
31 #include "amdgpu_amdkfd.h"
32 
33 /* Fixme: Fake 32GB for 1PNPS1 mode bringup */
34 #define DUMMY_VRAM_SIZE 31138512896
35 
36 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
37  * GPU processor ID are expressed with Bit[31]=1.
38  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
39  * used in the CRAT.
40  */
41 static uint32_t gpu_processor_id_low = 0x80001000;
42 
43 /* Return the next available gpu_processor_id and increment it for next GPU
44  *	@total_cu_count - Total CUs present in the GPU including ones
45  *			  masked off
46  */
47 static inline unsigned int get_and_inc_gpu_processor_id(
48 				unsigned int total_cu_count)
49 {
50 	int current_id = gpu_processor_id_low;
51 
52 	gpu_processor_id_low += total_cu_count;
53 	return current_id;
54 }
55 
56 
57 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
58 	{
59 		/* TCP L1 Cache per CU */
60 		.cache_size = 16,
61 		.cache_level = 1,
62 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
63 				CRAT_CACHE_FLAGS_DATA_CACHE |
64 				CRAT_CACHE_FLAGS_SIMD_CACHE),
65 		.num_cu_shared = 1,
66 	},
67 	{
68 		/* Scalar L1 Instruction Cache (in SQC module) per bank */
69 		.cache_size = 16,
70 		.cache_level = 1,
71 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
72 				CRAT_CACHE_FLAGS_INST_CACHE |
73 				CRAT_CACHE_FLAGS_SIMD_CACHE),
74 		.num_cu_shared = 2,
75 	},
76 	{
77 		/* Scalar L1 Data Cache (in SQC module) per bank */
78 		.cache_size = 8,
79 		.cache_level = 1,
80 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
81 				CRAT_CACHE_FLAGS_DATA_CACHE |
82 				CRAT_CACHE_FLAGS_SIMD_CACHE),
83 		.num_cu_shared = 2,
84 	},
85 
86 	/* TODO: Add L2 Cache information */
87 };
88 
89 
90 static struct kfd_gpu_cache_info carrizo_cache_info[] = {
91 	{
92 		/* TCP L1 Cache per CU */
93 		.cache_size = 16,
94 		.cache_level = 1,
95 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
96 				CRAT_CACHE_FLAGS_DATA_CACHE |
97 				CRAT_CACHE_FLAGS_SIMD_CACHE),
98 		.num_cu_shared = 1,
99 	},
100 	{
101 		/* Scalar L1 Instruction Cache (in SQC module) per bank */
102 		.cache_size = 8,
103 		.cache_level = 1,
104 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
105 				CRAT_CACHE_FLAGS_INST_CACHE |
106 				CRAT_CACHE_FLAGS_SIMD_CACHE),
107 		.num_cu_shared = 4,
108 	},
109 	{
110 		/* Scalar L1 Data Cache (in SQC module) per bank. */
111 		.cache_size = 4,
112 		.cache_level = 1,
113 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
114 				CRAT_CACHE_FLAGS_DATA_CACHE |
115 				CRAT_CACHE_FLAGS_SIMD_CACHE),
116 		.num_cu_shared = 4,
117 	},
118 
119 	/* TODO: Add L2 Cache information */
120 };
121 
122 #define hawaii_cache_info kaveri_cache_info
123 #define tonga_cache_info carrizo_cache_info
124 #define fiji_cache_info  carrizo_cache_info
125 #define polaris10_cache_info carrizo_cache_info
126 #define polaris11_cache_info carrizo_cache_info
127 #define polaris12_cache_info carrizo_cache_info
128 #define vegam_cache_info carrizo_cache_info
129 
130 /* NOTE: L1 cache information has been updated and L2/L3
131  * cache information has been added for Vega10 and
132  * newer ASICs. The unit for cache_size is KiB.
133  * In future,  check & update cache details
134  * for every new ASIC is required.
135  */
136 
137 static struct kfd_gpu_cache_info vega10_cache_info[] = {
138 	{
139 		/* TCP L1 Cache per CU */
140 		.cache_size = 16,
141 		.cache_level = 1,
142 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
143 				CRAT_CACHE_FLAGS_DATA_CACHE |
144 				CRAT_CACHE_FLAGS_SIMD_CACHE),
145 		.num_cu_shared = 1,
146 	},
147 	{
148 		/* Scalar L1 Instruction Cache per SQC */
149 		.cache_size = 32,
150 		.cache_level = 1,
151 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
152 				CRAT_CACHE_FLAGS_INST_CACHE |
153 				CRAT_CACHE_FLAGS_SIMD_CACHE),
154 		.num_cu_shared = 3,
155 	},
156 	{
157 		/* Scalar L1 Data Cache per SQC */
158 		.cache_size = 16,
159 		.cache_level = 1,
160 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
161 				CRAT_CACHE_FLAGS_DATA_CACHE |
162 				CRAT_CACHE_FLAGS_SIMD_CACHE),
163 		.num_cu_shared = 3,
164 	},
165 	{
166 		/* L2 Data Cache per GPU (Total Tex Cache) */
167 		.cache_size = 4096,
168 		.cache_level = 2,
169 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
170 				CRAT_CACHE_FLAGS_DATA_CACHE |
171 				CRAT_CACHE_FLAGS_SIMD_CACHE),
172 		.num_cu_shared = 16,
173 	},
174 };
175 
176 static struct kfd_gpu_cache_info raven_cache_info[] = {
177 	{
178 		/* TCP L1 Cache per CU */
179 		.cache_size = 16,
180 		.cache_level = 1,
181 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
182 				CRAT_CACHE_FLAGS_DATA_CACHE |
183 				CRAT_CACHE_FLAGS_SIMD_CACHE),
184 		.num_cu_shared = 1,
185 	},
186 	{
187 		/* Scalar L1 Instruction Cache per SQC */
188 		.cache_size = 32,
189 		.cache_level = 1,
190 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
191 				CRAT_CACHE_FLAGS_INST_CACHE |
192 				CRAT_CACHE_FLAGS_SIMD_CACHE),
193 		.num_cu_shared = 3,
194 	},
195 	{
196 		/* Scalar L1 Data Cache per SQC */
197 		.cache_size = 16,
198 		.cache_level = 1,
199 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
200 				CRAT_CACHE_FLAGS_DATA_CACHE |
201 				CRAT_CACHE_FLAGS_SIMD_CACHE),
202 		.num_cu_shared = 3,
203 	},
204 	{
205 		/* L2 Data Cache per GPU (Total Tex Cache) */
206 		.cache_size = 1024,
207 		.cache_level = 2,
208 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
209 				CRAT_CACHE_FLAGS_DATA_CACHE |
210 				CRAT_CACHE_FLAGS_SIMD_CACHE),
211 		.num_cu_shared = 11,
212 	},
213 };
214 
215 static struct kfd_gpu_cache_info renoir_cache_info[] = {
216 	{
217 		/* TCP L1 Cache per CU */
218 		.cache_size = 16,
219 		.cache_level = 1,
220 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
221 				CRAT_CACHE_FLAGS_DATA_CACHE |
222 				CRAT_CACHE_FLAGS_SIMD_CACHE),
223 		.num_cu_shared = 1,
224 	},
225 	{
226 		/* Scalar L1 Instruction Cache per SQC */
227 		.cache_size = 32,
228 		.cache_level = 1,
229 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
230 				CRAT_CACHE_FLAGS_INST_CACHE |
231 				CRAT_CACHE_FLAGS_SIMD_CACHE),
232 		.num_cu_shared = 3,
233 	},
234 	{
235 		/* Scalar L1 Data Cache per SQC */
236 		.cache_size = 16,
237 		.cache_level = 1,
238 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
239 				CRAT_CACHE_FLAGS_DATA_CACHE |
240 				CRAT_CACHE_FLAGS_SIMD_CACHE),
241 		.num_cu_shared = 3,
242 	},
243 	{
244 		/* L2 Data Cache per GPU (Total Tex Cache) */
245 		.cache_size = 1024,
246 		.cache_level = 2,
247 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
248 				CRAT_CACHE_FLAGS_DATA_CACHE |
249 				CRAT_CACHE_FLAGS_SIMD_CACHE),
250 		.num_cu_shared = 8,
251 	},
252 };
253 
254 static struct kfd_gpu_cache_info vega12_cache_info[] = {
255 	{
256 		/* TCP L1 Cache per CU */
257 		.cache_size = 16,
258 		.cache_level = 1,
259 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
260 				CRAT_CACHE_FLAGS_DATA_CACHE |
261 				CRAT_CACHE_FLAGS_SIMD_CACHE),
262 		.num_cu_shared = 1,
263 	},
264 	{
265 		/* Scalar L1 Instruction Cache per SQC */
266 		.cache_size = 32,
267 		.cache_level = 1,
268 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
269 				CRAT_CACHE_FLAGS_INST_CACHE |
270 				CRAT_CACHE_FLAGS_SIMD_CACHE),
271 		.num_cu_shared = 3,
272 	},
273 	{
274 		/* Scalar L1 Data Cache per SQC */
275 		.cache_size = 16,
276 		.cache_level = 1,
277 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
278 				CRAT_CACHE_FLAGS_DATA_CACHE |
279 				CRAT_CACHE_FLAGS_SIMD_CACHE),
280 		.num_cu_shared = 3,
281 	},
282 	{
283 		/* L2 Data Cache per GPU (Total Tex Cache) */
284 		.cache_size = 2048,
285 		.cache_level = 2,
286 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
287 				CRAT_CACHE_FLAGS_DATA_CACHE |
288 				CRAT_CACHE_FLAGS_SIMD_CACHE),
289 		.num_cu_shared = 5,
290 	},
291 };
292 
293 static struct kfd_gpu_cache_info vega20_cache_info[] = {
294 	{
295 		/* TCP L1 Cache per CU */
296 		.cache_size = 16,
297 		.cache_level = 1,
298 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
299 				CRAT_CACHE_FLAGS_DATA_CACHE |
300 				CRAT_CACHE_FLAGS_SIMD_CACHE),
301 		.num_cu_shared = 1,
302 	},
303 	{
304 		/* Scalar L1 Instruction Cache per SQC */
305 		.cache_size = 32,
306 		.cache_level = 1,
307 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
308 				CRAT_CACHE_FLAGS_INST_CACHE |
309 				CRAT_CACHE_FLAGS_SIMD_CACHE),
310 		.num_cu_shared = 3,
311 	},
312 	{
313 		/* Scalar L1 Data Cache per SQC */
314 		.cache_size = 16,
315 		.cache_level = 1,
316 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
317 				CRAT_CACHE_FLAGS_DATA_CACHE |
318 				CRAT_CACHE_FLAGS_SIMD_CACHE),
319 		.num_cu_shared = 3,
320 	},
321 	{
322 		/* L2 Data Cache per GPU (Total Tex Cache) */
323 		.cache_size = 8192,
324 		.cache_level = 2,
325 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
326 				CRAT_CACHE_FLAGS_DATA_CACHE |
327 				CRAT_CACHE_FLAGS_SIMD_CACHE),
328 		.num_cu_shared = 16,
329 	},
330 };
331 
332 static struct kfd_gpu_cache_info aldebaran_cache_info[] = {
333 	{
334 		/* TCP L1 Cache per CU */
335 		.cache_size = 16,
336 		.cache_level = 1,
337 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
338 				CRAT_CACHE_FLAGS_DATA_CACHE |
339 				CRAT_CACHE_FLAGS_SIMD_CACHE),
340 		.num_cu_shared = 1,
341 	},
342 	{
343 		/* Scalar L1 Instruction Cache per SQC */
344 		.cache_size = 32,
345 		.cache_level = 1,
346 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
347 				CRAT_CACHE_FLAGS_INST_CACHE |
348 				CRAT_CACHE_FLAGS_SIMD_CACHE),
349 		.num_cu_shared = 2,
350 	},
351 	{
352 		/* Scalar L1 Data Cache per SQC */
353 		.cache_size = 16,
354 		.cache_level = 1,
355 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
356 				CRAT_CACHE_FLAGS_DATA_CACHE |
357 				CRAT_CACHE_FLAGS_SIMD_CACHE),
358 		.num_cu_shared = 2,
359 	},
360 	{
361 		/* L2 Data Cache per GPU (Total Tex Cache) */
362 		.cache_size = 8192,
363 		.cache_level = 2,
364 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
365 				CRAT_CACHE_FLAGS_DATA_CACHE |
366 				CRAT_CACHE_FLAGS_SIMD_CACHE),
367 		.num_cu_shared = 14,
368 	},
369 };
370 
371 static struct kfd_gpu_cache_info navi10_cache_info[] = {
372 	{
373 		/* TCP L1 Cache per CU */
374 		.cache_size = 16,
375 		.cache_level = 1,
376 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
377 				CRAT_CACHE_FLAGS_DATA_CACHE |
378 				CRAT_CACHE_FLAGS_SIMD_CACHE),
379 		.num_cu_shared = 1,
380 	},
381 	{
382 		/* Scalar L1 Instruction Cache per SQC */
383 		.cache_size = 32,
384 		.cache_level = 1,
385 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
386 				CRAT_CACHE_FLAGS_INST_CACHE |
387 				CRAT_CACHE_FLAGS_SIMD_CACHE),
388 		.num_cu_shared = 2,
389 	},
390 	{
391 		/* Scalar L1 Data Cache per SQC */
392 		.cache_size = 16,
393 		.cache_level = 1,
394 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
395 				CRAT_CACHE_FLAGS_DATA_CACHE |
396 				CRAT_CACHE_FLAGS_SIMD_CACHE),
397 		.num_cu_shared = 2,
398 	},
399 	{
400 		/* GL1 Data Cache per SA */
401 		.cache_size = 128,
402 		.cache_level = 1,
403 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
404 				CRAT_CACHE_FLAGS_DATA_CACHE |
405 				CRAT_CACHE_FLAGS_SIMD_CACHE),
406 		.num_cu_shared = 10,
407 	},
408 	{
409 		/* L2 Data Cache per GPU (Total Tex Cache) */
410 		.cache_size = 4096,
411 		.cache_level = 2,
412 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
413 				CRAT_CACHE_FLAGS_DATA_CACHE |
414 				CRAT_CACHE_FLAGS_SIMD_CACHE),
415 		.num_cu_shared = 10,
416 	},
417 };
418 
419 static struct kfd_gpu_cache_info vangogh_cache_info[] = {
420 	{
421 		/* TCP L1 Cache per CU */
422 		.cache_size = 16,
423 		.cache_level = 1,
424 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
425 				CRAT_CACHE_FLAGS_DATA_CACHE |
426 				CRAT_CACHE_FLAGS_SIMD_CACHE),
427 		.num_cu_shared = 1,
428 	},
429 	{
430 		/* Scalar L1 Instruction Cache per SQC */
431 		.cache_size = 32,
432 		.cache_level = 1,
433 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
434 				CRAT_CACHE_FLAGS_INST_CACHE |
435 				CRAT_CACHE_FLAGS_SIMD_CACHE),
436 		.num_cu_shared = 2,
437 	},
438 	{
439 		/* Scalar L1 Data Cache per SQC */
440 		.cache_size = 16,
441 		.cache_level = 1,
442 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
443 				CRAT_CACHE_FLAGS_DATA_CACHE |
444 				CRAT_CACHE_FLAGS_SIMD_CACHE),
445 		.num_cu_shared = 2,
446 	},
447 	{
448 		/* GL1 Data Cache per SA */
449 		.cache_size = 128,
450 		.cache_level = 1,
451 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
452 				CRAT_CACHE_FLAGS_DATA_CACHE |
453 				CRAT_CACHE_FLAGS_SIMD_CACHE),
454 		.num_cu_shared = 8,
455 	},
456 	{
457 		/* L2 Data Cache per GPU (Total Tex Cache) */
458 		.cache_size = 1024,
459 		.cache_level = 2,
460 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
461 				CRAT_CACHE_FLAGS_DATA_CACHE |
462 				CRAT_CACHE_FLAGS_SIMD_CACHE),
463 		.num_cu_shared = 8,
464 	},
465 };
466 
467 static struct kfd_gpu_cache_info navi14_cache_info[] = {
468 	{
469 		/* TCP L1 Cache per CU */
470 		.cache_size = 16,
471 		.cache_level = 1,
472 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
473 				CRAT_CACHE_FLAGS_DATA_CACHE |
474 				CRAT_CACHE_FLAGS_SIMD_CACHE),
475 		.num_cu_shared = 1,
476 	},
477 	{
478 		/* Scalar L1 Instruction Cache per SQC */
479 		.cache_size = 32,
480 		.cache_level = 1,
481 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
482 				CRAT_CACHE_FLAGS_INST_CACHE |
483 				CRAT_CACHE_FLAGS_SIMD_CACHE),
484 		.num_cu_shared = 2,
485 	},
486 	{
487 		/* Scalar L1 Data Cache per SQC */
488 		.cache_size = 16,
489 		.cache_level = 1,
490 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
491 				CRAT_CACHE_FLAGS_DATA_CACHE |
492 				CRAT_CACHE_FLAGS_SIMD_CACHE),
493 		.num_cu_shared = 2,
494 	},
495 	{
496 		/* GL1 Data Cache per SA */
497 		.cache_size = 128,
498 		.cache_level = 1,
499 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
500 				CRAT_CACHE_FLAGS_DATA_CACHE |
501 				CRAT_CACHE_FLAGS_SIMD_CACHE),
502 		.num_cu_shared = 12,
503 	},
504 	{
505 		/* L2 Data Cache per GPU (Total Tex Cache) */
506 		.cache_size = 2048,
507 		.cache_level = 2,
508 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
509 				CRAT_CACHE_FLAGS_DATA_CACHE |
510 				CRAT_CACHE_FLAGS_SIMD_CACHE),
511 		.num_cu_shared = 12,
512 	},
513 };
514 
515 static struct kfd_gpu_cache_info sienna_cichlid_cache_info[] = {
516 	{
517 		/* TCP L1 Cache per CU */
518 		.cache_size = 16,
519 		.cache_level = 1,
520 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
521 				CRAT_CACHE_FLAGS_DATA_CACHE |
522 				CRAT_CACHE_FLAGS_SIMD_CACHE),
523 		.num_cu_shared = 1,
524 	},
525 	{
526 		/* Scalar L1 Instruction Cache per SQC */
527 		.cache_size = 32,
528 		.cache_level = 1,
529 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
530 				CRAT_CACHE_FLAGS_INST_CACHE |
531 				CRAT_CACHE_FLAGS_SIMD_CACHE),
532 		.num_cu_shared = 2,
533 	},
534 	{
535 		/* Scalar L1 Data Cache per SQC */
536 		.cache_size = 16,
537 		.cache_level = 1,
538 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
539 				CRAT_CACHE_FLAGS_DATA_CACHE |
540 				CRAT_CACHE_FLAGS_SIMD_CACHE),
541 		.num_cu_shared = 2,
542 	},
543 	{
544 		/* GL1 Data Cache per SA */
545 		.cache_size = 128,
546 		.cache_level = 1,
547 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
548 				CRAT_CACHE_FLAGS_DATA_CACHE |
549 				CRAT_CACHE_FLAGS_SIMD_CACHE),
550 		.num_cu_shared = 10,
551 	},
552 	{
553 		/* L2 Data Cache per GPU (Total Tex Cache) */
554 		.cache_size = 4096,
555 		.cache_level = 2,
556 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
557 				CRAT_CACHE_FLAGS_DATA_CACHE |
558 				CRAT_CACHE_FLAGS_SIMD_CACHE),
559 		.num_cu_shared = 10,
560 	},
561 	{
562 		/* L3 Data Cache per GPU */
563 		.cache_size = 128*1024,
564 		.cache_level = 3,
565 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
566 				CRAT_CACHE_FLAGS_DATA_CACHE |
567 				CRAT_CACHE_FLAGS_SIMD_CACHE),
568 		.num_cu_shared = 10,
569 	},
570 };
571 
572 static struct kfd_gpu_cache_info navy_flounder_cache_info[] = {
573 	{
574 		/* TCP L1 Cache per CU */
575 		.cache_size = 16,
576 		.cache_level = 1,
577 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
578 				CRAT_CACHE_FLAGS_DATA_CACHE |
579 				CRAT_CACHE_FLAGS_SIMD_CACHE),
580 		.num_cu_shared = 1,
581 	},
582 	{
583 		/* Scalar L1 Instruction Cache per SQC */
584 		.cache_size = 32,
585 		.cache_level = 1,
586 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
587 				CRAT_CACHE_FLAGS_INST_CACHE |
588 				CRAT_CACHE_FLAGS_SIMD_CACHE),
589 		.num_cu_shared = 2,
590 	},
591 	{
592 		/* Scalar L1 Data Cache per SQC */
593 		.cache_size = 16,
594 		.cache_level = 1,
595 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
596 				CRAT_CACHE_FLAGS_DATA_CACHE |
597 				CRAT_CACHE_FLAGS_SIMD_CACHE),
598 		.num_cu_shared = 2,
599 	},
600 	{
601 		/* GL1 Data Cache per SA */
602 		.cache_size = 128,
603 		.cache_level = 1,
604 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
605 				CRAT_CACHE_FLAGS_DATA_CACHE |
606 				CRAT_CACHE_FLAGS_SIMD_CACHE),
607 		.num_cu_shared = 10,
608 	},
609 	{
610 		/* L2 Data Cache per GPU (Total Tex Cache) */
611 		.cache_size = 3072,
612 		.cache_level = 2,
613 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
614 				CRAT_CACHE_FLAGS_DATA_CACHE |
615 				CRAT_CACHE_FLAGS_SIMD_CACHE),
616 		.num_cu_shared = 10,
617 	},
618 	{
619 		/* L3 Data Cache per GPU */
620 		.cache_size = 96*1024,
621 		.cache_level = 3,
622 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
623 				CRAT_CACHE_FLAGS_DATA_CACHE |
624 				CRAT_CACHE_FLAGS_SIMD_CACHE),
625 		.num_cu_shared = 10,
626 	},
627 };
628 
629 static struct kfd_gpu_cache_info dimgrey_cavefish_cache_info[] = {
630 	{
631 		/* TCP L1 Cache per CU */
632 		.cache_size = 16,
633 		.cache_level = 1,
634 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
635 				CRAT_CACHE_FLAGS_DATA_CACHE |
636 				CRAT_CACHE_FLAGS_SIMD_CACHE),
637 		.num_cu_shared = 1,
638 	},
639 	{
640 		/* Scalar L1 Instruction Cache per SQC */
641 		.cache_size = 32,
642 		.cache_level = 1,
643 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
644 				CRAT_CACHE_FLAGS_INST_CACHE |
645 				CRAT_CACHE_FLAGS_SIMD_CACHE),
646 		.num_cu_shared = 2,
647 	},
648 	{
649 		/* Scalar L1 Data Cache per SQC */
650 		.cache_size = 16,
651 		.cache_level = 1,
652 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
653 				CRAT_CACHE_FLAGS_DATA_CACHE |
654 				CRAT_CACHE_FLAGS_SIMD_CACHE),
655 		.num_cu_shared = 2,
656 	},
657 	{
658 		/* GL1 Data Cache per SA */
659 		.cache_size = 128,
660 		.cache_level = 1,
661 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
662 				CRAT_CACHE_FLAGS_DATA_CACHE |
663 				CRAT_CACHE_FLAGS_SIMD_CACHE),
664 		.num_cu_shared = 8,
665 	},
666 	{
667 		/* L2 Data Cache per GPU (Total Tex Cache) */
668 		.cache_size = 2048,
669 		.cache_level = 2,
670 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
671 				CRAT_CACHE_FLAGS_DATA_CACHE |
672 				CRAT_CACHE_FLAGS_SIMD_CACHE),
673 		.num_cu_shared = 8,
674 	},
675 	{
676 		/* L3 Data Cache per GPU */
677 		.cache_size = 32*1024,
678 		.cache_level = 3,
679 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
680 				CRAT_CACHE_FLAGS_DATA_CACHE |
681 				CRAT_CACHE_FLAGS_SIMD_CACHE),
682 		.num_cu_shared = 8,
683 	},
684 };
685 
686 static struct kfd_gpu_cache_info beige_goby_cache_info[] = {
687 	{
688 		/* TCP L1 Cache per CU */
689 		.cache_size = 16,
690 		.cache_level = 1,
691 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
692 				CRAT_CACHE_FLAGS_DATA_CACHE |
693 				CRAT_CACHE_FLAGS_SIMD_CACHE),
694 		.num_cu_shared = 1,
695 	},
696 	{
697 		/* Scalar L1 Instruction Cache per SQC */
698 		.cache_size = 32,
699 		.cache_level = 1,
700 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
701 				CRAT_CACHE_FLAGS_INST_CACHE |
702 				CRAT_CACHE_FLAGS_SIMD_CACHE),
703 		.num_cu_shared = 2,
704 	},
705 	{
706 		/* Scalar L1 Data Cache per SQC */
707 		.cache_size = 16,
708 		.cache_level = 1,
709 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
710 				CRAT_CACHE_FLAGS_DATA_CACHE |
711 				CRAT_CACHE_FLAGS_SIMD_CACHE),
712 		.num_cu_shared = 2,
713 	},
714 	{
715 		/* GL1 Data Cache per SA */
716 		.cache_size = 128,
717 		.cache_level = 1,
718 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
719 				CRAT_CACHE_FLAGS_DATA_CACHE |
720 				CRAT_CACHE_FLAGS_SIMD_CACHE),
721 		.num_cu_shared = 8,
722 	},
723 	{
724 		/* L2 Data Cache per GPU (Total Tex Cache) */
725 		.cache_size = 1024,
726 		.cache_level = 2,
727 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
728 				CRAT_CACHE_FLAGS_DATA_CACHE |
729 				CRAT_CACHE_FLAGS_SIMD_CACHE),
730 		.num_cu_shared = 8,
731 	},
732 	{
733 		/* L3 Data Cache per GPU */
734 		.cache_size = 16*1024,
735 		.cache_level = 3,
736 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
737 				CRAT_CACHE_FLAGS_DATA_CACHE |
738 				CRAT_CACHE_FLAGS_SIMD_CACHE),
739 		.num_cu_shared = 8,
740 	},
741 };
742 
743 static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
744 	{
745 		/* TCP L1 Cache per CU */
746 		.cache_size = 16,
747 		.cache_level = 1,
748 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
749 				CRAT_CACHE_FLAGS_DATA_CACHE |
750 				CRAT_CACHE_FLAGS_SIMD_CACHE),
751 		.num_cu_shared = 1,
752 	},
753 	{
754 		/* Scalar L1 Instruction Cache per SQC */
755 		.cache_size = 32,
756 		.cache_level = 1,
757 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
758 				CRAT_CACHE_FLAGS_INST_CACHE |
759 				CRAT_CACHE_FLAGS_SIMD_CACHE),
760 		.num_cu_shared = 2,
761 	},
762 	{
763 		/* Scalar L1 Data Cache per SQC */
764 		.cache_size = 16,
765 		.cache_level = 1,
766 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
767 				CRAT_CACHE_FLAGS_DATA_CACHE |
768 				CRAT_CACHE_FLAGS_SIMD_CACHE),
769 		.num_cu_shared = 2,
770 	},
771 	{
772 		/* GL1 Data Cache per SA */
773 		.cache_size = 128,
774 		.cache_level = 1,
775 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
776 				CRAT_CACHE_FLAGS_DATA_CACHE |
777 				CRAT_CACHE_FLAGS_SIMD_CACHE),
778 		.num_cu_shared = 6,
779 	},
780 	{
781 		/* L2 Data Cache per GPU (Total Tex Cache) */
782 		.cache_size = 2048,
783 		.cache_level = 2,
784 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
785 				CRAT_CACHE_FLAGS_DATA_CACHE |
786 				CRAT_CACHE_FLAGS_SIMD_CACHE),
787 		.num_cu_shared = 6,
788 	},
789 };
790 
791 static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
792 	{
793 		/* TCP L1 Cache per CU */
794 		.cache_size = 16,
795 		.cache_level = 1,
796 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
797 				CRAT_CACHE_FLAGS_DATA_CACHE |
798 				CRAT_CACHE_FLAGS_SIMD_CACHE),
799 		.num_cu_shared = 1,
800 	},
801 	{
802 		/* Scalar L1 Instruction Cache per SQC */
803 		.cache_size = 32,
804 		.cache_level = 1,
805 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
806 				CRAT_CACHE_FLAGS_INST_CACHE |
807 				CRAT_CACHE_FLAGS_SIMD_CACHE),
808 		.num_cu_shared = 2,
809 	},
810 	{
811 		/* Scalar L1 Data Cache per SQC */
812 		.cache_size = 16,
813 		.cache_level = 1,
814 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
815 				CRAT_CACHE_FLAGS_DATA_CACHE |
816 				CRAT_CACHE_FLAGS_SIMD_CACHE),
817 		.num_cu_shared = 2,
818 	},
819 	{
820 		/* GL1 Data Cache per SA */
821 		.cache_size = 128,
822 		.cache_level = 1,
823 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
824 				CRAT_CACHE_FLAGS_DATA_CACHE |
825 				CRAT_CACHE_FLAGS_SIMD_CACHE),
826 		.num_cu_shared = 2,
827 	},
828 	{
829 		/* L2 Data Cache per GPU (Total Tex Cache) */
830 		.cache_size = 256,
831 		.cache_level = 2,
832 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
833 				CRAT_CACHE_FLAGS_DATA_CACHE |
834 				CRAT_CACHE_FLAGS_SIMD_CACHE),
835 		.num_cu_shared = 2,
836 	},
837 };
838 
839 static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
840 	{
841 		/* TCP L1 Cache per CU */
842 		.cache_size = 16,
843 		.cache_level = 1,
844 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
845 			  CRAT_CACHE_FLAGS_DATA_CACHE |
846 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
847 		.num_cu_shared = 1,
848 	},
849 	{
850 		/* Scalar L1 Instruction Cache per SQC */
851 		.cache_size = 32,
852 		.cache_level = 1,
853 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
854 			  CRAT_CACHE_FLAGS_INST_CACHE |
855 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
856 		.num_cu_shared = 2,
857 	},
858 	{
859 		/* Scalar L1 Data Cache per SQC */
860 		.cache_size = 16,
861 		.cache_level = 1,
862 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
863 			  CRAT_CACHE_FLAGS_DATA_CACHE |
864 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
865 		.num_cu_shared = 2,
866 	},
867 	{
868 		/* GL1 Data Cache per SA */
869 		.cache_size = 128,
870 		.cache_level = 1,
871 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
872 			  CRAT_CACHE_FLAGS_DATA_CACHE |
873 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
874 		.num_cu_shared = 2,
875 	},
876 	{
877 		/* L2 Data Cache per GPU (Total Tex Cache) */
878 		.cache_size = 256,
879 		.cache_level = 2,
880 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
881 			  CRAT_CACHE_FLAGS_DATA_CACHE |
882 			  CRAT_CACHE_FLAGS_SIMD_CACHE),
883 		.num_cu_shared = 2,
884 	},
885 };
886 
887 static struct kfd_gpu_cache_info dummy_cache_info[] = {
888 	{
889 		/* TCP L1 Cache per CU */
890 		.cache_size = 16,
891 		.cache_level = 1,
892 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
893 				CRAT_CACHE_FLAGS_DATA_CACHE |
894 				CRAT_CACHE_FLAGS_SIMD_CACHE),
895 		.num_cu_shared = 1,
896 	},
897 	{
898 		/* Scalar L1 Instruction Cache per SQC */
899 		.cache_size = 32,
900 		.cache_level = 1,
901 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
902 				CRAT_CACHE_FLAGS_INST_CACHE |
903 				CRAT_CACHE_FLAGS_SIMD_CACHE),
904 		.num_cu_shared = 2,
905 	},
906 	{
907 		/* Scalar L1 Data Cache per SQC */
908 		.cache_size = 16,
909 		.cache_level = 1,
910 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
911 				CRAT_CACHE_FLAGS_DATA_CACHE |
912 				CRAT_CACHE_FLAGS_SIMD_CACHE),
913 		.num_cu_shared = 2,
914 	},
915 	{
916 		/* GL1 Data Cache per SA */
917 		.cache_size = 128,
918 		.cache_level = 1,
919 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
920 				CRAT_CACHE_FLAGS_DATA_CACHE |
921 				CRAT_CACHE_FLAGS_SIMD_CACHE),
922 		.num_cu_shared = 6,
923 	},
924 	{
925 		/* L2 Data Cache per GPU (Total Tex Cache) */
926 		.cache_size = 2048,
927 		.cache_level = 2,
928 		.flags = (CRAT_CACHE_FLAGS_ENABLED |
929 				CRAT_CACHE_FLAGS_DATA_CACHE |
930 				CRAT_CACHE_FLAGS_SIMD_CACHE),
931 		.num_cu_shared = 6,
932 	},
933 };
934 
935 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
936 		struct crat_subtype_computeunit *cu)
937 {
938 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
939 	dev->node_props.cpu_core_id_base = cu->processor_id_low;
940 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
941 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
942 
943 	pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores,
944 			cu->processor_id_low);
945 }
946 
947 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
948 		struct crat_subtype_computeunit *cu)
949 {
950 	dev->node_props.simd_id_base = cu->processor_id_low;
951 	dev->node_props.simd_count = cu->num_simd_cores;
952 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
953 	dev->node_props.max_waves_per_simd = cu->max_waves_simd;
954 	dev->node_props.wave_front_size = cu->wave_front_size;
955 	dev->node_props.array_count = cu->array_count;
956 	dev->node_props.cu_per_simd_array = cu->num_cu_per_array;
957 	dev->node_props.simd_per_cu = cu->num_simd_per_cu;
958 	dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu;
959 	if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE)
960 		dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE;
961 	pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low);
962 }
963 
964 /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct
965  * topology device present in the device_list
966  */
967 static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
968 				struct list_head *device_list)
969 {
970 	struct kfd_topology_device *dev;
971 
972 	pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
973 			cu->proximity_domain, cu->hsa_capability);
974 	list_for_each_entry(dev, device_list, list) {
975 		if (cu->proximity_domain == dev->proximity_domain) {
976 			if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT)
977 				kfd_populated_cu_info_cpu(dev, cu);
978 
979 			if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT)
980 				kfd_populated_cu_info_gpu(dev, cu);
981 			break;
982 		}
983 	}
984 
985 	return 0;
986 }
987 
988 static struct kfd_mem_properties *
989 find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
990 		struct kfd_topology_device *dev)
991 {
992 	struct kfd_mem_properties *props;
993 
994 	list_for_each_entry(props, &dev->mem_props, list) {
995 		if (props->heap_type == heap_type
996 				&& props->flags == flags
997 				&& props->width == width)
998 			return props;
999 	}
1000 
1001 	return NULL;
1002 }
1003 /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
1004  * topology device present in the device_list
1005  */
1006 static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
1007 				struct list_head *device_list)
1008 {
1009 	struct kfd_mem_properties *props;
1010 	struct kfd_topology_device *dev;
1011 	uint32_t heap_type;
1012 	uint64_t size_in_bytes;
1013 	uint32_t flags = 0;
1014 	uint32_t width;
1015 
1016 	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
1017 			mem->proximity_domain);
1018 	list_for_each_entry(dev, device_list, list) {
1019 		if (mem->proximity_domain == dev->proximity_domain) {
1020 			/* We're on GPU node */
1021 			if (dev->node_props.cpu_cores_count == 0) {
1022 				/* APU */
1023 				if (mem->visibility_type == 0)
1024 					heap_type =
1025 						HSA_MEM_HEAP_TYPE_FB_PRIVATE;
1026 				/* dGPU */
1027 				else
1028 					heap_type = mem->visibility_type;
1029 			} else
1030 				heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
1031 
1032 			if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
1033 				flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
1034 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
1035 				flags |= HSA_MEM_FLAGS_NON_VOLATILE;
1036 
1037 			size_in_bytes =
1038 				((uint64_t)mem->length_high << 32) +
1039 							mem->length_low;
1040 			width = mem->width;
1041 
1042 			/* Multiple banks of the same type are aggregated into
1043 			 * one. User mode doesn't care about multiple physical
1044 			 * memory segments. It's managed as a single virtual
1045 			 * heap for user mode.
1046 			 */
1047 			props = find_subtype_mem(heap_type, flags, width, dev);
1048 			if (props) {
1049 				props->size_in_bytes += size_in_bytes;
1050 				break;
1051 			}
1052 
1053 			props = kfd_alloc_struct(props);
1054 			if (!props)
1055 				return -ENOMEM;
1056 
1057 			props->heap_type = heap_type;
1058 			props->flags = flags;
1059 			if (size_in_bytes == 0)
1060 				size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
1061 			props->size_in_bytes = size_in_bytes;
1062 			props->width = width;
1063 
1064 			dev->node_props.mem_banks_count++;
1065 			list_add_tail(&props->list, &dev->mem_props);
1066 
1067 			break;
1068 		}
1069 	}
1070 
1071 	return 0;
1072 }
1073 
1074 /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct
1075  * topology device present in the device_list
1076  */
1077 static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
1078 			struct list_head *device_list)
1079 {
1080 	struct kfd_cache_properties *props;
1081 	struct kfd_topology_device *dev;
1082 	uint32_t id;
1083 	uint32_t total_num_of_cu;
1084 
1085 	id = cache->processor_id_low;
1086 
1087 	pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id);
1088 	list_for_each_entry(dev, device_list, list) {
1089 		total_num_of_cu = (dev->node_props.array_count *
1090 					dev->node_props.cu_per_simd_array);
1091 
1092 		/* Cache infomration in CRAT doesn't have proximity_domain
1093 		 * information as it is associated with a CPU core or GPU
1094 		 * Compute Unit. So map the cache using CPU core Id or SIMD
1095 		 * (GPU) ID.
1096 		 * TODO: This works because currently we can safely assume that
1097 		 *  Compute Units are parsed before caches are parsed. In
1098 		 *  future, remove this dependency
1099 		 */
1100 		if ((id >= dev->node_props.cpu_core_id_base &&
1101 			id <= dev->node_props.cpu_core_id_base +
1102 				dev->node_props.cpu_cores_count) ||
1103 			(id >= dev->node_props.simd_id_base &&
1104 			id < dev->node_props.simd_id_base +
1105 				total_num_of_cu)) {
1106 			props = kfd_alloc_struct(props);
1107 			if (!props)
1108 				return -ENOMEM;
1109 
1110 			props->processor_id_low = id;
1111 			props->cache_level = cache->cache_level;
1112 			props->cache_size = cache->cache_size;
1113 			props->cacheline_size = cache->cache_line_size;
1114 			props->cachelines_per_tag = cache->lines_per_tag;
1115 			props->cache_assoc = cache->associativity;
1116 			props->cache_latency = cache->cache_latency;
1117 
1118 			memcpy(props->sibling_map, cache->sibling_map,
1119 					CRAT_SIBLINGMAP_SIZE);
1120 
1121 			/* set the sibling_map_size as 32 for CRAT from ACPI */
1122 			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
1123 
1124 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
1125 				props->cache_type |= HSA_CACHE_TYPE_DATA;
1126 			if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE)
1127 				props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
1128 			if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE)
1129 				props->cache_type |= HSA_CACHE_TYPE_CPU;
1130 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
1131 				props->cache_type |= HSA_CACHE_TYPE_HSACU;
1132 
1133 			dev->node_props.caches_count++;
1134 			list_add_tail(&props->list, &dev->cache_props);
1135 
1136 			break;
1137 		}
1138 	}
1139 
1140 	return 0;
1141 }
1142 
1143 /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct
1144  * topology device present in the device_list
1145  */
1146 static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
1147 					struct list_head *device_list)
1148 {
1149 	struct kfd_iolink_properties *props = NULL, *props2;
1150 	struct kfd_topology_device *dev, *to_dev;
1151 	uint32_t id_from;
1152 	uint32_t id_to;
1153 
1154 	id_from = iolink->proximity_domain_from;
1155 	id_to = iolink->proximity_domain_to;
1156 
1157 	pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n",
1158 			id_from, id_to);
1159 	list_for_each_entry(dev, device_list, list) {
1160 		if (id_from == dev->proximity_domain) {
1161 			props = kfd_alloc_struct(props);
1162 			if (!props)
1163 				return -ENOMEM;
1164 
1165 			props->node_from = id_from;
1166 			props->node_to = id_to;
1167 			props->ver_maj = iolink->version_major;
1168 			props->ver_min = iolink->version_minor;
1169 			props->iolink_type = iolink->io_interface_type;
1170 
1171 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
1172 				props->weight = 20;
1173 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
1174 				props->weight = iolink->weight_xgmi;
1175 			else
1176 				props->weight = node_distance(id_from, id_to);
1177 
1178 			props->min_latency = iolink->minimum_latency;
1179 			props->max_latency = iolink->maximum_latency;
1180 			props->min_bandwidth = iolink->minimum_bandwidth_mbs;
1181 			props->max_bandwidth = iolink->maximum_bandwidth_mbs;
1182 			props->rec_transfer_size =
1183 					iolink->recommended_transfer_size;
1184 
1185 			dev->node_props.io_links_count++;
1186 			list_add_tail(&props->list, &dev->io_link_props);
1187 			break;
1188 		}
1189 	}
1190 
1191 	/* CPU topology is created before GPUs are detected, so CPU->GPU
1192 	 * links are not built at that time. If a PCIe type is discovered, it
1193 	 * means a GPU is detected and we are adding GPU->CPU to the topology.
1194 	 * At this time, also add the corresponded CPU->GPU link if GPU
1195 	 * is large bar.
1196 	 * For xGMI, we only added the link with one direction in the crat
1197 	 * table, add corresponded reversed direction link now.
1198 	 */
1199 	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
1200 		to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
1201 		if (!to_dev)
1202 			return -ENODEV;
1203 		/* same everything but the other direction */
1204 		props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL);
1205 		if (!props2)
1206 			return -ENOMEM;
1207 
1208 		props2->node_from = id_to;
1209 		props2->node_to = id_from;
1210 		props2->kobj = NULL;
1211 		to_dev->node_props.io_links_count++;
1212 		list_add_tail(&props2->list, &to_dev->io_link_props);
1213 	}
1214 
1215 	return 0;
1216 }
1217 
1218 /* kfd_parse_subtype - parse subtypes and attach it to correct topology device
1219  * present in the device_list
1220  *	@sub_type_hdr - subtype section of crat_image
1221  *	@device_list - list of topology devices present in this crat_image
1222  */
1223 static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr,
1224 				struct list_head *device_list)
1225 {
1226 	struct crat_subtype_computeunit *cu;
1227 	struct crat_subtype_memory *mem;
1228 	struct crat_subtype_cache *cache;
1229 	struct crat_subtype_iolink *iolink;
1230 	int ret = 0;
1231 
1232 	switch (sub_type_hdr->type) {
1233 	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
1234 		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
1235 		ret = kfd_parse_subtype_cu(cu, device_list);
1236 		break;
1237 	case CRAT_SUBTYPE_MEMORY_AFFINITY:
1238 		mem = (struct crat_subtype_memory *)sub_type_hdr;
1239 		ret = kfd_parse_subtype_mem(mem, device_list);
1240 		break;
1241 	case CRAT_SUBTYPE_CACHE_AFFINITY:
1242 		cache = (struct crat_subtype_cache *)sub_type_hdr;
1243 		ret = kfd_parse_subtype_cache(cache, device_list);
1244 		break;
1245 	case CRAT_SUBTYPE_TLB_AFFINITY:
1246 		/*
1247 		 * For now, nothing to do here
1248 		 */
1249 		pr_debug("Found TLB entry in CRAT table (not processing)\n");
1250 		break;
1251 	case CRAT_SUBTYPE_CCOMPUTE_AFFINITY:
1252 		/*
1253 		 * For now, nothing to do here
1254 		 */
1255 		pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n");
1256 		break;
1257 	case CRAT_SUBTYPE_IOLINK_AFFINITY:
1258 		iolink = (struct crat_subtype_iolink *)sub_type_hdr;
1259 		ret = kfd_parse_subtype_iolink(iolink, device_list);
1260 		break;
1261 	default:
1262 		pr_warn("Unknown subtype %d in CRAT\n",
1263 				sub_type_hdr->type);
1264 	}
1265 
1266 	return ret;
1267 }
1268 
1269 /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT
1270  * create a kfd_topology_device and add in to device_list. Also parse
1271  * CRAT subtypes and attach it to appropriate kfd_topology_device
1272  *	@crat_image - input image containing CRAT
1273  *	@device_list - [OUT] list of kfd_topology_device generated after
1274  *		       parsing crat_image
1275  *	@proximity_domain - Proximity domain of the first device in the table
1276  *
1277  *	Return - 0 if successful else -ve value
1278  */
1279 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
1280 			 uint32_t proximity_domain)
1281 {
1282 	struct kfd_topology_device *top_dev = NULL;
1283 	struct crat_subtype_generic *sub_type_hdr;
1284 	uint16_t node_id;
1285 	int ret = 0;
1286 	struct crat_header *crat_table = (struct crat_header *)crat_image;
1287 	uint16_t num_nodes;
1288 	uint32_t image_len;
1289 
1290 	if (!crat_image)
1291 		return -EINVAL;
1292 
1293 	if (!list_empty(device_list)) {
1294 		pr_warn("Error device list should be empty\n");
1295 		return -EINVAL;
1296 	}
1297 
1298 	num_nodes = crat_table->num_domains;
1299 	image_len = crat_table->length;
1300 
1301 	pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
1302 
1303 	for (node_id = 0; node_id < num_nodes; node_id++) {
1304 		top_dev = kfd_create_topology_device(device_list);
1305 		if (!top_dev)
1306 			break;
1307 		top_dev->proximity_domain = proximity_domain++;
1308 	}
1309 
1310 	if (!top_dev) {
1311 		ret = -ENOMEM;
1312 		goto err;
1313 	}
1314 
1315 	memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH);
1316 	memcpy(top_dev->oem_table_id, crat_table->oem_table_id,
1317 			CRAT_OEMTABLEID_LENGTH);
1318 	top_dev->oem_revision = crat_table->oem_revision;
1319 
1320 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1321 	while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) <
1322 			((char *)crat_image) + image_len) {
1323 		if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) {
1324 			ret = kfd_parse_subtype(sub_type_hdr, device_list);
1325 			if (ret)
1326 				break;
1327 		}
1328 
1329 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1330 				sub_type_hdr->length);
1331 	}
1332 
1333 err:
1334 	if (ret)
1335 		kfd_release_topology_device_list(device_list);
1336 
1337 	return ret;
1338 }
1339 
1340 
1341 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
1342 						   struct kfd_gpu_cache_info *pcache_info)
1343 {
1344 	struct amdgpu_device *adev = kdev->adev;
1345 	int i = 0;
1346 
1347 	/* TCP L1 Cache per CU */
1348 	if (adev->gfx.config.gc_tcp_l1_size) {
1349 		pcache_info[i].cache_size = adev->gfx.config.gc_tcp_l1_size;
1350 		pcache_info[i].cache_level = 1;
1351 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1352 					CRAT_CACHE_FLAGS_DATA_CACHE |
1353 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1354 		pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
1355 		i++;
1356 	}
1357 	/* Scalar L1 Instruction Cache per SQC */
1358 	if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
1359 		pcache_info[i].cache_size =
1360 			adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
1361 		pcache_info[i].cache_level = 1;
1362 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1363 					CRAT_CACHE_FLAGS_INST_CACHE |
1364 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1365 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1366 		i++;
1367 	}
1368 	/* Scalar L1 Data Cache per SQC */
1369 	if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
1370 		pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
1371 		pcache_info[i].cache_level = 1;
1372 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1373 					CRAT_CACHE_FLAGS_DATA_CACHE |
1374 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1375 		pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
1376 		i++;
1377 	}
1378 	/* GL1 Data Cache per SA */
1379 	if (adev->gfx.config.gc_gl1c_per_sa &&
1380 	    adev->gfx.config.gc_gl1c_size_per_instance) {
1381 		pcache_info[i].cache_size = adev->gfx.config.gc_gl1c_per_sa *
1382 			adev->gfx.config.gc_gl1c_size_per_instance;
1383 		pcache_info[i].cache_level = 1;
1384 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1385 					CRAT_CACHE_FLAGS_DATA_CACHE |
1386 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1387 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1388 		i++;
1389 	}
1390 	/* L2 Data Cache per GPU (Total Tex Cache) */
1391 	if (adev->gfx.config.gc_gl2c_per_gpu) {
1392 		pcache_info[i].cache_size = adev->gfx.config.gc_gl2c_per_gpu;
1393 		pcache_info[i].cache_level = 2;
1394 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1395 					CRAT_CACHE_FLAGS_DATA_CACHE |
1396 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1397 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1398 		i++;
1399 	}
1400 	/* L3 Data Cache per GPU */
1401 	if (adev->gmc.mall_size) {
1402 		pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
1403 		pcache_info[i].cache_level = 3;
1404 		pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
1405 					CRAT_CACHE_FLAGS_DATA_CACHE |
1406 					CRAT_CACHE_FLAGS_SIMD_CACHE);
1407 		pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
1408 		i++;
1409 	}
1410 	return i;
1411 }
1412 
1413 int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
1414 {
1415 	int num_of_cache_types = 0;
1416 
1417 	switch (kdev->adev->asic_type) {
1418 	case CHIP_KAVERI:
1419 		*pcache_info = kaveri_cache_info;
1420 		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
1421 		break;
1422 	case CHIP_HAWAII:
1423 		*pcache_info = hawaii_cache_info;
1424 		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
1425 		break;
1426 	case CHIP_CARRIZO:
1427 		*pcache_info = carrizo_cache_info;
1428 		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
1429 		break;
1430 	case CHIP_TONGA:
1431 		*pcache_info = tonga_cache_info;
1432 		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
1433 		break;
1434 	case CHIP_FIJI:
1435 		*pcache_info = fiji_cache_info;
1436 		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
1437 		break;
1438 	case CHIP_POLARIS10:
1439 		*pcache_info = polaris10_cache_info;
1440 		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
1441 		break;
1442 	case CHIP_POLARIS11:
1443 		*pcache_info = polaris11_cache_info;
1444 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
1445 		break;
1446 	case CHIP_POLARIS12:
1447 		*pcache_info = polaris12_cache_info;
1448 		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
1449 		break;
1450 	case CHIP_VEGAM:
1451 		*pcache_info = vegam_cache_info;
1452 		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
1453 		break;
1454 	default:
1455 		switch (KFD_GC_VERSION(kdev)) {
1456 		case IP_VERSION(9, 0, 1):
1457 			*pcache_info = vega10_cache_info;
1458 			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
1459 			break;
1460 		case IP_VERSION(9, 2, 1):
1461 			*pcache_info = vega12_cache_info;
1462 			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
1463 			break;
1464 		case IP_VERSION(9, 4, 0):
1465 		case IP_VERSION(9, 4, 1):
1466 			*pcache_info = vega20_cache_info;
1467 			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
1468 			break;
1469 		case IP_VERSION(9, 4, 2):
1470 		case IP_VERSION(9, 4, 3):
1471 			*pcache_info = aldebaran_cache_info;
1472 			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
1473 			break;
1474 		case IP_VERSION(9, 1, 0):
1475 		case IP_VERSION(9, 2, 2):
1476 			*pcache_info = raven_cache_info;
1477 			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
1478 			break;
1479 		case IP_VERSION(9, 3, 0):
1480 			*pcache_info = renoir_cache_info;
1481 			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
1482 			break;
1483 		case IP_VERSION(10, 1, 10):
1484 		case IP_VERSION(10, 1, 2):
1485 		case IP_VERSION(10, 1, 3):
1486 		case IP_VERSION(10, 1, 4):
1487 			*pcache_info = navi10_cache_info;
1488 			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
1489 			break;
1490 		case IP_VERSION(10, 1, 1):
1491 			*pcache_info = navi14_cache_info;
1492 			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
1493 			break;
1494 		case IP_VERSION(10, 3, 0):
1495 			*pcache_info = sienna_cichlid_cache_info;
1496 			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
1497 			break;
1498 		case IP_VERSION(10, 3, 2):
1499 			*pcache_info = navy_flounder_cache_info;
1500 			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
1501 			break;
1502 		case IP_VERSION(10, 3, 4):
1503 			*pcache_info = dimgrey_cavefish_cache_info;
1504 			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
1505 			break;
1506 		case IP_VERSION(10, 3, 1):
1507 			*pcache_info = vangogh_cache_info;
1508 			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
1509 			break;
1510 		case IP_VERSION(10, 3, 5):
1511 			*pcache_info = beige_goby_cache_info;
1512 			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
1513 			break;
1514 		case IP_VERSION(10, 3, 3):
1515 			*pcache_info = yellow_carp_cache_info;
1516 			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
1517 			break;
1518 		case IP_VERSION(10, 3, 6):
1519 			*pcache_info = gc_10_3_6_cache_info;
1520 			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
1521 			break;
1522 		case IP_VERSION(10, 3, 7):
1523 			*pcache_info = gfx1037_cache_info;
1524 			num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
1525 			break;
1526 		case IP_VERSION(11, 0, 0):
1527 		case IP_VERSION(11, 0, 1):
1528 		case IP_VERSION(11, 0, 2):
1529 		case IP_VERSION(11, 0, 3):
1530 		case IP_VERSION(11, 0, 4):
1531 			num_of_cache_types =
1532 				kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
1533 			break;
1534 		default:
1535 			*pcache_info = dummy_cache_info;
1536 			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
1537 			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
1538 			break;
1539 		}
1540 	}
1541 	return num_of_cache_types;
1542 }
1543 
1544 static bool kfd_ignore_crat(void)
1545 {
1546 	bool ret;
1547 
1548 	if (ignore_crat)
1549 		return true;
1550 
1551 #ifndef KFD_SUPPORT_IOMMU_V2
1552 	ret = true;
1553 #else
1554 	ret = false;
1555 #endif
1556 
1557 	return ret;
1558 }
1559 
1560 /*
1561  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
1562  * copies CRAT from ACPI (if available).
1563  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
1564  *
1565  *	@crat_image: CRAT read from ACPI. If no CRAT in ACPI then
1566  *		     crat_image will be NULL
1567  *	@size: [OUT] size of crat_image
1568  *
1569  *	Return 0 if successful else return error code
1570  */
1571 int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
1572 {
1573 	struct acpi_table_header *crat_table;
1574 	acpi_status status;
1575 	void *pcrat_image;
1576 	int rc = 0;
1577 
1578 	if (!crat_image)
1579 		return -EINVAL;
1580 
1581 	*crat_image = NULL;
1582 
1583 	if (kfd_ignore_crat()) {
1584 		pr_info("CRAT table disabled by module option\n");
1585 		return -ENODATA;
1586 	}
1587 
1588 	/* Fetch the CRAT table from ACPI */
1589 	status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table);
1590 	if (status == AE_NOT_FOUND) {
1591 		pr_info("CRAT table not found\n");
1592 		return -ENODATA;
1593 	} else if (ACPI_FAILURE(status)) {
1594 		const char *err = acpi_format_exception(status);
1595 
1596 		pr_err("CRAT table error: %s\n", err);
1597 		return -EINVAL;
1598 	}
1599 
1600 	pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL);
1601 	if (!pcrat_image) {
1602 		rc = -ENOMEM;
1603 		goto out;
1604 	}
1605 
1606 	memcpy(pcrat_image, crat_table, crat_table->length);
1607 	*crat_image = pcrat_image;
1608 	*size = crat_table->length;
1609 out:
1610 	acpi_put_table(crat_table);
1611 	return rc;
1612 }
1613 
1614 /* Memory required to create Virtual CRAT.
1615  * Since there is no easy way to predict the amount of memory required, the
1616  * following amount is allocated for GPU Virtual CRAT. This is
1617  * expected to cover all known conditions. But to be safe additional check
1618  * is put in the code to ensure we don't overwrite.
1619  */
1620 #define VCRAT_SIZE_FOR_GPU	(4 * PAGE_SIZE)
1621 
1622 /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node
1623  *
1624  *	@numa_node_id: CPU NUMA node id
1625  *	@avail_size: Available size in the memory
1626  *	@sub_type_hdr: Memory into which compute info will be filled in
1627  *
1628  *	Return 0 if successful else return -ve value
1629  */
1630 static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size,
1631 				int proximity_domain,
1632 				struct crat_subtype_computeunit *sub_type_hdr)
1633 {
1634 	const struct cpumask *cpumask;
1635 
1636 	*avail_size -= sizeof(struct crat_subtype_computeunit);
1637 	if (*avail_size < 0)
1638 		return -ENOMEM;
1639 
1640 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
1641 
1642 	/* Fill in subtype header data */
1643 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
1644 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
1645 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1646 
1647 	cpumask = cpumask_of_node(numa_node_id);
1648 
1649 	/* Fill in CU data */
1650 	sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT;
1651 	sub_type_hdr->proximity_domain = proximity_domain;
1652 	sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id);
1653 	if (sub_type_hdr->processor_id_low == -1)
1654 		return -EINVAL;
1655 
1656 	sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask);
1657 
1658 	return 0;
1659 }
1660 
1661 /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node
1662  *
1663  *	@numa_node_id: CPU NUMA node id
1664  *	@avail_size: Available size in the memory
1665  *	@sub_type_hdr: Memory into which compute info will be filled in
1666  *
1667  *	Return 0 if successful else return -ve value
1668  */
1669 static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
1670 			int proximity_domain,
1671 			struct crat_subtype_memory *sub_type_hdr)
1672 {
1673 	uint64_t mem_in_bytes = 0;
1674 	pg_data_t *pgdat;
1675 	int zone_type;
1676 
1677 	*avail_size -= sizeof(struct crat_subtype_memory);
1678 	if (*avail_size < 0)
1679 		return -ENOMEM;
1680 
1681 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1682 
1683 	/* Fill in subtype header data */
1684 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1685 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1686 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1687 
1688 	/* Fill in Memory Subunit data */
1689 
1690 	/* Unlike si_meminfo, si_meminfo_node is not exported. So
1691 	 * the following lines are duplicated from si_meminfo_node
1692 	 * function
1693 	 */
1694 	pgdat = NODE_DATA(numa_node_id);
1695 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
1696 		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
1697 	mem_in_bytes <<= PAGE_SHIFT;
1698 
1699 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
1700 	sub_type_hdr->length_high = upper_32_bits(mem_in_bytes);
1701 	sub_type_hdr->proximity_domain = proximity_domain;
1702 
1703 	return 0;
1704 }
1705 
1706 #ifdef CONFIG_X86_64
1707 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
1708 				uint32_t *num_entries,
1709 				struct crat_subtype_iolink *sub_type_hdr)
1710 {
1711 	int nid;
1712 	struct cpuinfo_x86 *c = &cpu_data(0);
1713 	uint8_t link_type;
1714 
1715 	if (c->x86_vendor == X86_VENDOR_AMD)
1716 		link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT;
1717 	else
1718 		link_type = CRAT_IOLINK_TYPE_QPI_1_1;
1719 
1720 	*num_entries = 0;
1721 
1722 	/* Create IO links from this node to other CPU nodes */
1723 	for_each_online_node(nid) {
1724 		if (nid == numa_node_id) /* node itself */
1725 			continue;
1726 
1727 		*avail_size -= sizeof(struct crat_subtype_iolink);
1728 		if (*avail_size < 0)
1729 			return -ENOMEM;
1730 
1731 		memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
1732 
1733 		/* Fill in subtype header data */
1734 		sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
1735 		sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
1736 		sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
1737 
1738 		/* Fill in IO link data */
1739 		sub_type_hdr->proximity_domain_from = numa_node_id;
1740 		sub_type_hdr->proximity_domain_to = nid;
1741 		sub_type_hdr->io_interface_type = link_type;
1742 
1743 		(*num_entries)++;
1744 		sub_type_hdr++;
1745 	}
1746 
1747 	return 0;
1748 }
1749 #endif
1750 
1751 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
1752  *
1753  *	@pcrat_image: Fill in VCRAT for CPU
1754  *	@size:	[IN] allocated size of crat_image.
1755  *		[OUT] actual size of data filled in crat_image
1756  */
1757 static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
1758 {
1759 	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
1760 	struct acpi_table_header *acpi_table;
1761 	acpi_status status;
1762 	struct crat_subtype_generic *sub_type_hdr;
1763 	int avail_size = *size;
1764 	int numa_node_id;
1765 #ifdef CONFIG_X86_64
1766 	uint32_t entries = 0;
1767 #endif
1768 	int ret = 0;
1769 
1770 	if (!pcrat_image)
1771 		return -EINVAL;
1772 
1773 	/* Fill in CRAT Header.
1774 	 * Modify length and total_entries as subunits are added.
1775 	 */
1776 	avail_size -= sizeof(struct crat_header);
1777 	if (avail_size < 0)
1778 		return -ENOMEM;
1779 
1780 	memset(crat_table, 0, sizeof(struct crat_header));
1781 	memcpy(&crat_table->signature, CRAT_SIGNATURE,
1782 			sizeof(crat_table->signature));
1783 	crat_table->length = sizeof(struct crat_header);
1784 
1785 	status = acpi_get_table("DSDT", 0, &acpi_table);
1786 	if (status != AE_OK)
1787 		pr_warn("DSDT table not found for OEM information\n");
1788 	else {
1789 		crat_table->oem_revision = acpi_table->revision;
1790 		memcpy(crat_table->oem_id, acpi_table->oem_id,
1791 				CRAT_OEMID_LENGTH);
1792 		memcpy(crat_table->oem_table_id, acpi_table->oem_table_id,
1793 				CRAT_OEMTABLEID_LENGTH);
1794 		acpi_put_table(acpi_table);
1795 	}
1796 	crat_table->total_entries = 0;
1797 	crat_table->num_domains = 0;
1798 
1799 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
1800 
1801 	for_each_online_node(numa_node_id) {
1802 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
1803 			continue;
1804 
1805 		/* Fill in Subtype: Compute Unit */
1806 		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
1807 			crat_table->num_domains,
1808 			(struct crat_subtype_computeunit *)sub_type_hdr);
1809 		if (ret < 0)
1810 			return ret;
1811 		crat_table->length += sub_type_hdr->length;
1812 		crat_table->total_entries++;
1813 
1814 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1815 			sub_type_hdr->length);
1816 
1817 		/* Fill in Subtype: Memory */
1818 		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
1819 			crat_table->num_domains,
1820 			(struct crat_subtype_memory *)sub_type_hdr);
1821 		if (ret < 0)
1822 			return ret;
1823 		crat_table->length += sub_type_hdr->length;
1824 		crat_table->total_entries++;
1825 
1826 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1827 			sub_type_hdr->length);
1828 
1829 		/* Fill in Subtype: IO Link */
1830 #ifdef CONFIG_X86_64
1831 		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
1832 				&entries,
1833 				(struct crat_subtype_iolink *)sub_type_hdr);
1834 		if (ret < 0)
1835 			return ret;
1836 
1837 		if (entries) {
1838 			crat_table->length += (sub_type_hdr->length * entries);
1839 			crat_table->total_entries += entries;
1840 
1841 			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
1842 					sub_type_hdr->length * entries);
1843 		}
1844 #else
1845 		pr_info("IO link not available for non x86 platforms\n");
1846 #endif
1847 
1848 		crat_table->num_domains++;
1849 	}
1850 
1851 	/* TODO: Add cache Subtype for CPU.
1852 	 * Currently, CPU cache information is available in function
1853 	 * detect_cache_attributes(cpu) defined in the file
1854 	 * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not
1855 	 * exported and to get the same information the code needs to be
1856 	 * duplicated.
1857 	 */
1858 
1859 	*size = crat_table->length;
1860 	pr_info("Virtual CRAT table created for CPU\n");
1861 
1862 	return 0;
1863 }
1864 
1865 static int kfd_fill_gpu_memory_affinity(int *avail_size,
1866 		struct kfd_node *kdev, uint8_t type, uint64_t size,
1867 		struct crat_subtype_memory *sub_type_hdr,
1868 		uint32_t proximity_domain,
1869 		const struct kfd_local_mem_info *local_mem_info)
1870 {
1871 	*avail_size -= sizeof(struct crat_subtype_memory);
1872 	if (*avail_size < 0)
1873 		return -ENOMEM;
1874 
1875 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory));
1876 	sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY;
1877 	sub_type_hdr->length = sizeof(struct crat_subtype_memory);
1878 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
1879 
1880 	sub_type_hdr->proximity_domain = proximity_domain;
1881 
1882 	pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n",
1883 			type, size);
1884 
1885 	sub_type_hdr->length_low = lower_32_bits(size);
1886 	sub_type_hdr->length_high = upper_32_bits(size);
1887 
1888 	sub_type_hdr->width = local_mem_info->vram_width;
1889 	sub_type_hdr->visibility_type = type;
1890 
1891 	return 0;
1892 }
1893 
1894 #ifdef CONFIG_ACPI_NUMA
1895 static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
1896 {
1897 	struct acpi_table_header *table_header = NULL;
1898 	struct acpi_subtable_header *sub_header = NULL;
1899 	unsigned long table_end, subtable_len;
1900 	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
1901 			pci_dev_id(kdev->adev->pdev);
1902 	u32 bdf;
1903 	acpi_status status;
1904 	struct acpi_srat_cpu_affinity *cpu;
1905 	struct acpi_srat_generic_affinity *gpu;
1906 	int pxm = 0, max_pxm = 0;
1907 	int numa_node = NUMA_NO_NODE;
1908 	bool found = false;
1909 
1910 	/* Fetch the SRAT table from ACPI */
1911 	status = acpi_get_table(ACPI_SIG_SRAT, 0, &table_header);
1912 	if (status == AE_NOT_FOUND) {
1913 		pr_warn("SRAT table not found\n");
1914 		return;
1915 	} else if (ACPI_FAILURE(status)) {
1916 		const char *err = acpi_format_exception(status);
1917 		pr_err("SRAT table error: %s\n", err);
1918 		return;
1919 	}
1920 
1921 	table_end = (unsigned long)table_header + table_header->length;
1922 
1923 	/* Parse all entries looking for a match. */
1924 	sub_header = (struct acpi_subtable_header *)
1925 			((unsigned long)table_header +
1926 			sizeof(struct acpi_table_srat));
1927 	subtable_len = sub_header->length;
1928 
1929 	while (((unsigned long)sub_header) + subtable_len  < table_end) {
1930 		/*
1931 		 * If length is 0, break from this loop to avoid
1932 		 * infinite loop.
1933 		 */
1934 		if (subtable_len == 0) {
1935 			pr_err("SRAT invalid zero length\n");
1936 			break;
1937 		}
1938 
1939 		switch (sub_header->type) {
1940 		case ACPI_SRAT_TYPE_CPU_AFFINITY:
1941 			cpu = (struct acpi_srat_cpu_affinity *)sub_header;
1942 			pxm = *((u32 *)cpu->proximity_domain_hi) << 8 |
1943 					cpu->proximity_domain_lo;
1944 			if (pxm > max_pxm)
1945 				max_pxm = pxm;
1946 			break;
1947 		case ACPI_SRAT_TYPE_GENERIC_AFFINITY:
1948 			gpu = (struct acpi_srat_generic_affinity *)sub_header;
1949 			bdf = *((u16 *)(&gpu->device_handle[0])) << 16 |
1950 					*((u16 *)(&gpu->device_handle[2]));
1951 			if (bdf == pci_id) {
1952 				found = true;
1953 				numa_node = pxm_to_node(gpu->proximity_domain);
1954 			}
1955 			break;
1956 		default:
1957 			break;
1958 		}
1959 
1960 		if (found)
1961 			break;
1962 
1963 		sub_header = (struct acpi_subtable_header *)
1964 				((unsigned long)sub_header + subtable_len);
1965 		subtable_len = sub_header->length;
1966 	}
1967 
1968 	acpi_put_table(table_header);
1969 
1970 	/* Workaround bad cpu-gpu binding case */
1971 	if (found && (numa_node < 0 ||
1972 			numa_node > pxm_to_node(max_pxm)))
1973 		numa_node = 0;
1974 
1975 	if (numa_node != NUMA_NO_NODE)
1976 		set_dev_node(&kdev->adev->pdev->dev, numa_node);
1977 }
1978 #endif
1979 
1980 #define KFD_CRAT_INTRA_SOCKET_WEIGHT	13
1981 #define KFD_CRAT_XGMI_WEIGHT		15
1982 
1983 /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU
1984  * to its NUMA node
1985  *	@avail_size: Available size in the memory
1986  *	@kdev - [IN] GPU device
1987  *	@sub_type_hdr: Memory into which io link info will be filled in
1988  *	@proximity_domain - proximity domain of the GPU node
1989  *
1990  *	Return 0 if successful else return -ve value
1991  */
1992 static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
1993 			struct kfd_node *kdev,
1994 			struct crat_subtype_iolink *sub_type_hdr,
1995 			uint32_t proximity_domain)
1996 {
1997 	*avail_size -= sizeof(struct crat_subtype_iolink);
1998 	if (*avail_size < 0)
1999 		return -ENOMEM;
2000 
2001 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2002 
2003 	/* Fill in subtype header data */
2004 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2005 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2006 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED;
2007 	if (kfd_dev_is_large_bar(kdev))
2008 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2009 
2010 	/* Fill in IOLINK subtype.
2011 	 * TODO: Fill-in other fields of iolink subtype
2012 	 */
2013 	if (kdev->adev->gmc.xgmi.connected_to_cpu ||
2014 	    (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 3) &&
2015 	     kdev->adev->smuio.funcs->get_pkg_type(kdev->adev) ==
2016 	     AMDGPU_PKG_TYPE_APU)) {
2017 		bool ext_cpu = KFD_GC_VERSION(kdev) != IP_VERSION(9, 4, 3);
2018 		int mem_bw = 819200, weight = ext_cpu ? KFD_CRAT_XGMI_WEIGHT :
2019 							KFD_CRAT_INTRA_SOCKET_WEIGHT;
2020 		uint32_t bandwidth = ext_cpu ? amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(
2021 							kdev->adev, NULL, true) : mem_bw;
2022 
2023 		/*
2024 		 * with host gpu xgmi link, host can access gpu memory whether
2025 		 * or not pcie bar type is large, so always create bidirectional
2026 		 * io link.
2027 		 */
2028 		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2029 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2030 		sub_type_hdr->weight_xgmi = weight;
2031 		sub_type_hdr->minimum_bandwidth_mbs = bandwidth;
2032 		sub_type_hdr->maximum_bandwidth_mbs = bandwidth;
2033 	} else {
2034 		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;
2035 		sub_type_hdr->minimum_bandwidth_mbs =
2036 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);
2037 		sub_type_hdr->maximum_bandwidth_mbs =
2038 				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);
2039 	}
2040 
2041 	sub_type_hdr->proximity_domain_from = proximity_domain;
2042 
2043 #ifdef CONFIG_ACPI_NUMA
2044 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2045 		kfd_find_numa_node_in_srat(kdev);
2046 #endif
2047 #ifdef CONFIG_NUMA
2048 	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
2049 		sub_type_hdr->proximity_domain_to = 0;
2050 	else
2051 		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
2052 #else
2053 	sub_type_hdr->proximity_domain_to = 0;
2054 #endif
2055 	return 0;
2056 }
2057 
2058 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
2059 			struct kfd_node *kdev,
2060 			struct kfd_node *peer_kdev,
2061 			struct crat_subtype_iolink *sub_type_hdr,
2062 			uint32_t proximity_domain_from,
2063 			uint32_t proximity_domain_to)
2064 {
2065 	bool use_ta_info = kdev->kfd->num_nodes == 1;
2066 
2067 	*avail_size -= sizeof(struct crat_subtype_iolink);
2068 	if (*avail_size < 0)
2069 		return -ENOMEM;
2070 
2071 	memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink));
2072 
2073 	sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY;
2074 	sub_type_hdr->length = sizeof(struct crat_subtype_iolink);
2075 	sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED |
2076 			       CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;
2077 
2078 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
2079 	sub_type_hdr->proximity_domain_from = proximity_domain_from;
2080 	sub_type_hdr->proximity_domain_to = proximity_domain_to;
2081 
2082 	if (use_ta_info) {
2083 		sub_type_hdr->weight_xgmi = KFD_CRAT_XGMI_WEIGHT *
2084 			amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);
2085 		sub_type_hdr->maximum_bandwidth_mbs =
2086 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev,
2087 							peer_kdev->adev, false);
2088 		sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ?
2089 			amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;
2090 	} else {
2091 		bool is_single_hop = kdev->kfd == peer_kdev->kfd;
2092 		int weight = is_single_hop ? KFD_CRAT_INTRA_SOCKET_WEIGHT :
2093 			(2 * KFD_CRAT_INTRA_SOCKET_WEIGHT) + KFD_CRAT_XGMI_WEIGHT;
2094 		int mem_bw = 819200;
2095 
2096 		sub_type_hdr->weight_xgmi = weight;
2097 		sub_type_hdr->maximum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2098 		sub_type_hdr->minimum_bandwidth_mbs = is_single_hop ? mem_bw : 0;
2099 	}
2100 
2101 	return 0;
2102 }
2103 
2104 /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU
2105  *
2106  *	@pcrat_image: Fill in VCRAT for GPU
2107  *	@size:	[IN] allocated size of crat_image.
2108  *		[OUT] actual size of data filled in crat_image
2109  */
2110 static int kfd_create_vcrat_image_gpu(void *pcrat_image,
2111 				      size_t *size, struct kfd_node *kdev,
2112 				      uint32_t proximity_domain)
2113 {
2114 	struct crat_header *crat_table = (struct crat_header *)pcrat_image;
2115 	struct crat_subtype_generic *sub_type_hdr;
2116 	struct kfd_local_mem_info local_mem_info;
2117 	struct kfd_topology_device *peer_dev;
2118 	struct crat_subtype_computeunit *cu;
2119 	struct kfd_cu_info cu_info;
2120 	int avail_size = *size;
2121 	uint32_t total_num_of_cu;
2122 	uint32_t nid = 0;
2123 	int ret = 0;
2124 
2125 	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU)
2126 		return -EINVAL;
2127 
2128 	/* Fill the CRAT Header.
2129 	 * Modify length and total_entries as subunits are added.
2130 	 */
2131 	avail_size -= sizeof(struct crat_header);
2132 	if (avail_size < 0)
2133 		return -ENOMEM;
2134 
2135 	memset(crat_table, 0, sizeof(struct crat_header));
2136 
2137 	memcpy(&crat_table->signature, CRAT_SIGNATURE,
2138 			sizeof(crat_table->signature));
2139 	/* Change length as we add more subtypes*/
2140 	crat_table->length = sizeof(struct crat_header);
2141 	crat_table->num_domains = 1;
2142 	crat_table->total_entries = 0;
2143 
2144 	/* Fill in Subtype: Compute Unit
2145 	 * First fill in the sub type header and then sub type data
2146 	 */
2147 	avail_size -= sizeof(struct crat_subtype_computeunit);
2148 	if (avail_size < 0)
2149 		return -ENOMEM;
2150 
2151 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1);
2152 	memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit));
2153 
2154 	sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY;
2155 	sub_type_hdr->length = sizeof(struct crat_subtype_computeunit);
2156 	sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED;
2157 
2158 	/* Fill CU subtype data */
2159 	cu = (struct crat_subtype_computeunit *)sub_type_hdr;
2160 	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
2161 	cu->proximity_domain = proximity_domain;
2162 
2163 	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
2164 	cu->num_simd_per_cu = cu_info.simd_per_cu;
2165 	cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;
2166 	cu->max_waves_simd = cu_info.max_waves_per_simd;
2167 
2168 	cu->wave_front_size = cu_info.wave_front_size;
2169 	cu->array_count = cu_info.num_shader_arrays_per_engine *
2170 		cu_info.num_shader_engines;
2171 	total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
2172 	cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
2173 	cu->num_cu_per_array = cu_info.num_cu_per_sh;
2174 	cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
2175 	cu->num_banks = cu_info.num_shader_engines;
2176 	cu->lds_size_in_kb = cu_info.lds_size;
2177 
2178 	cu->hsa_capability = 0;
2179 
2180 	/* Check if this node supports IOMMU. During parsing this flag will
2181 	 * translate to HSA_CAP_ATS_PRESENT
2182 	 */
2183 	if (!kfd_iommu_check_device(kdev->kfd))
2184 		cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT;
2185 
2186 	crat_table->length += sub_type_hdr->length;
2187 	crat_table->total_entries++;
2188 
2189 	/* Fill in Subtype: Memory. Only on systems with large BAR (no
2190 	 * private FB), report memory as public. On other systems
2191 	 * report the total FB size (public+private) as a single
2192 	 * private heap.
2193 	 */
2194 	local_mem_info = kdev->local_mem_info;
2195 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2196 			sub_type_hdr->length);
2197 
2198 	if (debug_largebar)
2199 		local_mem_info.local_mem_size_private = 0;
2200 
2201 	if (local_mem_info.local_mem_size_private == 0)
2202 		ret = kfd_fill_gpu_memory_affinity(&avail_size,
2203 				kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC,
2204 				local_mem_info.local_mem_size_public,
2205 				(struct crat_subtype_memory *)sub_type_hdr,
2206 				proximity_domain,
2207 				&local_mem_info);
2208 	else
2209 		ret = kfd_fill_gpu_memory_affinity(&avail_size,
2210 				kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE,
2211 				local_mem_info.local_mem_size_public +
2212 				local_mem_info.local_mem_size_private,
2213 				(struct crat_subtype_memory *)sub_type_hdr,
2214 				proximity_domain,
2215 				&local_mem_info);
2216 	if (ret < 0)
2217 		return ret;
2218 
2219 	crat_table->length += sizeof(struct crat_subtype_memory);
2220 	crat_table->total_entries++;
2221 
2222 	/* Fill in Subtype: IO_LINKS
2223 	 *  Only direct links are added here which is Link from GPU to
2224 	 *  its NUMA node. Indirect links are added by userspace.
2225 	 */
2226 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
2227 		sub_type_hdr->length);
2228 	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
2229 		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
2230 
2231 	if (ret < 0)
2232 		return ret;
2233 
2234 	crat_table->length += sub_type_hdr->length;
2235 	crat_table->total_entries++;
2236 
2237 
2238 	/* Fill in Subtype: IO_LINKS
2239 	 * Direct links from GPU to other GPUs through xGMI.
2240 	 * We will loop GPUs that already be processed (with lower value
2241 	 * of proximity_domain), add the link for the GPUs with same
2242 	 * hive id (from this GPU to other GPU) . The reversed iolink
2243 	 * (from other GPU to this GPU) will be added
2244 	 * in kfd_parse_subtype_iolink.
2245 	 */
2246 	if (kdev->kfd->hive_id) {
2247 		for (nid = 0; nid < proximity_domain; ++nid) {
2248 			peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
2249 			if (!peer_dev->gpu)
2250 				continue;
2251 			if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
2252 				continue;
2253 			sub_type_hdr = (typeof(sub_type_hdr))(
2254 				(char *)sub_type_hdr +
2255 				sizeof(struct crat_subtype_iolink));
2256 			ret = kfd_fill_gpu_xgmi_link_to_gpu(
2257 				&avail_size, kdev, peer_dev->gpu,
2258 				(struct crat_subtype_iolink *)sub_type_hdr,
2259 				proximity_domain, nid);
2260 			if (ret < 0)
2261 				return ret;
2262 			crat_table->length += sub_type_hdr->length;
2263 			crat_table->total_entries++;
2264 		}
2265 	}
2266 	*size = crat_table->length;
2267 	pr_info("Virtual CRAT table created for GPU\n");
2268 
2269 	return ret;
2270 }
2271 
2272 /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and
2273  *		creates a Virtual CRAT (VCRAT) image
2274  *
2275  * NOTE: Call kfd_destroy_crat_image to free CRAT image memory
2276  *
2277  *	@crat_image: VCRAT image created because ACPI does not have a
2278  *		     CRAT for this device
2279  *	@size: [OUT] size of virtual crat_image
2280  *	@flags:	COMPUTE_UNIT_CPU - Create VCRAT for CPU device
2281  *		COMPUTE_UNIT_GPU - Create VCRAT for GPU
2282  *		(COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU
2283  *			-- this option is not currently implemented.
2284  *			The assumption is that all AMD APUs will have CRAT
2285  *	@kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
2286  *
2287  *	Return 0 if successful else return -ve value
2288  */
2289 int kfd_create_crat_image_virtual(void **crat_image, size_t *size,
2290 				  int flags, struct kfd_node *kdev,
2291 				  uint32_t proximity_domain)
2292 {
2293 	void *pcrat_image = NULL;
2294 	int ret = 0, num_nodes;
2295 	size_t dyn_size;
2296 
2297 	if (!crat_image)
2298 		return -EINVAL;
2299 
2300 	*crat_image = NULL;
2301 
2302 	/* Allocate the CPU Virtual CRAT size based on the number of online
2303 	 * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image.
2304 	 * This should cover all the current conditions. A check is put not
2305 	 * to overwrite beyond allocated size for GPUs
2306 	 */
2307 	switch (flags) {
2308 	case COMPUTE_UNIT_CPU:
2309 		num_nodes = num_online_nodes();
2310 		dyn_size = sizeof(struct crat_header) +
2311 			num_nodes * (sizeof(struct crat_subtype_computeunit) +
2312 			sizeof(struct crat_subtype_memory) +
2313 			(num_nodes - 1) * sizeof(struct crat_subtype_iolink));
2314 		pcrat_image = kvmalloc(dyn_size, GFP_KERNEL);
2315 		if (!pcrat_image)
2316 			return -ENOMEM;
2317 		*size = dyn_size;
2318 		pr_debug("CRAT size is %ld", dyn_size);
2319 		ret = kfd_create_vcrat_image_cpu(pcrat_image, size);
2320 		break;
2321 	case COMPUTE_UNIT_GPU:
2322 		if (!kdev)
2323 			return -EINVAL;
2324 		pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL);
2325 		if (!pcrat_image)
2326 			return -ENOMEM;
2327 		*size = VCRAT_SIZE_FOR_GPU;
2328 		ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev,
2329 						 proximity_domain);
2330 		break;
2331 	case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU):
2332 		/* TODO: */
2333 		ret = -EINVAL;
2334 		pr_err("VCRAT not implemented for APU\n");
2335 		break;
2336 	default:
2337 		ret = -EINVAL;
2338 	}
2339 
2340 	if (!ret)
2341 		*crat_image = pcrat_image;
2342 	else
2343 		kvfree(pcrat_image);
2344 
2345 	return ret;
2346 }
2347 
2348 
2349 /* kfd_destroy_crat_image
2350  *
2351  *	@crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..)
2352  *
2353  */
2354 void kfd_destroy_crat_image(void *crat_image)
2355 {
2356 	kvfree(crat_image);
2357 }
2358