1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/pci.h>
26 #include <linux/errno.h>
27 #include <linux/acpi.h>
28 #include <linux/hash.h>
29 #include <linux/cpufreq.h>
30 #include <linux/log2.h>
31 #include <linux/dmi.h>
32 #include <linux/atomic.h>
33 
34 #include "kfd_priv.h"
35 #include "kfd_crat.h"
36 #include "kfd_topology.h"
37 #include "kfd_device_queue_manager.h"
38 
39 /* topology_device_list - Master list of all topology devices */
40 static struct list_head topology_device_list;
41 static struct kfd_system_properties sys_props;
42 
43 static DECLARE_RWSEM(topology_lock);
44 static atomic_t topology_crat_proximity_domain;
45 
46 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
47 						uint32_t proximity_domain)
48 {
49 	struct kfd_topology_device *top_dev;
50 	struct kfd_topology_device *device = NULL;
51 
52 	down_read(&topology_lock);
53 
54 	list_for_each_entry(top_dev, &topology_device_list, list)
55 		if (top_dev->proximity_domain == proximity_domain) {
56 			device = top_dev;
57 			break;
58 		}
59 
60 	up_read(&topology_lock);
61 
62 	return device;
63 }
64 
65 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id)
66 {
67 	struct kfd_topology_device *top_dev;
68 	struct kfd_dev *device = NULL;
69 
70 	down_read(&topology_lock);
71 
72 	list_for_each_entry(top_dev, &topology_device_list, list)
73 		if (top_dev->gpu_id == gpu_id) {
74 			device = top_dev->gpu;
75 			break;
76 		}
77 
78 	up_read(&topology_lock);
79 
80 	return device;
81 }
82 
83 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
84 {
85 	struct kfd_topology_device *top_dev;
86 	struct kfd_dev *device = NULL;
87 
88 	down_read(&topology_lock);
89 
90 	list_for_each_entry(top_dev, &topology_device_list, list)
91 		if (top_dev->gpu->pdev == pdev) {
92 			device = top_dev->gpu;
93 			break;
94 		}
95 
96 	up_read(&topology_lock);
97 
98 	return device;
99 }
100 
101 /* Called with write topology_lock acquired */
102 static void kfd_release_topology_device(struct kfd_topology_device *dev)
103 {
104 	struct kfd_mem_properties *mem;
105 	struct kfd_cache_properties *cache;
106 	struct kfd_iolink_properties *iolink;
107 	struct kfd_perf_properties *perf;
108 
109 	list_del(&dev->list);
110 
111 	while (dev->mem_props.next != &dev->mem_props) {
112 		mem = container_of(dev->mem_props.next,
113 				struct kfd_mem_properties, list);
114 		list_del(&mem->list);
115 		kfree(mem);
116 	}
117 
118 	while (dev->cache_props.next != &dev->cache_props) {
119 		cache = container_of(dev->cache_props.next,
120 				struct kfd_cache_properties, list);
121 		list_del(&cache->list);
122 		kfree(cache);
123 	}
124 
125 	while (dev->io_link_props.next != &dev->io_link_props) {
126 		iolink = container_of(dev->io_link_props.next,
127 				struct kfd_iolink_properties, list);
128 		list_del(&iolink->list);
129 		kfree(iolink);
130 	}
131 
132 	while (dev->perf_props.next != &dev->perf_props) {
133 		perf = container_of(dev->perf_props.next,
134 				struct kfd_perf_properties, list);
135 		list_del(&perf->list);
136 		kfree(perf);
137 	}
138 
139 	kfree(dev);
140 }
141 
142 void kfd_release_topology_device_list(struct list_head *device_list)
143 {
144 	struct kfd_topology_device *dev;
145 
146 	while (!list_empty(device_list)) {
147 		dev = list_first_entry(device_list,
148 				       struct kfd_topology_device, list);
149 		kfd_release_topology_device(dev);
150 	}
151 }
152 
153 static void kfd_release_live_view(void)
154 {
155 	kfd_release_topology_device_list(&topology_device_list);
156 	memset(&sys_props, 0, sizeof(sys_props));
157 }
158 
159 struct kfd_topology_device *kfd_create_topology_device(
160 				struct list_head *device_list)
161 {
162 	struct kfd_topology_device *dev;
163 
164 	dev = kfd_alloc_struct(dev);
165 	if (!dev) {
166 		pr_err("No memory to allocate a topology device");
167 		return NULL;
168 	}
169 
170 	INIT_LIST_HEAD(&dev->mem_props);
171 	INIT_LIST_HEAD(&dev->cache_props);
172 	INIT_LIST_HEAD(&dev->io_link_props);
173 	INIT_LIST_HEAD(&dev->perf_props);
174 
175 	list_add_tail(&dev->list, device_list);
176 
177 	return dev;
178 }
179 
180 
181 #define sysfs_show_gen_prop(buffer, fmt, ...) \
182 		snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__)
183 #define sysfs_show_32bit_prop(buffer, name, value) \
184 		sysfs_show_gen_prop(buffer, "%s %u\n", name, value)
185 #define sysfs_show_64bit_prop(buffer, name, value) \
186 		sysfs_show_gen_prop(buffer, "%s %llu\n", name, value)
187 #define sysfs_show_32bit_val(buffer, value) \
188 		sysfs_show_gen_prop(buffer, "%u\n", value)
189 #define sysfs_show_str_val(buffer, value) \
190 		sysfs_show_gen_prop(buffer, "%s\n", value)
191 
192 static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr,
193 		char *buffer)
194 {
195 	ssize_t ret;
196 
197 	/* Making sure that the buffer is an empty string */
198 	buffer[0] = 0;
199 
200 	if (attr == &sys_props.attr_genid) {
201 		ret = sysfs_show_32bit_val(buffer, sys_props.generation_count);
202 	} else if (attr == &sys_props.attr_props) {
203 		sysfs_show_64bit_prop(buffer, "platform_oem",
204 				sys_props.platform_oem);
205 		sysfs_show_64bit_prop(buffer, "platform_id",
206 				sys_props.platform_id);
207 		ret = sysfs_show_64bit_prop(buffer, "platform_rev",
208 				sys_props.platform_rev);
209 	} else {
210 		ret = -EINVAL;
211 	}
212 
213 	return ret;
214 }
215 
216 static void kfd_topology_kobj_release(struct kobject *kobj)
217 {
218 	kfree(kobj);
219 }
220 
221 static const struct sysfs_ops sysprops_ops = {
222 	.show = sysprops_show,
223 };
224 
225 static struct kobj_type sysprops_type = {
226 	.release = kfd_topology_kobj_release,
227 	.sysfs_ops = &sysprops_ops,
228 };
229 
230 static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr,
231 		char *buffer)
232 {
233 	ssize_t ret;
234 	struct kfd_iolink_properties *iolink;
235 
236 	/* Making sure that the buffer is an empty string */
237 	buffer[0] = 0;
238 
239 	iolink = container_of(attr, struct kfd_iolink_properties, attr);
240 	sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type);
241 	sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj);
242 	sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min);
243 	sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from);
244 	sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to);
245 	sysfs_show_32bit_prop(buffer, "weight", iolink->weight);
246 	sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency);
247 	sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency);
248 	sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth);
249 	sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth);
250 	sysfs_show_32bit_prop(buffer, "recommended_transfer_size",
251 			iolink->rec_transfer_size);
252 	ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags);
253 
254 	return ret;
255 }
256 
257 static const struct sysfs_ops iolink_ops = {
258 	.show = iolink_show,
259 };
260 
261 static struct kobj_type iolink_type = {
262 	.release = kfd_topology_kobj_release,
263 	.sysfs_ops = &iolink_ops,
264 };
265 
266 static ssize_t mem_show(struct kobject *kobj, struct attribute *attr,
267 		char *buffer)
268 {
269 	ssize_t ret;
270 	struct kfd_mem_properties *mem;
271 
272 	/* Making sure that the buffer is an empty string */
273 	buffer[0] = 0;
274 
275 	mem = container_of(attr, struct kfd_mem_properties, attr);
276 	sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type);
277 	sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes);
278 	sysfs_show_32bit_prop(buffer, "flags", mem->flags);
279 	sysfs_show_32bit_prop(buffer, "width", mem->width);
280 	ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max);
281 
282 	return ret;
283 }
284 
285 static const struct sysfs_ops mem_ops = {
286 	.show = mem_show,
287 };
288 
289 static struct kobj_type mem_type = {
290 	.release = kfd_topology_kobj_release,
291 	.sysfs_ops = &mem_ops,
292 };
293 
294 static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
295 		char *buffer)
296 {
297 	ssize_t ret;
298 	uint32_t i, j;
299 	struct kfd_cache_properties *cache;
300 
301 	/* Making sure that the buffer is an empty string */
302 	buffer[0] = 0;
303 
304 	cache = container_of(attr, struct kfd_cache_properties, attr);
305 	sysfs_show_32bit_prop(buffer, "processor_id_low",
306 			cache->processor_id_low);
307 	sysfs_show_32bit_prop(buffer, "level", cache->cache_level);
308 	sysfs_show_32bit_prop(buffer, "size", cache->cache_size);
309 	sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size);
310 	sysfs_show_32bit_prop(buffer, "cache_lines_per_tag",
311 			cache->cachelines_per_tag);
312 	sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc);
313 	sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency);
314 	sysfs_show_32bit_prop(buffer, "type", cache->cache_type);
315 	snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer);
316 	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
317 		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++) {
318 			/* Check each bit */
319 			if (cache->sibling_map[i] & (1 << j))
320 				ret = snprintf(buffer, PAGE_SIZE,
321 					 "%s%d%s", buffer, 1, ",");
322 			else
323 				ret = snprintf(buffer, PAGE_SIZE,
324 					 "%s%d%s", buffer, 0, ",");
325 		}
326 	/* Replace the last "," with end of line */
327 	*(buffer + strlen(buffer) - 1) = 0xA;
328 	return ret;
329 }
330 
331 static const struct sysfs_ops cache_ops = {
332 	.show = kfd_cache_show,
333 };
334 
335 static struct kobj_type cache_type = {
336 	.release = kfd_topology_kobj_release,
337 	.sysfs_ops = &cache_ops,
338 };
339 
340 /****** Sysfs of Performance Counters ******/
341 
342 struct kfd_perf_attr {
343 	struct kobj_attribute attr;
344 	uint32_t data;
345 };
346 
347 static ssize_t perf_show(struct kobject *kobj, struct kobj_attribute *attrs,
348 			char *buf)
349 {
350 	struct kfd_perf_attr *attr;
351 
352 	buf[0] = 0;
353 	attr = container_of(attrs, struct kfd_perf_attr, attr);
354 	if (!attr->data) /* invalid data for PMC */
355 		return 0;
356 	else
357 		return sysfs_show_32bit_val(buf, attr->data);
358 }
359 
360 #define KFD_PERF_DESC(_name, _data)			\
361 {							\
362 	.attr  = __ATTR(_name, 0444, perf_show, NULL),	\
363 	.data = _data,					\
364 }
365 
366 static struct kfd_perf_attr perf_attr_iommu[] = {
367 	KFD_PERF_DESC(max_concurrent, 0),
368 	KFD_PERF_DESC(num_counters, 0),
369 	KFD_PERF_DESC(counter_ids, 0),
370 };
371 /****************************************/
372 
373 static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
374 		char *buffer)
375 {
376 	struct kfd_topology_device *dev;
377 	char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
378 	uint32_t i;
379 	uint32_t log_max_watch_addr;
380 
381 	/* Making sure that the buffer is an empty string */
382 	buffer[0] = 0;
383 
384 	if (strcmp(attr->name, "gpu_id") == 0) {
385 		dev = container_of(attr, struct kfd_topology_device,
386 				attr_gpuid);
387 		return sysfs_show_32bit_val(buffer, dev->gpu_id);
388 	}
389 
390 	if (strcmp(attr->name, "name") == 0) {
391 		dev = container_of(attr, struct kfd_topology_device,
392 				attr_name);
393 		for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) {
394 			public_name[i] =
395 					(char)dev->node_props.marketing_name[i];
396 			if (dev->node_props.marketing_name[i] == 0)
397 				break;
398 		}
399 		public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0;
400 		return sysfs_show_str_val(buffer, public_name);
401 	}
402 
403 	dev = container_of(attr, struct kfd_topology_device,
404 			attr_props);
405 	sysfs_show_32bit_prop(buffer, "cpu_cores_count",
406 			dev->node_props.cpu_cores_count);
407 	sysfs_show_32bit_prop(buffer, "simd_count",
408 			dev->node_props.simd_count);
409 	sysfs_show_32bit_prop(buffer, "mem_banks_count",
410 			dev->node_props.mem_banks_count);
411 	sysfs_show_32bit_prop(buffer, "caches_count",
412 			dev->node_props.caches_count);
413 	sysfs_show_32bit_prop(buffer, "io_links_count",
414 			dev->node_props.io_links_count);
415 	sysfs_show_32bit_prop(buffer, "cpu_core_id_base",
416 			dev->node_props.cpu_core_id_base);
417 	sysfs_show_32bit_prop(buffer, "simd_id_base",
418 			dev->node_props.simd_id_base);
419 	sysfs_show_32bit_prop(buffer, "max_waves_per_simd",
420 			dev->node_props.max_waves_per_simd);
421 	sysfs_show_32bit_prop(buffer, "lds_size_in_kb",
422 			dev->node_props.lds_size_in_kb);
423 	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
424 			dev->node_props.gds_size_in_kb);
425 	sysfs_show_32bit_prop(buffer, "wave_front_size",
426 			dev->node_props.wave_front_size);
427 	sysfs_show_32bit_prop(buffer, "array_count",
428 			dev->node_props.array_count);
429 	sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine",
430 			dev->node_props.simd_arrays_per_engine);
431 	sysfs_show_32bit_prop(buffer, "cu_per_simd_array",
432 			dev->node_props.cu_per_simd_array);
433 	sysfs_show_32bit_prop(buffer, "simd_per_cu",
434 			dev->node_props.simd_per_cu);
435 	sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
436 			dev->node_props.max_slots_scratch_cu);
437 	sysfs_show_32bit_prop(buffer, "vendor_id",
438 			dev->node_props.vendor_id);
439 	sysfs_show_32bit_prop(buffer, "device_id",
440 			dev->node_props.device_id);
441 	sysfs_show_32bit_prop(buffer, "location_id",
442 			dev->node_props.location_id);
443 
444 	if (dev->gpu) {
445 		log_max_watch_addr =
446 			__ilog2_u32(dev->gpu->device_info->num_of_watch_points);
447 
448 		if (log_max_watch_addr) {
449 			dev->node_props.capability |=
450 					HSA_CAP_WATCH_POINTS_SUPPORTED;
451 
452 			dev->node_props.capability |=
453 				((log_max_watch_addr <<
454 					HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT) &
455 				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);
456 		}
457 
458 		if (dev->gpu->device_info->asic_family == CHIP_TONGA)
459 			dev->node_props.capability |=
460 					HSA_CAP_AQL_QUEUE_DOUBLE_MAP;
461 
462 		sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute",
463 			dev->node_props.max_engine_clk_fcompute);
464 
465 		sysfs_show_64bit_prop(buffer, "local_mem_size",
466 				(unsigned long long int) 0);
467 
468 		sysfs_show_32bit_prop(buffer, "fw_version",
469 			dev->gpu->kfd2kgd->get_fw_version(
470 						dev->gpu->kgd,
471 						KGD_ENGINE_MEC1));
472 		sysfs_show_32bit_prop(buffer, "capability",
473 				dev->node_props.capability);
474 	}
475 
476 	return sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",
477 					cpufreq_quick_get_max(0)/1000);
478 }
479 
480 static const struct sysfs_ops node_ops = {
481 	.show = node_show,
482 };
483 
484 static struct kobj_type node_type = {
485 	.release = kfd_topology_kobj_release,
486 	.sysfs_ops = &node_ops,
487 };
488 
489 static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
490 {
491 	sysfs_remove_file(kobj, attr);
492 	kobject_del(kobj);
493 	kobject_put(kobj);
494 }
495 
496 static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
497 {
498 	struct kfd_iolink_properties *iolink;
499 	struct kfd_cache_properties *cache;
500 	struct kfd_mem_properties *mem;
501 	struct kfd_perf_properties *perf;
502 
503 	if (dev->kobj_iolink) {
504 		list_for_each_entry(iolink, &dev->io_link_props, list)
505 			if (iolink->kobj) {
506 				kfd_remove_sysfs_file(iolink->kobj,
507 							&iolink->attr);
508 				iolink->kobj = NULL;
509 			}
510 		kobject_del(dev->kobj_iolink);
511 		kobject_put(dev->kobj_iolink);
512 		dev->kobj_iolink = NULL;
513 	}
514 
515 	if (dev->kobj_cache) {
516 		list_for_each_entry(cache, &dev->cache_props, list)
517 			if (cache->kobj) {
518 				kfd_remove_sysfs_file(cache->kobj,
519 							&cache->attr);
520 				cache->kobj = NULL;
521 			}
522 		kobject_del(dev->kobj_cache);
523 		kobject_put(dev->kobj_cache);
524 		dev->kobj_cache = NULL;
525 	}
526 
527 	if (dev->kobj_mem) {
528 		list_for_each_entry(mem, &dev->mem_props, list)
529 			if (mem->kobj) {
530 				kfd_remove_sysfs_file(mem->kobj, &mem->attr);
531 				mem->kobj = NULL;
532 			}
533 		kobject_del(dev->kobj_mem);
534 		kobject_put(dev->kobj_mem);
535 		dev->kobj_mem = NULL;
536 	}
537 
538 	if (dev->kobj_perf) {
539 		list_for_each_entry(perf, &dev->perf_props, list) {
540 			kfree(perf->attr_group);
541 			perf->attr_group = NULL;
542 		}
543 		kobject_del(dev->kobj_perf);
544 		kobject_put(dev->kobj_perf);
545 		dev->kobj_perf = NULL;
546 	}
547 
548 	if (dev->kobj_node) {
549 		sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
550 		sysfs_remove_file(dev->kobj_node, &dev->attr_name);
551 		sysfs_remove_file(dev->kobj_node, &dev->attr_props);
552 		kobject_del(dev->kobj_node);
553 		kobject_put(dev->kobj_node);
554 		dev->kobj_node = NULL;
555 	}
556 }
557 
558 static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
559 		uint32_t id)
560 {
561 	struct kfd_iolink_properties *iolink;
562 	struct kfd_cache_properties *cache;
563 	struct kfd_mem_properties *mem;
564 	struct kfd_perf_properties *perf;
565 	int ret;
566 	uint32_t i, num_attrs;
567 	struct attribute **attrs;
568 
569 	if (WARN_ON(dev->kobj_node))
570 		return -EEXIST;
571 
572 	/*
573 	 * Creating the sysfs folders
574 	 */
575 	dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
576 	if (!dev->kobj_node)
577 		return -ENOMEM;
578 
579 	ret = kobject_init_and_add(dev->kobj_node, &node_type,
580 			sys_props.kobj_nodes, "%d", id);
581 	if (ret < 0)
582 		return ret;
583 
584 	dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node);
585 	if (!dev->kobj_mem)
586 		return -ENOMEM;
587 
588 	dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node);
589 	if (!dev->kobj_cache)
590 		return -ENOMEM;
591 
592 	dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node);
593 	if (!dev->kobj_iolink)
594 		return -ENOMEM;
595 
596 	dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
597 	if (!dev->kobj_perf)
598 		return -ENOMEM;
599 
600 	/*
601 	 * Creating sysfs files for node properties
602 	 */
603 	dev->attr_gpuid.name = "gpu_id";
604 	dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE;
605 	sysfs_attr_init(&dev->attr_gpuid);
606 	dev->attr_name.name = "name";
607 	dev->attr_name.mode = KFD_SYSFS_FILE_MODE;
608 	sysfs_attr_init(&dev->attr_name);
609 	dev->attr_props.name = "properties";
610 	dev->attr_props.mode = KFD_SYSFS_FILE_MODE;
611 	sysfs_attr_init(&dev->attr_props);
612 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid);
613 	if (ret < 0)
614 		return ret;
615 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_name);
616 	if (ret < 0)
617 		return ret;
618 	ret = sysfs_create_file(dev->kobj_node, &dev->attr_props);
619 	if (ret < 0)
620 		return ret;
621 
622 	i = 0;
623 	list_for_each_entry(mem, &dev->mem_props, list) {
624 		mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
625 		if (!mem->kobj)
626 			return -ENOMEM;
627 		ret = kobject_init_and_add(mem->kobj, &mem_type,
628 				dev->kobj_mem, "%d", i);
629 		if (ret < 0)
630 			return ret;
631 
632 		mem->attr.name = "properties";
633 		mem->attr.mode = KFD_SYSFS_FILE_MODE;
634 		sysfs_attr_init(&mem->attr);
635 		ret = sysfs_create_file(mem->kobj, &mem->attr);
636 		if (ret < 0)
637 			return ret;
638 		i++;
639 	}
640 
641 	i = 0;
642 	list_for_each_entry(cache, &dev->cache_props, list) {
643 		cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
644 		if (!cache->kobj)
645 			return -ENOMEM;
646 		ret = kobject_init_and_add(cache->kobj, &cache_type,
647 				dev->kobj_cache, "%d", i);
648 		if (ret < 0)
649 			return ret;
650 
651 		cache->attr.name = "properties";
652 		cache->attr.mode = KFD_SYSFS_FILE_MODE;
653 		sysfs_attr_init(&cache->attr);
654 		ret = sysfs_create_file(cache->kobj, &cache->attr);
655 		if (ret < 0)
656 			return ret;
657 		i++;
658 	}
659 
660 	i = 0;
661 	list_for_each_entry(iolink, &dev->io_link_props, list) {
662 		iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
663 		if (!iolink->kobj)
664 			return -ENOMEM;
665 		ret = kobject_init_and_add(iolink->kobj, &iolink_type,
666 				dev->kobj_iolink, "%d", i);
667 		if (ret < 0)
668 			return ret;
669 
670 		iolink->attr.name = "properties";
671 		iolink->attr.mode = KFD_SYSFS_FILE_MODE;
672 		sysfs_attr_init(&iolink->attr);
673 		ret = sysfs_create_file(iolink->kobj, &iolink->attr);
674 		if (ret < 0)
675 			return ret;
676 		i++;
677 	}
678 
679 	/* All hardware blocks have the same number of attributes. */
680 	num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
681 	list_for_each_entry(perf, &dev->perf_props, list) {
682 		perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr)
683 			* num_attrs + sizeof(struct attribute_group),
684 			GFP_KERNEL);
685 		if (!perf->attr_group)
686 			return -ENOMEM;
687 
688 		attrs = (struct attribute **)(perf->attr_group + 1);
689 		if (!strcmp(perf->block_name, "iommu")) {
690 		/* Information of IOMMU's num_counters and counter_ids is shown
691 		 * under /sys/bus/event_source/devices/amd_iommu. We don't
692 		 * duplicate here.
693 		 */
694 			perf_attr_iommu[0].data = perf->max_concurrent;
695 			for (i = 0; i < num_attrs; i++)
696 				attrs[i] = &perf_attr_iommu[i].attr.attr;
697 		}
698 		perf->attr_group->name = perf->block_name;
699 		perf->attr_group->attrs = attrs;
700 		ret = sysfs_create_group(dev->kobj_perf, perf->attr_group);
701 		if (ret < 0)
702 			return ret;
703 	}
704 
705 	return 0;
706 }
707 
708 /* Called with write topology lock acquired */
709 static int kfd_build_sysfs_node_tree(void)
710 {
711 	struct kfd_topology_device *dev;
712 	int ret;
713 	uint32_t i = 0;
714 
715 	list_for_each_entry(dev, &topology_device_list, list) {
716 		ret = kfd_build_sysfs_node_entry(dev, i);
717 		if (ret < 0)
718 			return ret;
719 		i++;
720 	}
721 
722 	return 0;
723 }
724 
725 /* Called with write topology lock acquired */
726 static void kfd_remove_sysfs_node_tree(void)
727 {
728 	struct kfd_topology_device *dev;
729 
730 	list_for_each_entry(dev, &topology_device_list, list)
731 		kfd_remove_sysfs_node_entry(dev);
732 }
733 
734 static int kfd_topology_update_sysfs(void)
735 {
736 	int ret;
737 
738 	pr_info("Creating topology SYSFS entries\n");
739 	if (!sys_props.kobj_topology) {
740 		sys_props.kobj_topology =
741 				kfd_alloc_struct(sys_props.kobj_topology);
742 		if (!sys_props.kobj_topology)
743 			return -ENOMEM;
744 
745 		ret = kobject_init_and_add(sys_props.kobj_topology,
746 				&sysprops_type,  &kfd_device->kobj,
747 				"topology");
748 		if (ret < 0)
749 			return ret;
750 
751 		sys_props.kobj_nodes = kobject_create_and_add("nodes",
752 				sys_props.kobj_topology);
753 		if (!sys_props.kobj_nodes)
754 			return -ENOMEM;
755 
756 		sys_props.attr_genid.name = "generation_id";
757 		sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE;
758 		sysfs_attr_init(&sys_props.attr_genid);
759 		ret = sysfs_create_file(sys_props.kobj_topology,
760 				&sys_props.attr_genid);
761 		if (ret < 0)
762 			return ret;
763 
764 		sys_props.attr_props.name = "system_properties";
765 		sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE;
766 		sysfs_attr_init(&sys_props.attr_props);
767 		ret = sysfs_create_file(sys_props.kobj_topology,
768 				&sys_props.attr_props);
769 		if (ret < 0)
770 			return ret;
771 	}
772 
773 	kfd_remove_sysfs_node_tree();
774 
775 	return kfd_build_sysfs_node_tree();
776 }
777 
778 static void kfd_topology_release_sysfs(void)
779 {
780 	kfd_remove_sysfs_node_tree();
781 	if (sys_props.kobj_topology) {
782 		sysfs_remove_file(sys_props.kobj_topology,
783 				&sys_props.attr_genid);
784 		sysfs_remove_file(sys_props.kobj_topology,
785 				&sys_props.attr_props);
786 		if (sys_props.kobj_nodes) {
787 			kobject_del(sys_props.kobj_nodes);
788 			kobject_put(sys_props.kobj_nodes);
789 			sys_props.kobj_nodes = NULL;
790 		}
791 		kobject_del(sys_props.kobj_topology);
792 		kobject_put(sys_props.kobj_topology);
793 		sys_props.kobj_topology = NULL;
794 	}
795 }
796 
797 /* Called with write topology_lock acquired */
798 static void kfd_topology_update_device_list(struct list_head *temp_list,
799 					struct list_head *master_list)
800 {
801 	while (!list_empty(temp_list)) {
802 		list_move_tail(temp_list->next, master_list);
803 		sys_props.num_devices++;
804 	}
805 }
806 
807 static void kfd_debug_print_topology(void)
808 {
809 	struct kfd_topology_device *dev;
810 
811 	down_read(&topology_lock);
812 
813 	dev = list_last_entry(&topology_device_list,
814 			struct kfd_topology_device, list);
815 	if (dev) {
816 		if (dev->node_props.cpu_cores_count &&
817 				dev->node_props.simd_count) {
818 			pr_info("Topology: Add APU node [0x%0x:0x%0x]\n",
819 				dev->node_props.device_id,
820 				dev->node_props.vendor_id);
821 		} else if (dev->node_props.cpu_cores_count)
822 			pr_info("Topology: Add CPU node\n");
823 		else if (dev->node_props.simd_count)
824 			pr_info("Topology: Add dGPU node [0x%0x:0x%0x]\n",
825 				dev->node_props.device_id,
826 				dev->node_props.vendor_id);
827 	}
828 	up_read(&topology_lock);
829 }
830 
831 /* Helper function for intializing platform_xx members of
832  * kfd_system_properties. Uses OEM info from the last CPU/APU node.
833  */
834 static void kfd_update_system_properties(void)
835 {
836 	struct kfd_topology_device *dev;
837 
838 	down_read(&topology_lock);
839 	dev = list_last_entry(&topology_device_list,
840 			struct kfd_topology_device, list);
841 	if (dev) {
842 		sys_props.platform_id =
843 			(*((uint64_t *)dev->oem_id)) & CRAT_OEMID_64BIT_MASK;
844 		sys_props.platform_oem = *((uint64_t *)dev->oem_table_id);
845 		sys_props.platform_rev = dev->oem_revision;
846 	}
847 	up_read(&topology_lock);
848 }
849 
850 static void find_system_memory(const struct dmi_header *dm,
851 	void *private)
852 {
853 	struct kfd_mem_properties *mem;
854 	u16 mem_width, mem_clock;
855 	struct kfd_topology_device *kdev =
856 		(struct kfd_topology_device *)private;
857 	const u8 *dmi_data = (const u8 *)(dm + 1);
858 
859 	if (dm->type == DMI_ENTRY_MEM_DEVICE && dm->length >= 0x15) {
860 		mem_width = (u16)(*(const u16 *)(dmi_data + 0x6));
861 		mem_clock = (u16)(*(const u16 *)(dmi_data + 0x11));
862 		list_for_each_entry(mem, &kdev->mem_props, list) {
863 			if (mem_width != 0xFFFF && mem_width != 0)
864 				mem->width = mem_width;
865 			if (mem_clock != 0)
866 				mem->mem_clk_max = mem_clock;
867 		}
868 	}
869 }
870 
871 /*
872  * Performance counters information is not part of CRAT but we would like to
873  * put them in the sysfs under topology directory for Thunk to get the data.
874  * This function is called before updating the sysfs.
875  */
876 static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
877 {
878 	struct kfd_perf_properties *props;
879 
880 	if (amd_iommu_pc_supported()) {
881 		props = kfd_alloc_struct(props);
882 		if (!props)
883 			return -ENOMEM;
884 		strcpy(props->block_name, "iommu");
885 		props->max_concurrent = amd_iommu_pc_get_max_banks(0) *
886 			amd_iommu_pc_get_max_counters(0); /* assume one iommu */
887 		list_add_tail(&props->list, &kdev->perf_props);
888 	}
889 
890 	return 0;
891 }
892 
893 /* kfd_add_non_crat_information - Add information that is not currently
894  *	defined in CRAT but is necessary for KFD topology
895  * @dev - topology device to which addition info is added
896  */
897 static void kfd_add_non_crat_information(struct kfd_topology_device *kdev)
898 {
899 	/* Check if CPU only node. */
900 	if (!kdev->gpu) {
901 		/* Add system memory information */
902 		dmi_walk(find_system_memory, kdev);
903 	}
904 	/* TODO: For GPU node, rearrange code from kfd_topology_add_device */
905 }
906 
907 /* kfd_is_acpi_crat_invalid - CRAT from ACPI is valid only for AMD APU devices.
908  *	Ignore CRAT for all other devices. AMD APU is identified if both CPU
909  *	and GPU cores are present.
910  * @device_list - topology device list created by parsing ACPI CRAT table.
911  * @return - TRUE if invalid, FALSE is valid.
912  */
913 static bool kfd_is_acpi_crat_invalid(struct list_head *device_list)
914 {
915 	struct kfd_topology_device *dev;
916 
917 	list_for_each_entry(dev, device_list, list) {
918 		if (dev->node_props.cpu_cores_count &&
919 			dev->node_props.simd_count)
920 			return false;
921 	}
922 	pr_info("Ignoring ACPI CRAT on non-APU system\n");
923 	return true;
924 }
925 
926 int kfd_topology_init(void)
927 {
928 	void *crat_image = NULL;
929 	size_t image_size = 0;
930 	int ret;
931 	struct list_head temp_topology_device_list;
932 	int cpu_only_node = 0;
933 	struct kfd_topology_device *kdev;
934 	int proximity_domain;
935 
936 	/* topology_device_list - Master list of all topology devices
937 	 * temp_topology_device_list - temporary list created while parsing CRAT
938 	 * or VCRAT. Once parsing is complete the contents of list is moved to
939 	 * topology_device_list
940 	 */
941 
942 	/* Initialize the head for the both the lists */
943 	INIT_LIST_HEAD(&topology_device_list);
944 	INIT_LIST_HEAD(&temp_topology_device_list);
945 	init_rwsem(&topology_lock);
946 
947 	memset(&sys_props, 0, sizeof(sys_props));
948 
949 	/* Proximity domains in ACPI CRAT tables start counting at
950 	 * 0. The same should be true for virtual CRAT tables created
951 	 * at this stage. GPUs added later in kfd_topology_add_device
952 	 * use a counter.
953 	 */
954 	proximity_domain = 0;
955 
956 	/*
957 	 * Get the CRAT image from the ACPI. If ACPI doesn't have one
958 	 * or if ACPI CRAT is invalid create a virtual CRAT.
959 	 * NOTE: The current implementation expects all AMD APUs to have
960 	 *	CRAT. If no CRAT is available, it is assumed to be a CPU
961 	 */
962 	ret = kfd_create_crat_image_acpi(&crat_image, &image_size);
963 	if (!ret) {
964 		ret = kfd_parse_crat_table(crat_image,
965 					   &temp_topology_device_list,
966 					   proximity_domain);
967 		if (ret ||
968 		    kfd_is_acpi_crat_invalid(&temp_topology_device_list)) {
969 			kfd_release_topology_device_list(
970 				&temp_topology_device_list);
971 			kfd_destroy_crat_image(crat_image);
972 			crat_image = NULL;
973 		}
974 	}
975 
976 	if (!crat_image) {
977 		ret = kfd_create_crat_image_virtual(&crat_image, &image_size,
978 						    COMPUTE_UNIT_CPU, NULL,
979 						    proximity_domain);
980 		cpu_only_node = 1;
981 		if (ret) {
982 			pr_err("Error creating VCRAT table for CPU\n");
983 			return ret;
984 		}
985 
986 		ret = kfd_parse_crat_table(crat_image,
987 					   &temp_topology_device_list,
988 					   proximity_domain);
989 		if (ret) {
990 			pr_err("Error parsing VCRAT table for CPU\n");
991 			goto err;
992 		}
993 	}
994 
995 	kdev = list_first_entry(&temp_topology_device_list,
996 				struct kfd_topology_device, list);
997 	kfd_add_perf_to_topology(kdev);
998 
999 	down_write(&topology_lock);
1000 	kfd_topology_update_device_list(&temp_topology_device_list,
1001 					&topology_device_list);
1002 	atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
1003 	ret = kfd_topology_update_sysfs();
1004 	up_write(&topology_lock);
1005 
1006 	if (!ret) {
1007 		sys_props.generation_count++;
1008 		kfd_update_system_properties();
1009 		kfd_debug_print_topology();
1010 		pr_info("Finished initializing topology\n");
1011 	} else
1012 		pr_err("Failed to update topology in sysfs ret=%d\n", ret);
1013 
1014 	/* For nodes with GPU, this information gets added
1015 	 * when GPU is detected (kfd_topology_add_device).
1016 	 */
1017 	if (cpu_only_node) {
1018 		/* Add additional information to CPU only node created above */
1019 		down_write(&topology_lock);
1020 		kdev = list_first_entry(&topology_device_list,
1021 				struct kfd_topology_device, list);
1022 		up_write(&topology_lock);
1023 		kfd_add_non_crat_information(kdev);
1024 	}
1025 
1026 err:
1027 	kfd_destroy_crat_image(crat_image);
1028 	return ret;
1029 }
1030 
1031 void kfd_topology_shutdown(void)
1032 {
1033 	down_write(&topology_lock);
1034 	kfd_topology_release_sysfs();
1035 	kfd_release_live_view();
1036 	up_write(&topology_lock);
1037 }
1038 
1039 static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
1040 {
1041 	uint32_t hashout;
1042 	uint32_t buf[7];
1043 	uint64_t local_mem_size;
1044 	int i;
1045 	struct kfd_local_mem_info local_mem_info;
1046 
1047 	if (!gpu)
1048 		return 0;
1049 
1050 	gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info);
1051 
1052 	local_mem_size = local_mem_info.local_mem_size_private +
1053 			local_mem_info.local_mem_size_public;
1054 
1055 	buf[0] = gpu->pdev->devfn;
1056 	buf[1] = gpu->pdev->subsystem_vendor;
1057 	buf[2] = gpu->pdev->subsystem_device;
1058 	buf[3] = gpu->pdev->device;
1059 	buf[4] = gpu->pdev->bus->number;
1060 	buf[5] = lower_32_bits(local_mem_size);
1061 	buf[6] = upper_32_bits(local_mem_size);
1062 
1063 	for (i = 0, hashout = 0; i < 7; i++)
1064 		hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH);
1065 
1066 	return hashout;
1067 }
1068 /* kfd_assign_gpu - Attach @gpu to the correct kfd topology device. If
1069  *		the GPU device is not already present in the topology device
1070  *		list then return NULL. This means a new topology device has to
1071  *		be created for this GPU.
1072  * TODO: Rather than assiging @gpu to first topology device withtout
1073  *		gpu attached, it will better to have more stringent check.
1074  */
1075 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
1076 {
1077 	struct kfd_topology_device *dev;
1078 	struct kfd_topology_device *out_dev = NULL;
1079 
1080 	down_write(&topology_lock);
1081 	list_for_each_entry(dev, &topology_device_list, list)
1082 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
1083 			dev->gpu = gpu;
1084 			out_dev = dev;
1085 			break;
1086 		}
1087 	up_write(&topology_lock);
1088 	return out_dev;
1089 }
1090 
1091 static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival)
1092 {
1093 	/*
1094 	 * TODO: Generate an event for thunk about the arrival/removal
1095 	 * of the GPU
1096 	 */
1097 }
1098 
1099 /* kfd_fill_mem_clk_max_info - Since CRAT doesn't have memory clock info,
1100  *		patch this after CRAT parsing.
1101  */
1102 static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)
1103 {
1104 	struct kfd_mem_properties *mem;
1105 	struct kfd_local_mem_info local_mem_info;
1106 
1107 	if (!dev)
1108 		return;
1109 
1110 	/* Currently, amdgpu driver (amdgpu_mc) deals only with GPUs with
1111 	 * single bank of VRAM local memory.
1112 	 * for dGPUs - VCRAT reports only one bank of Local Memory
1113 	 * for APUs - If CRAT from ACPI reports more than one bank, then
1114 	 *	all the banks will report the same mem_clk_max information
1115 	 */
1116 	dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd,
1117 		&local_mem_info);
1118 
1119 	list_for_each_entry(mem, &dev->mem_props, list)
1120 		mem->mem_clk_max = local_mem_info.mem_clk_max;
1121 }
1122 
1123 static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
1124 {
1125 	struct kfd_iolink_properties *link;
1126 
1127 	if (!dev || !dev->gpu)
1128 		return;
1129 
1130 	/* GPU only creates direck links so apply flags setting to all */
1131 	if (dev->gpu->device_info->asic_family == CHIP_HAWAII)
1132 		list_for_each_entry(link, &dev->io_link_props, list)
1133 			link->flags = CRAT_IOLINK_FLAGS_ENABLED |
1134 				CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |
1135 				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;
1136 }
1137 
1138 int kfd_topology_add_device(struct kfd_dev *gpu)
1139 {
1140 	uint32_t gpu_id;
1141 	struct kfd_topology_device *dev;
1142 	struct kfd_cu_info cu_info;
1143 	int res = 0;
1144 	struct list_head temp_topology_device_list;
1145 	void *crat_image = NULL;
1146 	size_t image_size = 0;
1147 	int proximity_domain;
1148 
1149 	INIT_LIST_HEAD(&temp_topology_device_list);
1150 
1151 	gpu_id = kfd_generate_gpu_id(gpu);
1152 
1153 	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
1154 
1155 	proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
1156 
1157 	/* Check to see if this gpu device exists in the topology_device_list.
1158 	 * If so, assign the gpu to that device,
1159 	 * else create a Virtual CRAT for this gpu device and then parse that
1160 	 * CRAT to create a new topology device. Once created assign the gpu to
1161 	 * that topology device
1162 	 */
1163 	dev = kfd_assign_gpu(gpu);
1164 	if (!dev) {
1165 		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
1166 						    COMPUTE_UNIT_GPU, gpu,
1167 						    proximity_domain);
1168 		if (res) {
1169 			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
1170 			       gpu_id);
1171 			return res;
1172 		}
1173 		res = kfd_parse_crat_table(crat_image,
1174 					   &temp_topology_device_list,
1175 					   proximity_domain);
1176 		if (res) {
1177 			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
1178 			       gpu_id);
1179 			goto err;
1180 		}
1181 
1182 		down_write(&topology_lock);
1183 		kfd_topology_update_device_list(&temp_topology_device_list,
1184 			&topology_device_list);
1185 
1186 		/* Update the SYSFS tree, since we added another topology
1187 		 * device
1188 		 */
1189 		res = kfd_topology_update_sysfs();
1190 		up_write(&topology_lock);
1191 
1192 		if (!res)
1193 			sys_props.generation_count++;
1194 		else
1195 			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
1196 						gpu_id, res);
1197 		dev = kfd_assign_gpu(gpu);
1198 		if (WARN_ON(!dev)) {
1199 			res = -ENODEV;
1200 			goto err;
1201 		}
1202 	}
1203 
1204 	dev->gpu_id = gpu_id;
1205 	gpu->id = gpu_id;
1206 
1207 	/* TODO: Move the following lines to function
1208 	 *	kfd_add_non_crat_information
1209 	 */
1210 
1211 	/* Fill-in additional information that is not available in CRAT but
1212 	 * needed for the topology
1213 	 */
1214 
1215 	dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info);
1216 	dev->node_props.simd_arrays_per_engine =
1217 		cu_info.num_shader_arrays_per_engine;
1218 
1219 	dev->node_props.vendor_id = gpu->pdev->vendor;
1220 	dev->node_props.device_id = gpu->pdev->device;
1221 	dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number,
1222 		gpu->pdev->devfn);
1223 	dev->node_props.max_engine_clk_fcompute =
1224 		dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd);
1225 	dev->node_props.max_engine_clk_ccompute =
1226 		cpufreq_quick_get_max(0) / 1000;
1227 
1228 	kfd_fill_mem_clk_max_info(dev);
1229 	kfd_fill_iolink_non_crat_info(dev);
1230 
1231 	switch (dev->gpu->device_info->asic_family) {
1232 	case CHIP_KAVERI:
1233 	case CHIP_HAWAII:
1234 	case CHIP_TONGA:
1235 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_PRE_1_0 <<
1236 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1237 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1238 		break;
1239 	case CHIP_CARRIZO:
1240 	case CHIP_FIJI:
1241 	case CHIP_POLARIS10:
1242 	case CHIP_POLARIS11:
1243 		pr_debug("Adding doorbell packet type capability\n");
1244 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
1245 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
1246 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
1247 		break;
1248 	default:
1249 		WARN(1, "Unexpected ASIC family %u",
1250 		     dev->gpu->device_info->asic_family);
1251 	}
1252 
1253 	/* Fix errors in CZ CRAT.
1254 	 * simd_count: Carrizo CRAT reports wrong simd_count, probably
1255 	 *		because it doesn't consider masked out CUs
1256 	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd
1257 	 * capability flag: Carrizo CRAT doesn't report IOMMU flags
1258 	 */
1259 	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
1260 		dev->node_props.simd_count =
1261 			cu_info.simd_per_cu * cu_info.cu_active_number;
1262 		dev->node_props.max_waves_per_simd = 10;
1263 		dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
1264 	}
1265 
1266 	kfd_debug_print_topology();
1267 
1268 	if (!res)
1269 		kfd_notify_gpu_change(gpu_id, 1);
1270 err:
1271 	kfd_destroy_crat_image(crat_image);
1272 	return res;
1273 }
1274 
1275 int kfd_topology_remove_device(struct kfd_dev *gpu)
1276 {
1277 	struct kfd_topology_device *dev, *tmp;
1278 	uint32_t gpu_id;
1279 	int res = -ENODEV;
1280 
1281 	down_write(&topology_lock);
1282 
1283 	list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
1284 		if (dev->gpu == gpu) {
1285 			gpu_id = dev->gpu_id;
1286 			kfd_remove_sysfs_node_entry(dev);
1287 			kfd_release_topology_device(dev);
1288 			sys_props.num_devices--;
1289 			res = 0;
1290 			if (kfd_topology_update_sysfs() < 0)
1291 				kfd_topology_release_sysfs();
1292 			break;
1293 		}
1294 
1295 	up_write(&topology_lock);
1296 
1297 	if (!res)
1298 		kfd_notify_gpu_change(gpu_id, 0);
1299 
1300 	return res;
1301 }
1302 
1303 /* kfd_topology_enum_kfd_devices - Enumerate through all devices in KFD
1304  *	topology. If GPU device is found @idx, then valid kfd_dev pointer is
1305  *	returned through @kdev
1306  * Return -	0: On success (@kdev will be NULL for non GPU nodes)
1307  *		-1: If end of list
1308  */
1309 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
1310 {
1311 
1312 	struct kfd_topology_device *top_dev;
1313 	uint8_t device_idx = 0;
1314 
1315 	*kdev = NULL;
1316 	down_read(&topology_lock);
1317 
1318 	list_for_each_entry(top_dev, &topology_device_list, list) {
1319 		if (device_idx == idx) {
1320 			*kdev = top_dev->gpu;
1321 			up_read(&topology_lock);
1322 			return 0;
1323 		}
1324 
1325 		device_idx++;
1326 	}
1327 
1328 	up_read(&topology_lock);
1329 
1330 	return -1;
1331 
1332 }
1333 
1334 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
1335 {
1336 	const struct cpuinfo_x86 *cpuinfo;
1337 	int first_cpu_of_numa_node;
1338 
1339 	if (!cpumask || cpumask == cpu_none_mask)
1340 		return -1;
1341 	first_cpu_of_numa_node = cpumask_first(cpumask);
1342 	if (first_cpu_of_numa_node >= nr_cpu_ids)
1343 		return -1;
1344 	cpuinfo = &cpu_data(first_cpu_of_numa_node);
1345 
1346 	return cpuinfo->apicid;
1347 }
1348 
1349 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
1350  *	of the given NUMA node (numa_node_id)
1351  * Return -1 on failure
1352  */
1353 int kfd_numa_node_to_apic_id(int numa_node_id)
1354 {
1355 	if (numa_node_id == -1) {
1356 		pr_warn("Invalid NUMA Node. Use online CPU mask\n");
1357 		return kfd_cpumask_to_apic_id(cpu_online_mask);
1358 	}
1359 	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
1360 }
1361 
1362 #if defined(CONFIG_DEBUG_FS)
1363 
1364 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)
1365 {
1366 	struct kfd_topology_device *dev;
1367 	unsigned int i = 0;
1368 	int r = 0;
1369 
1370 	down_read(&topology_lock);
1371 
1372 	list_for_each_entry(dev, &topology_device_list, list) {
1373 		if (!dev->gpu) {
1374 			i++;
1375 			continue;
1376 		}
1377 
1378 		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1379 		r = dqm_debugfs_hqds(m, dev->gpu->dqm);
1380 		if (r)
1381 			break;
1382 	}
1383 
1384 	up_read(&topology_lock);
1385 
1386 	return r;
1387 }
1388 
1389 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data)
1390 {
1391 	struct kfd_topology_device *dev;
1392 	unsigned int i = 0;
1393 	int r = 0;
1394 
1395 	down_read(&topology_lock);
1396 
1397 	list_for_each_entry(dev, &topology_device_list, list) {
1398 		if (!dev->gpu) {
1399 			i++;
1400 			continue;
1401 		}
1402 
1403 		seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id);
1404 		r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets);
1405 		if (r)
1406 			break;
1407 	}
1408 
1409 	up_read(&topology_lock);
1410 
1411 	return r;
1412 }
1413 
1414 #endif
1415