1 /*
2  * Copyright 2018 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "amdgpu.h"
25 #include "amdgpu_discovery.h"
26 #include "soc15_hw_ip.h"
27 #include "discovery.h"
28 
29 #define mmRCC_CONFIG_MEMSIZE	0xde3
30 #define mmMM_INDEX		0x0
31 #define mmMM_INDEX_HI		0x6
32 #define mmMM_DATA		0x1
33 #define HW_ID_MAX		300
34 
35 static const char *hw_id_names[HW_ID_MAX] = {
36 	[MP1_HWID]		= "MP1",
37 	[MP2_HWID]		= "MP2",
38 	[THM_HWID]		= "THM",
39 	[SMUIO_HWID]		= "SMUIO",
40 	[FUSE_HWID]		= "FUSE",
41 	[CLKA_HWID]		= "CLKA",
42 	[PWR_HWID]		= "PWR",
43 	[GC_HWID]		= "GC",
44 	[UVD_HWID]		= "UVD",
45 	[AUDIO_AZ_HWID]		= "AUDIO_AZ",
46 	[ACP_HWID]		= "ACP",
47 	[DCI_HWID]		= "DCI",
48 	[DMU_HWID]		= "DMU",
49 	[DCO_HWID]		= "DCO",
50 	[DIO_HWID]		= "DIO",
51 	[XDMA_HWID]		= "XDMA",
52 	[DCEAZ_HWID]		= "DCEAZ",
53 	[DAZ_HWID]		= "DAZ",
54 	[SDPMUX_HWID]		= "SDPMUX",
55 	[NTB_HWID]		= "NTB",
56 	[IOHC_HWID]		= "IOHC",
57 	[L2IMU_HWID]		= "L2IMU",
58 	[VCE_HWID]		= "VCE",
59 	[MMHUB_HWID]		= "MMHUB",
60 	[ATHUB_HWID]		= "ATHUB",
61 	[DBGU_NBIO_HWID]	= "DBGU_NBIO",
62 	[DFX_HWID]		= "DFX",
63 	[DBGU0_HWID]		= "DBGU0",
64 	[DBGU1_HWID]		= "DBGU1",
65 	[OSSSYS_HWID]		= "OSSSYS",
66 	[HDP_HWID]		= "HDP",
67 	[SDMA0_HWID]		= "SDMA0",
68 	[SDMA1_HWID]		= "SDMA1",
69 	[ISP_HWID]		= "ISP",
70 	[DBGU_IO_HWID]		= "DBGU_IO",
71 	[DF_HWID]		= "DF",
72 	[CLKB_HWID]		= "CLKB",
73 	[FCH_HWID]		= "FCH",
74 	[DFX_DAP_HWID]		= "DFX_DAP",
75 	[L1IMU_PCIE_HWID]	= "L1IMU_PCIE",
76 	[L1IMU_NBIF_HWID]	= "L1IMU_NBIF",
77 	[L1IMU_IOAGR_HWID]	= "L1IMU_IOAGR",
78 	[L1IMU3_HWID]		= "L1IMU3",
79 	[L1IMU4_HWID]		= "L1IMU4",
80 	[L1IMU5_HWID]		= "L1IMU5",
81 	[L1IMU6_HWID]		= "L1IMU6",
82 	[L1IMU7_HWID]		= "L1IMU7",
83 	[L1IMU8_HWID]		= "L1IMU8",
84 	[L1IMU9_HWID]		= "L1IMU9",
85 	[L1IMU10_HWID]		= "L1IMU10",
86 	[L1IMU11_HWID]		= "L1IMU11",
87 	[L1IMU12_HWID]		= "L1IMU12",
88 	[L1IMU13_HWID]		= "L1IMU13",
89 	[L1IMU14_HWID]		= "L1IMU14",
90 	[L1IMU15_HWID]		= "L1IMU15",
91 	[WAFLC_HWID]		= "WAFLC",
92 	[FCH_USB_PD_HWID]	= "FCH_USB_PD",
93 	[PCIE_HWID]		= "PCIE",
94 	[PCS_HWID]		= "PCS",
95 	[DDCL_HWID]		= "DDCL",
96 	[SST_HWID]		= "SST",
97 	[IOAGR_HWID]		= "IOAGR",
98 	[NBIF_HWID]		= "NBIF",
99 	[IOAPIC_HWID]		= "IOAPIC",
100 	[SYSTEMHUB_HWID]	= "SYSTEMHUB",
101 	[NTBCCP_HWID]		= "NTBCCP",
102 	[UMC_HWID]		= "UMC",
103 	[SATA_HWID]		= "SATA",
104 	[USB_HWID]		= "USB",
105 	[CCXSEC_HWID]		= "CCXSEC",
106 	[XGMI_HWID]		= "XGMI",
107 	[XGBE_HWID]		= "XGBE",
108 	[MP0_HWID]		= "MP0",
109 };
110 
111 static int hw_id_map[MAX_HWIP] = {
112 	[GC_HWIP]	= GC_HWID,
113 	[HDP_HWIP]	= HDP_HWID,
114 	[SDMA0_HWIP]	= SDMA0_HWID,
115 	[SDMA1_HWIP]	= SDMA1_HWID,
116 	[MMHUB_HWIP]	= MMHUB_HWID,
117 	[ATHUB_HWIP]	= ATHUB_HWID,
118 	[NBIO_HWIP]	= NBIF_HWID,
119 	[MP0_HWIP]	= MP0_HWID,
120 	[MP1_HWIP]	= MP1_HWID,
121 	[UVD_HWIP]	= UVD_HWID,
122 	[VCE_HWIP]	= VCE_HWID,
123 	[DF_HWIP]	= DF_HWID,
124 	[DCE_HWIP]	= DMU_HWID,
125 	[OSSSYS_HWIP]	= OSSSYS_HWID,
126 	[SMUIO_HWIP]	= SMUIO_HWID,
127 	[PWR_HWIP]	= PWR_HWID,
128 	[NBIF_HWIP]	= NBIF_HWID,
129 	[THM_HWIP]	= THM_HWID,
130 	[CLK_HWIP]	= CLKA_HWID,
131 	[UMC_HWIP]	= UMC_HWID,
132 };
133 
134 static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
135 {
136 	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
137 	uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET;
138 
139 	amdgpu_device_vram_access(adev, pos, (uint32_t *)binary,
140 				  adev->mman.discovery_tmr_size, false);
141 	return 0;
142 }
143 
144 static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
145 {
146 	uint16_t checksum = 0;
147 	int i;
148 
149 	for (i = 0; i < size; i++)
150 		checksum += data[i];
151 
152 	return checksum;
153 }
154 
155 static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
156 						    uint16_t expected)
157 {
158 	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
159 }
160 
161 static int amdgpu_discovery_init(struct amdgpu_device *adev)
162 {
163 	struct table_info *info;
164 	struct binary_header *bhdr;
165 	struct ip_discovery_header *ihdr;
166 	struct gpu_info_header *ghdr;
167 	uint16_t offset;
168 	uint16_t size;
169 	uint16_t checksum;
170 	int r;
171 
172 	adev->mman.discovery_tmr_size = DISCOVERY_TMR_SIZE;
173 	adev->mman.discovery_bin = kzalloc(adev->mman.discovery_tmr_size, GFP_KERNEL);
174 	if (!adev->mman.discovery_bin)
175 		return -ENOMEM;
176 
177 	r = amdgpu_discovery_read_binary(adev, adev->mman.discovery_bin);
178 	if (r) {
179 		DRM_ERROR("failed to read ip discovery binary\n");
180 		goto out;
181 	}
182 
183 	bhdr = (struct binary_header *)adev->mman.discovery_bin;
184 
185 	if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
186 		DRM_ERROR("invalid ip discovery binary signature\n");
187 		r = -EINVAL;
188 		goto out;
189 	}
190 
191 	offset = offsetof(struct binary_header, binary_checksum) +
192 		sizeof(bhdr->binary_checksum);
193 	size = bhdr->binary_size - offset;
194 	checksum = bhdr->binary_checksum;
195 
196 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
197 					      size, checksum)) {
198 		DRM_ERROR("invalid ip discovery binary checksum\n");
199 		r = -EINVAL;
200 		goto out;
201 	}
202 
203 	info = &bhdr->table_list[IP_DISCOVERY];
204 	offset = le16_to_cpu(info->offset);
205 	checksum = le16_to_cpu(info->checksum);
206 	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
207 
208 	if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
209 		DRM_ERROR("invalid ip discovery data table signature\n");
210 		r = -EINVAL;
211 		goto out;
212 	}
213 
214 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
215 					      ihdr->size, checksum)) {
216 		DRM_ERROR("invalid ip discovery data table checksum\n");
217 		r = -EINVAL;
218 		goto out;
219 	}
220 
221 	info = &bhdr->table_list[GC];
222 	offset = le16_to_cpu(info->offset);
223 	checksum = le16_to_cpu(info->checksum);
224 	ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
225 
226 	if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
227 				              ghdr->size, checksum)) {
228 		DRM_ERROR("invalid gc data table checksum\n");
229 		r = -EINVAL;
230 		goto out;
231 	}
232 
233 	return 0;
234 
235 out:
236 	kfree(adev->mman.discovery_bin);
237 	adev->mman.discovery_bin = NULL;
238 
239 	return r;
240 }
241 
242 void amdgpu_discovery_fini(struct amdgpu_device *adev)
243 {
244 	kfree(adev->mman.discovery_bin);
245 	adev->mman.discovery_bin = NULL;
246 }
247 
248 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
249 {
250 	struct binary_header *bhdr;
251 	struct ip_discovery_header *ihdr;
252 	struct die_header *dhdr;
253 	struct ip *ip;
254 	uint16_t die_offset;
255 	uint16_t ip_offset;
256 	uint16_t num_dies;
257 	uint16_t num_ips;
258 	uint8_t num_base_address;
259 	int hw_ip;
260 	int i, j, k;
261 	int r;
262 
263 	r = amdgpu_discovery_init(adev);
264 	if (r) {
265 		DRM_ERROR("amdgpu_discovery_init failed\n");
266 		return r;
267 	}
268 
269 	bhdr = (struct binary_header *)adev->mman.discovery_bin;
270 	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
271 			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
272 	num_dies = le16_to_cpu(ihdr->num_dies);
273 
274 	DRM_DEBUG("number of dies: %d\n", num_dies);
275 
276 	for (i = 0; i < num_dies; i++) {
277 		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
278 		dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
279 		num_ips = le16_to_cpu(dhdr->num_ips);
280 		ip_offset = die_offset + sizeof(*dhdr);
281 
282 		if (le16_to_cpu(dhdr->die_id) != i) {
283 			DRM_ERROR("invalid die id %d, expected %d\n",
284 					le16_to_cpu(dhdr->die_id), i);
285 			return -EINVAL;
286 		}
287 
288 		DRM_DEBUG("number of hardware IPs on die%d: %d\n",
289 				le16_to_cpu(dhdr->die_id), num_ips);
290 
291 		for (j = 0; j < num_ips; j++) {
292 			ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
293 			num_base_address = ip->num_base_address;
294 
295 			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
296 				  hw_id_names[le16_to_cpu(ip->hw_id)],
297 				  le16_to_cpu(ip->hw_id),
298 				  ip->number_instance,
299 				  ip->major, ip->minor,
300 				  ip->revision);
301 
302 			for (k = 0; k < num_base_address; k++) {
303 				/*
304 				 * convert the endianness of base addresses in place,
305 				 * so that we don't need to convert them when accessing adev->reg_offset.
306 				 */
307 				ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
308 				DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
309 			}
310 
311 			for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
312 				if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
313 					DRM_DEBUG("set register base offset for %s\n",
314 							hw_id_names[le16_to_cpu(ip->hw_id)]);
315 					adev->reg_offset[hw_ip][ip->number_instance] =
316 						ip->base_address;
317 				}
318 
319 			}
320 
321 			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
322 		}
323 	}
324 
325 	return 0;
326 }
327 
328 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
329 				    int *major, int *minor, int *revision)
330 {
331 	struct binary_header *bhdr;
332 	struct ip_discovery_header *ihdr;
333 	struct die_header *dhdr;
334 	struct ip *ip;
335 	uint16_t die_offset;
336 	uint16_t ip_offset;
337 	uint16_t num_dies;
338 	uint16_t num_ips;
339 	int i, j;
340 
341 	if (!adev->mman.discovery_bin) {
342 		DRM_ERROR("ip discovery uninitialized\n");
343 		return -EINVAL;
344 	}
345 
346 	bhdr = (struct binary_header *)adev->mman.discovery_bin;
347 	ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin +
348 			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
349 	num_dies = le16_to_cpu(ihdr->num_dies);
350 
351 	for (i = 0; i < num_dies; i++) {
352 		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
353 		dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset);
354 		num_ips = le16_to_cpu(dhdr->num_ips);
355 		ip_offset = die_offset + sizeof(*dhdr);
356 
357 		for (j = 0; j < num_ips; j++) {
358 			ip = (struct ip *)(adev->mman.discovery_bin + ip_offset);
359 
360 			if ((le16_to_cpu(ip->hw_id) == hw_id) && (ip->number_instance == number_instance)) {
361 				if (major)
362 					*major = ip->major;
363 				if (minor)
364 					*minor = ip->minor;
365 				if (revision)
366 					*revision = ip->revision;
367 				return 0;
368 			}
369 			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
370 		}
371 	}
372 
373 	return -EINVAL;
374 }
375 
376 
377 int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
378 				     int *major, int *minor, int *revision)
379 {
380 	return amdgpu_discovery_get_ip_version(adev, VCN_HWID,
381 					       vcn_instance, major, minor, revision);
382 }
383 
384 void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
385 {
386 	struct binary_header *bhdr;
387 	struct harvest_table *harvest_info;
388 	int i;
389 
390 	bhdr = (struct binary_header *)adev->mman.discovery_bin;
391 	harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
392 			le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
393 
394 	for (i = 0; i < 32; i++) {
395 		if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
396 			break;
397 
398 		switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
399 		case VCN_HWID:
400 			adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
401 			adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
402 			break;
403 		case DMU_HWID:
404 			adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
405 			break;
406 		default:
407 			break;
408 		}
409 	}
410 }
411 
412 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
413 {
414 	struct binary_header *bhdr;
415 	struct gc_info_v1_0 *gc_info;
416 
417 	if (!adev->mman.discovery_bin) {
418 		DRM_ERROR("ip discovery uninitialized\n");
419 		return -EINVAL;
420 	}
421 
422 	bhdr = (struct binary_header *)adev->mman.discovery_bin;
423 	gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
424 			le16_to_cpu(bhdr->table_list[GC].offset));
425 
426 	adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
427 	adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
428 					      le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
429 	adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
430 	adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
431 	adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
432 	adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
433 	adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
434 	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
435 	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
436 	adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
437 	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
438 	adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
439 	adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
440 	adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
441 	adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
442 					 le32_to_cpu(gc_info->gc_num_sa_per_se);
443 	adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
444 
445 	return 0;
446 }
447