1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "amdgpu_ras_eeprom.h"
25 #include "amdgpu.h"
26 #include "amdgpu_ras.h"
27 #include <linux/bits.h>
28 #include "atom.h"
29 #include "amdgpu_eeprom.h"
30 #include "amdgpu_atomfirmware.h"
31 #include <linux/debugfs.h>
32 #include <linux/uaccess.h>
33 
34 #include "amdgpu_reset.h"
35 
36 #define EEPROM_I2C_MADDR_VEGA20         0x0
37 #define EEPROM_I2C_MADDR_ARCTURUS       0x40000
38 #define EEPROM_I2C_MADDR_ARCTURUS_D342  0x0
39 #define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0
40 #define EEPROM_I2C_MADDR_ALDEBARAN      0x0
41 #define EEPROM_I2C_MADDR_54H            (0x54UL << 16)
42 
43 /*
44  * The 2 macros bellow represent the actual size in bytes that
45  * those entities occupy in the EEPROM memory.
46  * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
47  * uses uint64 to store 6b fields such as retired_page.
48  */
49 #define RAS_TABLE_HEADER_SIZE   20
50 #define RAS_TABLE_RECORD_SIZE   24
51 
52 /* Table hdr is 'AMDR' */
53 #define RAS_TABLE_HDR_VAL       0x414d4452
54 #define RAS_TABLE_VER           0x00010000
55 
56 /* Bad GPU tag ‘BADG’ */
57 #define RAS_TABLE_HDR_BAD       0x42414447
58 
59 /* Assume 2-Mbit size EEPROM and take up the whole space. */
60 #define RAS_TBL_SIZE_BYTES      (256 * 1024)
61 #define RAS_TABLE_START         0
62 #define RAS_HDR_START           RAS_TABLE_START
63 #define RAS_RECORD_START        (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
64 #define RAS_MAX_RECORD_COUNT    ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
65 				 / RAS_TABLE_RECORD_SIZE)
66 
67 /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
68  * offset off of RAS_TABLE_START.  That is, this is something you can
69  * add to control->i2c_address, and then tell I2C layer to read
70  * from/write to there. _N is the so called absolute index,
71  * because it starts right after the table header.
72  */
73 #define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \
74 				     (_N) * RAS_TABLE_RECORD_SIZE)
75 
76 #define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \
77 				      (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE)
78 
79 /* Given a 0-based relative record index, 0, 1, 2, ..., etc., off
80  * of "fri", return the absolute record index off of the end of
81  * the table header.
82  */
83 #define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \
84 			      (_C)->ras_max_record_count)
85 
86 #define RAS_NUM_RECS(_tbl_hdr)  (((_tbl_hdr)->tbl_size - \
87 				  RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
88 
89 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
90 
91 static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
92 {
93 	if (adev->asic_type == CHIP_IP_DISCOVERY) {
94 		switch (adev->ip_versions[MP1_HWIP][0]) {
95 		case IP_VERSION(13, 0, 0):
96 		case IP_VERSION(13, 0, 10):
97 			return true;
98 		default:
99 			return false;
100 		}
101 	}
102 
103 	return  adev->asic_type == CHIP_VEGA20 ||
104 		adev->asic_type == CHIP_ARCTURUS ||
105 		adev->asic_type == CHIP_SIENNA_CICHLID ||
106 		adev->asic_type == CHIP_ALDEBARAN;
107 }
108 
109 static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
110 				       struct amdgpu_ras_eeprom_control *control)
111 {
112 	struct atom_context *atom_ctx = adev->mode_info.atom_context;
113 
114 	if (!control || !atom_ctx)
115 		return false;
116 
117 	if (strnstr(atom_ctx->vbios_version,
118 	            "D342",
119 		    sizeof(atom_ctx->vbios_version)))
120 		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342;
121 	else
122 		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS;
123 
124 	return true;
125 }
126 
127 static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev,
128 				       struct amdgpu_ras_eeprom_control *control)
129 {
130 	switch (adev->ip_versions[MP1_HWIP][0]) {
131 	case IP_VERSION(13, 0, 0):
132 	case IP_VERSION(13, 0, 10):
133 		control->i2c_address = EEPROM_I2C_MADDR_54H;
134 		return true;
135 	default:
136 		return false;
137 	}
138 }
139 
140 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
141 				  struct amdgpu_ras_eeprom_control *control)
142 {
143 	u8 i2c_addr;
144 
145 	if (!control)
146 		return false;
147 
148 	if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
149 		/* The address given by VBIOS is an 8-bit, wire-format
150 		 * address, i.e. the most significant byte.
151 		 *
152 		 * Normalize it to a 19-bit EEPROM address. Remove the
153 		 * device type identifier and make it a 7-bit address;
154 		 * then make it a 19-bit EEPROM address. See top of
155 		 * amdgpu_eeprom.c.
156 		 */
157 		i2c_addr = (i2c_addr & 0x0F) >> 1;
158 		control->i2c_address = ((u32) i2c_addr) << 16;
159 
160 		return true;
161 	}
162 
163 	switch (adev->asic_type) {
164 	case CHIP_VEGA20:
165 		control->i2c_address = EEPROM_I2C_MADDR_VEGA20;
166 		break;
167 
168 	case CHIP_ARCTURUS:
169 		return __get_eeprom_i2c_addr_arct(adev, control);
170 
171 	case CHIP_SIENNA_CICHLID:
172 		control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID;
173 		break;
174 
175 	case CHIP_ALDEBARAN:
176 		control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN;
177 		break;
178 
179 	case CHIP_IP_DISCOVERY:
180 		return __get_eeprom_i2c_addr_ip_discovery(adev, control);
181 
182 	default:
183 		return false;
184 	}
185 
186 	switch (adev->ip_versions[MP1_HWIP][0]) {
187 	case IP_VERSION(13, 0, 0):
188 		control->i2c_address = EEPROM_I2C_MADDR_54H;
189 		break;
190 
191 	default:
192 		break;
193 	}
194 
195 	return true;
196 }
197 
198 static void
199 __encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr,
200 			     unsigned char *buf)
201 {
202 	u32 *pp = (uint32_t *)buf;
203 
204 	pp[0] = cpu_to_le32(hdr->header);
205 	pp[1] = cpu_to_le32(hdr->version);
206 	pp[2] = cpu_to_le32(hdr->first_rec_offset);
207 	pp[3] = cpu_to_le32(hdr->tbl_size);
208 	pp[4] = cpu_to_le32(hdr->checksum);
209 }
210 
211 static void
212 __decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr,
213 			       unsigned char *buf)
214 {
215 	u32 *pp = (uint32_t *)buf;
216 
217 	hdr->header	      = le32_to_cpu(pp[0]);
218 	hdr->version	      = le32_to_cpu(pp[1]);
219 	hdr->first_rec_offset = le32_to_cpu(pp[2]);
220 	hdr->tbl_size	      = le32_to_cpu(pp[3]);
221 	hdr->checksum	      = le32_to_cpu(pp[4]);
222 }
223 
224 static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
225 {
226 	u8 buf[RAS_TABLE_HEADER_SIZE];
227 	struct amdgpu_device *adev = to_amdgpu_device(control);
228 	int res;
229 
230 	memset(buf, 0, sizeof(buf));
231 	__encode_table_header_to_buf(&control->tbl_hdr, buf);
232 
233 	/* i2c may be unstable in gpu reset */
234 	down_read(&adev->reset_domain->sem);
235 	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
236 				  control->i2c_address +
237 				  control->ras_header_offset,
238 				  buf, RAS_TABLE_HEADER_SIZE);
239 	up_read(&adev->reset_domain->sem);
240 
241 	if (res < 0) {
242 		DRM_ERROR("Failed to write EEPROM table header:%d", res);
243 	} else if (res < RAS_TABLE_HEADER_SIZE) {
244 		DRM_ERROR("Short write:%d out of %d\n",
245 			  res, RAS_TABLE_HEADER_SIZE);
246 		res = -EIO;
247 	} else {
248 		res = 0;
249 	}
250 
251 	return res;
252 }
253 
254 static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
255 {
256 	int ii;
257 	u8  *pp, csum;
258 	size_t sz;
259 
260 	/* Header checksum, skip checksum field in the calculation */
261 	sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
262 	pp = (u8 *) &control->tbl_hdr;
263 	csum = 0;
264 	for (ii = 0; ii < sz; ii++, pp++)
265 		csum += *pp;
266 
267 	return csum;
268 }
269 
270 static int amdgpu_ras_eeprom_correct_header_tag(
271 	struct amdgpu_ras_eeprom_control *control,
272 	uint32_t header)
273 {
274 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
275 	u8 *hh;
276 	int res;
277 	u8 csum;
278 
279 	csum = -hdr->checksum;
280 
281 	hh = (void *) &hdr->header;
282 	csum -= (hh[0] + hh[1] + hh[2] + hh[3]);
283 	hh = (void *) &header;
284 	csum += hh[0] + hh[1] + hh[2] + hh[3];
285 	csum = -csum;
286 	mutex_lock(&control->ras_tbl_mutex);
287 	hdr->header = header;
288 	hdr->checksum = csum;
289 	res = __write_table_header(control);
290 	mutex_unlock(&control->ras_tbl_mutex);
291 
292 	return res;
293 }
294 
295 /**
296  * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
297  * @control: pointer to control structure
298  *
299  * Reset the contents of the header of the RAS EEPROM table.
300  * Return 0 on success, -errno on error.
301  */
302 int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
303 {
304 	struct amdgpu_device *adev = to_amdgpu_device(control);
305 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
306 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
307 	u8 csum;
308 	int res;
309 
310 	mutex_lock(&control->ras_tbl_mutex);
311 
312 	hdr->header = RAS_TABLE_HDR_VAL;
313 	hdr->version = RAS_TABLE_VER;
314 	hdr->first_rec_offset = RAS_RECORD_START;
315 	hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
316 
317 	csum = __calc_hdr_byte_sum(control);
318 	csum = -csum;
319 	hdr->checksum = csum;
320 	res = __write_table_header(control);
321 
322 	control->ras_num_recs = 0;
323 	control->ras_fri = 0;
324 
325 	amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
326 
327 	control->bad_channel_bitmap = 0;
328 	amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
329 	con->update_channel_flag = false;
330 
331 	amdgpu_ras_debugfs_set_ret_size(control);
332 
333 	mutex_unlock(&control->ras_tbl_mutex);
334 
335 	return res;
336 }
337 
338 static void
339 __encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control,
340 			     struct eeprom_table_record *record,
341 			     unsigned char *buf)
342 {
343 	__le64 tmp = 0;
344 	int i = 0;
345 
346 	/* Next are all record fields according to EEPROM page spec in LE foramt */
347 	buf[i++] = record->err_type;
348 
349 	buf[i++] = record->bank;
350 
351 	tmp = cpu_to_le64(record->ts);
352 	memcpy(buf + i, &tmp, 8);
353 	i += 8;
354 
355 	tmp = cpu_to_le64((record->offset & 0xffffffffffff));
356 	memcpy(buf + i, &tmp, 6);
357 	i += 6;
358 
359 	buf[i++] = record->mem_channel;
360 	buf[i++] = record->mcumc_id;
361 
362 	tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
363 	memcpy(buf + i, &tmp, 6);
364 }
365 
366 static void
367 __decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control,
368 			       struct eeprom_table_record *record,
369 			       unsigned char *buf)
370 {
371 	__le64 tmp = 0;
372 	int i =  0;
373 
374 	/* Next are all record fields according to EEPROM page spec in LE foramt */
375 	record->err_type = buf[i++];
376 
377 	record->bank = buf[i++];
378 
379 	memcpy(&tmp, buf + i, 8);
380 	record->ts = le64_to_cpu(tmp);
381 	i += 8;
382 
383 	memcpy(&tmp, buf + i, 6);
384 	record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
385 	i += 6;
386 
387 	record->mem_channel = buf[i++];
388 	record->mcumc_id = buf[i++];
389 
390 	memcpy(&tmp, buf + i,  6);
391 	record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
392 }
393 
394 bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
395 {
396 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
397 
398 	if (!__is_ras_eeprom_supported(adev))
399 		return false;
400 
401 	/* skip check eeprom table for VEGA20 Gaming */
402 	if (!con)
403 		return false;
404 	else
405 		if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
406 			return false;
407 
408 	if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
409 		dev_warn(adev->dev, "This GPU is in BAD status.");
410 		dev_warn(adev->dev, "Please retire it or set a larger "
411 			 "threshold value when reloading driver.\n");
412 		return true;
413 	}
414 
415 	return false;
416 }
417 
418 /**
419  * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM
420  * @control: pointer to control structure
421  * @buf: pointer to buffer containing data to write
422  * @fri: start writing at this index
423  * @num: number of records to write
424  *
425  * The caller must hold the table mutex in @control.
426  * Return 0 on success, -errno otherwise.
427  */
428 static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
429 				     u8 *buf, const u32 fri, const u32 num)
430 {
431 	struct amdgpu_device *adev = to_amdgpu_device(control);
432 	u32 buf_size;
433 	int res;
434 
435 	/* i2c may be unstable in gpu reset */
436 	down_read(&adev->reset_domain->sem);
437 	buf_size = num * RAS_TABLE_RECORD_SIZE;
438 	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
439 				  control->i2c_address +
440 				  RAS_INDEX_TO_OFFSET(control, fri),
441 				  buf, buf_size);
442 	up_read(&adev->reset_domain->sem);
443 	if (res < 0) {
444 		DRM_ERROR("Writing %d EEPROM table records error:%d",
445 			  num, res);
446 	} else if (res < buf_size) {
447 		/* Short write, return error.
448 		 */
449 		DRM_ERROR("Wrote %d records out of %d",
450 			  res / RAS_TABLE_RECORD_SIZE, num);
451 		res = -EIO;
452 	} else {
453 		res = 0;
454 	}
455 
456 	return res;
457 }
458 
459 static int
460 amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
461 			       struct eeprom_table_record *record,
462 			       const u32 num)
463 {
464 	struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
465 	u32 a, b, i;
466 	u8 *buf, *pp;
467 	int res;
468 
469 	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
470 	if (!buf)
471 		return -ENOMEM;
472 
473 	/* Encode all of them in one go.
474 	 */
475 	pp = buf;
476 	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
477 		__encode_table_record_to_buf(control, &record[i], pp);
478 
479 		/* update bad channel bitmap */
480 		if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
481 			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
482 			con->update_channel_flag = true;
483 		}
484 	}
485 
486 	/* a, first record index to write into.
487 	 * b, last record index to write into.
488 	 * a = first index to read (fri) + number of records in the table,
489 	 * b = a + @num - 1.
490 	 * Let N = control->ras_max_num_record_count, then we have,
491 	 * case 0: 0 <= a <= b < N,
492 	 *   just append @num records starting at a;
493 	 * case 1: 0 <= a < N <= b,
494 	 *   append (N - a) records starting at a, and
495 	 *   append the remainder,  b % N + 1, starting at 0.
496 	 * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases,
497 	 * case 2a: 0 <= a <= b < N
498 	 *   append num records starting at a; and fix fri if b overwrote it,
499 	 *   and since a <= b, if b overwrote it then a must've also,
500 	 *   and if b didn't overwrite it, then a didn't also.
501 	 * case 2b: 0 <= b < a < N
502 	 *   write num records starting at a, which wraps around 0=N
503 	 *   and overwrite fri unconditionally. Now from case 2a,
504 	 *   this means that b eclipsed fri to overwrite it and wrap
505 	 *   around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally
506 	 *   set fri = b + 1 (mod N).
507 	 * Now, since fri is updated in every case, except the trivial case 0,
508 	 * the number of records present in the table after writing, is,
509 	 * num_recs - 1 = b - fri (mod N), and we take the positive value,
510 	 * by adding an arbitrary multiple of N before taking the modulo N
511 	 * as shown below.
512 	 */
513 	a = control->ras_fri + control->ras_num_recs;
514 	b = a + num  - 1;
515 	if (b < control->ras_max_record_count) {
516 		res = __amdgpu_ras_eeprom_write(control, buf, a, num);
517 	} else if (a < control->ras_max_record_count) {
518 		u32 g0, g1;
519 
520 		g0 = control->ras_max_record_count - a;
521 		g1 = b % control->ras_max_record_count + 1;
522 		res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
523 		if (res)
524 			goto Out;
525 		res = __amdgpu_ras_eeprom_write(control,
526 						buf + g0 * RAS_TABLE_RECORD_SIZE,
527 						0, g1);
528 		if (res)
529 			goto Out;
530 		if (g1 > control->ras_fri)
531 			control->ras_fri = g1 % control->ras_max_record_count;
532 	} else {
533 		a %= control->ras_max_record_count;
534 		b %= control->ras_max_record_count;
535 
536 		if (a <= b) {
537 			/* Note that, b - a + 1 = num. */
538 			res = __amdgpu_ras_eeprom_write(control, buf, a, num);
539 			if (res)
540 				goto Out;
541 			if (b >= control->ras_fri)
542 				control->ras_fri = (b + 1) % control->ras_max_record_count;
543 		} else {
544 			u32 g0, g1;
545 
546 			/* b < a, which means, we write from
547 			 * a to the end of the table, and from
548 			 * the start of the table to b.
549 			 */
550 			g0 = control->ras_max_record_count - a;
551 			g1 = b + 1;
552 			res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
553 			if (res)
554 				goto Out;
555 			res = __amdgpu_ras_eeprom_write(control,
556 							buf + g0 * RAS_TABLE_RECORD_SIZE,
557 							0, g1);
558 			if (res)
559 				goto Out;
560 			control->ras_fri = g1 % control->ras_max_record_count;
561 		}
562 	}
563 	control->ras_num_recs = 1 + (control->ras_max_record_count + b
564 				     - control->ras_fri)
565 		% control->ras_max_record_count;
566 Out:
567 	kfree(buf);
568 	return res;
569 }
570 
571 static int
572 amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
573 {
574 	struct amdgpu_device *adev = to_amdgpu_device(control);
575 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
576 	u8 *buf, *pp, csum;
577 	u32 buf_size;
578 	int res;
579 
580 	/* Modify the header if it exceeds.
581 	 */
582 	if (amdgpu_bad_page_threshold != 0 &&
583 	    control->ras_num_recs >= ras->bad_page_cnt_threshold) {
584 		dev_warn(adev->dev,
585 			"Saved bad pages %d reaches threshold value %d\n",
586 			control->ras_num_recs, ras->bad_page_cnt_threshold);
587 		control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
588 	}
589 
590 	control->tbl_hdr.version = RAS_TABLE_VER;
591 	control->tbl_hdr.first_rec_offset = RAS_INDEX_TO_OFFSET(control, control->ras_fri);
592 	control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
593 	control->tbl_hdr.checksum = 0;
594 
595 	buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
596 	buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
597 	if (!buf) {
598 		DRM_ERROR("allocating memory for table of size %d bytes failed\n",
599 			  control->tbl_hdr.tbl_size);
600 		res = -ENOMEM;
601 		goto Out;
602 	}
603 
604 	down_read(&adev->reset_domain->sem);
605 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
606 				 control->i2c_address +
607 				 control->ras_record_offset,
608 				 buf, buf_size);
609 	up_read(&adev->reset_domain->sem);
610 	if (res < 0) {
611 		DRM_ERROR("EEPROM failed reading records:%d\n",
612 			  res);
613 		goto Out;
614 	} else if (res < buf_size) {
615 		DRM_ERROR("EEPROM read %d out of %d bytes\n",
616 			  res, buf_size);
617 		res = -EIO;
618 		goto Out;
619 	}
620 
621 	/* Recalc the checksum.
622 	 */
623 	csum = 0;
624 	for (pp = buf; pp < buf + buf_size; pp++)
625 		csum += *pp;
626 
627 	csum += __calc_hdr_byte_sum(control);
628 	/* avoid sign extension when assigning to "checksum" */
629 	csum = -csum;
630 	control->tbl_hdr.checksum = csum;
631 	res = __write_table_header(control);
632 Out:
633 	kfree(buf);
634 	return res;
635 }
636 
637 /**
638  * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
639  * @control: pointer to control structure
640  * @record: array of records to append
641  * @num: number of records in @record array
642  *
643  * Append @num records to the table, calculate the checksum and write
644  * the table back to EEPROM. The maximum number of records that
645  * can be appended is between 1 and control->ras_max_record_count,
646  * regardless of how many records are already stored in the table.
647  *
648  * Return 0 on success or if EEPROM is not supported, -errno on error.
649  */
650 int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
651 			     struct eeprom_table_record *record,
652 			     const u32 num)
653 {
654 	struct amdgpu_device *adev = to_amdgpu_device(control);
655 	int res;
656 
657 	if (!__is_ras_eeprom_supported(adev))
658 		return 0;
659 
660 	if (num == 0) {
661 		DRM_ERROR("will not append 0 records\n");
662 		return -EINVAL;
663 	} else if (num > control->ras_max_record_count) {
664 		DRM_ERROR("cannot append %d records than the size of table %d\n",
665 			  num, control->ras_max_record_count);
666 		return -EINVAL;
667 	}
668 
669 	mutex_lock(&control->ras_tbl_mutex);
670 
671 	res = amdgpu_ras_eeprom_append_table(control, record, num);
672 	if (!res)
673 		res = amdgpu_ras_eeprom_update_header(control);
674 	if (!res)
675 		amdgpu_ras_debugfs_set_ret_size(control);
676 
677 	mutex_unlock(&control->ras_tbl_mutex);
678 	return res;
679 }
680 
681 /**
682  * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer
683  * @control: pointer to control structure
684  * @buf: pointer to buffer to read into
685  * @fri: first record index, start reading at this index, absolute index
686  * @num: number of records to read
687  *
688  * The caller must hold the table mutex in @control.
689  * Return 0 on success, -errno otherwise.
690  */
691 static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
692 				    u8 *buf, const u32 fri, const u32 num)
693 {
694 	struct amdgpu_device *adev = to_amdgpu_device(control);
695 	u32 buf_size;
696 	int res;
697 
698 	/* i2c may be unstable in gpu reset */
699 	down_read(&adev->reset_domain->sem);
700 	buf_size = num * RAS_TABLE_RECORD_SIZE;
701 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
702 				 control->i2c_address +
703 				 RAS_INDEX_TO_OFFSET(control, fri),
704 				 buf, buf_size);
705 	up_read(&adev->reset_domain->sem);
706 	if (res < 0) {
707 		DRM_ERROR("Reading %d EEPROM table records error:%d",
708 			  num, res);
709 	} else if (res < buf_size) {
710 		/* Short read, return error.
711 		 */
712 		DRM_ERROR("Read %d records out of %d",
713 			  res / RAS_TABLE_RECORD_SIZE, num);
714 		res = -EIO;
715 	} else {
716 		res = 0;
717 	}
718 
719 	return res;
720 }
721 
722 /**
723  * amdgpu_ras_eeprom_read -- read EEPROM
724  * @control: pointer to control structure
725  * @record: array of records to read into
726  * @num: number of records in @record
727  *
728  * Reads num records from the RAS table in EEPROM and
729  * writes the data into @record array.
730  *
731  * Returns 0 on success, -errno on error.
732  */
733 int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
734 			   struct eeprom_table_record *record,
735 			   const u32 num)
736 {
737 	struct amdgpu_device *adev = to_amdgpu_device(control);
738 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
739 	int i, res;
740 	u8 *buf, *pp;
741 	u32 g0, g1;
742 
743 	if (!__is_ras_eeprom_supported(adev))
744 		return 0;
745 
746 	if (num == 0) {
747 		DRM_ERROR("will not read 0 records\n");
748 		return -EINVAL;
749 	} else if (num > control->ras_num_recs) {
750 		DRM_ERROR("too many records to read:%d available:%d\n",
751 			  num, control->ras_num_recs);
752 		return -EINVAL;
753 	}
754 
755 	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
756 	if (!buf)
757 		return -ENOMEM;
758 
759 	/* Determine how many records to read, from the first record
760 	 * index, fri, to the end of the table, and from the beginning
761 	 * of the table, such that the total number of records is
762 	 * @num, and we handle wrap around when fri > 0 and
763 	 * fri + num > RAS_MAX_RECORD_COUNT.
764 	 *
765 	 * First we compute the index of the last element
766 	 * which would be fetched from each region,
767 	 * g0 is in [fri, fri + num - 1], and
768 	 * g1 is in [0, RAS_MAX_RECORD_COUNT - 1].
769 	 * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of
770 	 * the last element to fetch, we set g0 to _the number_
771 	 * of elements to fetch, @num, since we know that the last
772 	 * indexed to be fetched does not exceed the table.
773 	 *
774 	 * If, however, g0 >= RAS_MAX_RECORD_COUNT, then
775 	 * we set g0 to the number of elements to read
776 	 * until the end of the table, and g1 to the number of
777 	 * elements to read from the beginning of the table.
778 	 */
779 	g0 = control->ras_fri + num - 1;
780 	g1 = g0 % control->ras_max_record_count;
781 	if (g0 < control->ras_max_record_count) {
782 		g0 = num;
783 		g1 = 0;
784 	} else {
785 		g0 = control->ras_max_record_count - control->ras_fri;
786 		g1 += 1;
787 	}
788 
789 	mutex_lock(&control->ras_tbl_mutex);
790 	res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0);
791 	if (res)
792 		goto Out;
793 	if (g1) {
794 		res = __amdgpu_ras_eeprom_read(control,
795 					       buf + g0 * RAS_TABLE_RECORD_SIZE,
796 					       0, g1);
797 		if (res)
798 			goto Out;
799 	}
800 
801 	res = 0;
802 
803 	/* Read up everything? Then transform.
804 	 */
805 	pp = buf;
806 	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
807 		__decode_table_record_from_buf(control, &record[i], pp);
808 
809 		/* update bad channel bitmap */
810 		if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
811 			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
812 			con->update_channel_flag = true;
813 		}
814 	}
815 Out:
816 	kfree(buf);
817 	mutex_unlock(&control->ras_tbl_mutex);
818 
819 	return res;
820 }
821 
822 uint32_t amdgpu_ras_eeprom_max_record_count(void)
823 {
824 	return RAS_MAX_RECORD_COUNT;
825 }
826 
827 static ssize_t
828 amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf,
829 				    size_t size, loff_t *pos)
830 {
831 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
832 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
833 	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
834 	u8 data[50];
835 	int res;
836 
837 	if (!size)
838 		return size;
839 
840 	if (!ras || !control) {
841 		res = snprintf(data, sizeof(data), "Not supported\n");
842 	} else {
843 		res = snprintf(data, sizeof(data), "%d bytes or %d records\n",
844 			       RAS_TBL_SIZE_BYTES, control->ras_max_record_count);
845 	}
846 
847 	if (*pos >= res)
848 		return 0;
849 
850 	res -= *pos;
851 	res = min_t(size_t, res, size);
852 
853 	if (copy_to_user(buf, &data[*pos], res))
854 		return -EFAULT;
855 
856 	*pos += res;
857 
858 	return res;
859 }
860 
861 const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = {
862 	.owner = THIS_MODULE,
863 	.read = amdgpu_ras_debugfs_eeprom_size_read,
864 	.write = NULL,
865 	.llseek = default_llseek,
866 };
867 
868 static const char *tbl_hdr_str = " Signature    Version  FirstOffs       Size   Checksum\n";
869 static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n";
870 #define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1)
871 static const char *rec_hdr_str = "Index  Offset ErrType Bank/CU          TimeStamp      Offs/Addr MemChl MCUMCID    RetiredPage\n";
872 static const char *rec_hdr_fmt = "%5d 0x%05X %7s    0x%02X 0x%016llX 0x%012llX   0x%02X    0x%02X 0x%012llX\n";
873 #define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1)
874 
875 static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = {
876 	"ignore",
877 	"re",
878 	"ue",
879 };
880 
881 static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control)
882 {
883 	return strlen(tbl_hdr_str) + tbl_hdr_fmt_size +
884 		strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs;
885 }
886 
887 void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control)
888 {
889 	struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras,
890 					      eeprom_control);
891 	struct dentry *de = ras->de_ras_eeprom_table;
892 
893 	if (de)
894 		d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control);
895 }
896 
897 static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
898 					     size_t size, loff_t *pos)
899 {
900 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
901 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
902 	struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control;
903 	const size_t orig_size = size;
904 	int res = -EFAULT;
905 	size_t data_len;
906 
907 	mutex_lock(&control->ras_tbl_mutex);
908 
909 	/* We want *pos - data_len > 0, which means there's
910 	 * bytes to be printed from data.
911 	 */
912 	data_len = strlen(tbl_hdr_str);
913 	if (*pos < data_len) {
914 		data_len -= *pos;
915 		data_len = min_t(size_t, data_len, size);
916 		if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len))
917 			goto Out;
918 		buf += data_len;
919 		size -= data_len;
920 		*pos += data_len;
921 	}
922 
923 	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size;
924 	if (*pos < data_len && size > 0) {
925 		u8 data[tbl_hdr_fmt_size + 1];
926 		loff_t lpos;
927 
928 		snprintf(data, sizeof(data), tbl_hdr_fmt,
929 			 control->tbl_hdr.header,
930 			 control->tbl_hdr.version,
931 			 control->tbl_hdr.first_rec_offset,
932 			 control->tbl_hdr.tbl_size,
933 			 control->tbl_hdr.checksum);
934 
935 		data_len -= *pos;
936 		data_len = min_t(size_t, data_len, size);
937 		lpos = *pos - strlen(tbl_hdr_str);
938 		if (copy_to_user(buf, &data[lpos], data_len))
939 			goto Out;
940 		buf += data_len;
941 		size -= data_len;
942 		*pos += data_len;
943 	}
944 
945 	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str);
946 	if (*pos < data_len && size > 0) {
947 		loff_t lpos;
948 
949 		data_len -= *pos;
950 		data_len = min_t(size_t, data_len, size);
951 		lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size;
952 		if (copy_to_user(buf, &rec_hdr_str[lpos], data_len))
953 			goto Out;
954 		buf += data_len;
955 		size -= data_len;
956 		*pos += data_len;
957 	}
958 
959 	data_len = amdgpu_ras_debugfs_table_size(control);
960 	if (*pos < data_len && size > 0) {
961 		u8 dare[RAS_TABLE_RECORD_SIZE];
962 		u8 data[rec_hdr_fmt_size + 1];
963 		struct eeprom_table_record record;
964 		int s, r;
965 
966 		/* Find the starting record index
967 		 */
968 		s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
969 			strlen(rec_hdr_str);
970 		s = s / rec_hdr_fmt_size;
971 		r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
972 			strlen(rec_hdr_str);
973 		r = r % rec_hdr_fmt_size;
974 
975 		for ( ; size > 0 && s < control->ras_num_recs; s++) {
976 			u32 ai = RAS_RI_TO_AI(control, s);
977 			/* Read a single record
978 			 */
979 			res = __amdgpu_ras_eeprom_read(control, dare, ai, 1);
980 			if (res)
981 				goto Out;
982 			__decode_table_record_from_buf(control, &record, dare);
983 			snprintf(data, sizeof(data), rec_hdr_fmt,
984 				 s,
985 				 RAS_INDEX_TO_OFFSET(control, ai),
986 				 record_err_type_str[record.err_type],
987 				 record.bank,
988 				 record.ts,
989 				 record.offset,
990 				 record.mem_channel,
991 				 record.mcumc_id,
992 				 record.retired_page);
993 
994 			data_len = min_t(size_t, rec_hdr_fmt_size - r, size);
995 			if (copy_to_user(buf, &data[r], data_len)) {
996 				res = -EFAULT;
997 				goto Out;
998 			}
999 			buf += data_len;
1000 			size -= data_len;
1001 			*pos += data_len;
1002 			r = 0;
1003 		}
1004 	}
1005 	res = 0;
1006 Out:
1007 	mutex_unlock(&control->ras_tbl_mutex);
1008 	return res < 0 ? res : orig_size - size;
1009 }
1010 
1011 static ssize_t
1012 amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf,
1013 				     size_t size, loff_t *pos)
1014 {
1015 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
1016 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1017 	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
1018 	u8 data[81];
1019 	int res;
1020 
1021 	if (!size)
1022 		return size;
1023 
1024 	if (!ras || !control) {
1025 		res = snprintf(data, sizeof(data), "Not supported\n");
1026 		if (*pos >= res)
1027 			return 0;
1028 
1029 		res -= *pos;
1030 		res = min_t(size_t, res, size);
1031 
1032 		if (copy_to_user(buf, &data[*pos], res))
1033 			return -EFAULT;
1034 
1035 		*pos += res;
1036 
1037 		return res;
1038 	} else {
1039 		return amdgpu_ras_debugfs_table_read(f, buf, size, pos);
1040 	}
1041 }
1042 
1043 const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = {
1044 	.owner = THIS_MODULE,
1045 	.read = amdgpu_ras_debugfs_eeprom_table_read,
1046 	.write = NULL,
1047 	.llseek = default_llseek,
1048 };
1049 
1050 /**
1051  * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum
1052  * @control: pointer to control structure
1053  *
1054  * Check the checksum of the stored in EEPROM RAS table.
1055  *
1056  * Return 0 if the checksum is correct,
1057  * positive if it is not correct, and
1058  * -errno on I/O error.
1059  */
1060 static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control)
1061 {
1062 	struct amdgpu_device *adev = to_amdgpu_device(control);
1063 	int buf_size, res;
1064 	u8  csum, *buf, *pp;
1065 
1066 	buf_size = RAS_TABLE_HEADER_SIZE +
1067 		control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
1068 	buf = kzalloc(buf_size, GFP_KERNEL);
1069 	if (!buf) {
1070 		DRM_ERROR("Out of memory checking RAS table checksum.\n");
1071 		return -ENOMEM;
1072 	}
1073 
1074 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
1075 				 control->i2c_address +
1076 				 control->ras_header_offset,
1077 				 buf, buf_size);
1078 	if (res < buf_size) {
1079 		DRM_ERROR("Partial read for checksum, res:%d\n", res);
1080 		/* On partial reads, return -EIO.
1081 		 */
1082 		if (res >= 0)
1083 			res = -EIO;
1084 		goto Out;
1085 	}
1086 
1087 	csum = 0;
1088 	for (pp = buf; pp < buf + buf_size; pp++)
1089 		csum += *pp;
1090 Out:
1091 	kfree(buf);
1092 	return res < 0 ? res : csum;
1093 }
1094 
1095 int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
1096 			   bool *exceed_err_limit)
1097 {
1098 	struct amdgpu_device *adev = to_amdgpu_device(control);
1099 	unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
1100 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
1101 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1102 	int res;
1103 
1104 	*exceed_err_limit = false;
1105 
1106 	if (!__is_ras_eeprom_supported(adev))
1107 		return 0;
1108 
1109 	/* Verify i2c adapter is initialized */
1110 	if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
1111 		return -ENOENT;
1112 
1113 	if (!__get_eeprom_i2c_addr(adev, control))
1114 		return -EINVAL;
1115 
1116 	control->ras_header_offset = RAS_HDR_START;
1117 	control->ras_record_offset = RAS_RECORD_START;
1118 	control->ras_max_record_count  = RAS_MAX_RECORD_COUNT;
1119 	mutex_init(&control->ras_tbl_mutex);
1120 
1121 	/* Read the table header from EEPROM address */
1122 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
1123 				 control->i2c_address + control->ras_header_offset,
1124 				 buf, RAS_TABLE_HEADER_SIZE);
1125 	if (res < RAS_TABLE_HEADER_SIZE) {
1126 		DRM_ERROR("Failed to read EEPROM table header, res:%d", res);
1127 		return res >= 0 ? -EIO : res;
1128 	}
1129 
1130 	__decode_table_header_from_buf(hdr, buf);
1131 
1132 	control->ras_num_recs = RAS_NUM_RECS(hdr);
1133 	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
1134 
1135 	if (hdr->header == RAS_TABLE_HDR_VAL) {
1136 		DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
1137 				 control->ras_num_recs);
1138 		res = __verify_ras_table_checksum(control);
1139 		if (res)
1140 			DRM_ERROR("RAS table incorrect checksum or error:%d\n",
1141 				  res);
1142 
1143 		/* Warn if we are at 90% of the threshold or above
1144 		 */
1145 		if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
1146 			dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
1147 					control->ras_num_recs,
1148 					ras->bad_page_cnt_threshold);
1149 	} else if (hdr->header == RAS_TABLE_HDR_BAD &&
1150 		   amdgpu_bad_page_threshold != 0) {
1151 		res = __verify_ras_table_checksum(control);
1152 		if (res)
1153 			DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
1154 				  res);
1155 		if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
1156 			/* This means that, the threshold was increased since
1157 			 * the last time the system was booted, and now,
1158 			 * ras->bad_page_cnt_threshold - control->num_recs > 0,
1159 			 * so that at least one more record can be saved,
1160 			 * before the page count threshold is reached.
1161 			 */
1162 			dev_info(adev->dev,
1163 				 "records:%d threshold:%d, resetting "
1164 				 "RAS table header signature",
1165 				 control->ras_num_recs,
1166 				 ras->bad_page_cnt_threshold);
1167 			res = amdgpu_ras_eeprom_correct_header_tag(control,
1168 								   RAS_TABLE_HDR_VAL);
1169 		} else {
1170 			dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
1171 				control->ras_num_recs, ras->bad_page_cnt_threshold);
1172 			if (amdgpu_bad_page_threshold == -2) {
1173 				dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -2.");
1174 				res = 0;
1175 			} else {
1176 				*exceed_err_limit = true;
1177 				dev_err(adev->dev,
1178 					"RAS records:%d exceed threshold:%d, "
1179 					"GPU will not be initialized. Replace this GPU or increase the threshold",
1180 					control->ras_num_recs, ras->bad_page_cnt_threshold);
1181 			}
1182 		}
1183 	} else {
1184 		DRM_INFO("Creating a new EEPROM table");
1185 
1186 		res = amdgpu_ras_eeprom_reset_table(control);
1187 	}
1188 
1189 	return res < 0 ? res : 0;
1190 }
1191