xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c (revision b97d6790d03b763eca08847a9a5869a4291b9f9a)
164f55e62SAndrey Grodzovsky /*
264f55e62SAndrey Grodzovsky  * Copyright 2019 Advanced Micro Devices, Inc.
364f55e62SAndrey Grodzovsky  *
464f55e62SAndrey Grodzovsky  * Permission is hereby granted, free of charge, to any person obtaining a
564f55e62SAndrey Grodzovsky  * copy of this software and associated documentation files (the "Software"),
664f55e62SAndrey Grodzovsky  * to deal in the Software without restriction, including without limitation
764f55e62SAndrey Grodzovsky  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
864f55e62SAndrey Grodzovsky  * and/or sell copies of the Software, and to permit persons to whom the
964f55e62SAndrey Grodzovsky  * Software is furnished to do so, subject to the following conditions:
1064f55e62SAndrey Grodzovsky  *
1164f55e62SAndrey Grodzovsky  * The above copyright notice and this permission notice shall be included in
1264f55e62SAndrey Grodzovsky  * all copies or substantial portions of the Software.
1364f55e62SAndrey Grodzovsky  *
1464f55e62SAndrey Grodzovsky  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1564f55e62SAndrey Grodzovsky  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1664f55e62SAndrey Grodzovsky  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1764f55e62SAndrey Grodzovsky  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1864f55e62SAndrey Grodzovsky  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1964f55e62SAndrey Grodzovsky  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2064f55e62SAndrey Grodzovsky  * OTHER DEALINGS IN THE SOFTWARE.
2164f55e62SAndrey Grodzovsky  *
2264f55e62SAndrey Grodzovsky  */
2364f55e62SAndrey Grodzovsky 
2464f55e62SAndrey Grodzovsky #include "amdgpu_ras_eeprom.h"
2564f55e62SAndrey Grodzovsky #include "amdgpu.h"
2664f55e62SAndrey Grodzovsky #include "amdgpu_ras.h"
2764f55e62SAndrey Grodzovsky #include <linux/bits.h>
28ef1caf48SJohn Clements #include "atom.h"
2924f55c05SAlex Deucher #include "amdgpu_eeprom.h"
3014fb496aSJohn Clements #include "amdgpu_atomfirmware.h"
31c65b0805SLuben Tuikov #include <linux/debugfs.h>
32c65b0805SLuben Tuikov #include <linux/uaccess.h>
3364f55e62SAndrey Grodzovsky 
34d0fb18b5SAndrey Grodzovsky #include "amdgpu_reset.h"
35d0fb18b5SAndrey Grodzovsky 
36da858deaSLuben Tuikov /* These are memory addresses as would be seen by one or more EEPROM
37da858deaSLuben Tuikov  * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
38da858deaSLuben Tuikov  * set of EEPROM devices. They form a continuous memory space.
39da858deaSLuben Tuikov  *
40da858deaSLuben Tuikov  * The I2C device address includes the device type identifier, 1010b,
41da858deaSLuben Tuikov  * which is a reserved value and indicates that this is an I2C EEPROM
42da858deaSLuben Tuikov  * device. It also includes the top 3 bits of the 19 bit EEPROM memory
43da858deaSLuben Tuikov  * address, namely bits 18, 17, and 16. This makes up the 7 bit
44da858deaSLuben Tuikov  * address sent on the I2C bus with bit 0 being the direction bit,
45da858deaSLuben Tuikov  * which is not represented here, and sent by the hardware directly.
46da858deaSLuben Tuikov  *
47da858deaSLuben Tuikov  * For instance,
48da858deaSLuben Tuikov  *   50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
49da858deaSLuben Tuikov  *   54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
50da858deaSLuben Tuikov  *   56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
51da858deaSLuben Tuikov  * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
52da858deaSLuben Tuikov  * address memory in a device or a device on the I2C bus, depending on
53da858deaSLuben Tuikov  * the status of pins 1-3. See top of amdgpu_eeprom.c.
543b8164f8SLuben Tuikov  *
553b8164f8SLuben Tuikov  * The RAS table lives either at address 0 or address 40000h of EEPROM.
56da858deaSLuben Tuikov  */
573b8164f8SLuben Tuikov #define EEPROM_I2C_MADDR_0      0x0
583b8164f8SLuben Tuikov #define EEPROM_I2C_MADDR_4      0x40000
5964f55e62SAndrey Grodzovsky 
6064f55e62SAndrey Grodzovsky /*
6164f55e62SAndrey Grodzovsky  * The 2 macros bellow represent the actual size in bytes that
6264f55e62SAndrey Grodzovsky  * those entities occupy in the EEPROM memory.
63c28aa44dSLuben Tuikov  * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
6464f55e62SAndrey Grodzovsky  * uses uint64 to store 6b fields such as retired_page.
6564f55e62SAndrey Grodzovsky  */
66c28aa44dSLuben Tuikov #define RAS_TABLE_HEADER_SIZE   20
67c28aa44dSLuben Tuikov #define RAS_TABLE_RECORD_SIZE   24
6864f55e62SAndrey Grodzovsky 
6964f55e62SAndrey Grodzovsky /* Table hdr is 'AMDR' */
70c28aa44dSLuben Tuikov #define RAS_TABLE_HDR_VAL       0x414d4452
7164f55e62SAndrey Grodzovsky 
721d6a9d12SGuchun Chen /* Bad GPU tag ‘BADG’ */
73c28aa44dSLuben Tuikov #define RAS_TABLE_HDR_BAD       0x42414447
741d6a9d12SGuchun Chen 
75e06da817SSrinivasan Shanmugam /*
767f599fedSStanley.Yang  * EEPROM Table structure v1
777f599fedSStanley.Yang  * ---------------------------------
787f599fedSStanley.Yang  * |                               |
797f599fedSStanley.Yang  * |     EEPROM TABLE HEADER       |
807f599fedSStanley.Yang  * |      ( size 20 Bytes )        |
817f599fedSStanley.Yang  * |                               |
827f599fedSStanley.Yang  * ---------------------------------
837f599fedSStanley.Yang  * |                               |
847f599fedSStanley.Yang  * |    BAD PAGE RECORD AREA       |
857f599fedSStanley.Yang  * |                               |
867f599fedSStanley.Yang  * ---------------------------------
877f599fedSStanley.Yang  */
887f599fedSStanley.Yang 
89c28aa44dSLuben Tuikov /* Assume 2-Mbit size EEPROM and take up the whole space. */
90c28aa44dSLuben Tuikov #define RAS_TBL_SIZE_BYTES      (256 * 1024)
9163d4c081SLuben Tuikov #define RAS_TABLE_START         0
9263d4c081SLuben Tuikov #define RAS_HDR_START           RAS_TABLE_START
93c28aa44dSLuben Tuikov #define RAS_RECORD_START        (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
94e4e6a589SLuben Tuikov #define RAS_MAX_RECORD_COUNT    ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
95c28aa44dSLuben Tuikov 				 / RAS_TABLE_RECORD_SIZE)
9664f55e62SAndrey Grodzovsky 
97e06da817SSrinivasan Shanmugam /*
987f599fedSStanley.Yang  * EEPROM Table structrue v2.1
997f599fedSStanley.Yang  * ---------------------------------
1007f599fedSStanley.Yang  * |                               |
1017f599fedSStanley.Yang  * |     EEPROM TABLE HEADER       |
1027f599fedSStanley.Yang  * |      ( size 20 Bytes )        |
1037f599fedSStanley.Yang  * |                               |
1047f599fedSStanley.Yang  * ---------------------------------
1057f599fedSStanley.Yang  * |                               |
1067f599fedSStanley.Yang  * |     EEPROM TABLE RAS INFO     |
1077f599fedSStanley.Yang  * | (available info size 4 Bytes) |
1087f599fedSStanley.Yang  * |  ( reserved size 252 Bytes )  |
1097f599fedSStanley.Yang  * |                               |
1107f599fedSStanley.Yang  * ---------------------------------
1117f599fedSStanley.Yang  * |                               |
1127f599fedSStanley.Yang  * |     BAD PAGE RECORD AREA      |
1137f599fedSStanley.Yang  * |                               |
1147f599fedSStanley.Yang  * ---------------------------------
1157f599fedSStanley.Yang  */
1167f599fedSStanley.Yang 
11765183faeSStanley.Yang /* EEPROM Table V2_1 */
11865183faeSStanley.Yang #define RAS_TABLE_V2_1_INFO_SIZE       256
11965183faeSStanley.Yang #define RAS_TABLE_V2_1_INFO_START      RAS_TABLE_HEADER_SIZE
12065183faeSStanley.Yang #define RAS_RECORD_START_V2_1          (RAS_HDR_START + RAS_TABLE_HEADER_SIZE + \
12165183faeSStanley.Yang 					RAS_TABLE_V2_1_INFO_SIZE)
12265183faeSStanley.Yang #define RAS_MAX_RECORD_COUNT_V2_1      ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE - \
12365183faeSStanley.Yang 					RAS_TABLE_V2_1_INFO_SIZE) \
12465183faeSStanley.Yang 					/ RAS_TABLE_RECORD_SIZE)
12565183faeSStanley.Yang 
12663d4c081SLuben Tuikov /* Given a zero-based index of an EEPROM RAS record, yields the EEPROM
12763d4c081SLuben Tuikov  * offset off of RAS_TABLE_START.  That is, this is something you can
12863d4c081SLuben Tuikov  * add to control->i2c_address, and then tell I2C layer to read
12963d4c081SLuben Tuikov  * from/write to there. _N is the so called absolute index,
13063d4c081SLuben Tuikov  * because it starts right after the table header.
13163d4c081SLuben Tuikov  */
13263d4c081SLuben Tuikov #define RAS_INDEX_TO_OFFSET(_C, _N) ((_C)->ras_record_offset + \
13363d4c081SLuben Tuikov 				     (_N) * RAS_TABLE_RECORD_SIZE)
13463d4c081SLuben Tuikov 
13563d4c081SLuben Tuikov #define RAS_OFFSET_TO_INDEX(_C, _O) (((_O) - \
13663d4c081SLuben Tuikov 				      (_C)->ras_record_offset) / RAS_TABLE_RECORD_SIZE)
13763d4c081SLuben Tuikov 
138c65b0805SLuben Tuikov /* Given a 0-based relative record index, 0, 1, 2, ..., etc., off
139c65b0805SLuben Tuikov  * of "fri", return the absolute record index off of the end of
140c65b0805SLuben Tuikov  * the table header.
141c65b0805SLuben Tuikov  */
142c65b0805SLuben Tuikov #define RAS_RI_TO_AI(_C, _I) (((_I) + (_C)->ras_fri) % \
143c65b0805SLuben Tuikov 			      (_C)->ras_max_record_count)
144c65b0805SLuben Tuikov 
14563d4c081SLuben Tuikov #define RAS_NUM_RECS(_tbl_hdr)  (((_tbl_hdr)->tbl_size - \
14663d4c081SLuben Tuikov 				  RAS_TABLE_HEADER_SIZE) / RAS_TABLE_RECORD_SIZE)
14763d4c081SLuben Tuikov 
14865183faeSStanley.Yang #define RAS_NUM_RECS_V2_1(_tbl_hdr)  (((_tbl_hdr)->tbl_size - \
14965183faeSStanley.Yang 				       RAS_TABLE_HEADER_SIZE - \
15065183faeSStanley.Yang 				       RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE)
15165183faeSStanley.Yang 
15264f55e62SAndrey Grodzovsky #define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev
15364f55e62SAndrey Grodzovsky 
__is_ras_eeprom_supported(struct amdgpu_device * adev)1544bfb7428SJohn Clements static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
1554bfb7428SJohn Clements {
156bc22f8ecSCandice Li 	switch (adev->ip_versions[MP1_HWIP][0]) {
1576246059aSAlex Deucher 	case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */
1586246059aSAlex Deucher 	case IP_VERSION(11, 0, 7): /* Sienna cichlid */
159bc22f8ecSCandice Li 	case IP_VERSION(13, 0, 0):
1606246059aSAlex Deucher 	case IP_VERSION(13, 0, 2): /* Aldebaran */
161bc22f8ecSCandice Li 	case IP_VERSION(13, 0, 10):
162bc22f8ecSCandice Li 		return true;
1634b721ed8SCandice Li 	case IP_VERSION(13, 0, 6):
1644b721ed8SCandice Li 		return (adev->gmc.is_app_apu) ? false : true;
165bc22f8ecSCandice Li 	default:
166bc22f8ecSCandice Li 		return false;
167bc22f8ecSCandice Li 	}
168bc22f8ecSCandice Li }
169bc22f8ecSCandice Li 
__get_eeprom_i2c_addr(struct amdgpu_device * adev,struct amdgpu_ras_eeprom_control * control)170ef1caf48SJohn Clements static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
171ccdfbfecSLuben Tuikov 				  struct amdgpu_ras_eeprom_control *control)
172ef1caf48SJohn Clements {
17364a3dbb0SLuben Tuikov 	struct atom_context *atom_ctx = adev->mode_info.atom_context;
174cc947bf9SLuben Tuikov 	u8 i2c_addr;
175cc947bf9SLuben Tuikov 
176ccdfbfecSLuben Tuikov 	if (!control)
177ef1caf48SJohn Clements 		return false;
178ef1caf48SJohn Clements 
179cc947bf9SLuben Tuikov 	if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
180cc947bf9SLuben Tuikov 		/* The address given by VBIOS is an 8-bit, wire-format
181cc947bf9SLuben Tuikov 		 * address, i.e. the most significant byte.
182cc947bf9SLuben Tuikov 		 *
183cc947bf9SLuben Tuikov 		 * Normalize it to a 19-bit EEPROM address. Remove the
184cc947bf9SLuben Tuikov 		 * device type identifier and make it a 7-bit address;
185cc947bf9SLuben Tuikov 		 * then make it a 19-bit EEPROM address. See top of
186cc947bf9SLuben Tuikov 		 * amdgpu_eeprom.c.
187cc947bf9SLuben Tuikov 		 */
188cc947bf9SLuben Tuikov 		i2c_addr = (i2c_addr & 0x0F) >> 1;
189cc947bf9SLuben Tuikov 		control->i2c_address = ((u32) i2c_addr) << 16;
19014fb496aSJohn Clements 
19114fb496aSJohn Clements 		return true;
19214fb496aSJohn Clements 	}
19314fb496aSJohn Clements 
1946246059aSAlex Deucher 	switch (adev->ip_versions[MP1_HWIP][0]) {
1956246059aSAlex Deucher 	case IP_VERSION(11, 0, 2):
1966246059aSAlex Deucher 		/* VEGA20 and ARCTURUS */
1976246059aSAlex Deucher 		if (adev->asic_type == CHIP_VEGA20)
1986246059aSAlex Deucher 			control->i2c_address = EEPROM_I2C_MADDR_0;
199adf64e21SMario Limonciello 		else if (strnstr(atom_ctx->vbios_pn,
2006246059aSAlex Deucher 				 "D342",
201adf64e21SMario Limonciello 				 sizeof(atom_ctx->vbios_pn)))
2026246059aSAlex Deucher 			control->i2c_address = EEPROM_I2C_MADDR_0;
2036246059aSAlex Deucher 		else
2046246059aSAlex Deucher 			control->i2c_address = EEPROM_I2C_MADDR_4;
2056246059aSAlex Deucher 		return true;
2066246059aSAlex Deucher 	case IP_VERSION(11, 0, 7):
2073b8164f8SLuben Tuikov 		control->i2c_address = EEPROM_I2C_MADDR_0;
2088782007bSLuben Tuikov 		return true;
2096246059aSAlex Deucher 	case IP_VERSION(13, 0, 2):
210adf64e21SMario Limonciello 		if (strnstr(atom_ctx->vbios_pn, "D673",
211adf64e21SMario Limonciello 			    sizeof(atom_ctx->vbios_pn)))
21264a3dbb0SLuben Tuikov 			control->i2c_address = EEPROM_I2C_MADDR_4;
21364a3dbb0SLuben Tuikov 		else
2143b8164f8SLuben Tuikov 			control->i2c_address = EEPROM_I2C_MADDR_0;
2158782007bSLuben Tuikov 		return true;
2166246059aSAlex Deucher 	case IP_VERSION(13, 0, 0):
217*c4307207SCandice Li 		if (strnstr(atom_ctx->vbios_pn, "D707",
218*c4307207SCandice Li 			    sizeof(atom_ctx->vbios_pn)))
219*c4307207SCandice Li 			control->i2c_address = EEPROM_I2C_MADDR_0;
220*c4307207SCandice Li 		else
221*c4307207SCandice Li 			control->i2c_address = EEPROM_I2C_MADDR_4;
222*c4307207SCandice Li 		return true;
223b81fde0dSCandice Li 	case IP_VERSION(13, 0, 6):
2246246059aSAlex Deucher 	case IP_VERSION(13, 0, 10):
2256246059aSAlex Deucher 		control->i2c_address = EEPROM_I2C_MADDR_4;
2266246059aSAlex Deucher 		return true;
227ef1caf48SJohn Clements 	default:
228ef1caf48SJohn Clements 		return false;
229ef1caf48SJohn Clements 	}
230ef1caf48SJohn Clements }
231ef1caf48SJohn Clements 
23263d4c081SLuben Tuikov static void
__encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header * hdr,unsigned char * buf)23363d4c081SLuben Tuikov __encode_table_header_to_buf(struct amdgpu_ras_eeprom_table_header *hdr,
234d7edde3dSLuben Tuikov 			     unsigned char *buf)
23564f55e62SAndrey Grodzovsky {
23663d4c081SLuben Tuikov 	u32 *pp = (uint32_t *)buf;
23764f55e62SAndrey Grodzovsky 
23864f55e62SAndrey Grodzovsky 	pp[0] = cpu_to_le32(hdr->header);
23964f55e62SAndrey Grodzovsky 	pp[1] = cpu_to_le32(hdr->version);
24064f55e62SAndrey Grodzovsky 	pp[2] = cpu_to_le32(hdr->first_rec_offset);
24164f55e62SAndrey Grodzovsky 	pp[3] = cpu_to_le32(hdr->tbl_size);
24264f55e62SAndrey Grodzovsky 	pp[4] = cpu_to_le32(hdr->checksum);
24364f55e62SAndrey Grodzovsky }
24464f55e62SAndrey Grodzovsky 
24563d4c081SLuben Tuikov static void
__decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header * hdr,unsigned char * buf)24663d4c081SLuben Tuikov __decode_table_header_from_buf(struct amdgpu_ras_eeprom_table_header *hdr,
247d7edde3dSLuben Tuikov 			       unsigned char *buf)
24864f55e62SAndrey Grodzovsky {
249d7edde3dSLuben Tuikov 	u32 *pp = (uint32_t *)buf;
25064f55e62SAndrey Grodzovsky 
25164f55e62SAndrey Grodzovsky 	hdr->header	      = le32_to_cpu(pp[0]);
25264f55e62SAndrey Grodzovsky 	hdr->version	      = le32_to_cpu(pp[1]);
25364f55e62SAndrey Grodzovsky 	hdr->first_rec_offset = le32_to_cpu(pp[2]);
25464f55e62SAndrey Grodzovsky 	hdr->tbl_size	      = le32_to_cpu(pp[3]);
25564f55e62SAndrey Grodzovsky 	hdr->checksum	      = le32_to_cpu(pp[4]);
25664f55e62SAndrey Grodzovsky }
25764f55e62SAndrey Grodzovsky 
__write_table_header(struct amdgpu_ras_eeprom_control * control)25863d4c081SLuben Tuikov static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
25964f55e62SAndrey Grodzovsky {
26063d4c081SLuben Tuikov 	u8 buf[RAS_TABLE_HEADER_SIZE];
2619015d60cSAndrey Grodzovsky 	struct amdgpu_device *adev = to_amdgpu_device(control);
26263d4c081SLuben Tuikov 	int res;
26364f55e62SAndrey Grodzovsky 
26463d4c081SLuben Tuikov 	memset(buf, 0, sizeof(buf));
265d7edde3dSLuben Tuikov 	__encode_table_header_to_buf(&control->tbl_hdr, buf);
2665985ebbeSJohn Clements 
26740e7ed97SDennis Li 	/* i2c may be unstable in gpu reset */
268d0fb18b5SAndrey Grodzovsky 	down_read(&adev->reset_domain->sem);
2692f60dd50SLuben Tuikov 	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
27063d4c081SLuben Tuikov 				  control->i2c_address +
27163d4c081SLuben Tuikov 				  control->ras_header_offset,
272d7edde3dSLuben Tuikov 				  buf, RAS_TABLE_HEADER_SIZE);
273d0fb18b5SAndrey Grodzovsky 	up_read(&adev->reset_domain->sem);
27440e7ed97SDennis Li 
27563d4c081SLuben Tuikov 	if (res < 0) {
27663d4c081SLuben Tuikov 		DRM_ERROR("Failed to write EEPROM table header:%d", res);
27763d4c081SLuben Tuikov 	} else if (res < RAS_TABLE_HEADER_SIZE) {
27863d4c081SLuben Tuikov 		DRM_ERROR("Short write:%d out of %d\n",
27963d4c081SLuben Tuikov 			  res, RAS_TABLE_HEADER_SIZE);
28063d4c081SLuben Tuikov 		res = -EIO;
28163d4c081SLuben Tuikov 	} else {
28263d4c081SLuben Tuikov 		res = 0;
28363d4c081SLuben Tuikov 	}
28464f55e62SAndrey Grodzovsky 
28563d4c081SLuben Tuikov 	return res;
28664f55e62SAndrey Grodzovsky }
28764f55e62SAndrey Grodzovsky 
2887f599fedSStanley.Yang static void
__encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info * rai,unsigned char * buf)2897f599fedSStanley.Yang __encode_table_ras_info_to_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
2907f599fedSStanley.Yang 			       unsigned char *buf)
2917f599fedSStanley.Yang {
2927f599fedSStanley.Yang 	u32 *pp = (uint32_t *)buf;
2937f599fedSStanley.Yang 	u32 tmp;
2947f599fedSStanley.Yang 
2957f599fedSStanley.Yang 	tmp = ((uint32_t)(rai->rma_status) & 0xFF) |
2967f599fedSStanley.Yang 	      (((uint32_t)(rai->health_percent) << 8) & 0xFF00) |
2977f599fedSStanley.Yang 	      (((uint32_t)(rai->ecc_page_threshold) << 16) & 0xFFFF0000);
2987f599fedSStanley.Yang 	pp[0] = cpu_to_le32(tmp);
2997f599fedSStanley.Yang }
3007f599fedSStanley.Yang 
3017f599fedSStanley.Yang static void
__decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info * rai,unsigned char * buf)3027f599fedSStanley.Yang __decode_table_ras_info_from_buf(struct amdgpu_ras_eeprom_table_ras_info *rai,
3037f599fedSStanley.Yang 				 unsigned char *buf)
3047f599fedSStanley.Yang {
3057f599fedSStanley.Yang 	u32 *pp = (uint32_t *)buf;
3067f599fedSStanley.Yang 	u32 tmp;
3077f599fedSStanley.Yang 
3087f599fedSStanley.Yang 	tmp = le32_to_cpu(pp[0]);
3097f599fedSStanley.Yang 	rai->rma_status = tmp & 0xFF;
3107f599fedSStanley.Yang 	rai->health_percent = (tmp >> 8) & 0xFF;
3117f599fedSStanley.Yang 	rai->ecc_page_threshold = (tmp >> 16) & 0xFFFF;
3127f599fedSStanley.Yang }
3137f599fedSStanley.Yang 
__write_table_ras_info(struct amdgpu_ras_eeprom_control * control)3147f599fedSStanley.Yang static int __write_table_ras_info(struct amdgpu_ras_eeprom_control *control)
3157f599fedSStanley.Yang {
3167f599fedSStanley.Yang 	struct amdgpu_device *adev = to_amdgpu_device(control);
3177f599fedSStanley.Yang 	u8 *buf;
3187f599fedSStanley.Yang 	int res;
3197f599fedSStanley.Yang 
3207f599fedSStanley.Yang 	buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
3217f599fedSStanley.Yang 	if (!buf) {
3227f599fedSStanley.Yang 		DRM_ERROR("Failed to alloc buf to write table ras info\n");
3237f599fedSStanley.Yang 		return -ENOMEM;
3247f599fedSStanley.Yang 	}
3257f599fedSStanley.Yang 
3267f599fedSStanley.Yang 	__encode_table_ras_info_to_buf(&control->tbl_rai, buf);
3277f599fedSStanley.Yang 
3287f599fedSStanley.Yang 	/* i2c may be unstable in gpu reset */
3297f599fedSStanley.Yang 	down_read(&adev->reset_domain->sem);
3307f599fedSStanley.Yang 	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
3317f599fedSStanley.Yang 				  control->i2c_address +
3327f599fedSStanley.Yang 				  control->ras_info_offset,
3337f599fedSStanley.Yang 				  buf, RAS_TABLE_V2_1_INFO_SIZE);
3347f599fedSStanley.Yang 	up_read(&adev->reset_domain->sem);
3357f599fedSStanley.Yang 
3367f599fedSStanley.Yang 	if (res < 0) {
3377f599fedSStanley.Yang 		DRM_ERROR("Failed to write EEPROM table ras info:%d", res);
3387f599fedSStanley.Yang 	} else if (res < RAS_TABLE_V2_1_INFO_SIZE) {
3397f599fedSStanley.Yang 		DRM_ERROR("Short write:%d out of %d\n",
3407f599fedSStanley.Yang 			  res, RAS_TABLE_V2_1_INFO_SIZE);
3417f599fedSStanley.Yang 		res = -EIO;
3427f599fedSStanley.Yang 	} else {
3437f599fedSStanley.Yang 		res = 0;
3447f599fedSStanley.Yang 	}
3457f599fedSStanley.Yang 
3467f599fedSStanley.Yang 	kfree(buf);
3477f599fedSStanley.Yang 
3487f599fedSStanley.Yang 	return res;
3497f599fedSStanley.Yang }
3507f599fedSStanley.Yang 
__calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control * control)351803c6ebdSLuben Tuikov static u8 __calc_hdr_byte_sum(const struct amdgpu_ras_eeprom_control *control)
352db338e16SAndrey Grodzovsky {
35363d4c081SLuben Tuikov 	int ii;
35463d4c081SLuben Tuikov 	u8  *pp, csum;
355803c6ebdSLuben Tuikov 	size_t sz;
356db338e16SAndrey Grodzovsky 
357db338e16SAndrey Grodzovsky 	/* Header checksum, skip checksum field in the calculation */
358803c6ebdSLuben Tuikov 	sz = sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum);
35963d4c081SLuben Tuikov 	pp = (u8 *) &control->tbl_hdr;
36063d4c081SLuben Tuikov 	csum = 0;
36163d4c081SLuben Tuikov 	for (ii = 0; ii < sz; ii++, pp++)
36263d4c081SLuben Tuikov 		csum += *pp;
363db338e16SAndrey Grodzovsky 
36463d4c081SLuben Tuikov 	return csum;
365db338e16SAndrey Grodzovsky }
36664f55e62SAndrey Grodzovsky 
__calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control * control)3677c2551faSStanley.Yang static u8 __calc_ras_info_byte_sum(const struct amdgpu_ras_eeprom_control *control)
3687c2551faSStanley.Yang {
3697c2551faSStanley.Yang 	int ii;
3707c2551faSStanley.Yang 	u8  *pp, csum;
3717c2551faSStanley.Yang 	size_t sz;
3727c2551faSStanley.Yang 
3737c2551faSStanley.Yang 	sz = sizeof(control->tbl_rai);
3747c2551faSStanley.Yang 	pp = (u8 *) &control->tbl_rai;
3757c2551faSStanley.Yang 	csum = 0;
3767c2551faSStanley.Yang 	for (ii = 0; ii < sz; ii++, pp++)
3777c2551faSStanley.Yang 		csum += *pp;
3787c2551faSStanley.Yang 
3797c2551faSStanley.Yang 	return csum;
3807c2551faSStanley.Yang }
3817c2551faSStanley.Yang 
amdgpu_ras_eeprom_correct_header_tag(struct amdgpu_ras_eeprom_control * control,uint32_t header)3829b856defSGuchun Chen static int amdgpu_ras_eeprom_correct_header_tag(
3839b856defSGuchun Chen 	struct amdgpu_ras_eeprom_control *control,
3849b856defSGuchun Chen 	uint32_t header)
3859b856defSGuchun Chen {
3869b856defSGuchun Chen 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
38763d4c081SLuben Tuikov 	u8 *hh;
38863d4c081SLuben Tuikov 	int res;
38963d4c081SLuben Tuikov 	u8 csum;
3909b856defSGuchun Chen 
39163d4c081SLuben Tuikov 	csum = -hdr->checksum;
3929b856defSGuchun Chen 
39363d4c081SLuben Tuikov 	hh = (void *) &hdr->header;
39463d4c081SLuben Tuikov 	csum -= (hh[0] + hh[1] + hh[2] + hh[3]);
39563d4c081SLuben Tuikov 	hh = (void *) &header;
39663d4c081SLuben Tuikov 	csum += hh[0] + hh[1] + hh[2] + hh[3];
39763d4c081SLuben Tuikov 	csum = -csum;
3980686627bSLuben Tuikov 	mutex_lock(&control->ras_tbl_mutex);
3999b856defSGuchun Chen 	hdr->header = header;
40063d4c081SLuben Tuikov 	hdr->checksum = csum;
40163d4c081SLuben Tuikov 	res = __write_table_header(control);
4020686627bSLuben Tuikov 	mutex_unlock(&control->ras_tbl_mutex);
4039b856defSGuchun Chen 
40463d4c081SLuben Tuikov 	return res;
4059b856defSGuchun Chen }
4069b856defSGuchun Chen 
40763d4c081SLuben Tuikov /**
40863d4c081SLuben Tuikov  * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
40963d4c081SLuben Tuikov  * @control: pointer to control structure
41063d4c081SLuben Tuikov  *
41163d4c081SLuben Tuikov  * Reset the contents of the header of the RAS EEPROM table.
41263d4c081SLuben Tuikov  * Return 0 on success, -errno on error.
41363d4c081SLuben Tuikov  */
amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control * control)414d01b400bSAndrey Grodzovsky int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
415d01b400bSAndrey Grodzovsky {
4168bbd4d83SStanley.Yang 	struct amdgpu_device *adev = to_amdgpu_device(control);
417d01b400bSAndrey Grodzovsky 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
4180bc3137bSStanley.Yang 	struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
41969691c82SStanley.Yang 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
42063d4c081SLuben Tuikov 	u8 csum;
42163d4c081SLuben Tuikov 	int res;
422db338e16SAndrey Grodzovsky 
4230686627bSLuben Tuikov 	mutex_lock(&control->ras_tbl_mutex);
424d01b400bSAndrey Grodzovsky 
425c28aa44dSLuben Tuikov 	hdr->header = RAS_TABLE_HDR_VAL;
4267f599fedSStanley.Yang 	if (adev->umc.ras &&
4277f599fedSStanley.Yang 	    adev->umc.ras->set_eeprom_table_version)
4287f599fedSStanley.Yang 		adev->umc.ras->set_eeprom_table_version(hdr);
4297f599fedSStanley.Yang 	else
43071c79a19SStanley.Yang 		hdr->version = RAS_TABLE_VER_V1;
4317f599fedSStanley.Yang 
4327f599fedSStanley.Yang 	if (hdr->version == RAS_TABLE_VER_V2_1) {
4337f599fedSStanley.Yang 		hdr->first_rec_offset = RAS_RECORD_START_V2_1;
4347f599fedSStanley.Yang 		hdr->tbl_size = RAS_TABLE_HEADER_SIZE +
4357f599fedSStanley.Yang 				RAS_TABLE_V2_1_INFO_SIZE;
4360bc3137bSStanley.Yang 		rai->rma_status = GPU_HEALTH_USABLE;
4370bc3137bSStanley.Yang 		/**
4380bc3137bSStanley.Yang 		 * GPU health represented as a percentage.
4390bc3137bSStanley.Yang 		 * 0 means worst health, 100 means fully health.
4400bc3137bSStanley.Yang 		 */
4410bc3137bSStanley.Yang 		rai->health_percent = 100;
4420bc3137bSStanley.Yang 		/* ecc_page_threshold = 0 means disable bad page retirement */
4430bc3137bSStanley.Yang 		rai->ecc_page_threshold = con->bad_page_cnt_threshold;
4447f599fedSStanley.Yang 	} else {
445c28aa44dSLuben Tuikov 		hdr->first_rec_offset = RAS_RECORD_START;
446c28aa44dSLuben Tuikov 		hdr->tbl_size = RAS_TABLE_HEADER_SIZE;
4477f599fedSStanley.Yang 	}
448d01b400bSAndrey Grodzovsky 
44963d4c081SLuben Tuikov 	csum = __calc_hdr_byte_sum(control);
4507c2551faSStanley.Yang 	if (hdr->version == RAS_TABLE_VER_V2_1)
4517c2551faSStanley.Yang 		csum += __calc_ras_info_byte_sum(control);
45263d4c081SLuben Tuikov 	csum = -csum;
45363d4c081SLuben Tuikov 	hdr->checksum = csum;
45463d4c081SLuben Tuikov 	res = __write_table_header(control);
4557f599fedSStanley.Yang 	if (!res && hdr->version > RAS_TABLE_VER_V1)
4567f599fedSStanley.Yang 		res = __write_table_ras_info(control);
45763d4c081SLuben Tuikov 
45863d4c081SLuben Tuikov 	control->ras_num_recs = 0;
45963d4c081SLuben Tuikov 	control->ras_fri = 0;
460db338e16SAndrey Grodzovsky 
4618bbd4d83SStanley.Yang 	amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs);
4628bbd4d83SStanley.Yang 
46369691c82SStanley.Yang 	control->bad_channel_bitmap = 0;
46469691c82SStanley.Yang 	amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap);
46569691c82SStanley.Yang 	con->update_channel_flag = false;
46669691c82SStanley.Yang 
467c65b0805SLuben Tuikov 	amdgpu_ras_debugfs_set_ret_size(control);
468c65b0805SLuben Tuikov 
4690686627bSLuben Tuikov 	mutex_unlock(&control->ras_tbl_mutex);
470db338e16SAndrey Grodzovsky 
47163d4c081SLuben Tuikov 	return res;
472d01b400bSAndrey Grodzovsky }
473d01b400bSAndrey Grodzovsky 
47463d4c081SLuben Tuikov static void
__encode_table_record_to_buf(struct amdgpu_ras_eeprom_control * control,struct eeprom_table_record * record,unsigned char * buf)47563d4c081SLuben Tuikov __encode_table_record_to_buf(struct amdgpu_ras_eeprom_control *control,
47664f55e62SAndrey Grodzovsky 			     struct eeprom_table_record *record,
477d7edde3dSLuben Tuikov 			     unsigned char *buf)
47864f55e62SAndrey Grodzovsky {
47964f55e62SAndrey Grodzovsky 	__le64 tmp = 0;
48064f55e62SAndrey Grodzovsky 	int i = 0;
48164f55e62SAndrey Grodzovsky 
48264f55e62SAndrey Grodzovsky 	/* Next are all record fields according to EEPROM page spec in LE foramt */
483d7edde3dSLuben Tuikov 	buf[i++] = record->err_type;
48464f55e62SAndrey Grodzovsky 
485d7edde3dSLuben Tuikov 	buf[i++] = record->bank;
48664f55e62SAndrey Grodzovsky 
48764f55e62SAndrey Grodzovsky 	tmp = cpu_to_le64(record->ts);
488d7edde3dSLuben Tuikov 	memcpy(buf + i, &tmp, 8);
48964f55e62SAndrey Grodzovsky 	i += 8;
49064f55e62SAndrey Grodzovsky 
49164f55e62SAndrey Grodzovsky 	tmp = cpu_to_le64((record->offset & 0xffffffffffff));
492d7edde3dSLuben Tuikov 	memcpy(buf + i, &tmp, 6);
49364f55e62SAndrey Grodzovsky 	i += 6;
49464f55e62SAndrey Grodzovsky 
495d7edde3dSLuben Tuikov 	buf[i++] = record->mem_channel;
496d7edde3dSLuben Tuikov 	buf[i++] = record->mcumc_id;
49764f55e62SAndrey Grodzovsky 
49864f55e62SAndrey Grodzovsky 	tmp = cpu_to_le64((record->retired_page & 0xffffffffffff));
499d7edde3dSLuben Tuikov 	memcpy(buf + i, &tmp, 6);
50064f55e62SAndrey Grodzovsky }
50164f55e62SAndrey Grodzovsky 
50263d4c081SLuben Tuikov static void
__decode_table_record_from_buf(struct amdgpu_ras_eeprom_control * control,struct eeprom_table_record * record,unsigned char * buf)50363d4c081SLuben Tuikov __decode_table_record_from_buf(struct amdgpu_ras_eeprom_control *control,
50464f55e62SAndrey Grodzovsky 			       struct eeprom_table_record *record,
505d7edde3dSLuben Tuikov 			       unsigned char *buf)
50664f55e62SAndrey Grodzovsky {
50764f55e62SAndrey Grodzovsky 	__le64 tmp = 0;
50864f55e62SAndrey Grodzovsky 	int i =  0;
50964f55e62SAndrey Grodzovsky 
51064f55e62SAndrey Grodzovsky 	/* Next are all record fields according to EEPROM page spec in LE foramt */
511d7edde3dSLuben Tuikov 	record->err_type = buf[i++];
51264f55e62SAndrey Grodzovsky 
513d7edde3dSLuben Tuikov 	record->bank = buf[i++];
51464f55e62SAndrey Grodzovsky 
515d7edde3dSLuben Tuikov 	memcpy(&tmp, buf + i, 8);
51664f55e62SAndrey Grodzovsky 	record->ts = le64_to_cpu(tmp);
51764f55e62SAndrey Grodzovsky 	i += 8;
51864f55e62SAndrey Grodzovsky 
519d7edde3dSLuben Tuikov 	memcpy(&tmp, buf + i, 6);
52064f55e62SAndrey Grodzovsky 	record->offset = (le64_to_cpu(tmp) & 0xffffffffffff);
52164f55e62SAndrey Grodzovsky 	i += 6;
52264f55e62SAndrey Grodzovsky 
523d7edde3dSLuben Tuikov 	record->mem_channel = buf[i++];
524d7edde3dSLuben Tuikov 	record->mcumc_id = buf[i++];
52564f55e62SAndrey Grodzovsky 
526d7edde3dSLuben Tuikov 	memcpy(&tmp, buf + i,  6);
52764f55e62SAndrey Grodzovsky 	record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff);
52864f55e62SAndrey Grodzovsky }
52964f55e62SAndrey Grodzovsky 
amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device * adev)53011003c68SDennis Li bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev)
531e8fbaf03SGuchun Chen {
53211003c68SDennis Li 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
533e8fbaf03SGuchun Chen 
53422106ed0STao Zhou 	if (!__is_ras_eeprom_supported(adev) ||
53522106ed0STao Zhou 	    !amdgpu_bad_page_threshold)
53611003c68SDennis Li 		return false;
5374bfb7428SJohn Clements 
538970fd197SStanley.Yang 	/* skip check eeprom table for VEGA20 Gaming */
539970fd197SStanley.Yang 	if (!con)
540970fd197SStanley.Yang 		return false;
541970fd197SStanley.Yang 	else
542970fd197SStanley.Yang 		if (!(con->features & BIT(AMDGPU_RAS_BLOCK__UMC)))
543970fd197SStanley.Yang 			return false;
544970fd197SStanley.Yang 
545c28aa44dSLuben Tuikov 	if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) {
54622106ed0STao Zhou 		if (amdgpu_bad_page_threshold == -1) {
54722106ed0STao Zhou 			dev_warn(adev->dev, "RAS records:%d exceed threshold:%d",
54822106ed0STao Zhou 				con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold);
54922106ed0STao Zhou 			dev_warn(adev->dev,
55022106ed0STao Zhou 				"But GPU can be operated due to bad_page_threshold = -1.\n");
55122106ed0STao Zhou 			return false;
55222106ed0STao Zhou 		} else {
553e8fbaf03SGuchun Chen 			dev_warn(adev->dev, "This GPU is in BAD status.");
55463d4c081SLuben Tuikov 			dev_warn(adev->dev, "Please retire it or set a larger "
555e8fbaf03SGuchun Chen 				 "threshold value when reloading driver.\n");
55611003c68SDennis Li 			return true;
557e8fbaf03SGuchun Chen 		}
55822106ed0STao Zhou 	}
559e8fbaf03SGuchun Chen 
56011003c68SDennis Li 	return false;
561e8fbaf03SGuchun Chen }
562e8fbaf03SGuchun Chen 
56363d4c081SLuben Tuikov /**
56463d4c081SLuben Tuikov  * __amdgpu_ras_eeprom_write -- write indexed from buffer to EEPROM
56563d4c081SLuben Tuikov  * @control: pointer to control structure
56663d4c081SLuben Tuikov  * @buf: pointer to buffer containing data to write
56763d4c081SLuben Tuikov  * @fri: start writing at this index
56863d4c081SLuben Tuikov  * @num: number of records to write
56963d4c081SLuben Tuikov  *
57063d4c081SLuben Tuikov  * The caller must hold the table mutex in @control.
57163d4c081SLuben Tuikov  * Return 0 on success, -errno otherwise.
57263d4c081SLuben Tuikov  */
__amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control * control,u8 * buf,const u32 fri,const u32 num)57363d4c081SLuben Tuikov static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control,
57463d4c081SLuben Tuikov 				     u8 *buf, const u32 fri, const u32 num)
57564f55e62SAndrey Grodzovsky {
57663d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
57763d4c081SLuben Tuikov 	u32 buf_size;
57863d4c081SLuben Tuikov 	int res;
57963d4c081SLuben Tuikov 
58063d4c081SLuben Tuikov 	/* i2c may be unstable in gpu reset */
581d0fb18b5SAndrey Grodzovsky 	down_read(&adev->reset_domain->sem);
58263d4c081SLuben Tuikov 	buf_size = num * RAS_TABLE_RECORD_SIZE;
5832f60dd50SLuben Tuikov 	res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus,
58463d4c081SLuben Tuikov 				  control->i2c_address +
58563d4c081SLuben Tuikov 				  RAS_INDEX_TO_OFFSET(control, fri),
58663d4c081SLuben Tuikov 				  buf, buf_size);
587d0fb18b5SAndrey Grodzovsky 	up_read(&adev->reset_domain->sem);
58863d4c081SLuben Tuikov 	if (res < 0) {
58963d4c081SLuben Tuikov 		DRM_ERROR("Writing %d EEPROM table records error:%d",
59063d4c081SLuben Tuikov 			  num, res);
59163d4c081SLuben Tuikov 	} else if (res < buf_size) {
59263d4c081SLuben Tuikov 		/* Short write, return error.
59363d4c081SLuben Tuikov 		 */
59463d4c081SLuben Tuikov 		DRM_ERROR("Wrote %d records out of %d",
59563d4c081SLuben Tuikov 			  res / RAS_TABLE_RECORD_SIZE, num);
59663d4c081SLuben Tuikov 		res = -EIO;
59763d4c081SLuben Tuikov 	} else {
59863d4c081SLuben Tuikov 		res = 0;
59963d4c081SLuben Tuikov 	}
60063d4c081SLuben Tuikov 
60163d4c081SLuben Tuikov 	return res;
60263d4c081SLuben Tuikov }
60363d4c081SLuben Tuikov 
60463d4c081SLuben Tuikov static int
amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control * control,struct eeprom_table_record * record,const u32 num)60563d4c081SLuben Tuikov amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
60663d4c081SLuben Tuikov 			       struct eeprom_table_record *record,
60763d4c081SLuben Tuikov 			       const u32 num)
60863d4c081SLuben Tuikov {
60969691c82SStanley.Yang 	struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control));
61063d4c081SLuben Tuikov 	u32 a, b, i;
61163d4c081SLuben Tuikov 	u8 *buf, *pp;
61263d4c081SLuben Tuikov 	int res;
61363d4c081SLuben Tuikov 
61463d4c081SLuben Tuikov 	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
61563d4c081SLuben Tuikov 	if (!buf)
61663d4c081SLuben Tuikov 		return -ENOMEM;
61763d4c081SLuben Tuikov 
61863d4c081SLuben Tuikov 	/* Encode all of them in one go.
61963d4c081SLuben Tuikov 	 */
62063d4c081SLuben Tuikov 	pp = buf;
62169691c82SStanley.Yang 	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
62263d4c081SLuben Tuikov 		__encode_table_record_to_buf(control, &record[i], pp);
62363d4c081SLuben Tuikov 
62469691c82SStanley.Yang 		/* update bad channel bitmap */
62569691c82SStanley.Yang 		if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
62669691c82SStanley.Yang 			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
62769691c82SStanley.Yang 			con->update_channel_flag = true;
62869691c82SStanley.Yang 		}
62969691c82SStanley.Yang 	}
63069691c82SStanley.Yang 
63163d4c081SLuben Tuikov 	/* a, first record index to write into.
63263d4c081SLuben Tuikov 	 * b, last record index to write into.
63363d4c081SLuben Tuikov 	 * a = first index to read (fri) + number of records in the table,
63463d4c081SLuben Tuikov 	 * b = a + @num - 1.
63563d4c081SLuben Tuikov 	 * Let N = control->ras_max_num_record_count, then we have,
63663d4c081SLuben Tuikov 	 * case 0: 0 <= a <= b < N,
63763d4c081SLuben Tuikov 	 *   just append @num records starting at a;
63863d4c081SLuben Tuikov 	 * case 1: 0 <= a < N <= b,
63963d4c081SLuben Tuikov 	 *   append (N - a) records starting at a, and
64063d4c081SLuben Tuikov 	 *   append the remainder,  b % N + 1, starting at 0.
64163d4c081SLuben Tuikov 	 * case 2: 0 <= fri < N <= a <= b, then modulo N we get two subcases,
64263d4c081SLuben Tuikov 	 * case 2a: 0 <= a <= b < N
64363d4c081SLuben Tuikov 	 *   append num records starting at a; and fix fri if b overwrote it,
64463d4c081SLuben Tuikov 	 *   and since a <= b, if b overwrote it then a must've also,
64563d4c081SLuben Tuikov 	 *   and if b didn't overwrite it, then a didn't also.
64663d4c081SLuben Tuikov 	 * case 2b: 0 <= b < a < N
64763d4c081SLuben Tuikov 	 *   write num records starting at a, which wraps around 0=N
64863d4c081SLuben Tuikov 	 *   and overwrite fri unconditionally. Now from case 2a,
64963d4c081SLuben Tuikov 	 *   this means that b eclipsed fri to overwrite it and wrap
65063d4c081SLuben Tuikov 	 *   around 0 again, i.e. b = 2N+r pre modulo N, so we unconditionally
65163d4c081SLuben Tuikov 	 *   set fri = b + 1 (mod N).
65263d4c081SLuben Tuikov 	 * Now, since fri is updated in every case, except the trivial case 0,
65363d4c081SLuben Tuikov 	 * the number of records present in the table after writing, is,
65463d4c081SLuben Tuikov 	 * num_recs - 1 = b - fri (mod N), and we take the positive value,
65563d4c081SLuben Tuikov 	 * by adding an arbitrary multiple of N before taking the modulo N
65663d4c081SLuben Tuikov 	 * as shown below.
65763d4c081SLuben Tuikov 	 */
65863d4c081SLuben Tuikov 	a = control->ras_fri + control->ras_num_recs;
65963d4c081SLuben Tuikov 	b = a + num  - 1;
66063d4c081SLuben Tuikov 	if (b < control->ras_max_record_count) {
66163d4c081SLuben Tuikov 		res = __amdgpu_ras_eeprom_write(control, buf, a, num);
66263d4c081SLuben Tuikov 	} else if (a < control->ras_max_record_count) {
66363d4c081SLuben Tuikov 		u32 g0, g1;
66463d4c081SLuben Tuikov 
66563d4c081SLuben Tuikov 		g0 = control->ras_max_record_count - a;
66663d4c081SLuben Tuikov 		g1 = b % control->ras_max_record_count + 1;
66763d4c081SLuben Tuikov 		res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
66863d4c081SLuben Tuikov 		if (res)
66963d4c081SLuben Tuikov 			goto Out;
67063d4c081SLuben Tuikov 		res = __amdgpu_ras_eeprom_write(control,
67163d4c081SLuben Tuikov 						buf + g0 * RAS_TABLE_RECORD_SIZE,
67263d4c081SLuben Tuikov 						0, g1);
67363d4c081SLuben Tuikov 		if (res)
67463d4c081SLuben Tuikov 			goto Out;
67563d4c081SLuben Tuikov 		if (g1 > control->ras_fri)
67663d4c081SLuben Tuikov 			control->ras_fri = g1 % control->ras_max_record_count;
67763d4c081SLuben Tuikov 	} else {
67863d4c081SLuben Tuikov 		a %= control->ras_max_record_count;
67963d4c081SLuben Tuikov 		b %= control->ras_max_record_count;
68063d4c081SLuben Tuikov 
68163d4c081SLuben Tuikov 		if (a <= b) {
68263d4c081SLuben Tuikov 			/* Note that, b - a + 1 = num. */
68363d4c081SLuben Tuikov 			res = __amdgpu_ras_eeprom_write(control, buf, a, num);
68463d4c081SLuben Tuikov 			if (res)
68563d4c081SLuben Tuikov 				goto Out;
68663d4c081SLuben Tuikov 			if (b >= control->ras_fri)
68763d4c081SLuben Tuikov 				control->ras_fri = (b + 1) % control->ras_max_record_count;
68863d4c081SLuben Tuikov 		} else {
68963d4c081SLuben Tuikov 			u32 g0, g1;
69063d4c081SLuben Tuikov 
69163d4c081SLuben Tuikov 			/* b < a, which means, we write from
69263d4c081SLuben Tuikov 			 * a to the end of the table, and from
69363d4c081SLuben Tuikov 			 * the start of the table to b.
69463d4c081SLuben Tuikov 			 */
69563d4c081SLuben Tuikov 			g0 = control->ras_max_record_count - a;
69663d4c081SLuben Tuikov 			g1 = b + 1;
69763d4c081SLuben Tuikov 			res = __amdgpu_ras_eeprom_write(control, buf, a, g0);
69863d4c081SLuben Tuikov 			if (res)
69963d4c081SLuben Tuikov 				goto Out;
70063d4c081SLuben Tuikov 			res = __amdgpu_ras_eeprom_write(control,
70163d4c081SLuben Tuikov 							buf + g0 * RAS_TABLE_RECORD_SIZE,
70263d4c081SLuben Tuikov 							0, g1);
70363d4c081SLuben Tuikov 			if (res)
70463d4c081SLuben Tuikov 				goto Out;
70563d4c081SLuben Tuikov 			control->ras_fri = g1 % control->ras_max_record_count;
70663d4c081SLuben Tuikov 		}
70763d4c081SLuben Tuikov 	}
70863d4c081SLuben Tuikov 	control->ras_num_recs = 1 + (control->ras_max_record_count + b
70963d4c081SLuben Tuikov 				     - control->ras_fri)
71063d4c081SLuben Tuikov 		% control->ras_max_record_count;
71163d4c081SLuben Tuikov Out:
71263d4c081SLuben Tuikov 	kfree(buf);
71363d4c081SLuben Tuikov 	return res;
71463d4c081SLuben Tuikov }
71563d4c081SLuben Tuikov 
71663d4c081SLuben Tuikov static int
amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control * control)71763d4c081SLuben Tuikov amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
71863d4c081SLuben Tuikov {
71964f55e62SAndrey Grodzovsky 	struct amdgpu_device *adev = to_amdgpu_device(control);
7209c06f91fSGuchun Chen 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
72163d4c081SLuben Tuikov 	u8 *buf, *pp, csum;
72263d4c081SLuben Tuikov 	u32 buf_size;
72363d4c081SLuben Tuikov 	int res;
72463d4c081SLuben Tuikov 
72563d4c081SLuben Tuikov 	/* Modify the header if it exceeds.
72663d4c081SLuben Tuikov 	 */
72763d4c081SLuben Tuikov 	if (amdgpu_bad_page_threshold != 0 &&
72863d4c081SLuben Tuikov 	    control->ras_num_recs >= ras->bad_page_cnt_threshold) {
72963d4c081SLuben Tuikov 		dev_warn(adev->dev,
73063d4c081SLuben Tuikov 			"Saved bad pages %d reaches threshold value %d\n",
73163d4c081SLuben Tuikov 			control->ras_num_recs, ras->bad_page_cnt_threshold);
73263d4c081SLuben Tuikov 		control->tbl_hdr.header = RAS_TABLE_HDR_BAD;
7330bc3137bSStanley.Yang 		if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) {
7340bc3137bSStanley.Yang 			control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD;
7350bc3137bSStanley.Yang 			control->tbl_rai.health_percent = 0;
7360bc3137bSStanley.Yang 		}
73763d4c081SLuben Tuikov 	}
73863d4c081SLuben Tuikov 
7397f599fedSStanley.Yang 	if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
7407f599fedSStanley.Yang 		control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
7417f599fedSStanley.Yang 					    RAS_TABLE_V2_1_INFO_SIZE +
7427f599fedSStanley.Yang 					    control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
7437f599fedSStanley.Yang 	else
7447f599fedSStanley.Yang 		control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE +
7457f599fedSStanley.Yang 					    control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
74663d4c081SLuben Tuikov 	control->tbl_hdr.checksum = 0;
74763d4c081SLuben Tuikov 
74863d4c081SLuben Tuikov 	buf_size = control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
74963d4c081SLuben Tuikov 	buf = kcalloc(control->ras_num_recs, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
75063d4c081SLuben Tuikov 	if (!buf) {
75163d4c081SLuben Tuikov 		DRM_ERROR("allocating memory for table of size %d bytes failed\n",
75263d4c081SLuben Tuikov 			  control->tbl_hdr.tbl_size);
75363d4c081SLuben Tuikov 		res = -ENOMEM;
75463d4c081SLuben Tuikov 		goto Out;
75563d4c081SLuben Tuikov 	}
75663d4c081SLuben Tuikov 
757d0fb18b5SAndrey Grodzovsky 	down_read(&adev->reset_domain->sem);
7582f60dd50SLuben Tuikov 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
75963d4c081SLuben Tuikov 				 control->i2c_address +
76063d4c081SLuben Tuikov 				 control->ras_record_offset,
76163d4c081SLuben Tuikov 				 buf, buf_size);
762d0fb18b5SAndrey Grodzovsky 	up_read(&adev->reset_domain->sem);
76363d4c081SLuben Tuikov 	if (res < 0) {
76463d4c081SLuben Tuikov 		DRM_ERROR("EEPROM failed reading records:%d\n",
76563d4c081SLuben Tuikov 			  res);
76663d4c081SLuben Tuikov 		goto Out;
76763d4c081SLuben Tuikov 	} else if (res < buf_size) {
76863d4c081SLuben Tuikov 		DRM_ERROR("EEPROM read %d out of %d bytes\n",
76963d4c081SLuben Tuikov 			  res, buf_size);
77063d4c081SLuben Tuikov 		res = -EIO;
77163d4c081SLuben Tuikov 		goto Out;
77263d4c081SLuben Tuikov 	}
77363d4c081SLuben Tuikov 
7740bc3137bSStanley.Yang 	/**
7750bc3137bSStanley.Yang 	 * bad page records have been stored in eeprom,
7760bc3137bSStanley.Yang 	 * now calculate gpu health percent
7770bc3137bSStanley.Yang 	 */
7780bc3137bSStanley.Yang 	if (amdgpu_bad_page_threshold != 0 &&
7790bc3137bSStanley.Yang 	    control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
7800bc3137bSStanley.Yang 	    control->ras_num_recs < ras->bad_page_cnt_threshold)
7810bc3137bSStanley.Yang 		control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
7820bc3137bSStanley.Yang 						   control->ras_num_recs) * 100) /
7830bc3137bSStanley.Yang 						   ras->bad_page_cnt_threshold;
7840bc3137bSStanley.Yang 
78563d4c081SLuben Tuikov 	/* Recalc the checksum.
78663d4c081SLuben Tuikov 	 */
78763d4c081SLuben Tuikov 	csum = 0;
78863d4c081SLuben Tuikov 	for (pp = buf; pp < buf + buf_size; pp++)
78963d4c081SLuben Tuikov 		csum += *pp;
79063d4c081SLuben Tuikov 
79163d4c081SLuben Tuikov 	csum += __calc_hdr_byte_sum(control);
7927c2551faSStanley.Yang 	if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
7937c2551faSStanley.Yang 		csum += __calc_ras_info_byte_sum(control);
79463d4c081SLuben Tuikov 	/* avoid sign extension when assigning to "checksum" */
79563d4c081SLuben Tuikov 	csum = -csum;
79663d4c081SLuben Tuikov 	control->tbl_hdr.checksum = csum;
79763d4c081SLuben Tuikov 	res = __write_table_header(control);
7987f599fedSStanley.Yang 	if (!res && control->tbl_hdr.version > RAS_TABLE_VER_V1)
7997f599fedSStanley.Yang 		res = __write_table_ras_info(control);
80063d4c081SLuben Tuikov Out:
80163d4c081SLuben Tuikov 	kfree(buf);
80263d4c081SLuben Tuikov 	return res;
80363d4c081SLuben Tuikov }
80463d4c081SLuben Tuikov 
80563d4c081SLuben Tuikov /**
80663d4c081SLuben Tuikov  * amdgpu_ras_eeprom_append -- append records to the EEPROM RAS table
80763d4c081SLuben Tuikov  * @control: pointer to control structure
80863d4c081SLuben Tuikov  * @record: array of records to append
80963d4c081SLuben Tuikov  * @num: number of records in @record array
81063d4c081SLuben Tuikov  *
81163d4c081SLuben Tuikov  * Append @num records to the table, calculate the checksum and write
81263d4c081SLuben Tuikov  * the table back to EEPROM. The maximum number of records that
81363d4c081SLuben Tuikov  * can be appended is between 1 and control->ras_max_record_count,
81463d4c081SLuben Tuikov  * regardless of how many records are already stored in the table.
81563d4c081SLuben Tuikov  *
81663d4c081SLuben Tuikov  * Return 0 on success or if EEPROM is not supported, -errno on error.
81763d4c081SLuben Tuikov  */
amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control * control,struct eeprom_table_record * record,const u32 num)81863d4c081SLuben Tuikov int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
81963d4c081SLuben Tuikov 			     struct eeprom_table_record *record,
82063d4c081SLuben Tuikov 			     const u32 num)
82163d4c081SLuben Tuikov {
82263d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
82363d4c081SLuben Tuikov 	int res;
82464f55e62SAndrey Grodzovsky 
8254bfb7428SJohn Clements 	if (!__is_ras_eeprom_supported(adev))
82664f55e62SAndrey Grodzovsky 		return 0;
82764f55e62SAndrey Grodzovsky 
82863d4c081SLuben Tuikov 	if (num == 0) {
82963d4c081SLuben Tuikov 		DRM_ERROR("will not append 0 records\n");
83063d4c081SLuben Tuikov 		return -EINVAL;
83163d4c081SLuben Tuikov 	} else if (num > control->ras_max_record_count) {
83263d4c081SLuben Tuikov 		DRM_ERROR("cannot append %d records than the size of table %d\n",
83363d4c081SLuben Tuikov 			  num, control->ras_max_record_count);
83463d4c081SLuben Tuikov 		return -EINVAL;
83563d4c081SLuben Tuikov 	}
83664f55e62SAndrey Grodzovsky 
8370686627bSLuben Tuikov 	mutex_lock(&control->ras_tbl_mutex);
83864f55e62SAndrey Grodzovsky 
83963d4c081SLuben Tuikov 	res = amdgpu_ras_eeprom_append_table(control, record, num);
84063d4c081SLuben Tuikov 	if (!res)
84163d4c081SLuben Tuikov 		res = amdgpu_ras_eeprom_update_header(control);
842c65b0805SLuben Tuikov 	if (!res)
843c65b0805SLuben Tuikov 		amdgpu_ras_debugfs_set_ret_size(control);
84463d4c081SLuben Tuikov 
84563d4c081SLuben Tuikov 	mutex_unlock(&control->ras_tbl_mutex);
84663d4c081SLuben Tuikov 	return res;
8479c06f91fSGuchun Chen }
8489c06f91fSGuchun Chen 
84963d4c081SLuben Tuikov /**
85063d4c081SLuben Tuikov  * __amdgpu_ras_eeprom_read -- read indexed from EEPROM into buffer
85163d4c081SLuben Tuikov  * @control: pointer to control structure
85263d4c081SLuben Tuikov  * @buf: pointer to buffer to read into
85363d4c081SLuben Tuikov  * @fri: first record index, start reading at this index, absolute index
85463d4c081SLuben Tuikov  * @num: number of records to read
85563d4c081SLuben Tuikov  *
85663d4c081SLuben Tuikov  * The caller must hold the table mutex in @control.
85763d4c081SLuben Tuikov  * Return 0 on success, -errno otherwise.
85864f55e62SAndrey Grodzovsky  */
__amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control * control,u8 * buf,const u32 fri,const u32 num)85963d4c081SLuben Tuikov static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
86063d4c081SLuben Tuikov 				    u8 *buf, const u32 fri, const u32 num)
86163d4c081SLuben Tuikov {
86263d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
86363d4c081SLuben Tuikov 	u32 buf_size;
86463d4c081SLuben Tuikov 	int res;
86564f55e62SAndrey Grodzovsky 
86624f55c05SAlex Deucher 	/* i2c may be unstable in gpu reset */
867d0fb18b5SAndrey Grodzovsky 	down_read(&adev->reset_domain->sem);
86863d4c081SLuben Tuikov 	buf_size = num * RAS_TABLE_RECORD_SIZE;
8692f60dd50SLuben Tuikov 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
87063d4c081SLuben Tuikov 				 control->i2c_address +
87163d4c081SLuben Tuikov 				 RAS_INDEX_TO_OFFSET(control, fri),
87263d4c081SLuben Tuikov 				 buf, buf_size);
873d0fb18b5SAndrey Grodzovsky 	up_read(&adev->reset_domain->sem);
87463d4c081SLuben Tuikov 	if (res < 0) {
87563d4c081SLuben Tuikov 		DRM_ERROR("Reading %d EEPROM table records error:%d",
87663d4c081SLuben Tuikov 			  num, res);
87763d4c081SLuben Tuikov 	} else if (res < buf_size) {
87863d4c081SLuben Tuikov 		/* Short read, return error.
87964f55e62SAndrey Grodzovsky 		 */
88063d4c081SLuben Tuikov 		DRM_ERROR("Read %d records out of %d",
88163d4c081SLuben Tuikov 			  res / RAS_TABLE_RECORD_SIZE, num);
88263d4c081SLuben Tuikov 		res = -EIO;
88363d4c081SLuben Tuikov 	} else {
88463d4c081SLuben Tuikov 		res = 0;
88564f55e62SAndrey Grodzovsky 	}
88664f55e62SAndrey Grodzovsky 
88763d4c081SLuben Tuikov 	return res;
88864f55e62SAndrey Grodzovsky }
88964f55e62SAndrey Grodzovsky 
89063d4c081SLuben Tuikov /**
89163d4c081SLuben Tuikov  * amdgpu_ras_eeprom_read -- read EEPROM
89263d4c081SLuben Tuikov  * @control: pointer to control structure
89363d4c081SLuben Tuikov  * @record: array of records to read into
89463d4c081SLuben Tuikov  * @num: number of records in @record
89564f55e62SAndrey Grodzovsky  *
89663d4c081SLuben Tuikov  * Reads num records from the RAS table in EEPROM and
89763d4c081SLuben Tuikov  * writes the data into @record array.
89863d4c081SLuben Tuikov  *
89963d4c081SLuben Tuikov  * Returns 0 on success, -errno on error.
90064f55e62SAndrey Grodzovsky  */
amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control * control,struct eeprom_table_record * record,const u32 num)90163d4c081SLuben Tuikov int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
90263d4c081SLuben Tuikov 			   struct eeprom_table_record *record,
90363d4c081SLuben Tuikov 			   const u32 num)
90463d4c081SLuben Tuikov {
90563d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
90669691c82SStanley.Yang 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
90763d4c081SLuben Tuikov 	int i, res;
90863d4c081SLuben Tuikov 	u8 *buf, *pp;
90963d4c081SLuben Tuikov 	u32 g0, g1;
91064f55e62SAndrey Grodzovsky 
91163d4c081SLuben Tuikov 	if (!__is_ras_eeprom_supported(adev))
91263d4c081SLuben Tuikov 		return 0;
91363d4c081SLuben Tuikov 
91463d4c081SLuben Tuikov 	if (num == 0) {
91563d4c081SLuben Tuikov 		DRM_ERROR("will not read 0 records\n");
91663d4c081SLuben Tuikov 		return -EINVAL;
91763d4c081SLuben Tuikov 	} else if (num > control->ras_num_recs) {
91863d4c081SLuben Tuikov 		DRM_ERROR("too many records to read:%d available:%d\n",
91963d4c081SLuben Tuikov 			  num, control->ras_num_recs);
92063d4c081SLuben Tuikov 		return -EINVAL;
92164f55e62SAndrey Grodzovsky 	}
92264f55e62SAndrey Grodzovsky 
92363d4c081SLuben Tuikov 	buf = kcalloc(num, RAS_TABLE_RECORD_SIZE, GFP_KERNEL);
92463d4c081SLuben Tuikov 	if (!buf)
92563d4c081SLuben Tuikov 		return -ENOMEM;
92664f55e62SAndrey Grodzovsky 
92763d4c081SLuben Tuikov 	/* Determine how many records to read, from the first record
92863d4c081SLuben Tuikov 	 * index, fri, to the end of the table, and from the beginning
92963d4c081SLuben Tuikov 	 * of the table, such that the total number of records is
93063d4c081SLuben Tuikov 	 * @num, and we handle wrap around when fri > 0 and
93163d4c081SLuben Tuikov 	 * fri + num > RAS_MAX_RECORD_COUNT.
93263d4c081SLuben Tuikov 	 *
93363d4c081SLuben Tuikov 	 * First we compute the index of the last element
93463d4c081SLuben Tuikov 	 * which would be fetched from each region,
93563d4c081SLuben Tuikov 	 * g0 is in [fri, fri + num - 1], and
93663d4c081SLuben Tuikov 	 * g1 is in [0, RAS_MAX_RECORD_COUNT - 1].
93763d4c081SLuben Tuikov 	 * Then, if g0 < RAS_MAX_RECORD_COUNT, the index of
93863d4c081SLuben Tuikov 	 * the last element to fetch, we set g0 to _the number_
93963d4c081SLuben Tuikov 	 * of elements to fetch, @num, since we know that the last
94063d4c081SLuben Tuikov 	 * indexed to be fetched does not exceed the table.
94163d4c081SLuben Tuikov 	 *
94263d4c081SLuben Tuikov 	 * If, however, g0 >= RAS_MAX_RECORD_COUNT, then
94363d4c081SLuben Tuikov 	 * we set g0 to the number of elements to read
94463d4c081SLuben Tuikov 	 * until the end of the table, and g1 to the number of
94563d4c081SLuben Tuikov 	 * elements to read from the beginning of the table.
94663d4c081SLuben Tuikov 	 */
94763d4c081SLuben Tuikov 	g0 = control->ras_fri + num - 1;
94863d4c081SLuben Tuikov 	g1 = g0 % control->ras_max_record_count;
94963d4c081SLuben Tuikov 	if (g0 < control->ras_max_record_count) {
95063d4c081SLuben Tuikov 		g0 = num;
95163d4c081SLuben Tuikov 		g1 = 0;
95263d4c081SLuben Tuikov 	} else {
95363d4c081SLuben Tuikov 		g0 = control->ras_max_record_count - control->ras_fri;
95463d4c081SLuben Tuikov 		g1 += 1;
95563d4c081SLuben Tuikov 	}
95663d4c081SLuben Tuikov 
95763d4c081SLuben Tuikov 	mutex_lock(&control->ras_tbl_mutex);
95863d4c081SLuben Tuikov 	res = __amdgpu_ras_eeprom_read(control, buf, control->ras_fri, g0);
95963d4c081SLuben Tuikov 	if (res)
96063d4c081SLuben Tuikov 		goto Out;
96163d4c081SLuben Tuikov 	if (g1) {
96263d4c081SLuben Tuikov 		res = __amdgpu_ras_eeprom_read(control,
96363d4c081SLuben Tuikov 					       buf + g0 * RAS_TABLE_RECORD_SIZE,
96463d4c081SLuben Tuikov 					       0, g1);
96563d4c081SLuben Tuikov 		if (res)
96663d4c081SLuben Tuikov 			goto Out;
96763d4c081SLuben Tuikov 	}
96863d4c081SLuben Tuikov 
96963d4c081SLuben Tuikov 	res = 0;
97063d4c081SLuben Tuikov 
97163d4c081SLuben Tuikov 	/* Read up everything? Then transform.
97263d4c081SLuben Tuikov 	 */
97363d4c081SLuben Tuikov 	pp = buf;
97469691c82SStanley.Yang 	for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) {
97563d4c081SLuben Tuikov 		__decode_table_record_from_buf(control, &record[i], pp);
97669691c82SStanley.Yang 
97769691c82SStanley.Yang 		/* update bad channel bitmap */
97869691c82SStanley.Yang 		if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) {
97969691c82SStanley.Yang 			control->bad_channel_bitmap |= 1 << record[i].mem_channel;
98069691c82SStanley.Yang 			con->update_channel_flag = true;
98169691c82SStanley.Yang 		}
98269691c82SStanley.Yang 	}
98363d4c081SLuben Tuikov Out:
98463d4c081SLuben Tuikov 	kfree(buf);
9850686627bSLuben Tuikov 	mutex_unlock(&control->ras_tbl_mutex);
98664f55e62SAndrey Grodzovsky 
98763d4c081SLuben Tuikov 	return res;
9881fab841fSLuben Tuikov }
9891fab841fSLuben Tuikov 
amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control * control)9907f599fedSStanley.Yang uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control)
991c84d4670SGuchun Chen {
9927f599fedSStanley.Yang 	if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
9937f599fedSStanley.Yang 		return RAS_MAX_RECORD_COUNT_V2_1;
9947f599fedSStanley.Yang 	else
995e4e6a589SLuben Tuikov 		return RAS_MAX_RECORD_COUNT;
996c84d4670SGuchun Chen }
99763d4c081SLuben Tuikov 
998c65b0805SLuben Tuikov static ssize_t
amdgpu_ras_debugfs_eeprom_size_read(struct file * f,char __user * buf,size_t size,loff_t * pos)999c65b0805SLuben Tuikov amdgpu_ras_debugfs_eeprom_size_read(struct file *f, char __user *buf,
1000c65b0805SLuben Tuikov 				    size_t size, loff_t *pos)
1001c65b0805SLuben Tuikov {
1002c65b0805SLuben Tuikov 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
1003c65b0805SLuben Tuikov 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1004c65b0805SLuben Tuikov 	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
1005c65b0805SLuben Tuikov 	u8 data[50];
1006c65b0805SLuben Tuikov 	int res;
1007c65b0805SLuben Tuikov 
1008c65b0805SLuben Tuikov 	if (!size)
1009c65b0805SLuben Tuikov 		return size;
1010c65b0805SLuben Tuikov 
1011c65b0805SLuben Tuikov 	if (!ras || !control) {
1012c65b0805SLuben Tuikov 		res = snprintf(data, sizeof(data), "Not supported\n");
1013c65b0805SLuben Tuikov 	} else {
1014c65b0805SLuben Tuikov 		res = snprintf(data, sizeof(data), "%d bytes or %d records\n",
1015c65b0805SLuben Tuikov 			       RAS_TBL_SIZE_BYTES, control->ras_max_record_count);
1016c65b0805SLuben Tuikov 	}
1017c65b0805SLuben Tuikov 
1018c65b0805SLuben Tuikov 	if (*pos >= res)
1019c65b0805SLuben Tuikov 		return 0;
1020c65b0805SLuben Tuikov 
1021c65b0805SLuben Tuikov 	res -= *pos;
1022c65b0805SLuben Tuikov 	res = min_t(size_t, res, size);
1023c65b0805SLuben Tuikov 
1024c65b0805SLuben Tuikov 	if (copy_to_user(buf, &data[*pos], res))
102564598e23SDan Carpenter 		return -EFAULT;
1026c65b0805SLuben Tuikov 
1027c65b0805SLuben Tuikov 	*pos += res;
1028c65b0805SLuben Tuikov 
1029c65b0805SLuben Tuikov 	return res;
1030c65b0805SLuben Tuikov }
1031c65b0805SLuben Tuikov 
1032c65b0805SLuben Tuikov const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops = {
1033c65b0805SLuben Tuikov 	.owner = THIS_MODULE,
1034c65b0805SLuben Tuikov 	.read = amdgpu_ras_debugfs_eeprom_size_read,
1035c65b0805SLuben Tuikov 	.write = NULL,
1036c65b0805SLuben Tuikov 	.llseek = default_llseek,
1037c65b0805SLuben Tuikov };
1038c65b0805SLuben Tuikov 
1039c65b0805SLuben Tuikov static const char *tbl_hdr_str = " Signature    Version  FirstOffs       Size   Checksum\n";
1040c65b0805SLuben Tuikov static const char *tbl_hdr_fmt = "0x%08X 0x%08X 0x%08X 0x%08X 0x%08X\n";
1041c65b0805SLuben Tuikov #define tbl_hdr_fmt_size (5 * (2+8) + 4 + 1)
1042c65b0805SLuben Tuikov static const char *rec_hdr_str = "Index  Offset ErrType Bank/CU          TimeStamp      Offs/Addr MemChl MCUMCID    RetiredPage\n";
1043c65b0805SLuben Tuikov static const char *rec_hdr_fmt = "%5d 0x%05X %7s    0x%02X 0x%016llX 0x%012llX   0x%02X    0x%02X 0x%012llX\n";
1044c65b0805SLuben Tuikov #define rec_hdr_fmt_size (5 + 1 + 7 + 1 + 7 + 1 + 7 + 1 + 18 + 1 + 14 + 1 + 6 + 1 + 7 + 1 + 14 + 1)
1045c65b0805SLuben Tuikov 
1046c65b0805SLuben Tuikov static const char *record_err_type_str[AMDGPU_RAS_EEPROM_ERR_COUNT] = {
1047c65b0805SLuben Tuikov 	"ignore",
1048c65b0805SLuben Tuikov 	"re",
1049c65b0805SLuben Tuikov 	"ue",
1050c65b0805SLuben Tuikov };
1051c65b0805SLuben Tuikov 
amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control * control)1052c65b0805SLuben Tuikov static loff_t amdgpu_ras_debugfs_table_size(struct amdgpu_ras_eeprom_control *control)
1053c65b0805SLuben Tuikov {
1054c65b0805SLuben Tuikov 	return strlen(tbl_hdr_str) + tbl_hdr_fmt_size +
1055c65b0805SLuben Tuikov 		strlen(rec_hdr_str) + rec_hdr_fmt_size * control->ras_num_recs;
1056c65b0805SLuben Tuikov }
1057c65b0805SLuben Tuikov 
amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control * control)1058c65b0805SLuben Tuikov void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control)
1059c65b0805SLuben Tuikov {
1060c65b0805SLuben Tuikov 	struct amdgpu_ras *ras = container_of(control, struct amdgpu_ras,
1061c65b0805SLuben Tuikov 					      eeprom_control);
1062c65b0805SLuben Tuikov 	struct dentry *de = ras->de_ras_eeprom_table;
1063c65b0805SLuben Tuikov 
1064c65b0805SLuben Tuikov 	if (de)
1065c65b0805SLuben Tuikov 		d_inode(de)->i_size = amdgpu_ras_debugfs_table_size(control);
1066c65b0805SLuben Tuikov }
1067c65b0805SLuben Tuikov 
amdgpu_ras_debugfs_table_read(struct file * f,char __user * buf,size_t size,loff_t * pos)1068c65b0805SLuben Tuikov static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
1069c65b0805SLuben Tuikov 					     size_t size, loff_t *pos)
1070c65b0805SLuben Tuikov {
1071c65b0805SLuben Tuikov 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
1072c65b0805SLuben Tuikov 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1073c65b0805SLuben Tuikov 	struct amdgpu_ras_eeprom_control *control = &ras->eeprom_control;
1074c65b0805SLuben Tuikov 	const size_t orig_size = size;
1075b8badd50SDan Carpenter 	int res = -EFAULT;
1076c65b0805SLuben Tuikov 	size_t data_len;
1077c65b0805SLuben Tuikov 
1078c65b0805SLuben Tuikov 	mutex_lock(&control->ras_tbl_mutex);
1079c65b0805SLuben Tuikov 
1080c65b0805SLuben Tuikov 	/* We want *pos - data_len > 0, which means there's
1081c65b0805SLuben Tuikov 	 * bytes to be printed from data.
1082c65b0805SLuben Tuikov 	 */
1083c65b0805SLuben Tuikov 	data_len = strlen(tbl_hdr_str);
1084c65b0805SLuben Tuikov 	if (*pos < data_len) {
1085c65b0805SLuben Tuikov 		data_len -= *pos;
1086c65b0805SLuben Tuikov 		data_len = min_t(size_t, data_len, size);
1087c65b0805SLuben Tuikov 		if (copy_to_user(buf, &tbl_hdr_str[*pos], data_len))
1088c65b0805SLuben Tuikov 			goto Out;
1089c65b0805SLuben Tuikov 		buf += data_len;
1090c65b0805SLuben Tuikov 		size -= data_len;
1091c65b0805SLuben Tuikov 		*pos += data_len;
1092c65b0805SLuben Tuikov 	}
1093c65b0805SLuben Tuikov 
1094c65b0805SLuben Tuikov 	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size;
1095c65b0805SLuben Tuikov 	if (*pos < data_len && size > 0) {
1096c65b0805SLuben Tuikov 		u8 data[tbl_hdr_fmt_size + 1];
1097c65b0805SLuben Tuikov 		loff_t lpos;
1098c65b0805SLuben Tuikov 
1099c65b0805SLuben Tuikov 		snprintf(data, sizeof(data), tbl_hdr_fmt,
1100c65b0805SLuben Tuikov 			 control->tbl_hdr.header,
1101c65b0805SLuben Tuikov 			 control->tbl_hdr.version,
1102c65b0805SLuben Tuikov 			 control->tbl_hdr.first_rec_offset,
1103c65b0805SLuben Tuikov 			 control->tbl_hdr.tbl_size,
1104c65b0805SLuben Tuikov 			 control->tbl_hdr.checksum);
1105c65b0805SLuben Tuikov 
1106c65b0805SLuben Tuikov 		data_len -= *pos;
1107c65b0805SLuben Tuikov 		data_len = min_t(size_t, data_len, size);
1108c65b0805SLuben Tuikov 		lpos = *pos - strlen(tbl_hdr_str);
1109c65b0805SLuben Tuikov 		if (copy_to_user(buf, &data[lpos], data_len))
1110c65b0805SLuben Tuikov 			goto Out;
1111c65b0805SLuben Tuikov 		buf += data_len;
1112c65b0805SLuben Tuikov 		size -= data_len;
1113c65b0805SLuben Tuikov 		*pos += data_len;
1114c65b0805SLuben Tuikov 	}
1115c65b0805SLuben Tuikov 
1116c65b0805SLuben Tuikov 	data_len = strlen(tbl_hdr_str) + tbl_hdr_fmt_size + strlen(rec_hdr_str);
1117c65b0805SLuben Tuikov 	if (*pos < data_len && size > 0) {
1118c65b0805SLuben Tuikov 		loff_t lpos;
1119c65b0805SLuben Tuikov 
1120c65b0805SLuben Tuikov 		data_len -= *pos;
1121c65b0805SLuben Tuikov 		data_len = min_t(size_t, data_len, size);
1122c65b0805SLuben Tuikov 		lpos = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size;
1123c65b0805SLuben Tuikov 		if (copy_to_user(buf, &rec_hdr_str[lpos], data_len))
1124c65b0805SLuben Tuikov 			goto Out;
1125c65b0805SLuben Tuikov 		buf += data_len;
1126c65b0805SLuben Tuikov 		size -= data_len;
1127c65b0805SLuben Tuikov 		*pos += data_len;
1128c65b0805SLuben Tuikov 	}
1129c65b0805SLuben Tuikov 
1130c65b0805SLuben Tuikov 	data_len = amdgpu_ras_debugfs_table_size(control);
1131c65b0805SLuben Tuikov 	if (*pos < data_len && size > 0) {
1132c65b0805SLuben Tuikov 		u8 dare[RAS_TABLE_RECORD_SIZE];
1133c65b0805SLuben Tuikov 		u8 data[rec_hdr_fmt_size + 1];
1134d456f387SAlex Deucher 		struct eeprom_table_record record;
1135d456f387SAlex Deucher 		int s, r;
1136d456f387SAlex Deucher 
1137c65b0805SLuben Tuikov 		/* Find the starting record index
1138c65b0805SLuben Tuikov 		 */
1139d456f387SAlex Deucher 		s = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
1140d456f387SAlex Deucher 			strlen(rec_hdr_str);
1141d456f387SAlex Deucher 		s = s / rec_hdr_fmt_size;
1142d456f387SAlex Deucher 		r = *pos - strlen(tbl_hdr_str) - tbl_hdr_fmt_size -
1143d456f387SAlex Deucher 			strlen(rec_hdr_str);
1144d456f387SAlex Deucher 		r = r % rec_hdr_fmt_size;
1145c65b0805SLuben Tuikov 
1146c65b0805SLuben Tuikov 		for ( ; size > 0 && s < control->ras_num_recs; s++) {
1147c65b0805SLuben Tuikov 			u32 ai = RAS_RI_TO_AI(control, s);
1148c65b0805SLuben Tuikov 			/* Read a single record
1149c65b0805SLuben Tuikov 			 */
1150c65b0805SLuben Tuikov 			res = __amdgpu_ras_eeprom_read(control, dare, ai, 1);
1151c65b0805SLuben Tuikov 			if (res)
1152c65b0805SLuben Tuikov 				goto Out;
1153c65b0805SLuben Tuikov 			__decode_table_record_from_buf(control, &record, dare);
1154c65b0805SLuben Tuikov 			snprintf(data, sizeof(data), rec_hdr_fmt,
1155c65b0805SLuben Tuikov 				 s,
1156c65b0805SLuben Tuikov 				 RAS_INDEX_TO_OFFSET(control, ai),
1157c65b0805SLuben Tuikov 				 record_err_type_str[record.err_type],
1158c65b0805SLuben Tuikov 				 record.bank,
1159c65b0805SLuben Tuikov 				 record.ts,
1160c65b0805SLuben Tuikov 				 record.offset,
1161c65b0805SLuben Tuikov 				 record.mem_channel,
1162c65b0805SLuben Tuikov 				 record.mcumc_id,
1163c65b0805SLuben Tuikov 				 record.retired_page);
1164c65b0805SLuben Tuikov 
1165c65b0805SLuben Tuikov 			data_len = min_t(size_t, rec_hdr_fmt_size - r, size);
1166b8badd50SDan Carpenter 			if (copy_to_user(buf, &data[r], data_len)) {
1167b8badd50SDan Carpenter 				res = -EFAULT;
1168b8badd50SDan Carpenter 				goto Out;
1169b8badd50SDan Carpenter 			}
1170c65b0805SLuben Tuikov 			buf += data_len;
1171c65b0805SLuben Tuikov 			size -= data_len;
1172c65b0805SLuben Tuikov 			*pos += data_len;
1173c65b0805SLuben Tuikov 			r = 0;
1174c65b0805SLuben Tuikov 		}
1175c65b0805SLuben Tuikov 	}
1176c65b0805SLuben Tuikov 	res = 0;
1177c65b0805SLuben Tuikov Out:
1178c65b0805SLuben Tuikov 	mutex_unlock(&control->ras_tbl_mutex);
1179c65b0805SLuben Tuikov 	return res < 0 ? res : orig_size - size;
1180c65b0805SLuben Tuikov }
1181c65b0805SLuben Tuikov 
1182c65b0805SLuben Tuikov static ssize_t
amdgpu_ras_debugfs_eeprom_table_read(struct file * f,char __user * buf,size_t size,loff_t * pos)1183c65b0805SLuben Tuikov amdgpu_ras_debugfs_eeprom_table_read(struct file *f, char __user *buf,
1184c65b0805SLuben Tuikov 				     size_t size, loff_t *pos)
1185c65b0805SLuben Tuikov {
1186c65b0805SLuben Tuikov 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
1187c65b0805SLuben Tuikov 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1188c65b0805SLuben Tuikov 	struct amdgpu_ras_eeprom_control *control = ras ? &ras->eeprom_control : NULL;
1189c65b0805SLuben Tuikov 	u8 data[81];
1190c65b0805SLuben Tuikov 	int res;
1191c65b0805SLuben Tuikov 
1192c65b0805SLuben Tuikov 	if (!size)
1193c65b0805SLuben Tuikov 		return size;
1194c65b0805SLuben Tuikov 
1195c65b0805SLuben Tuikov 	if (!ras || !control) {
1196c65b0805SLuben Tuikov 		res = snprintf(data, sizeof(data), "Not supported\n");
1197c65b0805SLuben Tuikov 		if (*pos >= res)
1198c65b0805SLuben Tuikov 			return 0;
1199c65b0805SLuben Tuikov 
1200c65b0805SLuben Tuikov 		res -= *pos;
1201c65b0805SLuben Tuikov 		res = min_t(size_t, res, size);
1202c65b0805SLuben Tuikov 
1203c65b0805SLuben Tuikov 		if (copy_to_user(buf, &data[*pos], res))
120464598e23SDan Carpenter 			return -EFAULT;
1205c65b0805SLuben Tuikov 
1206c65b0805SLuben Tuikov 		*pos += res;
1207c65b0805SLuben Tuikov 
1208c65b0805SLuben Tuikov 		return res;
1209c65b0805SLuben Tuikov 	} else {
1210c65b0805SLuben Tuikov 		return amdgpu_ras_debugfs_table_read(f, buf, size, pos);
1211c65b0805SLuben Tuikov 	}
1212c65b0805SLuben Tuikov }
1213c65b0805SLuben Tuikov 
1214c65b0805SLuben Tuikov const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops = {
1215c65b0805SLuben Tuikov 	.owner = THIS_MODULE,
1216c65b0805SLuben Tuikov 	.read = amdgpu_ras_debugfs_eeprom_table_read,
1217c65b0805SLuben Tuikov 	.write = NULL,
1218c65b0805SLuben Tuikov 	.llseek = default_llseek,
1219c65b0805SLuben Tuikov };
1220c65b0805SLuben Tuikov 
122163d4c081SLuben Tuikov /**
122263d4c081SLuben Tuikov  * __verify_ras_table_checksum -- verify the RAS EEPROM table checksum
122363d4c081SLuben Tuikov  * @control: pointer to control structure
122463d4c081SLuben Tuikov  *
122563d4c081SLuben Tuikov  * Check the checksum of the stored in EEPROM RAS table.
122663d4c081SLuben Tuikov  *
122763d4c081SLuben Tuikov  * Return 0 if the checksum is correct,
122863d4c081SLuben Tuikov  * positive if it is not correct, and
122963d4c081SLuben Tuikov  * -errno on I/O error.
123063d4c081SLuben Tuikov  */
__verify_ras_table_checksum(struct amdgpu_ras_eeprom_control * control)123163d4c081SLuben Tuikov static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control)
123263d4c081SLuben Tuikov {
123363d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
12343006c924SDan Carpenter 	int buf_size, res;
123563d4c081SLuben Tuikov 	u8  csum, *buf, *pp;
123663d4c081SLuben Tuikov 
12377f599fedSStanley.Yang 	if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1)
12387f599fedSStanley.Yang 		buf_size = RAS_TABLE_HEADER_SIZE +
12397f599fedSStanley.Yang 			   RAS_TABLE_V2_1_INFO_SIZE +
12407f599fedSStanley.Yang 			   control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
12417f599fedSStanley.Yang 	else
124263d4c081SLuben Tuikov 		buf_size = RAS_TABLE_HEADER_SIZE +
124363d4c081SLuben Tuikov 			   control->ras_num_recs * RAS_TABLE_RECORD_SIZE;
12447f599fedSStanley.Yang 
124563d4c081SLuben Tuikov 	buf = kzalloc(buf_size, GFP_KERNEL);
124663d4c081SLuben Tuikov 	if (!buf) {
124763d4c081SLuben Tuikov 		DRM_ERROR("Out of memory checking RAS table checksum.\n");
124863d4c081SLuben Tuikov 		return -ENOMEM;
124963d4c081SLuben Tuikov 	}
125063d4c081SLuben Tuikov 
12512f60dd50SLuben Tuikov 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
125263d4c081SLuben Tuikov 				 control->i2c_address +
125363d4c081SLuben Tuikov 				 control->ras_header_offset,
125463d4c081SLuben Tuikov 				 buf, buf_size);
125563d4c081SLuben Tuikov 	if (res < buf_size) {
125663d4c081SLuben Tuikov 		DRM_ERROR("Partial read for checksum, res:%d\n", res);
125763d4c081SLuben Tuikov 		/* On partial reads, return -EIO.
125863d4c081SLuben Tuikov 		 */
125963d4c081SLuben Tuikov 		if (res >= 0)
126063d4c081SLuben Tuikov 			res = -EIO;
126163d4c081SLuben Tuikov 		goto Out;
126263d4c081SLuben Tuikov 	}
126363d4c081SLuben Tuikov 
126463d4c081SLuben Tuikov 	csum = 0;
126563d4c081SLuben Tuikov 	for (pp = buf; pp < buf + buf_size; pp++)
126663d4c081SLuben Tuikov 		csum += *pp;
126763d4c081SLuben Tuikov Out:
126863d4c081SLuben Tuikov 	kfree(buf);
126963d4c081SLuben Tuikov 	return res < 0 ? res : csum;
127063d4c081SLuben Tuikov }
127163d4c081SLuben Tuikov 
__read_table_ras_info(struct amdgpu_ras_eeprom_control * control)12727f599fedSStanley.Yang static int __read_table_ras_info(struct amdgpu_ras_eeprom_control *control)
12737f599fedSStanley.Yang {
12747f599fedSStanley.Yang 	struct amdgpu_ras_eeprom_table_ras_info *rai = &control->tbl_rai;
12757f599fedSStanley.Yang 	struct amdgpu_device *adev = to_amdgpu_device(control);
12767f599fedSStanley.Yang 	unsigned char *buf;
12777f599fedSStanley.Yang 	int res;
12787f599fedSStanley.Yang 
12797f599fedSStanley.Yang 	buf = kzalloc(RAS_TABLE_V2_1_INFO_SIZE, GFP_KERNEL);
12807f599fedSStanley.Yang 	if (!buf) {
12817f599fedSStanley.Yang 		DRM_ERROR("Failed to alloc buf to read EEPROM table ras info\n");
12827f599fedSStanley.Yang 		return -ENOMEM;
12837f599fedSStanley.Yang 	}
12847f599fedSStanley.Yang 
12857f599fedSStanley.Yang 	/**
12867f599fedSStanley.Yang 	 * EEPROM table V2_1 supports ras info,
12877f599fedSStanley.Yang 	 * read EEPROM table ras info
12887f599fedSStanley.Yang 	 */
12897f599fedSStanley.Yang 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
12907f599fedSStanley.Yang 				 control->i2c_address + control->ras_info_offset,
12917f599fedSStanley.Yang 				 buf, RAS_TABLE_V2_1_INFO_SIZE);
12927f599fedSStanley.Yang 	if (res < RAS_TABLE_V2_1_INFO_SIZE) {
12937f599fedSStanley.Yang 		DRM_ERROR("Failed to read EEPROM table ras info, res:%d", res);
12947f599fedSStanley.Yang 		res = res >= 0 ? -EIO : res;
12957f599fedSStanley.Yang 		goto Out;
12967f599fedSStanley.Yang 	}
12977f599fedSStanley.Yang 
12987f599fedSStanley.Yang 	__decode_table_ras_info_from_buf(rai, buf);
12997f599fedSStanley.Yang 
13007f599fedSStanley.Yang Out:
13017f599fedSStanley.Yang 	kfree(buf);
13027f599fedSStanley.Yang 	return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
13037f599fedSStanley.Yang }
13047f599fedSStanley.Yang 
amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control * control,bool * exceed_err_limit)130563d4c081SLuben Tuikov int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
130663d4c081SLuben Tuikov 			   bool *exceed_err_limit)
130763d4c081SLuben Tuikov {
130863d4c081SLuben Tuikov 	struct amdgpu_device *adev = to_amdgpu_device(control);
130963d4c081SLuben Tuikov 	unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
131063d4c081SLuben Tuikov 	struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
131163d4c081SLuben Tuikov 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
131263d4c081SLuben Tuikov 	int res;
131363d4c081SLuben Tuikov 
131463d4c081SLuben Tuikov 	*exceed_err_limit = false;
131563d4c081SLuben Tuikov 
131663d4c081SLuben Tuikov 	if (!__is_ras_eeprom_supported(adev))
131763d4c081SLuben Tuikov 		return 0;
131863d4c081SLuben Tuikov 
131963d4c081SLuben Tuikov 	/* Verify i2c adapter is initialized */
13202f60dd50SLuben Tuikov 	if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo)
132163d4c081SLuben Tuikov 		return -ENOENT;
132263d4c081SLuben Tuikov 
132363d4c081SLuben Tuikov 	if (!__get_eeprom_i2c_addr(adev, control))
132463d4c081SLuben Tuikov 		return -EINVAL;
132563d4c081SLuben Tuikov 
132663d4c081SLuben Tuikov 	control->ras_header_offset = RAS_HDR_START;
13277f599fedSStanley.Yang 	control->ras_info_offset = RAS_TABLE_V2_1_INFO_START;
132863d4c081SLuben Tuikov 	mutex_init(&control->ras_tbl_mutex);
132963d4c081SLuben Tuikov 
133063d4c081SLuben Tuikov 	/* Read the table header from EEPROM address */
13312f60dd50SLuben Tuikov 	res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus,
133263d4c081SLuben Tuikov 				 control->i2c_address + control->ras_header_offset,
133363d4c081SLuben Tuikov 				 buf, RAS_TABLE_HEADER_SIZE);
133463d4c081SLuben Tuikov 	if (res < RAS_TABLE_HEADER_SIZE) {
133563d4c081SLuben Tuikov 		DRM_ERROR("Failed to read EEPROM table header, res:%d", res);
133663d4c081SLuben Tuikov 		return res >= 0 ? -EIO : res;
133763d4c081SLuben Tuikov 	}
133863d4c081SLuben Tuikov 
133963d4c081SLuben Tuikov 	__decode_table_header_from_buf(hdr, buf);
134063d4c081SLuben Tuikov 
13417f599fedSStanley.Yang 	if (hdr->version == RAS_TABLE_VER_V2_1) {
13427f599fedSStanley.Yang 		control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr);
13437f599fedSStanley.Yang 		control->ras_record_offset = RAS_RECORD_START_V2_1;
13447f599fedSStanley.Yang 		control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1;
13457f599fedSStanley.Yang 	} else {
134663d4c081SLuben Tuikov 		control->ras_num_recs = RAS_NUM_RECS(hdr);
13477f599fedSStanley.Yang 		control->ras_record_offset = RAS_RECORD_START;
13487f599fedSStanley.Yang 		control->ras_max_record_count = RAS_MAX_RECORD_COUNT;
13497f599fedSStanley.Yang 	}
135063d4c081SLuben Tuikov 	control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
135163d4c081SLuben Tuikov 
135263d4c081SLuben Tuikov 	if (hdr->header == RAS_TABLE_HDR_VAL) {
135363d4c081SLuben Tuikov 		DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
135463d4c081SLuben Tuikov 				 control->ras_num_recs);
13557f599fedSStanley.Yang 
13567f599fedSStanley.Yang 		if (hdr->version == RAS_TABLE_VER_V2_1) {
13577f599fedSStanley.Yang 			res = __read_table_ras_info(control);
13587f599fedSStanley.Yang 			if (res)
13597f599fedSStanley.Yang 				return res;
13607f599fedSStanley.Yang 		}
13617f599fedSStanley.Yang 
136263d4c081SLuben Tuikov 		res = __verify_ras_table_checksum(control);
136363d4c081SLuben Tuikov 		if (res)
136463d4c081SLuben Tuikov 			DRM_ERROR("RAS table incorrect checksum or error:%d\n",
136563d4c081SLuben Tuikov 				  res);
13668483fdfeSKent Russell 
13678483fdfeSKent Russell 		/* Warn if we are at 90% of the threshold or above
13688483fdfeSKent Russell 		 */
13698483fdfeSKent Russell 		if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold)
13708483fdfeSKent Russell 			dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
13718483fdfeSKent Russell 					control->ras_num_recs,
13728483fdfeSKent Russell 					ras->bad_page_cnt_threshold);
137363d4c081SLuben Tuikov 	} else if (hdr->header == RAS_TABLE_HDR_BAD &&
137463d4c081SLuben Tuikov 		   amdgpu_bad_page_threshold != 0) {
13757f599fedSStanley.Yang 		if (hdr->version == RAS_TABLE_VER_V2_1) {
13767f599fedSStanley.Yang 			res = __read_table_ras_info(control);
13777f599fedSStanley.Yang 			if (res)
13787f599fedSStanley.Yang 				return res;
13797f599fedSStanley.Yang 		}
13807f599fedSStanley.Yang 
138163d4c081SLuben Tuikov 		res = __verify_ras_table_checksum(control);
138263d4c081SLuben Tuikov 		if (res)
138363d4c081SLuben Tuikov 			DRM_ERROR("RAS Table incorrect checksum or error:%d\n",
138463d4c081SLuben Tuikov 				  res);
138563d4c081SLuben Tuikov 		if (ras->bad_page_cnt_threshold > control->ras_num_recs) {
138663d4c081SLuben Tuikov 			/* This means that, the threshold was increased since
138763d4c081SLuben Tuikov 			 * the last time the system was booted, and now,
138863d4c081SLuben Tuikov 			 * ras->bad_page_cnt_threshold - control->num_recs > 0,
138963d4c081SLuben Tuikov 			 * so that at least one more record can be saved,
139063d4c081SLuben Tuikov 			 * before the page count threshold is reached.
139163d4c081SLuben Tuikov 			 */
139263d4c081SLuben Tuikov 			dev_info(adev->dev,
139363d4c081SLuben Tuikov 				 "records:%d threshold:%d, resetting "
139463d4c081SLuben Tuikov 				 "RAS table header signature",
139563d4c081SLuben Tuikov 				 control->ras_num_recs,
139663d4c081SLuben Tuikov 				 ras->bad_page_cnt_threshold);
139763d4c081SLuben Tuikov 			res = amdgpu_ras_eeprom_correct_header_tag(control,
139863d4c081SLuben Tuikov 								   RAS_TABLE_HDR_VAL);
139963d4c081SLuben Tuikov 		} else {
140068daadf3SKent Russell 			dev_err(adev->dev, "RAS records:%d exceed threshold:%d",
140168daadf3SKent Russell 				control->ras_num_recs, ras->bad_page_cnt_threshold);
1402f3cbe70eSTao Zhou 			if (amdgpu_bad_page_threshold == -1) {
1403f3cbe70eSTao Zhou 				dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
140468daadf3SKent Russell 				res = 0;
140568daadf3SKent Russell 			} else {
140663d4c081SLuben Tuikov 				*exceed_err_limit = true;
140763d4c081SLuben Tuikov 				dev_err(adev->dev,
140863d4c081SLuben Tuikov 					"RAS records:%d exceed threshold:%d, "
1409dcd5ea9fSKent Russell 					"GPU will not be initialized. Replace this GPU or increase the threshold",
141063d4c081SLuben Tuikov 					control->ras_num_recs, ras->bad_page_cnt_threshold);
141163d4c081SLuben Tuikov 			}
141268daadf3SKent Russell 		}
141363d4c081SLuben Tuikov 	} else {
141463d4c081SLuben Tuikov 		DRM_INFO("Creating a new EEPROM table");
141563d4c081SLuben Tuikov 
141663d4c081SLuben Tuikov 		res = amdgpu_ras_eeprom_reset_table(control);
141763d4c081SLuben Tuikov 	}
141863d4c081SLuben Tuikov 
141963d4c081SLuben Tuikov 	return res < 0 ? res : 0;
142063d4c081SLuben Tuikov }
1421