xref: /openbmc/linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h (revision 2612e3bbc0386368a850140a6c9b990cd496a5ec)
164f55e62SAndrey Grodzovsky /*
264f55e62SAndrey Grodzovsky  * Copyright 2019 Advanced Micro Devices, Inc.
364f55e62SAndrey Grodzovsky  *
464f55e62SAndrey Grodzovsky  * Permission is hereby granted, free of charge, to any person obtaining a
564f55e62SAndrey Grodzovsky  * copy of this software and associated documentation files (the "Software"),
664f55e62SAndrey Grodzovsky  * to deal in the Software without restriction, including without limitation
764f55e62SAndrey Grodzovsky  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
864f55e62SAndrey Grodzovsky  * and/or sell copies of the Software, and to permit persons to whom the
964f55e62SAndrey Grodzovsky  * Software is furnished to do so, subject to the following conditions:
1064f55e62SAndrey Grodzovsky  *
1164f55e62SAndrey Grodzovsky  * The above copyright notice and this permission notice shall be included in
1264f55e62SAndrey Grodzovsky  * all copies or substantial portions of the Software.
1364f55e62SAndrey Grodzovsky  *
1464f55e62SAndrey Grodzovsky  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1564f55e62SAndrey Grodzovsky  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1664f55e62SAndrey Grodzovsky  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1764f55e62SAndrey Grodzovsky  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1864f55e62SAndrey Grodzovsky  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1964f55e62SAndrey Grodzovsky  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2064f55e62SAndrey Grodzovsky  * OTHER DEALINGS IN THE SOFTWARE.
2164f55e62SAndrey Grodzovsky  *
2264f55e62SAndrey Grodzovsky  */
2364f55e62SAndrey Grodzovsky 
2464f55e62SAndrey Grodzovsky #ifndef _AMDGPU_RAS_EEPROM_H
2564f55e62SAndrey Grodzovsky #define _AMDGPU_RAS_EEPROM_H
2664f55e62SAndrey Grodzovsky 
2764f55e62SAndrey Grodzovsky #include <linux/i2c.h>
2864f55e62SAndrey Grodzovsky 
2971c79a19SStanley.Yang #define RAS_TABLE_VER_V1           0x00010000
3065183faeSStanley.Yang #define RAS_TABLE_VER_V2_1         0x00021000
3171c79a19SStanley.Yang 
3264f55e62SAndrey Grodzovsky struct amdgpu_device;
3364f55e62SAndrey Grodzovsky 
34*0bc3137bSStanley.Yang enum amdgpu_ras_gpu_health_status {
35*0bc3137bSStanley.Yang 	GPU_HEALTH_USABLE = 0,
36*0bc3137bSStanley.Yang 	GPU_RETIRED__ECC_REACH_THRESHOLD = 2,
37*0bc3137bSStanley.Yang };
38*0bc3137bSStanley.Yang 
3964f55e62SAndrey Grodzovsky enum amdgpu_ras_eeprom_err_type {
40c65b0805SLuben Tuikov 	AMDGPU_RAS_EEPROM_ERR_NA,
4164f55e62SAndrey Grodzovsky 	AMDGPU_RAS_EEPROM_ERR_RECOVERABLE,
42c65b0805SLuben Tuikov 	AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE,
43c65b0805SLuben Tuikov 	AMDGPU_RAS_EEPROM_ERR_COUNT,
4464f55e62SAndrey Grodzovsky };
4564f55e62SAndrey Grodzovsky 
4664f55e62SAndrey Grodzovsky struct amdgpu_ras_eeprom_table_header {
4764f55e62SAndrey Grodzovsky 	uint32_t header;
4864f55e62SAndrey Grodzovsky 	uint32_t version;
4964f55e62SAndrey Grodzovsky 	uint32_t first_rec_offset;
5064f55e62SAndrey Grodzovsky 	uint32_t tbl_size;
5164f55e62SAndrey Grodzovsky 	uint32_t checksum;
520686627bSLuben Tuikov } __packed;
5364f55e62SAndrey Grodzovsky 
547f599fedSStanley.Yang struct amdgpu_ras_eeprom_table_ras_info {
557f599fedSStanley.Yang 	u8  rma_status;
567f599fedSStanley.Yang 	u8  health_percent;
577f599fedSStanley.Yang 	u16 ecc_page_threshold;
587f599fedSStanley.Yang 	u32 padding[64 - 1];
597f599fedSStanley.Yang } __packed;
607f599fedSStanley.Yang 
6164f55e62SAndrey Grodzovsky struct amdgpu_ras_eeprom_control {
6264f55e62SAndrey Grodzovsky 	struct amdgpu_ras_eeprom_table_header tbl_hdr;
630686627bSLuben Tuikov 
647f599fedSStanley.Yang 	struct amdgpu_ras_eeprom_table_ras_info tbl_rai;
657f599fedSStanley.Yang 
660686627bSLuben Tuikov 	/* Base I2C EEPPROM 19-bit memory address,
670686627bSLuben Tuikov 	 * where the table is located. For more information,
680686627bSLuben Tuikov 	 * see top of amdgpu_eeprom.c.
690686627bSLuben Tuikov 	 */
700686627bSLuben Tuikov 	u32 i2c_address;
710686627bSLuben Tuikov 
7263d4c081SLuben Tuikov 	/* The byte offset off of @i2c_address
7363d4c081SLuben Tuikov 	 * where the table header is found,
7463d4c081SLuben Tuikov 	 * and where the records start--always
7563d4c081SLuben Tuikov 	 * right after the header.
7663d4c081SLuben Tuikov 	 */
7763d4c081SLuben Tuikov 	u32 ras_header_offset;
787f599fedSStanley.Yang 	u32 ras_info_offset;
7963d4c081SLuben Tuikov 	u32 ras_record_offset;
800686627bSLuben Tuikov 
810686627bSLuben Tuikov 	/* Number of records in the table.
820686627bSLuben Tuikov 	 */
8363d4c081SLuben Tuikov 	u32 ras_num_recs;
8463d4c081SLuben Tuikov 
8563d4c081SLuben Tuikov 	/* First record index to read, 0-based.
8663d4c081SLuben Tuikov 	 * Range is [0, num_recs-1]. This is
8763d4c081SLuben Tuikov 	 * an absolute index, starting right after
8863d4c081SLuben Tuikov 	 * the table header.
8963d4c081SLuben Tuikov 	 */
9063d4c081SLuben Tuikov 	u32 ras_fri;
9163d4c081SLuben Tuikov 
9263d4c081SLuben Tuikov 	/* Maximum possible number of records
9363d4c081SLuben Tuikov 	 * we could store, i.e. the maximum capacity
9463d4c081SLuben Tuikov 	 * of the table.
9563d4c081SLuben Tuikov 	 */
9663d4c081SLuben Tuikov 	u32 ras_max_record_count;
970686627bSLuben Tuikov 
980686627bSLuben Tuikov 	/* Protect table access via this mutex.
990686627bSLuben Tuikov 	 */
1000686627bSLuben Tuikov 	struct mutex ras_tbl_mutex;
10169691c82SStanley.Yang 
10269691c82SStanley.Yang 	/* Record channel info which occurred bad pages
10369691c82SStanley.Yang 	 */
10469691c82SStanley.Yang 	u32 bad_channel_bitmap;
10564f55e62SAndrey Grodzovsky };
10664f55e62SAndrey Grodzovsky 
10764f55e62SAndrey Grodzovsky /*
10864f55e62SAndrey Grodzovsky  * Represents single table record. Packed to be easily serialized into byte
10964f55e62SAndrey Grodzovsky  * stream.
11064f55e62SAndrey Grodzovsky  */
11164f55e62SAndrey Grodzovsky struct eeprom_table_record {
11264f55e62SAndrey Grodzovsky 
11364f55e62SAndrey Grodzovsky 	union {
11464f55e62SAndrey Grodzovsky 		uint64_t address;
11564f55e62SAndrey Grodzovsky 		uint64_t offset;
11664f55e62SAndrey Grodzovsky 	};
11764f55e62SAndrey Grodzovsky 
11864f55e62SAndrey Grodzovsky 	uint64_t retired_page;
11964f55e62SAndrey Grodzovsky 	uint64_t ts;
12064f55e62SAndrey Grodzovsky 
12164f55e62SAndrey Grodzovsky 	enum amdgpu_ras_eeprom_err_type err_type;
12264f55e62SAndrey Grodzovsky 
12364f55e62SAndrey Grodzovsky 	union {
12464f55e62SAndrey Grodzovsky 		unsigned char bank;
12564f55e62SAndrey Grodzovsky 		unsigned char cu;
12664f55e62SAndrey Grodzovsky 	};
12764f55e62SAndrey Grodzovsky 
12864f55e62SAndrey Grodzovsky 	unsigned char mem_channel;
12964f55e62SAndrey Grodzovsky 	unsigned char mcumc_id;
1300686627bSLuben Tuikov } __packed;
13164f55e62SAndrey Grodzovsky 
132b82e65a9SGuchun Chen int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
133b82e65a9SGuchun Chen 			   bool *exceed_err_limit);
13463d4c081SLuben Tuikov 
135d01b400bSAndrey Grodzovsky int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);
13664f55e62SAndrey Grodzovsky 
13711003c68SDennis Li bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev);
138e8fbaf03SGuchun Chen 
1391fab841fSLuben Tuikov int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
1401fab841fSLuben Tuikov 			   struct eeprom_table_record *records, const u32 num);
1411fab841fSLuben Tuikov 
14263d4c081SLuben Tuikov int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
1431fab841fSLuben Tuikov 			     struct eeprom_table_record *records, const u32 num);
14464f55e62SAndrey Grodzovsky 
1457f599fedSStanley.Yang uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *control);
146c84d4670SGuchun Chen 
147c65b0805SLuben Tuikov void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control);
148c65b0805SLuben Tuikov 
149c65b0805SLuben Tuikov extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
150c65b0805SLuben Tuikov extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
151c65b0805SLuben Tuikov 
15264f55e62SAndrey Grodzovsky #endif // _AMDGPU_RAS_EEPROM_H
153