1d87f36a0SRajneesh Bhardwaj // SPDX-License-Identifier: GPL-2.0 OR MIT 2ca750681SFelix Kuehling /* 3d87f36a0SRajneesh Bhardwaj * Copyright 2016-2022 Advanced Micro Devices, Inc. 4ca750681SFelix Kuehling * 5ca750681SFelix Kuehling * Permission is hereby granted, free of charge, to any person obtaining a 6ca750681SFelix Kuehling * copy of this software and associated documentation files (the "Software"), 7ca750681SFelix Kuehling * to deal in the Software without restriction, including without limitation 8ca750681SFelix Kuehling * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9ca750681SFelix Kuehling * and/or sell copies of the Software, and to permit persons to whom the 10ca750681SFelix Kuehling * Software is furnished to do so, subject to the following conditions: 11ca750681SFelix Kuehling * 12ca750681SFelix Kuehling * The above copyright notice and this permission notice shall be included in 13ca750681SFelix Kuehling * all copies or substantial portions of the Software. 14ca750681SFelix Kuehling * 15ca750681SFelix Kuehling * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16ca750681SFelix Kuehling * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17ca750681SFelix Kuehling * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18ca750681SFelix Kuehling * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19ca750681SFelix Kuehling * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20ca750681SFelix Kuehling * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21ca750681SFelix Kuehling * OTHER DEALINGS IN THE SOFTWARE. 22ca750681SFelix Kuehling */ 23ca750681SFelix Kuehling 24ca750681SFelix Kuehling #include "kfd_priv.h" 25ca750681SFelix Kuehling #include "kfd_events.h" 26ca750681SFelix Kuehling #include "soc15_int.h" 27a53a11a8SYong Zhao #include "kfd_device_queue_manager.h" 28938a0650SAmber Lin #include "kfd_smi_events.h" 2920161e51SDennis Li 3020161e51SDennis Li enum SQ_INTERRUPT_WORD_ENCODING { 3120161e51SDennis Li SQ_INTERRUPT_WORD_ENCODING_AUTO = 0x0, 3220161e51SDennis Li SQ_INTERRUPT_WORD_ENCODING_INST, 3320161e51SDennis Li SQ_INTERRUPT_WORD_ENCODING_ERROR, 3420161e51SDennis Li }; 3520161e51SDennis Li 3620161e51SDennis Li enum SQ_INTERRUPT_ERROR_TYPE { 3720161e51SDennis Li SQ_INTERRUPT_ERROR_TYPE_EDC_FUE = 0x0, 3820161e51SDennis Li SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST, 3920161e51SDennis Li SQ_INTERRUPT_ERROR_TYPE_MEMVIOL, 4020161e51SDennis Li SQ_INTERRUPT_ERROR_TYPE_EDC_FED, 4120161e51SDennis Li }; 4220161e51SDennis Li 4320161e51SDennis Li /* SQ_INTERRUPT_WORD_AUTO_CTXID */ 4420161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE__SHIFT 0 4520161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT__SHIFT 1 4620161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL__SHIFT 2 4720161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP__SHIFT 3 4820161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP__SHIFT 4 4920161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW__SHIFT 5 5020161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW__SHIFT 6 5120161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW__SHIFT 7 5220161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR__SHIFT 8 5320161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID__SHIFT 24 5420161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING__SHIFT 26 5520161e51SDennis Li 5620161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_MASK 0x00000001 5720161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__WLT_MASK 0x00000002 5820161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_BUF_FULL_MASK 0x00000004 5920161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__REG_TIMESTAMP_MASK 0x00000008 6020161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__CMD_TIMESTAMP_MASK 0x00000010 6120161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_CMD_OVERFLOW_MASK 0x00000020 6220161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__HOST_REG_OVERFLOW_MASK 0x00000040 6320161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__IMMED_OVERFLOW_MASK 0x00000080 6420161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__THREAD_TRACE_UTC_ERROR_MASK 0x00000100 6520161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__SE_ID_MASK 0x03000000 6620161e51SDennis Li #define SQ_INTERRUPT_WORD_AUTO_CTXID__ENCODING_MASK 0x0c000000 6720161e51SDennis Li 6820161e51SDennis Li /* SQ_INTERRUPT_WORD_WAVE_CTXID */ 6920161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA__SHIFT 0 7020161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID__SHIFT 12 7120161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV__SHIFT 13 7220161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID__SHIFT 14 7320161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID__SHIFT 18 7420161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID__SHIFT 20 7520161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID__SHIFT 24 7620161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING__SHIFT 26 7720161e51SDennis Li 7820161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__DATA_MASK 0x00000fff 7920161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SH_ID_MASK 0x00001000 8020161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__PRIV_MASK 0x00002000 8120161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__WAVE_ID_MASK 0x0003c000 8220161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SIMD_ID_MASK 0x000c0000 8320161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__CU_ID_MASK 0x00f00000 8420161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__SE_ID_MASK 0x03000000 8520161e51SDennis Li #define SQ_INTERRUPT_WORD_WAVE_CTXID__ENCODING_MASK 0x0c000000 8620161e51SDennis Li 8720161e51SDennis Li #define KFD_CONTEXT_ID_GET_SQ_INT_DATA(ctx0, ctx1) \ 8820161e51SDennis Li ((ctx0 & 0xfff) | ((ctx0 >> 16) & 0xf000) | ((ctx1 << 16) & 0xff0000)) 8920161e51SDennis Li 9020161e51SDennis Li #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000 9120161e51SDennis Li #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 92ca750681SFelix Kuehling 93b6485bedSTao Zhou static void event_interrupt_poison_consumption(struct kfd_dev *dev, 949d8a8d78STao Zhou uint16_t pasid, uint16_t client_id) 95b6485bedSTao Zhou { 96eed41975STao Zhou int old_poison, ret = -EINVAL; 97b6485bedSTao Zhou struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 98b6485bedSTao Zhou 99b6485bedSTao Zhou if (!p) 100b6485bedSTao Zhou return; 101b6485bedSTao Zhou 102b6485bedSTao Zhou /* all queues of a process will be unmapped in one time */ 103eed41975STao Zhou old_poison = atomic_cmpxchg(&p->poison, 0, 1); 104b6485bedSTao Zhou kfd_unref_process(p); 105eed41975STao Zhou if (old_poison) 106b6485bedSTao Zhou return; 107b6485bedSTao Zhou 1089d8a8d78STao Zhou switch (client_id) { 1099d8a8d78STao Zhou case SOC15_IH_CLIENTID_SE0SH: 1109d8a8d78STao Zhou case SOC15_IH_CLIENTID_SE1SH: 1119d8a8d78STao Zhou case SOC15_IH_CLIENTID_SE2SH: 1129d8a8d78STao Zhou case SOC15_IH_CLIENTID_SE3SH: 1139d8a8d78STao Zhou case SOC15_IH_CLIENTID_UTCL2: 11429b440d2STao Zhou ret = kfd_dqm_evict_pasid(dev->dqm, pasid); 115b6485bedSTao Zhou break; 1169d8a8d78STao Zhou case SOC15_IH_CLIENTID_SDMA0: 1179d8a8d78STao Zhou case SOC15_IH_CLIENTID_SDMA1: 1189d8a8d78STao Zhou case SOC15_IH_CLIENTID_SDMA2: 1199d8a8d78STao Zhou case SOC15_IH_CLIENTID_SDMA3: 1209d8a8d78STao Zhou case SOC15_IH_CLIENTID_SDMA4: 1219d8a8d78STao Zhou break; 122b6485bedSTao Zhou default: 123b6485bedSTao Zhou break; 124b6485bedSTao Zhou } 125b6485bedSTao Zhou 126b6485bedSTao Zhou kfd_signal_poison_consumed_event(dev, pasid); 127b6485bedSTao Zhou 128b6485bedSTao Zhou /* resetting queue passes, do page retirement without gpu reset 1292243f493SRajneesh Bhardwaj * resetting queue fails, fallback to gpu reset solution 1302243f493SRajneesh Bhardwaj */ 131ed94aca6STao Zhou if (!ret) { 132ed94aca6STao Zhou dev_warn(dev->adev->dev, 133ed94aca6STao Zhou "RAS poison consumption, unmap queue flow succeeded: client id %d\n", 134ed94aca6STao Zhou client_id); 135b6485bedSTao Zhou amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); 136ed94aca6STao Zhou } else { 137ed94aca6STao Zhou dev_warn(dev->adev->dev, 138ed94aca6STao Zhou "RAS poison consumption, fall back to gpu reset flow: client id %d\n", 139ed94aca6STao Zhou client_id); 140b6485bedSTao Zhou amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); 141b6485bedSTao Zhou } 142ed94aca6STao Zhou } 143b6485bedSTao Zhou 144*c3eb12dfSFelix Kuehling static bool context_id_expected(struct kfd_dev *dev) 145*c3eb12dfSFelix Kuehling { 146*c3eb12dfSFelix Kuehling switch (KFD_GC_VERSION(dev)) { 147*c3eb12dfSFelix Kuehling case IP_VERSION(9, 0, 1): 148*c3eb12dfSFelix Kuehling return dev->mec_fw_version >= 0x817a; 149*c3eb12dfSFelix Kuehling case IP_VERSION(9, 1, 0): 150*c3eb12dfSFelix Kuehling case IP_VERSION(9, 2, 1): 151*c3eb12dfSFelix Kuehling case IP_VERSION(9, 2, 2): 152*c3eb12dfSFelix Kuehling case IP_VERSION(9, 3, 0): 153*c3eb12dfSFelix Kuehling case IP_VERSION(9, 4, 0): 154*c3eb12dfSFelix Kuehling return dev->mec_fw_version >= 0x17a; 155*c3eb12dfSFelix Kuehling default: 156*c3eb12dfSFelix Kuehling /* Other GFXv9 and later GPUs always sent valid context IDs 157*c3eb12dfSFelix Kuehling * on legitimate events 158*c3eb12dfSFelix Kuehling */ 159*c3eb12dfSFelix Kuehling return KFD_GC_VERSION(dev) >= IP_VERSION(9, 4, 1); 160*c3eb12dfSFelix Kuehling } 161*c3eb12dfSFelix Kuehling } 162*c3eb12dfSFelix Kuehling 163ca750681SFelix Kuehling static bool event_interrupt_isr_v9(struct kfd_dev *dev, 16458e69886SLan Xiao const uint32_t *ih_ring_entry, 16558e69886SLan Xiao uint32_t *patched_ihre, 16658e69886SLan Xiao bool *patched_flag) 167ca750681SFelix Kuehling { 168ca750681SFelix Kuehling uint16_t source_id, client_id, pasid, vmid; 169c129db12SFelix Kuehling const uint32_t *data = ih_ring_entry; 170c129db12SFelix Kuehling 171c129db12SFelix Kuehling /* Only handle interrupts from KFD VMIDs */ 172c129db12SFelix Kuehling vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); 173c129db12SFelix Kuehling if (vmid < dev->vm_info.first_vmid_kfd || 174c129db12SFelix Kuehling vmid > dev->vm_info.last_vmid_kfd) 1758c8e1f69SAishwarya Ramakrishnan return false; 176c129db12SFelix Kuehling 177ca750681SFelix Kuehling source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); 178ca750681SFelix Kuehling client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); 17900557f41SYong Zhao pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); 180ca750681SFelix Kuehling 181ae279f69SAlex Deucher /* Only handle clients we care about */ 182ae279f69SAlex Deucher if (client_id != SOC15_IH_CLIENTID_GRBM_CP && 183ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA0 && 184ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA1 && 185ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA2 && 186ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA3 && 187ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA4 && 188ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA5 && 189ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA6 && 190ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_SDMA7 && 191ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_VMC && 192ae279f69SAlex Deucher client_id != SOC15_IH_CLIENTID_VMC1 && 1937af103eaSTao Zhou client_id != SOC15_IH_CLIENTID_UTCL2 && 1947af103eaSTao Zhou client_id != SOC15_IH_CLIENTID_SE0SH && 1957af103eaSTao Zhou client_id != SOC15_IH_CLIENTID_SE1SH && 1967af103eaSTao Zhou client_id != SOC15_IH_CLIENTID_SE2SH && 1977af103eaSTao Zhou client_id != SOC15_IH_CLIENTID_SE3SH) 198ae279f69SAlex Deucher return false; 199ae279f69SAlex Deucher 200a53a11a8SYong Zhao /* This is a known issue for gfx9. Under non HWS, pasid is not set 201a53a11a8SYong Zhao * in the interrupt payload, so we need to find out the pasid on our 202a53a11a8SYong Zhao * own. 203a53a11a8SYong Zhao */ 204a53a11a8SYong Zhao if (!pasid && dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 205a53a11a8SYong Zhao const uint32_t pasid_mask = 0xffff; 206a53a11a8SYong Zhao 207a53a11a8SYong Zhao *patched_flag = true; 208a53a11a8SYong Zhao memcpy(patched_ihre, ih_ring_entry, 209f0dc99a6SGraham Sider dev->device_info.ih_ring_entry_size); 210a53a11a8SYong Zhao 2113fe023d4SYong Zhao pasid = dev->dqm->vmid_pasid[vmid]; 212a53a11a8SYong Zhao 213a53a11a8SYong Zhao /* Patch the pasid field */ 214a53a11a8SYong Zhao patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3]) 215a53a11a8SYong Zhao & ~pasid_mask) | pasid); 216a53a11a8SYong Zhao } 217a53a11a8SYong Zhao 21800557f41SYong Zhao pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", 21900557f41SYong Zhao client_id, source_id, vmid, pasid); 220ca750681SFelix Kuehling pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", 221ca750681SFelix Kuehling data[0], data[1], data[2], data[3], 222ca750681SFelix Kuehling data[4], data[5], data[6], data[7]); 223ca750681SFelix Kuehling 224a53a11a8SYong Zhao /* If there is no valid PASID, it's likely a bug */ 225a53a11a8SYong Zhao if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) 2268c8e1f69SAishwarya Ramakrishnan return false; 22700557f41SYong Zhao 228*c3eb12dfSFelix Kuehling /* Workaround CP firmware sending bogus signals with 0 context_id. 229*c3eb12dfSFelix Kuehling * Those can be safely ignored on hardware and firmware versions that 230*c3eb12dfSFelix Kuehling * include a valid context_id on legitimate signals. This avoids the 231*c3eb12dfSFelix Kuehling * slow path in kfd_signal_event_interrupt that scans all event slots 232*c3eb12dfSFelix Kuehling * for signaled events. 233*c3eb12dfSFelix Kuehling */ 234*c3eb12dfSFelix Kuehling if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) { 235*c3eb12dfSFelix Kuehling uint32_t context_id = 236*c3eb12dfSFelix Kuehling SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); 237*c3eb12dfSFelix Kuehling 238*c3eb12dfSFelix Kuehling if (context_id == 0 && context_id_expected(dev)) 239*c3eb12dfSFelix Kuehling return false; 240*c3eb12dfSFelix Kuehling } 241*c3eb12dfSFelix Kuehling 242c129db12SFelix Kuehling /* Interrupt types we care about: various signals and faults. 243c129db12SFelix Kuehling * They will be forwarded to a work queue (see below). 244c129db12SFelix Kuehling */ 245c129db12SFelix Kuehling return source_id == SOC15_INTSRC_CP_END_OF_PIPE || 246ca750681SFelix Kuehling source_id == SOC15_INTSRC_SDMA_TRAP || 2475b0ce2d4Syipechai source_id == SOC15_INTSRC_SDMA_ECC || 248ca750681SFelix Kuehling source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || 2492640c3faSshaoyunl source_id == SOC15_INTSRC_CP_BAD_OPCODE || 2506d909c5dSOak Zeng ((client_id == SOC15_IH_CLIENTID_VMC || 2510ad8c5e2SYong Zhao client_id == SOC15_IH_CLIENTID_VMC1 || 2526d909c5dSOak Zeng client_id == SOC15_IH_CLIENTID_UTCL2) && 2536d909c5dSOak Zeng !amdgpu_no_queue_eviction_on_vm_fault); 254ca750681SFelix Kuehling } 255ca750681SFelix Kuehling 256ca750681SFelix Kuehling static void event_interrupt_wq_v9(struct kfd_dev *dev, 257ca750681SFelix Kuehling const uint32_t *ih_ring_entry) 258ca750681SFelix Kuehling { 259ca750681SFelix Kuehling uint16_t source_id, client_id, pasid, vmid; 26020161e51SDennis Li uint32_t context_id0, context_id1; 26120161e51SDennis Li uint32_t sq_intr_err, sq_int_data, encoding; 262ca750681SFelix Kuehling 263ca750681SFelix Kuehling source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); 264ca750681SFelix Kuehling client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); 265ca750681SFelix Kuehling pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); 266ca750681SFelix Kuehling vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); 26720161e51SDennis Li context_id0 = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); 26820161e51SDennis Li context_id1 = SOC15_CONTEXT_ID1_FROM_IH_ENTRY(ih_ring_entry); 269ca750681SFelix Kuehling 2707af103eaSTao Zhou if (client_id == SOC15_IH_CLIENTID_GRBM_CP || 2717af103eaSTao Zhou client_id == SOC15_IH_CLIENTID_SE0SH || 2727af103eaSTao Zhou client_id == SOC15_IH_CLIENTID_SE1SH || 2737af103eaSTao Zhou client_id == SOC15_IH_CLIENTID_SE2SH || 2747af103eaSTao Zhou client_id == SOC15_IH_CLIENTID_SE3SH) { 275ca750681SFelix Kuehling if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) 27620161e51SDennis Li kfd_signal_event_interrupt(pasid, context_id0, 32); 27720161e51SDennis Li else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) { 27820161e51SDennis Li sq_int_data = KFD_CONTEXT_ID_GET_SQ_INT_DATA(context_id0, context_id1); 27920161e51SDennis Li encoding = REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, ENCODING); 28020161e51SDennis Li switch (encoding) { 28120161e51SDennis Li case SQ_INTERRUPT_WORD_ENCODING_AUTO: 28220161e51SDennis Li pr_debug( 28320161e51SDennis Li "sq_intr: auto, se %d, ttrace %d, wlt %d, ttrac_buf_full %d, reg_tms %d, cmd_tms %d, host_cmd_ovf %d, host_reg_ovf %d, immed_ovf %d, ttrace_utc_err %d\n", 28420161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, SE_ID), 28520161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE), 28620161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, WLT), 28720161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_BUF_FULL), 28820161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, REG_TIMESTAMP), 28920161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, CMD_TIMESTAMP), 29020161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_CMD_OVERFLOW), 29120161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, HOST_REG_OVERFLOW), 29220161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, IMMED_OVERFLOW), 29320161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_AUTO_CTXID, THREAD_TRACE_UTC_ERROR)); 29420161e51SDennis Li break; 29520161e51SDennis Li case SQ_INTERRUPT_WORD_ENCODING_INST: 29620161e51SDennis Li pr_debug("sq_intr: inst, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, intr_data 0x%x\n", 29720161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), 29820161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), 29920161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), 30020161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), 30120161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), 30220161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), 30320161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), 30420161e51SDennis Li sq_int_data); 30520161e51SDennis Li break; 30620161e51SDennis Li case SQ_INTERRUPT_WORD_ENCODING_ERROR: 30720161e51SDennis Li sq_intr_err = REG_GET_FIELD(sq_int_data, KFD_SQ_INT_DATA, ERR_TYPE); 30820161e51SDennis Li pr_warn("sq_intr: error, se %d, data 0x%x, sh %d, priv %d, wave_id %d, simd_id %d, cu_id %d, err_type %d\n", 30920161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SE_ID), 31020161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, DATA), 31120161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SH_ID), 31220161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, PRIV), 31320161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, WAVE_ID), 31420161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, SIMD_ID), 31520161e51SDennis Li REG_GET_FIELD(context_id0, SQ_INTERRUPT_WORD_WAVE_CTXID, CU_ID), 31620161e51SDennis Li sq_intr_err); 31720161e51SDennis Li if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && 31820161e51SDennis Li sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { 3199d8a8d78STao Zhou event_interrupt_poison_consumption(dev, pasid, client_id); 32020161e51SDennis Li return; 32120161e51SDennis Li } 32220161e51SDennis Li break; 32320161e51SDennis Li default: 32420161e51SDennis Li break; 32520161e51SDennis Li } 32620161e51SDennis Li kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24); 32720161e51SDennis Li } else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) 328ca750681SFelix Kuehling kfd_signal_hw_exception_event(pasid); 329ae279f69SAlex Deucher } else if (client_id == SOC15_IH_CLIENTID_SDMA0 || 330ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA1 || 331ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA2 || 332ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA3 || 333ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA4 || 334ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA5 || 335ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA6 || 336ae279f69SAlex Deucher client_id == SOC15_IH_CLIENTID_SDMA7) { 3374a1d4b6dSHawking Zhang if (source_id == SOC15_INTSRC_SDMA_TRAP) { 33820161e51SDennis Li kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); 3394a1d4b6dSHawking Zhang } else if (source_id == SOC15_INTSRC_SDMA_ECC) { 3409d8a8d78STao Zhou event_interrupt_poison_consumption(dev, pasid, client_id); 3414a1d4b6dSHawking Zhang return; 3424a1d4b6dSHawking Zhang } 343ae279f69SAlex Deucher } else if (client_id == SOC15_IH_CLIENTID_VMC || 3440ad8c5e2SYong Zhao client_id == SOC15_IH_CLIENTID_VMC1 || 345ca750681SFelix Kuehling client_id == SOC15_IH_CLIENTID_UTCL2) { 3462640c3faSshaoyunl struct kfd_vm_fault_info info = {0}; 3472640c3faSshaoyunl uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); 3482640c3faSshaoyunl 3491990e29bSTao Zhou if (client_id == SOC15_IH_CLIENTID_UTCL2 && 3501990e29bSTao Zhou amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { 3511990e29bSTao Zhou event_interrupt_poison_consumption(dev, pasid, client_id); 3521990e29bSTao Zhou return; 3531990e29bSTao Zhou } 3541990e29bSTao Zhou 3552640c3faSshaoyunl info.vmid = vmid; 3562640c3faSshaoyunl info.mc_id = client_id; 3572640c3faSshaoyunl info.page_addr = ih_ring_entry[4] | 3582640c3faSshaoyunl (uint64_t)(ih_ring_entry[5] & 0xf) << 32; 3592640c3faSshaoyunl info.prot_valid = ring_id & 0x08; 3602640c3faSshaoyunl info.prot_read = ring_id & 0x10; 3612640c3faSshaoyunl info.prot_write = ring_id & 0x20; 3622640c3faSshaoyunl 363938a0650SAmber Lin kfd_smi_event_update_vmfault(dev, pasid); 36403e5b167STao Zhou kfd_dqm_evict_pasid(dev->dqm, pasid); 3652640c3faSshaoyunl kfd_signal_vm_fault_event(dev, pasid, &info); 366ca750681SFelix Kuehling } 367ca750681SFelix Kuehling } 368ca750681SFelix Kuehling 369ca750681SFelix Kuehling const struct kfd_event_interrupt_class event_interrupt_class_v9 = { 370ca750681SFelix Kuehling .interrupt_isr = event_interrupt_isr_v9, 371ca750681SFelix Kuehling .interrupt_wq = event_interrupt_wq_v9, 372ca750681SFelix Kuehling }; 373