1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2020-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include <linux/poll.h> 25 #include <linux/wait.h> 26 #include <linux/anon_inodes.h> 27 #include <uapi/linux/kfd_ioctl.h> 28 #include "amdgpu.h" 29 #include "amdgpu_vm.h" 30 #include "kfd_priv.h" 31 #include "kfd_smi_events.h" 32 33 struct kfd_smi_client { 34 struct list_head list; 35 struct kfifo fifo; 36 wait_queue_head_t wait_queue; 37 /* events enabled */ 38 uint64_t events; 39 struct kfd_dev *dev; 40 spinlock_t lock; 41 }; 42 43 #define MAX_KFIFO_SIZE 1024 44 45 static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *); 46 static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *); 47 static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t, 48 loff_t *); 49 static int kfd_smi_ev_release(struct inode *, struct file *); 50 51 static const char kfd_smi_name[] = "kfd_smi_ev"; 52 53 static const struct file_operations kfd_smi_ev_fops = { 54 .owner = THIS_MODULE, 55 .poll = kfd_smi_ev_poll, 56 .read = kfd_smi_ev_read, 57 .write = kfd_smi_ev_write, 58 .release = kfd_smi_ev_release 59 }; 60 61 static __poll_t kfd_smi_ev_poll(struct file *filep, 62 struct poll_table_struct *wait) 63 { 64 struct kfd_smi_client *client = filep->private_data; 65 __poll_t mask = 0; 66 67 poll_wait(filep, &client->wait_queue, wait); 68 69 spin_lock(&client->lock); 70 if (!kfifo_is_empty(&client->fifo)) 71 mask = EPOLLIN | EPOLLRDNORM; 72 spin_unlock(&client->lock); 73 74 return mask; 75 } 76 77 static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user, 78 size_t size, loff_t *offset) 79 { 80 int ret; 81 size_t to_copy; 82 struct kfd_smi_client *client = filep->private_data; 83 unsigned char *buf; 84 85 buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL); 86 if (!buf) 87 return -ENOMEM; 88 89 /* kfifo_to_user can sleep so we can't use spinlock protection around 90 * it. Instead, we kfifo out as spinlocked then copy them to the user. 91 */ 92 spin_lock(&client->lock); 93 to_copy = kfifo_len(&client->fifo); 94 if (!to_copy) { 95 spin_unlock(&client->lock); 96 ret = -EAGAIN; 97 goto ret_err; 98 } 99 to_copy = min3(size, sizeof(buf), to_copy); 100 ret = kfifo_out(&client->fifo, buf, to_copy); 101 spin_unlock(&client->lock); 102 if (ret <= 0) { 103 ret = -EAGAIN; 104 goto ret_err; 105 } 106 107 ret = copy_to_user(user, buf, to_copy); 108 if (ret) { 109 ret = -EFAULT; 110 goto ret_err; 111 } 112 113 kfree(buf); 114 return to_copy; 115 116 ret_err: 117 kfree(buf); 118 return ret; 119 } 120 121 static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user, 122 size_t size, loff_t *offset) 123 { 124 struct kfd_smi_client *client = filep->private_data; 125 uint64_t events; 126 127 if (!access_ok(user, size) || size < sizeof(events)) 128 return -EFAULT; 129 if (copy_from_user(&events, user, sizeof(events))) 130 return -EFAULT; 131 132 WRITE_ONCE(client->events, events); 133 134 return sizeof(events); 135 } 136 137 static int kfd_smi_ev_release(struct inode *inode, struct file *filep) 138 { 139 struct kfd_smi_client *client = filep->private_data; 140 struct kfd_dev *dev = client->dev; 141 142 spin_lock(&dev->smi_lock); 143 list_del_rcu(&client->list); 144 spin_unlock(&dev->smi_lock); 145 146 synchronize_rcu(); 147 kfifo_free(&client->fifo); 148 kfree(client); 149 150 return 0; 151 } 152 153 static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event, 154 char *event_msg, int len) 155 { 156 struct kfd_smi_client *client; 157 158 rcu_read_lock(); 159 160 list_for_each_entry_rcu(client, &dev->smi_clients, list) { 161 if (!(READ_ONCE(client->events) & 162 KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event))) 163 continue; 164 spin_lock(&client->lock); 165 if (kfifo_avail(&client->fifo) >= len) { 166 kfifo_in(&client->fifo, event_msg, len); 167 wake_up_all(&client->wait_queue); 168 } else { 169 pr_debug("smi_event(EventID: %u): no space left\n", 170 smi_event); 171 } 172 spin_unlock(&client->lock); 173 } 174 175 rcu_read_unlock(); 176 } 177 178 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset) 179 { 180 /* 181 * GpuReset msg = Reset seq number (incremented for 182 * every reset message sent before GPU reset). 183 * 1 byte event + 1 byte space + 8 bytes seq num + 184 * 1 byte \n + 1 byte \0 = 12 185 */ 186 char fifo_in[12]; 187 int len; 188 unsigned int event; 189 190 if (list_empty(&dev->smi_clients)) 191 return; 192 193 memset(fifo_in, 0x0, sizeof(fifo_in)); 194 195 if (post_reset) { 196 event = KFD_SMI_EVENT_GPU_POST_RESET; 197 } else { 198 event = KFD_SMI_EVENT_GPU_PRE_RESET; 199 ++(dev->reset_seq_num); 200 } 201 202 len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event, 203 dev->reset_seq_num); 204 205 add_event_to_kfifo(dev, event, fifo_in, len); 206 } 207 208 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, 209 uint64_t throttle_bitmask) 210 { 211 /* 212 * ThermalThrottle msg = throttle_bitmask(8): 213 * thermal_interrupt_count(16): 214 * 1 byte event + 1 byte space + 16 byte throttle_bitmask + 215 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n + 216 * 1 byte \0 = 37 217 */ 218 char fifo_in[37]; 219 int len; 220 221 if (list_empty(&dev->smi_clients)) 222 return; 223 224 len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", 225 KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, 226 amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); 227 228 add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len); 229 } 230 231 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid) 232 { 233 struct amdgpu_task_info task_info; 234 /* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */ 235 /* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n + 236 * 1 byte \0 = 29 237 */ 238 char fifo_in[29]; 239 int len; 240 241 if (list_empty(&dev->smi_clients)) 242 return; 243 244 memset(&task_info, 0, sizeof(struct amdgpu_task_info)); 245 amdgpu_vm_get_task_info(dev->adev, pasid, &task_info); 246 /* Report VM faults from user applications, not retry from kernel */ 247 if (!task_info.pid) 248 return; 249 250 len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT, 251 task_info.pid, task_info.task_name); 252 253 add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len); 254 } 255 256 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) 257 { 258 struct kfd_smi_client *client; 259 int ret; 260 261 client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL); 262 if (!client) 263 return -ENOMEM; 264 INIT_LIST_HEAD(&client->list); 265 266 ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL); 267 if (ret) { 268 kfree(client); 269 return ret; 270 } 271 272 ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, 273 O_RDWR); 274 if (ret < 0) { 275 kfifo_free(&client->fifo); 276 kfree(client); 277 return ret; 278 } 279 *fd = ret; 280 281 init_waitqueue_head(&client->wait_queue); 282 spin_lock_init(&client->lock); 283 client->events = 0; 284 client->dev = dev; 285 286 spin_lock(&dev->smi_lock); 287 list_add_rcu(&client->list, &dev->smi_clients); 288 spin_unlock(&dev->smi_lock); 289 290 return 0; 291 } 292