1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2020-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <linux/poll.h>
25 #include <linux/wait.h>
26 #include <linux/anon_inodes.h>
27 #include <uapi/linux/kfd_ioctl.h>
28 #include "amdgpu.h"
29 #include "amdgpu_vm.h"
30 #include "kfd_priv.h"
31 #include "kfd_smi_events.h"
32 
33 struct kfd_smi_client {
34 	struct list_head list;
35 	struct kfifo fifo;
36 	wait_queue_head_t wait_queue;
37 	/* events enabled */
38 	uint64_t events;
39 	struct kfd_dev *dev;
40 	spinlock_t lock;
41 };
42 
43 #define MAX_KFIFO_SIZE	1024
44 
45 static __poll_t kfd_smi_ev_poll(struct file *, struct poll_table_struct *);
46 static ssize_t kfd_smi_ev_read(struct file *, char __user *, size_t, loff_t *);
47 static ssize_t kfd_smi_ev_write(struct file *, const char __user *, size_t,
48 				loff_t *);
49 static int kfd_smi_ev_release(struct inode *, struct file *);
50 
51 static const char kfd_smi_name[] = "kfd_smi_ev";
52 
53 static const struct file_operations kfd_smi_ev_fops = {
54 	.owner = THIS_MODULE,
55 	.poll = kfd_smi_ev_poll,
56 	.read = kfd_smi_ev_read,
57 	.write = kfd_smi_ev_write,
58 	.release = kfd_smi_ev_release
59 };
60 
61 static __poll_t kfd_smi_ev_poll(struct file *filep,
62 				struct poll_table_struct *wait)
63 {
64 	struct kfd_smi_client *client = filep->private_data;
65 	__poll_t mask = 0;
66 
67 	poll_wait(filep, &client->wait_queue, wait);
68 
69 	spin_lock(&client->lock);
70 	if (!kfifo_is_empty(&client->fifo))
71 		mask = EPOLLIN | EPOLLRDNORM;
72 	spin_unlock(&client->lock);
73 
74 	return mask;
75 }
76 
77 static ssize_t kfd_smi_ev_read(struct file *filep, char __user *user,
78 			       size_t size, loff_t *offset)
79 {
80 	int ret;
81 	size_t to_copy;
82 	struct kfd_smi_client *client = filep->private_data;
83 	unsigned char *buf;
84 
85 	buf = kmalloc_array(MAX_KFIFO_SIZE, sizeof(*buf), GFP_KERNEL);
86 	if (!buf)
87 		return -ENOMEM;
88 
89 	/* kfifo_to_user can sleep so we can't use spinlock protection around
90 	 * it. Instead, we kfifo out as spinlocked then copy them to the user.
91 	 */
92 	spin_lock(&client->lock);
93 	to_copy = kfifo_len(&client->fifo);
94 	if (!to_copy) {
95 		spin_unlock(&client->lock);
96 		ret = -EAGAIN;
97 		goto ret_err;
98 	}
99 	to_copy = min3(size, sizeof(buf), to_copy);
100 	ret = kfifo_out(&client->fifo, buf, to_copy);
101 	spin_unlock(&client->lock);
102 	if (ret <= 0) {
103 		ret = -EAGAIN;
104 		goto ret_err;
105 	}
106 
107 	ret = copy_to_user(user, buf, to_copy);
108 	if (ret) {
109 		ret = -EFAULT;
110 		goto ret_err;
111 	}
112 
113 	kfree(buf);
114 	return to_copy;
115 
116 ret_err:
117 	kfree(buf);
118 	return ret;
119 }
120 
121 static ssize_t kfd_smi_ev_write(struct file *filep, const char __user *user,
122 				size_t size, loff_t *offset)
123 {
124 	struct kfd_smi_client *client = filep->private_data;
125 	uint64_t events;
126 
127 	if (!access_ok(user, size) || size < sizeof(events))
128 		return -EFAULT;
129 	if (copy_from_user(&events, user, sizeof(events)))
130 		return -EFAULT;
131 
132 	WRITE_ONCE(client->events, events);
133 
134 	return sizeof(events);
135 }
136 
137 static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
138 {
139 	struct kfd_smi_client *client = filep->private_data;
140 	struct kfd_dev *dev = client->dev;
141 
142 	spin_lock(&dev->smi_lock);
143 	list_del_rcu(&client->list);
144 	spin_unlock(&dev->smi_lock);
145 
146 	synchronize_rcu();
147 	kfifo_free(&client->fifo);
148 	kfree(client);
149 
150 	return 0;
151 }
152 
153 static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
154 			      char *event_msg, int len)
155 {
156 	struct kfd_smi_client *client;
157 
158 	rcu_read_lock();
159 
160 	list_for_each_entry_rcu(client, &dev->smi_clients, list) {
161 		if (!(READ_ONCE(client->events) &
162 				KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
163 			continue;
164 		spin_lock(&client->lock);
165 		if (kfifo_avail(&client->fifo) >= len) {
166 			kfifo_in(&client->fifo, event_msg, len);
167 			wake_up_all(&client->wait_queue);
168 		} else {
169 			pr_debug("smi_event(EventID: %u): no space left\n",
170 					smi_event);
171 		}
172 		spin_unlock(&client->lock);
173 	}
174 
175 	rcu_read_unlock();
176 }
177 
178 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
179 {
180 	/*
181 	 * GpuReset msg = Reset seq number (incremented for
182 	 * every reset message sent before GPU reset).
183 	 * 1 byte event + 1 byte space + 8 bytes seq num +
184 	 * 1 byte \n + 1 byte \0 = 12
185 	 */
186 	char fifo_in[12];
187 	int len;
188 	unsigned int event;
189 
190 	if (list_empty(&dev->smi_clients))
191 		return;
192 
193 	memset(fifo_in, 0x0, sizeof(fifo_in));
194 
195 	if (post_reset) {
196 		event = KFD_SMI_EVENT_GPU_POST_RESET;
197 	} else {
198 		event = KFD_SMI_EVENT_GPU_PRE_RESET;
199 		++(dev->reset_seq_num);
200 	}
201 
202 	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
203 						dev->reset_seq_num);
204 
205 	add_event_to_kfifo(dev, event, fifo_in, len);
206 }
207 
208 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
209 					     uint64_t throttle_bitmask)
210 {
211 	/*
212 	 * ThermalThrottle msg = throttle_bitmask(8):
213 	 * 			 thermal_interrupt_count(16):
214 	 * 1 byte event + 1 byte space + 16 byte throttle_bitmask +
215 	 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n +
216 	 * 1 byte \0 = 37
217 	 */
218 	char fifo_in[37];
219 	int len;
220 
221 	if (list_empty(&dev->smi_clients))
222 		return;
223 
224 	len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",
225 		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
226 		       amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
227 
228 	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,	fifo_in, len);
229 }
230 
231 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
232 {
233 	struct amdgpu_task_info task_info;
234 	/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */
235 	/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n +
236 	 * 1 byte \0 = 29
237 	 */
238 	char fifo_in[29];
239 	int len;
240 
241 	if (list_empty(&dev->smi_clients))
242 		return;
243 
244 	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
245 	amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);
246 	/* Report VM faults from user applications, not retry from kernel */
247 	if (!task_info.pid)
248 		return;
249 
250 	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
251 		task_info.pid, task_info.task_name);
252 
253 	add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
254 }
255 
256 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
257 {
258 	struct kfd_smi_client *client;
259 	int ret;
260 
261 	client = kzalloc(sizeof(struct kfd_smi_client), GFP_KERNEL);
262 	if (!client)
263 		return -ENOMEM;
264 	INIT_LIST_HEAD(&client->list);
265 
266 	ret = kfifo_alloc(&client->fifo, MAX_KFIFO_SIZE, GFP_KERNEL);
267 	if (ret) {
268 		kfree(client);
269 		return ret;
270 	}
271 
272 	ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client,
273 			       O_RDWR);
274 	if (ret < 0) {
275 		kfifo_free(&client->fifo);
276 		kfree(client);
277 		return ret;
278 	}
279 	*fd = ret;
280 
281 	init_waitqueue_head(&client->wait_queue);
282 	spin_lock_init(&client->lock);
283 	client->events = 0;
284 	client->dev = dev;
285 
286 	spin_lock(&dev->smi_lock);
287 	list_add_rcu(&client->list, &dev->smi_clients);
288 	spin_unlock(&dev->smi_lock);
289 
290 	return 0;
291 }
292