1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <asm/processor.h>
37 #include "kfd_priv.h"
38 #include "kfd_device_queue_manager.h"
39 #include "kfd_dbgmgr.h"
40 
41 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
42 static int kfd_open(struct inode *, struct file *);
43 static int kfd_mmap(struct file *, struct vm_area_struct *);
44 
45 static const char kfd_dev_name[] = "kfd";
46 
47 static const struct file_operations kfd_fops = {
48 	.owner = THIS_MODULE,
49 	.unlocked_ioctl = kfd_ioctl,
50 	.compat_ioctl = kfd_ioctl,
51 	.open = kfd_open,
52 	.mmap = kfd_mmap,
53 };
54 
55 static int kfd_char_dev_major = -1;
56 static struct class *kfd_class;
57 struct device *kfd_device;
58 
59 int kfd_chardev_init(void)
60 {
61 	int err = 0;
62 
63 	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
64 	err = kfd_char_dev_major;
65 	if (err < 0)
66 		goto err_register_chrdev;
67 
68 	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
69 	err = PTR_ERR(kfd_class);
70 	if (IS_ERR(kfd_class))
71 		goto err_class_create;
72 
73 	kfd_device = device_create(kfd_class, NULL,
74 					MKDEV(kfd_char_dev_major, 0),
75 					NULL, kfd_dev_name);
76 	err = PTR_ERR(kfd_device);
77 	if (IS_ERR(kfd_device))
78 		goto err_device_create;
79 
80 	return 0;
81 
82 err_device_create:
83 	class_destroy(kfd_class);
84 err_class_create:
85 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
86 err_register_chrdev:
87 	return err;
88 }
89 
90 void kfd_chardev_exit(void)
91 {
92 	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
93 	class_destroy(kfd_class);
94 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
95 }
96 
97 struct device *kfd_chardev(void)
98 {
99 	return kfd_device;
100 }
101 
102 
103 static int kfd_open(struct inode *inode, struct file *filep)
104 {
105 	struct kfd_process *process;
106 	bool is_32bit_user_mode;
107 
108 	if (iminor(inode) != 0)
109 		return -ENODEV;
110 
111 	is_32bit_user_mode = in_compat_syscall();
112 
113 	if (is_32bit_user_mode) {
114 		dev_warn(kfd_device,
115 			"Process %d (32-bit) failed to open /dev/kfd\n"
116 			"32-bit processes are not supported by amdkfd\n",
117 			current->pid);
118 		return -EPERM;
119 	}
120 
121 	process = kfd_create_process(filep);
122 	if (IS_ERR(process))
123 		return PTR_ERR(process);
124 
125 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
126 		process->pasid, process->is_32bit_user_mode);
127 
128 	return 0;
129 }
130 
131 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
132 					void *data)
133 {
134 	struct kfd_ioctl_get_version_args *args = data;
135 
136 	args->major_version = KFD_IOCTL_MAJOR_VERSION;
137 	args->minor_version = KFD_IOCTL_MINOR_VERSION;
138 
139 	return 0;
140 }
141 
142 static int set_queue_properties_from_user(struct queue_properties *q_properties,
143 				struct kfd_ioctl_create_queue_args *args)
144 {
145 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
146 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
147 		return -EINVAL;
148 	}
149 
150 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
151 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
152 		return -EINVAL;
153 	}
154 
155 	if ((args->ring_base_address) &&
156 		(!access_ok(VERIFY_WRITE,
157 			(const void __user *) args->ring_base_address,
158 			sizeof(uint64_t)))) {
159 		pr_err("Can't access ring base address\n");
160 		return -EFAULT;
161 	}
162 
163 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
164 		pr_err("Ring size must be a power of 2 or 0\n");
165 		return -EINVAL;
166 	}
167 
168 	if (!access_ok(VERIFY_WRITE,
169 			(const void __user *) args->read_pointer_address,
170 			sizeof(uint32_t))) {
171 		pr_err("Can't access read pointer\n");
172 		return -EFAULT;
173 	}
174 
175 	if (!access_ok(VERIFY_WRITE,
176 			(const void __user *) args->write_pointer_address,
177 			sizeof(uint32_t))) {
178 		pr_err("Can't access write pointer\n");
179 		return -EFAULT;
180 	}
181 
182 	if (args->eop_buffer_address &&
183 		!access_ok(VERIFY_WRITE,
184 			(const void __user *) args->eop_buffer_address,
185 			sizeof(uint32_t))) {
186 		pr_debug("Can't access eop buffer");
187 		return -EFAULT;
188 	}
189 
190 	if (args->ctx_save_restore_address &&
191 		!access_ok(VERIFY_WRITE,
192 			(const void __user *) args->ctx_save_restore_address,
193 			sizeof(uint32_t))) {
194 		pr_debug("Can't access ctx save restore buffer");
195 		return -EFAULT;
196 	}
197 
198 	q_properties->is_interop = false;
199 	q_properties->queue_percent = args->queue_percentage;
200 	q_properties->priority = args->queue_priority;
201 	q_properties->queue_address = args->ring_base_address;
202 	q_properties->queue_size = args->ring_size;
203 	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
204 	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
205 	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
206 	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
207 	q_properties->ctx_save_restore_area_address =
208 			args->ctx_save_restore_address;
209 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
210 	q_properties->ctl_stack_size = args->ctl_stack_size;
211 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
212 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
213 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
214 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
215 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
216 	else
217 		return -ENOTSUPP;
218 
219 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
220 		q_properties->format = KFD_QUEUE_FORMAT_AQL;
221 	else
222 		q_properties->format = KFD_QUEUE_FORMAT_PM4;
223 
224 	pr_debug("Queue Percentage: %d, %d\n",
225 			q_properties->queue_percent, args->queue_percentage);
226 
227 	pr_debug("Queue Priority: %d, %d\n",
228 			q_properties->priority, args->queue_priority);
229 
230 	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
231 			q_properties->queue_address, args->ring_base_address);
232 
233 	pr_debug("Queue Size: 0x%llX, %u\n",
234 			q_properties->queue_size, args->ring_size);
235 
236 	pr_debug("Queue r/w Pointers: %px, %px\n",
237 			q_properties->read_ptr,
238 			q_properties->write_ptr);
239 
240 	pr_debug("Queue Format: %d\n", q_properties->format);
241 
242 	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
243 
244 	pr_debug("Queue CTX save area: 0x%llX\n",
245 			q_properties->ctx_save_restore_area_address);
246 
247 	return 0;
248 }
249 
250 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
251 					void *data)
252 {
253 	struct kfd_ioctl_create_queue_args *args = data;
254 	struct kfd_dev *dev;
255 	int err = 0;
256 	unsigned int queue_id;
257 	struct kfd_process_device *pdd;
258 	struct queue_properties q_properties;
259 
260 	memset(&q_properties, 0, sizeof(struct queue_properties));
261 
262 	pr_debug("Creating queue ioctl\n");
263 
264 	err = set_queue_properties_from_user(&q_properties, args);
265 	if (err)
266 		return err;
267 
268 	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
269 	dev = kfd_device_by_id(args->gpu_id);
270 	if (!dev) {
271 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
272 		return -EINVAL;
273 	}
274 
275 	mutex_lock(&p->mutex);
276 
277 	pdd = kfd_bind_process_to_device(dev, p);
278 	if (IS_ERR(pdd)) {
279 		err = -ESRCH;
280 		goto err_bind_process;
281 	}
282 
283 	pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
284 			p->pasid,
285 			dev->id);
286 
287 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
288 	if (err != 0)
289 		goto err_create_queue;
290 
291 	args->queue_id = queue_id;
292 
293 
294 	/* Return gpu_id as doorbell offset for mmap usage */
295 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
296 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
297 	args->doorbell_offset <<= PAGE_SHIFT;
298 	if (KFD_IS_SOC15(dev->device_info->asic_family))
299 		/* On SOC15 ASICs, doorbell allocation must be
300 		 * per-device, and independent from the per-process
301 		 * queue_id. Return the doorbell offset within the
302 		 * doorbell aperture to user mode.
303 		 */
304 		args->doorbell_offset |= q_properties.doorbell_off;
305 
306 	mutex_unlock(&p->mutex);
307 
308 	pr_debug("Queue id %d was created successfully\n", args->queue_id);
309 
310 	pr_debug("Ring buffer address == 0x%016llX\n",
311 			args->ring_base_address);
312 
313 	pr_debug("Read ptr address    == 0x%016llX\n",
314 			args->read_pointer_address);
315 
316 	pr_debug("Write ptr address   == 0x%016llX\n",
317 			args->write_pointer_address);
318 
319 	return 0;
320 
321 err_create_queue:
322 err_bind_process:
323 	mutex_unlock(&p->mutex);
324 	return err;
325 }
326 
327 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
328 					void *data)
329 {
330 	int retval;
331 	struct kfd_ioctl_destroy_queue_args *args = data;
332 
333 	pr_debug("Destroying queue id %d for pasid %d\n",
334 				args->queue_id,
335 				p->pasid);
336 
337 	mutex_lock(&p->mutex);
338 
339 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
340 
341 	mutex_unlock(&p->mutex);
342 	return retval;
343 }
344 
345 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
346 					void *data)
347 {
348 	int retval;
349 	struct kfd_ioctl_update_queue_args *args = data;
350 	struct queue_properties properties;
351 
352 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
353 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
354 		return -EINVAL;
355 	}
356 
357 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
358 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
359 		return -EINVAL;
360 	}
361 
362 	if ((args->ring_base_address) &&
363 		(!access_ok(VERIFY_WRITE,
364 			(const void __user *) args->ring_base_address,
365 			sizeof(uint64_t)))) {
366 		pr_err("Can't access ring base address\n");
367 		return -EFAULT;
368 	}
369 
370 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
371 		pr_err("Ring size must be a power of 2 or 0\n");
372 		return -EINVAL;
373 	}
374 
375 	properties.queue_address = args->ring_base_address;
376 	properties.queue_size = args->ring_size;
377 	properties.queue_percent = args->queue_percentage;
378 	properties.priority = args->queue_priority;
379 
380 	pr_debug("Updating queue id %d for pasid %d\n",
381 			args->queue_id, p->pasid);
382 
383 	mutex_lock(&p->mutex);
384 
385 	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
386 
387 	mutex_unlock(&p->mutex);
388 
389 	return retval;
390 }
391 
392 static int kfd_ioctl_set_memory_policy(struct file *filep,
393 					struct kfd_process *p, void *data)
394 {
395 	struct kfd_ioctl_set_memory_policy_args *args = data;
396 	struct kfd_dev *dev;
397 	int err = 0;
398 	struct kfd_process_device *pdd;
399 	enum cache_policy default_policy, alternate_policy;
400 
401 	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
402 	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
403 		return -EINVAL;
404 	}
405 
406 	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
407 	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
408 		return -EINVAL;
409 	}
410 
411 	dev = kfd_device_by_id(args->gpu_id);
412 	if (!dev)
413 		return -EINVAL;
414 
415 	mutex_lock(&p->mutex);
416 
417 	pdd = kfd_bind_process_to_device(dev, p);
418 	if (IS_ERR(pdd)) {
419 		err = -ESRCH;
420 		goto out;
421 	}
422 
423 	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
424 			 ? cache_policy_coherent : cache_policy_noncoherent;
425 
426 	alternate_policy =
427 		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
428 		   ? cache_policy_coherent : cache_policy_noncoherent;
429 
430 	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
431 				&pdd->qpd,
432 				default_policy,
433 				alternate_policy,
434 				(void __user *)args->alternate_aperture_base,
435 				args->alternate_aperture_size))
436 		err = -EINVAL;
437 
438 out:
439 	mutex_unlock(&p->mutex);
440 
441 	return err;
442 }
443 
444 static int kfd_ioctl_set_trap_handler(struct file *filep,
445 					struct kfd_process *p, void *data)
446 {
447 	struct kfd_ioctl_set_trap_handler_args *args = data;
448 	struct kfd_dev *dev;
449 	int err = 0;
450 	struct kfd_process_device *pdd;
451 
452 	dev = kfd_device_by_id(args->gpu_id);
453 	if (dev == NULL)
454 		return -EINVAL;
455 
456 	mutex_lock(&p->mutex);
457 
458 	pdd = kfd_bind_process_to_device(dev, p);
459 	if (IS_ERR(pdd)) {
460 		err = -ESRCH;
461 		goto out;
462 	}
463 
464 	if (dev->dqm->ops.set_trap_handler(dev->dqm,
465 					&pdd->qpd,
466 					args->tba_addr,
467 					args->tma_addr))
468 		err = -EINVAL;
469 
470 out:
471 	mutex_unlock(&p->mutex);
472 
473 	return err;
474 }
475 
476 static int kfd_ioctl_dbg_register(struct file *filep,
477 				struct kfd_process *p, void *data)
478 {
479 	struct kfd_ioctl_dbg_register_args *args = data;
480 	struct kfd_dev *dev;
481 	struct kfd_dbgmgr *dbgmgr_ptr;
482 	struct kfd_process_device *pdd;
483 	bool create_ok;
484 	long status = 0;
485 
486 	dev = kfd_device_by_id(args->gpu_id);
487 	if (!dev)
488 		return -EINVAL;
489 
490 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
491 		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
492 		return -EINVAL;
493 	}
494 
495 	mutex_lock(&p->mutex);
496 	mutex_lock(kfd_get_dbgmgr_mutex());
497 
498 	/*
499 	 * make sure that we have pdd, if this the first queue created for
500 	 * this process
501 	 */
502 	pdd = kfd_bind_process_to_device(dev, p);
503 	if (IS_ERR(pdd)) {
504 		status = PTR_ERR(pdd);
505 		goto out;
506 	}
507 
508 	if (!dev->dbgmgr) {
509 		/* In case of a legal call, we have no dbgmgr yet */
510 		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
511 		if (create_ok) {
512 			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
513 			if (status != 0)
514 				kfd_dbgmgr_destroy(dbgmgr_ptr);
515 			else
516 				dev->dbgmgr = dbgmgr_ptr;
517 		}
518 	} else {
519 		pr_debug("debugger already registered\n");
520 		status = -EINVAL;
521 	}
522 
523 out:
524 	mutex_unlock(kfd_get_dbgmgr_mutex());
525 	mutex_unlock(&p->mutex);
526 
527 	return status;
528 }
529 
530 static int kfd_ioctl_dbg_unregister(struct file *filep,
531 				struct kfd_process *p, void *data)
532 {
533 	struct kfd_ioctl_dbg_unregister_args *args = data;
534 	struct kfd_dev *dev;
535 	long status;
536 
537 	dev = kfd_device_by_id(args->gpu_id);
538 	if (!dev || !dev->dbgmgr)
539 		return -EINVAL;
540 
541 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
542 		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
543 		return -EINVAL;
544 	}
545 
546 	mutex_lock(kfd_get_dbgmgr_mutex());
547 
548 	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
549 	if (!status) {
550 		kfd_dbgmgr_destroy(dev->dbgmgr);
551 		dev->dbgmgr = NULL;
552 	}
553 
554 	mutex_unlock(kfd_get_dbgmgr_mutex());
555 
556 	return status;
557 }
558 
559 /*
560  * Parse and generate variable size data structure for address watch.
561  * Total size of the buffer and # watch points is limited in order
562  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
563  * which is enforced by dbgdev module)
564  * please also note that the watch address itself are not "copied from user",
565  * since it be set into the HW in user mode values.
566  *
567  */
568 static int kfd_ioctl_dbg_address_watch(struct file *filep,
569 					struct kfd_process *p, void *data)
570 {
571 	struct kfd_ioctl_dbg_address_watch_args *args = data;
572 	struct kfd_dev *dev;
573 	struct dbg_address_watch_info aw_info;
574 	unsigned char *args_buff;
575 	long status;
576 	void __user *cmd_from_user;
577 	uint64_t watch_mask_value = 0;
578 	unsigned int args_idx = 0;
579 
580 	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
581 
582 	dev = kfd_device_by_id(args->gpu_id);
583 	if (!dev)
584 		return -EINVAL;
585 
586 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
587 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
588 		return -EINVAL;
589 	}
590 
591 	cmd_from_user = (void __user *) args->content_ptr;
592 
593 	/* Validate arguments */
594 
595 	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
596 		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
597 		(cmd_from_user == NULL))
598 		return -EINVAL;
599 
600 	/* this is the actual buffer to work with */
601 	args_buff = memdup_user(cmd_from_user,
602 				args->buf_size_in_bytes - sizeof(*args));
603 	if (IS_ERR(args_buff))
604 		return PTR_ERR(args_buff);
605 
606 	aw_info.process = p;
607 
608 	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
609 	args_idx += sizeof(aw_info.num_watch_points);
610 
611 	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
612 	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
613 
614 	/*
615 	 * set watch address base pointer to point on the array base
616 	 * within args_buff
617 	 */
618 	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
619 
620 	/* skip over the addresses buffer */
621 	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
622 
623 	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
624 		status = -EINVAL;
625 		goto out;
626 	}
627 
628 	watch_mask_value = (uint64_t) args_buff[args_idx];
629 
630 	if (watch_mask_value > 0) {
631 		/*
632 		 * There is an array of masks.
633 		 * set watch mask base pointer to point on the array base
634 		 * within args_buff
635 		 */
636 		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
637 
638 		/* skip over the masks buffer */
639 		args_idx += sizeof(aw_info.watch_mask) *
640 				aw_info.num_watch_points;
641 	} else {
642 		/* just the NULL mask, set to NULL and skip over it */
643 		aw_info.watch_mask = NULL;
644 		args_idx += sizeof(aw_info.watch_mask);
645 	}
646 
647 	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
648 		status = -EINVAL;
649 		goto out;
650 	}
651 
652 	/* Currently HSA Event is not supported for DBG */
653 	aw_info.watch_event = NULL;
654 
655 	mutex_lock(kfd_get_dbgmgr_mutex());
656 
657 	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
658 
659 	mutex_unlock(kfd_get_dbgmgr_mutex());
660 
661 out:
662 	kfree(args_buff);
663 
664 	return status;
665 }
666 
667 /* Parse and generate fixed size data structure for wave control */
668 static int kfd_ioctl_dbg_wave_control(struct file *filep,
669 					struct kfd_process *p, void *data)
670 {
671 	struct kfd_ioctl_dbg_wave_control_args *args = data;
672 	struct kfd_dev *dev;
673 	struct dbg_wave_control_info wac_info;
674 	unsigned char *args_buff;
675 	uint32_t computed_buff_size;
676 	long status;
677 	void __user *cmd_from_user;
678 	unsigned int args_idx = 0;
679 
680 	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
681 
682 	/* we use compact form, independent of the packing attribute value */
683 	computed_buff_size = sizeof(*args) +
684 				sizeof(wac_info.mode) +
685 				sizeof(wac_info.operand) +
686 				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
687 				sizeof(wac_info.dbgWave_msg.MemoryVA) +
688 				sizeof(wac_info.trapId);
689 
690 	dev = kfd_device_by_id(args->gpu_id);
691 	if (!dev)
692 		return -EINVAL;
693 
694 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
695 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
696 		return -EINVAL;
697 	}
698 
699 	/* input size must match the computed "compact" size */
700 	if (args->buf_size_in_bytes != computed_buff_size) {
701 		pr_debug("size mismatch, computed : actual %u : %u\n",
702 				args->buf_size_in_bytes, computed_buff_size);
703 		return -EINVAL;
704 	}
705 
706 	cmd_from_user = (void __user *) args->content_ptr;
707 
708 	if (cmd_from_user == NULL)
709 		return -EINVAL;
710 
711 	/* copy the entire buffer from user */
712 
713 	args_buff = memdup_user(cmd_from_user,
714 				args->buf_size_in_bytes - sizeof(*args));
715 	if (IS_ERR(args_buff))
716 		return PTR_ERR(args_buff);
717 
718 	/* move ptr to the start of the "pay-load" area */
719 	wac_info.process = p;
720 
721 	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
722 	args_idx += sizeof(wac_info.operand);
723 
724 	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
725 	args_idx += sizeof(wac_info.mode);
726 
727 	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
728 	args_idx += sizeof(wac_info.trapId);
729 
730 	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
731 					*((uint32_t *)(&args_buff[args_idx]));
732 	wac_info.dbgWave_msg.MemoryVA = NULL;
733 
734 	mutex_lock(kfd_get_dbgmgr_mutex());
735 
736 	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
737 			wac_info.process, wac_info.operand,
738 			wac_info.mode, wac_info.trapId,
739 			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
740 
741 	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
742 
743 	pr_debug("Returned status of dbg manager is %ld\n", status);
744 
745 	mutex_unlock(kfd_get_dbgmgr_mutex());
746 
747 	kfree(args_buff);
748 
749 	return status;
750 }
751 
752 static int kfd_ioctl_get_clock_counters(struct file *filep,
753 				struct kfd_process *p, void *data)
754 {
755 	struct kfd_ioctl_get_clock_counters_args *args = data;
756 	struct kfd_dev *dev;
757 	struct timespec64 time;
758 
759 	dev = kfd_device_by_id(args->gpu_id);
760 	if (dev)
761 		/* Reading GPU clock counter from KGD */
762 		args->gpu_clock_counter =
763 			dev->kfd2kgd->get_gpu_clock_counter(dev->kgd);
764 	else
765 		/* Node without GPU resource */
766 		args->gpu_clock_counter = 0;
767 
768 	/* No access to rdtsc. Using raw monotonic time */
769 	getrawmonotonic64(&time);
770 	args->cpu_clock_counter = (uint64_t)timespec64_to_ns(&time);
771 
772 	get_monotonic_boottime64(&time);
773 	args->system_clock_counter = (uint64_t)timespec64_to_ns(&time);
774 
775 	/* Since the counter is in nano-seconds we use 1GHz frequency */
776 	args->system_clock_freq = 1000000000;
777 
778 	return 0;
779 }
780 
781 
782 static int kfd_ioctl_get_process_apertures(struct file *filp,
783 				struct kfd_process *p, void *data)
784 {
785 	struct kfd_ioctl_get_process_apertures_args *args = data;
786 	struct kfd_process_device_apertures *pAperture;
787 	struct kfd_process_device *pdd;
788 
789 	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
790 
791 	args->num_of_nodes = 0;
792 
793 	mutex_lock(&p->mutex);
794 
795 	/*if the process-device list isn't empty*/
796 	if (kfd_has_process_device_data(p)) {
797 		/* Run over all pdd of the process */
798 		pdd = kfd_get_first_process_device_data(p);
799 		do {
800 			pAperture =
801 				&args->process_apertures[args->num_of_nodes];
802 			pAperture->gpu_id = pdd->dev->id;
803 			pAperture->lds_base = pdd->lds_base;
804 			pAperture->lds_limit = pdd->lds_limit;
805 			pAperture->gpuvm_base = pdd->gpuvm_base;
806 			pAperture->gpuvm_limit = pdd->gpuvm_limit;
807 			pAperture->scratch_base = pdd->scratch_base;
808 			pAperture->scratch_limit = pdd->scratch_limit;
809 
810 			dev_dbg(kfd_device,
811 				"node id %u\n", args->num_of_nodes);
812 			dev_dbg(kfd_device,
813 				"gpu id %u\n", pdd->dev->id);
814 			dev_dbg(kfd_device,
815 				"lds_base %llX\n", pdd->lds_base);
816 			dev_dbg(kfd_device,
817 				"lds_limit %llX\n", pdd->lds_limit);
818 			dev_dbg(kfd_device,
819 				"gpuvm_base %llX\n", pdd->gpuvm_base);
820 			dev_dbg(kfd_device,
821 				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
822 			dev_dbg(kfd_device,
823 				"scratch_base %llX\n", pdd->scratch_base);
824 			dev_dbg(kfd_device,
825 				"scratch_limit %llX\n", pdd->scratch_limit);
826 
827 			args->num_of_nodes++;
828 
829 			pdd = kfd_get_next_process_device_data(p, pdd);
830 		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
831 	}
832 
833 	mutex_unlock(&p->mutex);
834 
835 	return 0;
836 }
837 
838 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
839 				struct kfd_process *p, void *data)
840 {
841 	struct kfd_ioctl_get_process_apertures_new_args *args = data;
842 	struct kfd_process_device_apertures *pa;
843 	struct kfd_process_device *pdd;
844 	uint32_t nodes = 0;
845 	int ret;
846 
847 	dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
848 
849 	if (args->num_of_nodes == 0) {
850 		/* Return number of nodes, so that user space can alloacate
851 		 * sufficient memory
852 		 */
853 		mutex_lock(&p->mutex);
854 
855 		if (!kfd_has_process_device_data(p))
856 			goto out_unlock;
857 
858 		/* Run over all pdd of the process */
859 		pdd = kfd_get_first_process_device_data(p);
860 		do {
861 			args->num_of_nodes++;
862 			pdd = kfd_get_next_process_device_data(p, pdd);
863 		} while (pdd);
864 
865 		goto out_unlock;
866 	}
867 
868 	/* Fill in process-aperture information for all available
869 	 * nodes, but not more than args->num_of_nodes as that is
870 	 * the amount of memory allocated by user
871 	 */
872 	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
873 				args->num_of_nodes), GFP_KERNEL);
874 	if (!pa)
875 		return -ENOMEM;
876 
877 	mutex_lock(&p->mutex);
878 
879 	if (!kfd_has_process_device_data(p)) {
880 		args->num_of_nodes = 0;
881 		kfree(pa);
882 		goto out_unlock;
883 	}
884 
885 	/* Run over all pdd of the process */
886 	pdd = kfd_get_first_process_device_data(p);
887 	do {
888 		pa[nodes].gpu_id = pdd->dev->id;
889 		pa[nodes].lds_base = pdd->lds_base;
890 		pa[nodes].lds_limit = pdd->lds_limit;
891 		pa[nodes].gpuvm_base = pdd->gpuvm_base;
892 		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
893 		pa[nodes].scratch_base = pdd->scratch_base;
894 		pa[nodes].scratch_limit = pdd->scratch_limit;
895 
896 		dev_dbg(kfd_device,
897 			"gpu id %u\n", pdd->dev->id);
898 		dev_dbg(kfd_device,
899 			"lds_base %llX\n", pdd->lds_base);
900 		dev_dbg(kfd_device,
901 			"lds_limit %llX\n", pdd->lds_limit);
902 		dev_dbg(kfd_device,
903 			"gpuvm_base %llX\n", pdd->gpuvm_base);
904 		dev_dbg(kfd_device,
905 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
906 		dev_dbg(kfd_device,
907 			"scratch_base %llX\n", pdd->scratch_base);
908 		dev_dbg(kfd_device,
909 			"scratch_limit %llX\n", pdd->scratch_limit);
910 		nodes++;
911 
912 		pdd = kfd_get_next_process_device_data(p, pdd);
913 	} while (pdd && (nodes < args->num_of_nodes));
914 	mutex_unlock(&p->mutex);
915 
916 	args->num_of_nodes = nodes;
917 	ret = copy_to_user(
918 			(void __user *)args->kfd_process_device_apertures_ptr,
919 			pa,
920 			(nodes * sizeof(struct kfd_process_device_apertures)));
921 	kfree(pa);
922 	return ret ? -EFAULT : 0;
923 
924 out_unlock:
925 	mutex_unlock(&p->mutex);
926 	return 0;
927 }
928 
929 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
930 					void *data)
931 {
932 	struct kfd_ioctl_create_event_args *args = data;
933 	int err;
934 
935 	/* For dGPUs the event page is allocated in user mode. The
936 	 * handle is passed to KFD with the first call to this IOCTL
937 	 * through the event_page_offset field.
938 	 */
939 	if (args->event_page_offset) {
940 		struct kfd_dev *kfd;
941 		struct kfd_process_device *pdd;
942 		void *mem, *kern_addr;
943 		uint64_t size;
944 
945 		if (p->signal_page) {
946 			pr_err("Event page is already set\n");
947 			return -EINVAL;
948 		}
949 
950 		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
951 		if (!kfd) {
952 			pr_err("Getting device by id failed in %s\n", __func__);
953 			return -EINVAL;
954 		}
955 
956 		mutex_lock(&p->mutex);
957 		pdd = kfd_bind_process_to_device(kfd, p);
958 		if (IS_ERR(pdd)) {
959 			err = PTR_ERR(pdd);
960 			goto out_unlock;
961 		}
962 
963 		mem = kfd_process_device_translate_handle(pdd,
964 				GET_IDR_HANDLE(args->event_page_offset));
965 		if (!mem) {
966 			pr_err("Can't find BO, offset is 0x%llx\n",
967 			       args->event_page_offset);
968 			err = -EINVAL;
969 			goto out_unlock;
970 		}
971 		mutex_unlock(&p->mutex);
972 
973 		err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd,
974 						mem, &kern_addr, &size);
975 		if (err) {
976 			pr_err("Failed to map event page to kernel\n");
977 			return err;
978 		}
979 
980 		err = kfd_event_page_set(p, kern_addr, size);
981 		if (err) {
982 			pr_err("Failed to set event page\n");
983 			return err;
984 		}
985 	}
986 
987 	err = kfd_event_create(filp, p, args->event_type,
988 				args->auto_reset != 0, args->node_id,
989 				&args->event_id, &args->event_trigger_data,
990 				&args->event_page_offset,
991 				&args->event_slot_index);
992 
993 	return err;
994 
995 out_unlock:
996 	mutex_unlock(&p->mutex);
997 	return err;
998 }
999 
1000 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1001 					void *data)
1002 {
1003 	struct kfd_ioctl_destroy_event_args *args = data;
1004 
1005 	return kfd_event_destroy(p, args->event_id);
1006 }
1007 
1008 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1009 				void *data)
1010 {
1011 	struct kfd_ioctl_set_event_args *args = data;
1012 
1013 	return kfd_set_event(p, args->event_id);
1014 }
1015 
1016 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1017 				void *data)
1018 {
1019 	struct kfd_ioctl_reset_event_args *args = data;
1020 
1021 	return kfd_reset_event(p, args->event_id);
1022 }
1023 
1024 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1025 				void *data)
1026 {
1027 	struct kfd_ioctl_wait_events_args *args = data;
1028 	int err;
1029 
1030 	err = kfd_wait_on_events(p, args->num_events,
1031 			(void __user *)args->events_ptr,
1032 			(args->wait_for_all != 0),
1033 			args->timeout, &args->wait_result);
1034 
1035 	return err;
1036 }
1037 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1038 					struct kfd_process *p, void *data)
1039 {
1040 	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1041 	struct kfd_process_device *pdd;
1042 	struct kfd_dev *dev;
1043 	long err;
1044 
1045 	dev = kfd_device_by_id(args->gpu_id);
1046 	if (!dev)
1047 		return -EINVAL;
1048 
1049 	mutex_lock(&p->mutex);
1050 
1051 	pdd = kfd_bind_process_to_device(dev, p);
1052 	if (IS_ERR(pdd)) {
1053 		err = PTR_ERR(pdd);
1054 		goto bind_process_to_device_fail;
1055 	}
1056 
1057 	pdd->qpd.sh_hidden_private_base = args->va_addr;
1058 
1059 	mutex_unlock(&p->mutex);
1060 
1061 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1062 	    pdd->qpd.vmid != 0)
1063 		dev->kfd2kgd->set_scratch_backing_va(
1064 			dev->kgd, args->va_addr, pdd->qpd.vmid);
1065 
1066 	return 0;
1067 
1068 bind_process_to_device_fail:
1069 	mutex_unlock(&p->mutex);
1070 	return err;
1071 }
1072 
1073 static int kfd_ioctl_get_tile_config(struct file *filep,
1074 		struct kfd_process *p, void *data)
1075 {
1076 	struct kfd_ioctl_get_tile_config_args *args = data;
1077 	struct kfd_dev *dev;
1078 	struct tile_config config;
1079 	int err = 0;
1080 
1081 	dev = kfd_device_by_id(args->gpu_id);
1082 	if (!dev)
1083 		return -EINVAL;
1084 
1085 	dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1086 
1087 	args->gb_addr_config = config.gb_addr_config;
1088 	args->num_banks = config.num_banks;
1089 	args->num_ranks = config.num_ranks;
1090 
1091 	if (args->num_tile_configs > config.num_tile_configs)
1092 		args->num_tile_configs = config.num_tile_configs;
1093 	err = copy_to_user((void __user *)args->tile_config_ptr,
1094 			config.tile_config_ptr,
1095 			args->num_tile_configs * sizeof(uint32_t));
1096 	if (err) {
1097 		args->num_tile_configs = 0;
1098 		return -EFAULT;
1099 	}
1100 
1101 	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1102 		args->num_macro_tile_configs =
1103 				config.num_macro_tile_configs;
1104 	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1105 			config.macro_tile_config_ptr,
1106 			args->num_macro_tile_configs * sizeof(uint32_t));
1107 	if (err) {
1108 		args->num_macro_tile_configs = 0;
1109 		return -EFAULT;
1110 	}
1111 
1112 	return 0;
1113 }
1114 
1115 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1116 				void *data)
1117 {
1118 	struct kfd_ioctl_acquire_vm_args *args = data;
1119 	struct kfd_process_device *pdd;
1120 	struct kfd_dev *dev;
1121 	struct file *drm_file;
1122 	int ret;
1123 
1124 	dev = kfd_device_by_id(args->gpu_id);
1125 	if (!dev)
1126 		return -EINVAL;
1127 
1128 	drm_file = fget(args->drm_fd);
1129 	if (!drm_file)
1130 		return -EINVAL;
1131 
1132 	mutex_lock(&p->mutex);
1133 
1134 	pdd = kfd_get_process_device_data(dev, p);
1135 	if (!pdd) {
1136 		ret = -EINVAL;
1137 		goto err_unlock;
1138 	}
1139 
1140 	if (pdd->drm_file) {
1141 		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1142 		goto err_unlock;
1143 	}
1144 
1145 	ret = kfd_process_device_init_vm(pdd, drm_file);
1146 	if (ret)
1147 		goto err_unlock;
1148 	/* On success, the PDD keeps the drm_file reference */
1149 	mutex_unlock(&p->mutex);
1150 
1151 	return 0;
1152 
1153 err_unlock:
1154 	mutex_unlock(&p->mutex);
1155 	fput(drm_file);
1156 	return ret;
1157 }
1158 
1159 static bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1160 {
1161 	struct kfd_local_mem_info mem_info;
1162 
1163 	if (debug_largebar) {
1164 		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1165 		return true;
1166 	}
1167 
1168 	if (dev->device_info->needs_iommu_device)
1169 		return false;
1170 
1171 	dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1172 	if (mem_info.local_mem_size_private == 0 &&
1173 			mem_info.local_mem_size_public > 0)
1174 		return true;
1175 	return false;
1176 }
1177 
1178 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1179 					struct kfd_process *p, void *data)
1180 {
1181 	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1182 	struct kfd_process_device *pdd;
1183 	void *mem;
1184 	struct kfd_dev *dev;
1185 	int idr_handle;
1186 	long err;
1187 	uint64_t offset = args->mmap_offset;
1188 	uint32_t flags = args->flags;
1189 
1190 	if (args->size == 0)
1191 		return -EINVAL;
1192 
1193 	dev = kfd_device_by_id(args->gpu_id);
1194 	if (!dev)
1195 		return -EINVAL;
1196 
1197 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1198 		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1199 		!kfd_dev_is_large_bar(dev)) {
1200 		pr_err("Alloc host visible vram on small bar is not allowed\n");
1201 		return -EINVAL;
1202 	}
1203 
1204 	mutex_lock(&p->mutex);
1205 
1206 	pdd = kfd_bind_process_to_device(dev, p);
1207 	if (IS_ERR(pdd)) {
1208 		err = PTR_ERR(pdd);
1209 		goto err_unlock;
1210 	}
1211 
1212 	err = dev->kfd2kgd->alloc_memory_of_gpu(
1213 		dev->kgd, args->va_addr, args->size,
1214 		pdd->vm, (struct kgd_mem **) &mem, &offset,
1215 		flags);
1216 
1217 	if (err)
1218 		goto err_unlock;
1219 
1220 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1221 	if (idr_handle < 0) {
1222 		err = -EFAULT;
1223 		goto err_free;
1224 	}
1225 
1226 	mutex_unlock(&p->mutex);
1227 
1228 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1229 	args->mmap_offset = offset;
1230 
1231 	return 0;
1232 
1233 err_free:
1234 	dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1235 err_unlock:
1236 	mutex_unlock(&p->mutex);
1237 	return err;
1238 }
1239 
1240 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1241 					struct kfd_process *p, void *data)
1242 {
1243 	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1244 	struct kfd_process_device *pdd;
1245 	void *mem;
1246 	struct kfd_dev *dev;
1247 	int ret;
1248 
1249 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1250 	if (!dev)
1251 		return -EINVAL;
1252 
1253 	mutex_lock(&p->mutex);
1254 
1255 	pdd = kfd_get_process_device_data(dev, p);
1256 	if (!pdd) {
1257 		pr_err("Process device data doesn't exist\n");
1258 		ret = -EINVAL;
1259 		goto err_unlock;
1260 	}
1261 
1262 	mem = kfd_process_device_translate_handle(
1263 		pdd, GET_IDR_HANDLE(args->handle));
1264 	if (!mem) {
1265 		ret = -EINVAL;
1266 		goto err_unlock;
1267 	}
1268 
1269 	ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1270 
1271 	/* If freeing the buffer failed, leave the handle in place for
1272 	 * clean-up during process tear-down.
1273 	 */
1274 	if (!ret)
1275 		kfd_process_device_remove_obj_handle(
1276 			pdd, GET_IDR_HANDLE(args->handle));
1277 
1278 err_unlock:
1279 	mutex_unlock(&p->mutex);
1280 	return ret;
1281 }
1282 
1283 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1284 					struct kfd_process *p, void *data)
1285 {
1286 	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1287 	struct kfd_process_device *pdd, *peer_pdd;
1288 	void *mem;
1289 	struct kfd_dev *dev, *peer;
1290 	long err = 0;
1291 	int i;
1292 	uint32_t *devices_arr = NULL;
1293 
1294 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1295 	if (!dev)
1296 		return -EINVAL;
1297 
1298 	if (!args->n_devices) {
1299 		pr_debug("Device IDs array empty\n");
1300 		return -EINVAL;
1301 	}
1302 	if (args->n_success > args->n_devices) {
1303 		pr_debug("n_success exceeds n_devices\n");
1304 		return -EINVAL;
1305 	}
1306 
1307 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1308 				    GFP_KERNEL);
1309 	if (!devices_arr)
1310 		return -ENOMEM;
1311 
1312 	err = copy_from_user(devices_arr,
1313 			     (void __user *)args->device_ids_array_ptr,
1314 			     args->n_devices * sizeof(*devices_arr));
1315 	if (err != 0) {
1316 		err = -EFAULT;
1317 		goto copy_from_user_failed;
1318 	}
1319 
1320 	mutex_lock(&p->mutex);
1321 
1322 	pdd = kfd_bind_process_to_device(dev, p);
1323 	if (IS_ERR(pdd)) {
1324 		err = PTR_ERR(pdd);
1325 		goto bind_process_to_device_failed;
1326 	}
1327 
1328 	mem = kfd_process_device_translate_handle(pdd,
1329 						GET_IDR_HANDLE(args->handle));
1330 	if (!mem) {
1331 		err = -ENOMEM;
1332 		goto get_mem_obj_from_handle_failed;
1333 	}
1334 
1335 	for (i = args->n_success; i < args->n_devices; i++) {
1336 		peer = kfd_device_by_id(devices_arr[i]);
1337 		if (!peer) {
1338 			pr_debug("Getting device by id failed for 0x%x\n",
1339 				 devices_arr[i]);
1340 			err = -EINVAL;
1341 			goto get_mem_obj_from_handle_failed;
1342 		}
1343 
1344 		peer_pdd = kfd_bind_process_to_device(peer, p);
1345 		if (IS_ERR(peer_pdd)) {
1346 			err = PTR_ERR(peer_pdd);
1347 			goto get_mem_obj_from_handle_failed;
1348 		}
1349 		err = peer->kfd2kgd->map_memory_to_gpu(
1350 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1351 		if (err) {
1352 			pr_err("Failed to map to gpu %d/%d\n",
1353 			       i, args->n_devices);
1354 			goto map_memory_to_gpu_failed;
1355 		}
1356 		args->n_success = i+1;
1357 	}
1358 
1359 	mutex_unlock(&p->mutex);
1360 
1361 	err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1362 	if (err) {
1363 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1364 		goto sync_memory_failed;
1365 	}
1366 
1367 	/* Flush TLBs after waiting for the page table updates to complete */
1368 	for (i = 0; i < args->n_devices; i++) {
1369 		peer = kfd_device_by_id(devices_arr[i]);
1370 		if (WARN_ON_ONCE(!peer))
1371 			continue;
1372 		peer_pdd = kfd_get_process_device_data(peer, p);
1373 		if (WARN_ON_ONCE(!peer_pdd))
1374 			continue;
1375 		kfd_flush_tlb(peer_pdd);
1376 	}
1377 
1378 	kfree(devices_arr);
1379 
1380 	return err;
1381 
1382 bind_process_to_device_failed:
1383 get_mem_obj_from_handle_failed:
1384 map_memory_to_gpu_failed:
1385 	mutex_unlock(&p->mutex);
1386 copy_from_user_failed:
1387 sync_memory_failed:
1388 	kfree(devices_arr);
1389 
1390 	return err;
1391 }
1392 
1393 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1394 					struct kfd_process *p, void *data)
1395 {
1396 	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1397 	struct kfd_process_device *pdd, *peer_pdd;
1398 	void *mem;
1399 	struct kfd_dev *dev, *peer;
1400 	long err = 0;
1401 	uint32_t *devices_arr = NULL, i;
1402 
1403 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1404 	if (!dev)
1405 		return -EINVAL;
1406 
1407 	if (!args->n_devices) {
1408 		pr_debug("Device IDs array empty\n");
1409 		return -EINVAL;
1410 	}
1411 	if (args->n_success > args->n_devices) {
1412 		pr_debug("n_success exceeds n_devices\n");
1413 		return -EINVAL;
1414 	}
1415 
1416 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1417 				    GFP_KERNEL);
1418 	if (!devices_arr)
1419 		return -ENOMEM;
1420 
1421 	err = copy_from_user(devices_arr,
1422 			     (void __user *)args->device_ids_array_ptr,
1423 			     args->n_devices * sizeof(*devices_arr));
1424 	if (err != 0) {
1425 		err = -EFAULT;
1426 		goto copy_from_user_failed;
1427 	}
1428 
1429 	mutex_lock(&p->mutex);
1430 
1431 	pdd = kfd_get_process_device_data(dev, p);
1432 	if (!pdd) {
1433 		err = -EINVAL;
1434 		goto bind_process_to_device_failed;
1435 	}
1436 
1437 	mem = kfd_process_device_translate_handle(pdd,
1438 						GET_IDR_HANDLE(args->handle));
1439 	if (!mem) {
1440 		err = -ENOMEM;
1441 		goto get_mem_obj_from_handle_failed;
1442 	}
1443 
1444 	for (i = args->n_success; i < args->n_devices; i++) {
1445 		peer = kfd_device_by_id(devices_arr[i]);
1446 		if (!peer) {
1447 			err = -EINVAL;
1448 			goto get_mem_obj_from_handle_failed;
1449 		}
1450 
1451 		peer_pdd = kfd_get_process_device_data(peer, p);
1452 		if (!peer_pdd) {
1453 			err = -ENODEV;
1454 			goto get_mem_obj_from_handle_failed;
1455 		}
1456 		err = dev->kfd2kgd->unmap_memory_to_gpu(
1457 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1458 		if (err) {
1459 			pr_err("Failed to unmap from gpu %d/%d\n",
1460 			       i, args->n_devices);
1461 			goto unmap_memory_from_gpu_failed;
1462 		}
1463 		args->n_success = i+1;
1464 	}
1465 	kfree(devices_arr);
1466 
1467 	mutex_unlock(&p->mutex);
1468 
1469 	return 0;
1470 
1471 bind_process_to_device_failed:
1472 get_mem_obj_from_handle_failed:
1473 unmap_memory_from_gpu_failed:
1474 	mutex_unlock(&p->mutex);
1475 copy_from_user_failed:
1476 	kfree(devices_arr);
1477 	return err;
1478 }
1479 
1480 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1481 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1482 			    .cmd_drv = 0, .name = #ioctl}
1483 
1484 /** Ioctl table */
1485 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1486 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1487 			kfd_ioctl_get_version, 0),
1488 
1489 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1490 			kfd_ioctl_create_queue, 0),
1491 
1492 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1493 			kfd_ioctl_destroy_queue, 0),
1494 
1495 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1496 			kfd_ioctl_set_memory_policy, 0),
1497 
1498 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1499 			kfd_ioctl_get_clock_counters, 0),
1500 
1501 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1502 			kfd_ioctl_get_process_apertures, 0),
1503 
1504 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1505 			kfd_ioctl_update_queue, 0),
1506 
1507 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1508 			kfd_ioctl_create_event, 0),
1509 
1510 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1511 			kfd_ioctl_destroy_event, 0),
1512 
1513 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1514 			kfd_ioctl_set_event, 0),
1515 
1516 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1517 			kfd_ioctl_reset_event, 0),
1518 
1519 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1520 			kfd_ioctl_wait_events, 0),
1521 
1522 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1523 			kfd_ioctl_dbg_register, 0),
1524 
1525 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1526 			kfd_ioctl_dbg_unregister, 0),
1527 
1528 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1529 			kfd_ioctl_dbg_address_watch, 0),
1530 
1531 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1532 			kfd_ioctl_dbg_wave_control, 0),
1533 
1534 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1535 			kfd_ioctl_set_scratch_backing_va, 0),
1536 
1537 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1538 			kfd_ioctl_get_tile_config, 0),
1539 
1540 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1541 			kfd_ioctl_set_trap_handler, 0),
1542 
1543 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1544 			kfd_ioctl_get_process_apertures_new, 0),
1545 
1546 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1547 			kfd_ioctl_acquire_vm, 0),
1548 
1549 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1550 			kfd_ioctl_alloc_memory_of_gpu, 0),
1551 
1552 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1553 			kfd_ioctl_free_memory_of_gpu, 0),
1554 
1555 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1556 			kfd_ioctl_map_memory_to_gpu, 0),
1557 
1558 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1559 			kfd_ioctl_unmap_memory_from_gpu, 0),
1560 
1561 };
1562 
1563 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
1564 
1565 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1566 {
1567 	struct kfd_process *process;
1568 	amdkfd_ioctl_t *func;
1569 	const struct amdkfd_ioctl_desc *ioctl = NULL;
1570 	unsigned int nr = _IOC_NR(cmd);
1571 	char stack_kdata[128];
1572 	char *kdata = NULL;
1573 	unsigned int usize, asize;
1574 	int retcode = -EINVAL;
1575 
1576 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1577 		goto err_i1;
1578 
1579 	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1580 		u32 amdkfd_size;
1581 
1582 		ioctl = &amdkfd_ioctls[nr];
1583 
1584 		amdkfd_size = _IOC_SIZE(ioctl->cmd);
1585 		usize = asize = _IOC_SIZE(cmd);
1586 		if (amdkfd_size > asize)
1587 			asize = amdkfd_size;
1588 
1589 		cmd = ioctl->cmd;
1590 	} else
1591 		goto err_i1;
1592 
1593 	dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1594 
1595 	process = kfd_get_process(current);
1596 	if (IS_ERR(process)) {
1597 		dev_dbg(kfd_device, "no process\n");
1598 		goto err_i1;
1599 	}
1600 
1601 	/* Do not trust userspace, use our own definition */
1602 	func = ioctl->func;
1603 
1604 	if (unlikely(!func)) {
1605 		dev_dbg(kfd_device, "no function\n");
1606 		retcode = -EINVAL;
1607 		goto err_i1;
1608 	}
1609 
1610 	if (cmd & (IOC_IN | IOC_OUT)) {
1611 		if (asize <= sizeof(stack_kdata)) {
1612 			kdata = stack_kdata;
1613 		} else {
1614 			kdata = kmalloc(asize, GFP_KERNEL);
1615 			if (!kdata) {
1616 				retcode = -ENOMEM;
1617 				goto err_i1;
1618 			}
1619 		}
1620 		if (asize > usize)
1621 			memset(kdata + usize, 0, asize - usize);
1622 	}
1623 
1624 	if (cmd & IOC_IN) {
1625 		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1626 			retcode = -EFAULT;
1627 			goto err_i1;
1628 		}
1629 	} else if (cmd & IOC_OUT) {
1630 		memset(kdata, 0, usize);
1631 	}
1632 
1633 	retcode = func(filep, process, kdata);
1634 
1635 	if (cmd & IOC_OUT)
1636 		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1637 			retcode = -EFAULT;
1638 
1639 err_i1:
1640 	if (!ioctl)
1641 		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1642 			  task_pid_nr(current), cmd, nr);
1643 
1644 	if (kdata != stack_kdata)
1645 		kfree(kdata);
1646 
1647 	if (retcode)
1648 		dev_dbg(kfd_device, "ret = %d\n", retcode);
1649 
1650 	return retcode;
1651 }
1652 
1653 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1654 {
1655 	struct kfd_process *process;
1656 	struct kfd_dev *dev = NULL;
1657 	unsigned long vm_pgoff;
1658 	unsigned int gpu_id;
1659 
1660 	process = kfd_get_process(current);
1661 	if (IS_ERR(process))
1662 		return PTR_ERR(process);
1663 
1664 	vm_pgoff = vma->vm_pgoff;
1665 	vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1666 	gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1667 	if (gpu_id)
1668 		dev = kfd_device_by_id(gpu_id);
1669 
1670 	switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1671 	case KFD_MMAP_TYPE_DOORBELL:
1672 		if (!dev)
1673 			return -ENODEV;
1674 		return kfd_doorbell_mmap(dev, process, vma);
1675 
1676 	case KFD_MMAP_TYPE_EVENTS:
1677 		return kfd_event_mmap(process, vma);
1678 
1679 	case KFD_MMAP_TYPE_RESERVED_MEM:
1680 		if (!dev)
1681 			return -ENODEV;
1682 		return kfd_reserved_mem_mmap(dev, process, vma);
1683 	}
1684 
1685 	return -EFAULT;
1686 }
1687