1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "amdgpu_amdkfd.h"
42 #include "kfd_smi_events.h"
43 
44 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
45 static int kfd_open(struct inode *, struct file *);
46 static int kfd_release(struct inode *, struct file *);
47 static int kfd_mmap(struct file *, struct vm_area_struct *);
48 
49 static const char kfd_dev_name[] = "kfd";
50 
51 static const struct file_operations kfd_fops = {
52 	.owner = THIS_MODULE,
53 	.unlocked_ioctl = kfd_ioctl,
54 	.compat_ioctl = compat_ptr_ioctl,
55 	.open = kfd_open,
56 	.release = kfd_release,
57 	.mmap = kfd_mmap,
58 };
59 
60 static int kfd_char_dev_major = -1;
61 static struct class *kfd_class;
62 struct device *kfd_device;
63 
64 int kfd_chardev_init(void)
65 {
66 	int err = 0;
67 
68 	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
69 	err = kfd_char_dev_major;
70 	if (err < 0)
71 		goto err_register_chrdev;
72 
73 	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
74 	err = PTR_ERR(kfd_class);
75 	if (IS_ERR(kfd_class))
76 		goto err_class_create;
77 
78 	kfd_device = device_create(kfd_class, NULL,
79 					MKDEV(kfd_char_dev_major, 0),
80 					NULL, kfd_dev_name);
81 	err = PTR_ERR(kfd_device);
82 	if (IS_ERR(kfd_device))
83 		goto err_device_create;
84 
85 	return 0;
86 
87 err_device_create:
88 	class_destroy(kfd_class);
89 err_class_create:
90 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
91 err_register_chrdev:
92 	return err;
93 }
94 
95 void kfd_chardev_exit(void)
96 {
97 	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
98 	class_destroy(kfd_class);
99 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
100 	kfd_device = NULL;
101 }
102 
103 struct device *kfd_chardev(void)
104 {
105 	return kfd_device;
106 }
107 
108 
109 static int kfd_open(struct inode *inode, struct file *filep)
110 {
111 	struct kfd_process *process;
112 	bool is_32bit_user_mode;
113 
114 	if (iminor(inode) != 0)
115 		return -ENODEV;
116 
117 	is_32bit_user_mode = in_compat_syscall();
118 
119 	if (is_32bit_user_mode) {
120 		dev_warn(kfd_device,
121 			"Process %d (32-bit) failed to open /dev/kfd\n"
122 			"32-bit processes are not supported by amdkfd\n",
123 			current->pid);
124 		return -EPERM;
125 	}
126 
127 	process = kfd_create_process(filep);
128 	if (IS_ERR(process))
129 		return PTR_ERR(process);
130 
131 	if (kfd_is_locked()) {
132 		dev_dbg(kfd_device, "kfd is locked!\n"
133 				"process %d unreferenced", process->pasid);
134 		kfd_unref_process(process);
135 		return -EAGAIN;
136 	}
137 
138 	/* filep now owns the reference returned by kfd_create_process */
139 	filep->private_data = process;
140 
141 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
142 		process->pasid, process->is_32bit_user_mode);
143 
144 	return 0;
145 }
146 
147 static int kfd_release(struct inode *inode, struct file *filep)
148 {
149 	struct kfd_process *process = filep->private_data;
150 
151 	if (process)
152 		kfd_unref_process(process);
153 
154 	return 0;
155 }
156 
157 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
158 					void *data)
159 {
160 	struct kfd_ioctl_get_version_args *args = data;
161 
162 	args->major_version = KFD_IOCTL_MAJOR_VERSION;
163 	args->minor_version = KFD_IOCTL_MINOR_VERSION;
164 
165 	return 0;
166 }
167 
168 static int set_queue_properties_from_user(struct queue_properties *q_properties,
169 				struct kfd_ioctl_create_queue_args *args)
170 {
171 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
172 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
173 		return -EINVAL;
174 	}
175 
176 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
177 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
178 		return -EINVAL;
179 	}
180 
181 	if ((args->ring_base_address) &&
182 		(!access_ok((const void __user *) args->ring_base_address,
183 			sizeof(uint64_t)))) {
184 		pr_err("Can't access ring base address\n");
185 		return -EFAULT;
186 	}
187 
188 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
189 		pr_err("Ring size must be a power of 2 or 0\n");
190 		return -EINVAL;
191 	}
192 
193 	if (!access_ok((const void __user *) args->read_pointer_address,
194 			sizeof(uint32_t))) {
195 		pr_err("Can't access read pointer\n");
196 		return -EFAULT;
197 	}
198 
199 	if (!access_ok((const void __user *) args->write_pointer_address,
200 			sizeof(uint32_t))) {
201 		pr_err("Can't access write pointer\n");
202 		return -EFAULT;
203 	}
204 
205 	if (args->eop_buffer_address &&
206 		!access_ok((const void __user *) args->eop_buffer_address,
207 			sizeof(uint32_t))) {
208 		pr_debug("Can't access eop buffer");
209 		return -EFAULT;
210 	}
211 
212 	if (args->ctx_save_restore_address &&
213 		!access_ok((const void __user *) args->ctx_save_restore_address,
214 			sizeof(uint32_t))) {
215 		pr_debug("Can't access ctx save restore buffer");
216 		return -EFAULT;
217 	}
218 
219 	q_properties->is_interop = false;
220 	q_properties->is_gws = false;
221 	q_properties->queue_percent = args->queue_percentage;
222 	q_properties->priority = args->queue_priority;
223 	q_properties->queue_address = args->ring_base_address;
224 	q_properties->queue_size = args->ring_size;
225 	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
226 	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
227 	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
228 	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
229 	q_properties->ctx_save_restore_area_address =
230 			args->ctx_save_restore_address;
231 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
232 	q_properties->ctl_stack_size = args->ctl_stack_size;
233 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
234 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
235 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
236 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
237 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
238 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
239 		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
240 	else
241 		return -ENOTSUPP;
242 
243 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
244 		q_properties->format = KFD_QUEUE_FORMAT_AQL;
245 	else
246 		q_properties->format = KFD_QUEUE_FORMAT_PM4;
247 
248 	pr_debug("Queue Percentage: %d, %d\n",
249 			q_properties->queue_percent, args->queue_percentage);
250 
251 	pr_debug("Queue Priority: %d, %d\n",
252 			q_properties->priority, args->queue_priority);
253 
254 	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
255 			q_properties->queue_address, args->ring_base_address);
256 
257 	pr_debug("Queue Size: 0x%llX, %u\n",
258 			q_properties->queue_size, args->ring_size);
259 
260 	pr_debug("Queue r/w Pointers: %px, %px\n",
261 			q_properties->read_ptr,
262 			q_properties->write_ptr);
263 
264 	pr_debug("Queue Format: %d\n", q_properties->format);
265 
266 	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
267 
268 	pr_debug("Queue CTX save area: 0x%llX\n",
269 			q_properties->ctx_save_restore_area_address);
270 
271 	return 0;
272 }
273 
274 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
275 					void *data)
276 {
277 	struct kfd_ioctl_create_queue_args *args = data;
278 	struct kfd_dev *dev;
279 	int err = 0;
280 	unsigned int queue_id;
281 	struct kfd_process_device *pdd;
282 	struct queue_properties q_properties;
283 	uint32_t doorbell_offset_in_process = 0;
284 
285 	memset(&q_properties, 0, sizeof(struct queue_properties));
286 
287 	pr_debug("Creating queue ioctl\n");
288 
289 	err = set_queue_properties_from_user(&q_properties, args);
290 	if (err)
291 		return err;
292 
293 	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
294 	dev = kfd_device_by_id(args->gpu_id);
295 	if (!dev) {
296 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
297 		return -EINVAL;
298 	}
299 
300 	mutex_lock(&p->mutex);
301 
302 	pdd = kfd_bind_process_to_device(dev, p);
303 	if (IS_ERR(pdd)) {
304 		err = -ESRCH;
305 		goto err_bind_process;
306 	}
307 
308 	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
309 			p->pasid,
310 			dev->id);
311 
312 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
313 			&doorbell_offset_in_process);
314 	if (err != 0)
315 		goto err_create_queue;
316 
317 	args->queue_id = queue_id;
318 
319 
320 	/* Return gpu_id as doorbell offset for mmap usage */
321 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
322 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
323 	if (KFD_IS_SOC15(dev->device_info->asic_family))
324 		/* On SOC15 ASICs, include the doorbell offset within the
325 		 * process doorbell frame, which is 2 pages.
326 		 */
327 		args->doorbell_offset |= doorbell_offset_in_process;
328 
329 	mutex_unlock(&p->mutex);
330 
331 	pr_debug("Queue id %d was created successfully\n", args->queue_id);
332 
333 	pr_debug("Ring buffer address == 0x%016llX\n",
334 			args->ring_base_address);
335 
336 	pr_debug("Read ptr address    == 0x%016llX\n",
337 			args->read_pointer_address);
338 
339 	pr_debug("Write ptr address   == 0x%016llX\n",
340 			args->write_pointer_address);
341 
342 	return 0;
343 
344 err_create_queue:
345 err_bind_process:
346 	mutex_unlock(&p->mutex);
347 	return err;
348 }
349 
350 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
351 					void *data)
352 {
353 	int retval;
354 	struct kfd_ioctl_destroy_queue_args *args = data;
355 
356 	pr_debug("Destroying queue id %d for pasid 0x%x\n",
357 				args->queue_id,
358 				p->pasid);
359 
360 	mutex_lock(&p->mutex);
361 
362 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
363 
364 	mutex_unlock(&p->mutex);
365 	return retval;
366 }
367 
368 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
369 					void *data)
370 {
371 	int retval;
372 	struct kfd_ioctl_update_queue_args *args = data;
373 	struct queue_properties properties;
374 
375 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
376 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
377 		return -EINVAL;
378 	}
379 
380 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
381 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
382 		return -EINVAL;
383 	}
384 
385 	if ((args->ring_base_address) &&
386 		(!access_ok((const void __user *) args->ring_base_address,
387 			sizeof(uint64_t)))) {
388 		pr_err("Can't access ring base address\n");
389 		return -EFAULT;
390 	}
391 
392 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
393 		pr_err("Ring size must be a power of 2 or 0\n");
394 		return -EINVAL;
395 	}
396 
397 	properties.queue_address = args->ring_base_address;
398 	properties.queue_size = args->ring_size;
399 	properties.queue_percent = args->queue_percentage;
400 	properties.priority = args->queue_priority;
401 
402 	pr_debug("Updating queue id %d for pasid 0x%x\n",
403 			args->queue_id, p->pasid);
404 
405 	mutex_lock(&p->mutex);
406 
407 	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
408 
409 	mutex_unlock(&p->mutex);
410 
411 	return retval;
412 }
413 
414 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
415 					void *data)
416 {
417 	int retval;
418 	const int max_num_cus = 1024;
419 	struct kfd_ioctl_set_cu_mask_args *args = data;
420 	struct queue_properties properties;
421 	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
422 	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
423 
424 	if ((args->num_cu_mask % 32) != 0) {
425 		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
426 				args->num_cu_mask);
427 		return -EINVAL;
428 	}
429 
430 	properties.cu_mask_count = args->num_cu_mask;
431 	if (properties.cu_mask_count == 0) {
432 		pr_debug("CU mask cannot be 0");
433 		return -EINVAL;
434 	}
435 
436 	/* To prevent an unreasonably large CU mask size, set an arbitrary
437 	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
438 	 * past max_num_cus bits and just use the first max_num_cus bits.
439 	 */
440 	if (properties.cu_mask_count > max_num_cus) {
441 		pr_debug("CU mask cannot be greater than 1024 bits");
442 		properties.cu_mask_count = max_num_cus;
443 		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
444 	}
445 
446 	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
447 	if (!properties.cu_mask)
448 		return -ENOMEM;
449 
450 	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
451 	if (retval) {
452 		pr_debug("Could not copy CU mask from userspace");
453 		kfree(properties.cu_mask);
454 		return -EFAULT;
455 	}
456 
457 	mutex_lock(&p->mutex);
458 
459 	retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
460 
461 	mutex_unlock(&p->mutex);
462 
463 	if (retval)
464 		kfree(properties.cu_mask);
465 
466 	return retval;
467 }
468 
469 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
470 					  struct kfd_process *p, void *data)
471 {
472 	struct kfd_ioctl_get_queue_wave_state_args *args = data;
473 	int r;
474 
475 	mutex_lock(&p->mutex);
476 
477 	r = pqm_get_wave_state(&p->pqm, args->queue_id,
478 			       (void __user *)args->ctl_stack_address,
479 			       &args->ctl_stack_used_size,
480 			       &args->save_area_used_size);
481 
482 	mutex_unlock(&p->mutex);
483 
484 	return r;
485 }
486 
487 static int kfd_ioctl_set_memory_policy(struct file *filep,
488 					struct kfd_process *p, void *data)
489 {
490 	struct kfd_ioctl_set_memory_policy_args *args = data;
491 	struct kfd_dev *dev;
492 	int err = 0;
493 	struct kfd_process_device *pdd;
494 	enum cache_policy default_policy, alternate_policy;
495 
496 	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
497 	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
498 		return -EINVAL;
499 	}
500 
501 	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
502 	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
503 		return -EINVAL;
504 	}
505 
506 	dev = kfd_device_by_id(args->gpu_id);
507 	if (!dev)
508 		return -EINVAL;
509 
510 	mutex_lock(&p->mutex);
511 
512 	pdd = kfd_bind_process_to_device(dev, p);
513 	if (IS_ERR(pdd)) {
514 		err = -ESRCH;
515 		goto out;
516 	}
517 
518 	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
519 			 ? cache_policy_coherent : cache_policy_noncoherent;
520 
521 	alternate_policy =
522 		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
523 		   ? cache_policy_coherent : cache_policy_noncoherent;
524 
525 	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
526 				&pdd->qpd,
527 				default_policy,
528 				alternate_policy,
529 				(void __user *)args->alternate_aperture_base,
530 				args->alternate_aperture_size))
531 		err = -EINVAL;
532 
533 out:
534 	mutex_unlock(&p->mutex);
535 
536 	return err;
537 }
538 
539 static int kfd_ioctl_set_trap_handler(struct file *filep,
540 					struct kfd_process *p, void *data)
541 {
542 	struct kfd_ioctl_set_trap_handler_args *args = data;
543 	struct kfd_dev *dev;
544 	int err = 0;
545 	struct kfd_process_device *pdd;
546 
547 	dev = kfd_device_by_id(args->gpu_id);
548 	if (!dev)
549 		return -EINVAL;
550 
551 	mutex_lock(&p->mutex);
552 
553 	pdd = kfd_bind_process_to_device(dev, p);
554 	if (IS_ERR(pdd)) {
555 		err = -ESRCH;
556 		goto out;
557 	}
558 
559 	kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
560 
561 out:
562 	mutex_unlock(&p->mutex);
563 
564 	return err;
565 }
566 
567 static int kfd_ioctl_dbg_register(struct file *filep,
568 				struct kfd_process *p, void *data)
569 {
570 	struct kfd_ioctl_dbg_register_args *args = data;
571 	struct kfd_dev *dev;
572 	struct kfd_dbgmgr *dbgmgr_ptr;
573 	struct kfd_process_device *pdd;
574 	bool create_ok;
575 	long status = 0;
576 
577 	dev = kfd_device_by_id(args->gpu_id);
578 	if (!dev)
579 		return -EINVAL;
580 
581 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
582 		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
583 		return -EINVAL;
584 	}
585 
586 	mutex_lock(&p->mutex);
587 	mutex_lock(kfd_get_dbgmgr_mutex());
588 
589 	/*
590 	 * make sure that we have pdd, if this the first queue created for
591 	 * this process
592 	 */
593 	pdd = kfd_bind_process_to_device(dev, p);
594 	if (IS_ERR(pdd)) {
595 		status = PTR_ERR(pdd);
596 		goto out;
597 	}
598 
599 	if (!dev->dbgmgr) {
600 		/* In case of a legal call, we have no dbgmgr yet */
601 		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
602 		if (create_ok) {
603 			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
604 			if (status != 0)
605 				kfd_dbgmgr_destroy(dbgmgr_ptr);
606 			else
607 				dev->dbgmgr = dbgmgr_ptr;
608 		}
609 	} else {
610 		pr_debug("debugger already registered\n");
611 		status = -EINVAL;
612 	}
613 
614 out:
615 	mutex_unlock(kfd_get_dbgmgr_mutex());
616 	mutex_unlock(&p->mutex);
617 
618 	return status;
619 }
620 
621 static int kfd_ioctl_dbg_unregister(struct file *filep,
622 				struct kfd_process *p, void *data)
623 {
624 	struct kfd_ioctl_dbg_unregister_args *args = data;
625 	struct kfd_dev *dev;
626 	long status;
627 
628 	dev = kfd_device_by_id(args->gpu_id);
629 	if (!dev || !dev->dbgmgr)
630 		return -EINVAL;
631 
632 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
633 		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
634 		return -EINVAL;
635 	}
636 
637 	mutex_lock(kfd_get_dbgmgr_mutex());
638 
639 	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
640 	if (!status) {
641 		kfd_dbgmgr_destroy(dev->dbgmgr);
642 		dev->dbgmgr = NULL;
643 	}
644 
645 	mutex_unlock(kfd_get_dbgmgr_mutex());
646 
647 	return status;
648 }
649 
650 /*
651  * Parse and generate variable size data structure for address watch.
652  * Total size of the buffer and # watch points is limited in order
653  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
654  * which is enforced by dbgdev module)
655  * please also note that the watch address itself are not "copied from user",
656  * since it be set into the HW in user mode values.
657  *
658  */
659 static int kfd_ioctl_dbg_address_watch(struct file *filep,
660 					struct kfd_process *p, void *data)
661 {
662 	struct kfd_ioctl_dbg_address_watch_args *args = data;
663 	struct kfd_dev *dev;
664 	struct dbg_address_watch_info aw_info;
665 	unsigned char *args_buff;
666 	long status;
667 	void __user *cmd_from_user;
668 	uint64_t watch_mask_value = 0;
669 	unsigned int args_idx = 0;
670 
671 	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
672 
673 	dev = kfd_device_by_id(args->gpu_id);
674 	if (!dev)
675 		return -EINVAL;
676 
677 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
678 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
679 		return -EINVAL;
680 	}
681 
682 	cmd_from_user = (void __user *) args->content_ptr;
683 
684 	/* Validate arguments */
685 
686 	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
687 		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
688 		(cmd_from_user == NULL))
689 		return -EINVAL;
690 
691 	/* this is the actual buffer to work with */
692 	args_buff = memdup_user(cmd_from_user,
693 				args->buf_size_in_bytes - sizeof(*args));
694 	if (IS_ERR(args_buff))
695 		return PTR_ERR(args_buff);
696 
697 	aw_info.process = p;
698 
699 	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
700 	args_idx += sizeof(aw_info.num_watch_points);
701 
702 	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
703 	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
704 
705 	/*
706 	 * set watch address base pointer to point on the array base
707 	 * within args_buff
708 	 */
709 	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
710 
711 	/* skip over the addresses buffer */
712 	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
713 
714 	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
715 		status = -EINVAL;
716 		goto out;
717 	}
718 
719 	watch_mask_value = (uint64_t) args_buff[args_idx];
720 
721 	if (watch_mask_value > 0) {
722 		/*
723 		 * There is an array of masks.
724 		 * set watch mask base pointer to point on the array base
725 		 * within args_buff
726 		 */
727 		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
728 
729 		/* skip over the masks buffer */
730 		args_idx += sizeof(aw_info.watch_mask) *
731 				aw_info.num_watch_points;
732 	} else {
733 		/* just the NULL mask, set to NULL and skip over it */
734 		aw_info.watch_mask = NULL;
735 		args_idx += sizeof(aw_info.watch_mask);
736 	}
737 
738 	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
739 		status = -EINVAL;
740 		goto out;
741 	}
742 
743 	/* Currently HSA Event is not supported for DBG */
744 	aw_info.watch_event = NULL;
745 
746 	mutex_lock(kfd_get_dbgmgr_mutex());
747 
748 	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
749 
750 	mutex_unlock(kfd_get_dbgmgr_mutex());
751 
752 out:
753 	kfree(args_buff);
754 
755 	return status;
756 }
757 
758 /* Parse and generate fixed size data structure for wave control */
759 static int kfd_ioctl_dbg_wave_control(struct file *filep,
760 					struct kfd_process *p, void *data)
761 {
762 	struct kfd_ioctl_dbg_wave_control_args *args = data;
763 	struct kfd_dev *dev;
764 	struct dbg_wave_control_info wac_info;
765 	unsigned char *args_buff;
766 	uint32_t computed_buff_size;
767 	long status;
768 	void __user *cmd_from_user;
769 	unsigned int args_idx = 0;
770 
771 	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
772 
773 	/* we use compact form, independent of the packing attribute value */
774 	computed_buff_size = sizeof(*args) +
775 				sizeof(wac_info.mode) +
776 				sizeof(wac_info.operand) +
777 				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
778 				sizeof(wac_info.dbgWave_msg.MemoryVA) +
779 				sizeof(wac_info.trapId);
780 
781 	dev = kfd_device_by_id(args->gpu_id);
782 	if (!dev)
783 		return -EINVAL;
784 
785 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
786 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
787 		return -EINVAL;
788 	}
789 
790 	/* input size must match the computed "compact" size */
791 	if (args->buf_size_in_bytes != computed_buff_size) {
792 		pr_debug("size mismatch, computed : actual %u : %u\n",
793 				args->buf_size_in_bytes, computed_buff_size);
794 		return -EINVAL;
795 	}
796 
797 	cmd_from_user = (void __user *) args->content_ptr;
798 
799 	if (cmd_from_user == NULL)
800 		return -EINVAL;
801 
802 	/* copy the entire buffer from user */
803 
804 	args_buff = memdup_user(cmd_from_user,
805 				args->buf_size_in_bytes - sizeof(*args));
806 	if (IS_ERR(args_buff))
807 		return PTR_ERR(args_buff);
808 
809 	/* move ptr to the start of the "pay-load" area */
810 	wac_info.process = p;
811 
812 	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
813 	args_idx += sizeof(wac_info.operand);
814 
815 	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
816 	args_idx += sizeof(wac_info.mode);
817 
818 	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
819 	args_idx += sizeof(wac_info.trapId);
820 
821 	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
822 					*((uint32_t *)(&args_buff[args_idx]));
823 	wac_info.dbgWave_msg.MemoryVA = NULL;
824 
825 	mutex_lock(kfd_get_dbgmgr_mutex());
826 
827 	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
828 			wac_info.process, wac_info.operand,
829 			wac_info.mode, wac_info.trapId,
830 			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
831 
832 	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
833 
834 	pr_debug("Returned status of dbg manager is %ld\n", status);
835 
836 	mutex_unlock(kfd_get_dbgmgr_mutex());
837 
838 	kfree(args_buff);
839 
840 	return status;
841 }
842 
843 static int kfd_ioctl_get_clock_counters(struct file *filep,
844 				struct kfd_process *p, void *data)
845 {
846 	struct kfd_ioctl_get_clock_counters_args *args = data;
847 	struct kfd_dev *dev;
848 
849 	dev = kfd_device_by_id(args->gpu_id);
850 	if (dev)
851 		/* Reading GPU clock counter from KGD */
852 		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
853 	else
854 		/* Node without GPU resource */
855 		args->gpu_clock_counter = 0;
856 
857 	/* No access to rdtsc. Using raw monotonic time */
858 	args->cpu_clock_counter = ktime_get_raw_ns();
859 	args->system_clock_counter = ktime_get_boottime_ns();
860 
861 	/* Since the counter is in nano-seconds we use 1GHz frequency */
862 	args->system_clock_freq = 1000000000;
863 
864 	return 0;
865 }
866 
867 
868 static int kfd_ioctl_get_process_apertures(struct file *filp,
869 				struct kfd_process *p, void *data)
870 {
871 	struct kfd_ioctl_get_process_apertures_args *args = data;
872 	struct kfd_process_device_apertures *pAperture;
873 	struct kfd_process_device *pdd;
874 
875 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
876 
877 	args->num_of_nodes = 0;
878 
879 	mutex_lock(&p->mutex);
880 
881 	/*if the process-device list isn't empty*/
882 	if (kfd_has_process_device_data(p)) {
883 		/* Run over all pdd of the process */
884 		pdd = kfd_get_first_process_device_data(p);
885 		do {
886 			pAperture =
887 				&args->process_apertures[args->num_of_nodes];
888 			pAperture->gpu_id = pdd->dev->id;
889 			pAperture->lds_base = pdd->lds_base;
890 			pAperture->lds_limit = pdd->lds_limit;
891 			pAperture->gpuvm_base = pdd->gpuvm_base;
892 			pAperture->gpuvm_limit = pdd->gpuvm_limit;
893 			pAperture->scratch_base = pdd->scratch_base;
894 			pAperture->scratch_limit = pdd->scratch_limit;
895 
896 			dev_dbg(kfd_device,
897 				"node id %u\n", args->num_of_nodes);
898 			dev_dbg(kfd_device,
899 				"gpu id %u\n", pdd->dev->id);
900 			dev_dbg(kfd_device,
901 				"lds_base %llX\n", pdd->lds_base);
902 			dev_dbg(kfd_device,
903 				"lds_limit %llX\n", pdd->lds_limit);
904 			dev_dbg(kfd_device,
905 				"gpuvm_base %llX\n", pdd->gpuvm_base);
906 			dev_dbg(kfd_device,
907 				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
908 			dev_dbg(kfd_device,
909 				"scratch_base %llX\n", pdd->scratch_base);
910 			dev_dbg(kfd_device,
911 				"scratch_limit %llX\n", pdd->scratch_limit);
912 
913 			args->num_of_nodes++;
914 
915 			pdd = kfd_get_next_process_device_data(p, pdd);
916 		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
917 	}
918 
919 	mutex_unlock(&p->mutex);
920 
921 	return 0;
922 }
923 
924 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
925 				struct kfd_process *p, void *data)
926 {
927 	struct kfd_ioctl_get_process_apertures_new_args *args = data;
928 	struct kfd_process_device_apertures *pa;
929 	struct kfd_process_device *pdd;
930 	uint32_t nodes = 0;
931 	int ret;
932 
933 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
934 
935 	if (args->num_of_nodes == 0) {
936 		/* Return number of nodes, so that user space can alloacate
937 		 * sufficient memory
938 		 */
939 		mutex_lock(&p->mutex);
940 
941 		if (!kfd_has_process_device_data(p))
942 			goto out_unlock;
943 
944 		/* Run over all pdd of the process */
945 		pdd = kfd_get_first_process_device_data(p);
946 		do {
947 			args->num_of_nodes++;
948 			pdd = kfd_get_next_process_device_data(p, pdd);
949 		} while (pdd);
950 
951 		goto out_unlock;
952 	}
953 
954 	/* Fill in process-aperture information for all available
955 	 * nodes, but not more than args->num_of_nodes as that is
956 	 * the amount of memory allocated by user
957 	 */
958 	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
959 				args->num_of_nodes), GFP_KERNEL);
960 	if (!pa)
961 		return -ENOMEM;
962 
963 	mutex_lock(&p->mutex);
964 
965 	if (!kfd_has_process_device_data(p)) {
966 		args->num_of_nodes = 0;
967 		kfree(pa);
968 		goto out_unlock;
969 	}
970 
971 	/* Run over all pdd of the process */
972 	pdd = kfd_get_first_process_device_data(p);
973 	do {
974 		pa[nodes].gpu_id = pdd->dev->id;
975 		pa[nodes].lds_base = pdd->lds_base;
976 		pa[nodes].lds_limit = pdd->lds_limit;
977 		pa[nodes].gpuvm_base = pdd->gpuvm_base;
978 		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
979 		pa[nodes].scratch_base = pdd->scratch_base;
980 		pa[nodes].scratch_limit = pdd->scratch_limit;
981 
982 		dev_dbg(kfd_device,
983 			"gpu id %u\n", pdd->dev->id);
984 		dev_dbg(kfd_device,
985 			"lds_base %llX\n", pdd->lds_base);
986 		dev_dbg(kfd_device,
987 			"lds_limit %llX\n", pdd->lds_limit);
988 		dev_dbg(kfd_device,
989 			"gpuvm_base %llX\n", pdd->gpuvm_base);
990 		dev_dbg(kfd_device,
991 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
992 		dev_dbg(kfd_device,
993 			"scratch_base %llX\n", pdd->scratch_base);
994 		dev_dbg(kfd_device,
995 			"scratch_limit %llX\n", pdd->scratch_limit);
996 		nodes++;
997 
998 		pdd = kfd_get_next_process_device_data(p, pdd);
999 	} while (pdd && (nodes < args->num_of_nodes));
1000 	mutex_unlock(&p->mutex);
1001 
1002 	args->num_of_nodes = nodes;
1003 	ret = copy_to_user(
1004 			(void __user *)args->kfd_process_device_apertures_ptr,
1005 			pa,
1006 			(nodes * sizeof(struct kfd_process_device_apertures)));
1007 	kfree(pa);
1008 	return ret ? -EFAULT : 0;
1009 
1010 out_unlock:
1011 	mutex_unlock(&p->mutex);
1012 	return 0;
1013 }
1014 
1015 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1016 					void *data)
1017 {
1018 	struct kfd_ioctl_create_event_args *args = data;
1019 	int err;
1020 
1021 	/* For dGPUs the event page is allocated in user mode. The
1022 	 * handle is passed to KFD with the first call to this IOCTL
1023 	 * through the event_page_offset field.
1024 	 */
1025 	if (args->event_page_offset) {
1026 		struct kfd_dev *kfd;
1027 		struct kfd_process_device *pdd;
1028 		void *mem, *kern_addr;
1029 		uint64_t size;
1030 
1031 		if (p->signal_page) {
1032 			pr_err("Event page is already set\n");
1033 			return -EINVAL;
1034 		}
1035 
1036 		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1037 		if (!kfd) {
1038 			pr_err("Getting device by id failed in %s\n", __func__);
1039 			return -EINVAL;
1040 		}
1041 
1042 		mutex_lock(&p->mutex);
1043 		pdd = kfd_bind_process_to_device(kfd, p);
1044 		if (IS_ERR(pdd)) {
1045 			err = PTR_ERR(pdd);
1046 			goto out_unlock;
1047 		}
1048 
1049 		mem = kfd_process_device_translate_handle(pdd,
1050 				GET_IDR_HANDLE(args->event_page_offset));
1051 		if (!mem) {
1052 			pr_err("Can't find BO, offset is 0x%llx\n",
1053 			       args->event_page_offset);
1054 			err = -EINVAL;
1055 			goto out_unlock;
1056 		}
1057 		mutex_unlock(&p->mutex);
1058 
1059 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1060 						mem, &kern_addr, &size);
1061 		if (err) {
1062 			pr_err("Failed to map event page to kernel\n");
1063 			return err;
1064 		}
1065 
1066 		err = kfd_event_page_set(p, kern_addr, size);
1067 		if (err) {
1068 			pr_err("Failed to set event page\n");
1069 			return err;
1070 		}
1071 	}
1072 
1073 	err = kfd_event_create(filp, p, args->event_type,
1074 				args->auto_reset != 0, args->node_id,
1075 				&args->event_id, &args->event_trigger_data,
1076 				&args->event_page_offset,
1077 				&args->event_slot_index);
1078 
1079 	return err;
1080 
1081 out_unlock:
1082 	mutex_unlock(&p->mutex);
1083 	return err;
1084 }
1085 
1086 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1087 					void *data)
1088 {
1089 	struct kfd_ioctl_destroy_event_args *args = data;
1090 
1091 	return kfd_event_destroy(p, args->event_id);
1092 }
1093 
1094 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1095 				void *data)
1096 {
1097 	struct kfd_ioctl_set_event_args *args = data;
1098 
1099 	return kfd_set_event(p, args->event_id);
1100 }
1101 
1102 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1103 				void *data)
1104 {
1105 	struct kfd_ioctl_reset_event_args *args = data;
1106 
1107 	return kfd_reset_event(p, args->event_id);
1108 }
1109 
1110 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1111 				void *data)
1112 {
1113 	struct kfd_ioctl_wait_events_args *args = data;
1114 	int err;
1115 
1116 	err = kfd_wait_on_events(p, args->num_events,
1117 			(void __user *)args->events_ptr,
1118 			(args->wait_for_all != 0),
1119 			args->timeout, &args->wait_result);
1120 
1121 	return err;
1122 }
1123 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1124 					struct kfd_process *p, void *data)
1125 {
1126 	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1127 	struct kfd_process_device *pdd;
1128 	struct kfd_dev *dev;
1129 	long err;
1130 
1131 	dev = kfd_device_by_id(args->gpu_id);
1132 	if (!dev)
1133 		return -EINVAL;
1134 
1135 	mutex_lock(&p->mutex);
1136 
1137 	pdd = kfd_bind_process_to_device(dev, p);
1138 	if (IS_ERR(pdd)) {
1139 		err = PTR_ERR(pdd);
1140 		goto bind_process_to_device_fail;
1141 	}
1142 
1143 	pdd->qpd.sh_hidden_private_base = args->va_addr;
1144 
1145 	mutex_unlock(&p->mutex);
1146 
1147 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1148 	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1149 		dev->kfd2kgd->set_scratch_backing_va(
1150 			dev->kgd, args->va_addr, pdd->qpd.vmid);
1151 
1152 	return 0;
1153 
1154 bind_process_to_device_fail:
1155 	mutex_unlock(&p->mutex);
1156 	return err;
1157 }
1158 
1159 static int kfd_ioctl_get_tile_config(struct file *filep,
1160 		struct kfd_process *p, void *data)
1161 {
1162 	struct kfd_ioctl_get_tile_config_args *args = data;
1163 	struct kfd_dev *dev;
1164 	struct tile_config config;
1165 	int err = 0;
1166 
1167 	dev = kfd_device_by_id(args->gpu_id);
1168 	if (!dev)
1169 		return -EINVAL;
1170 
1171 	amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1172 
1173 	args->gb_addr_config = config.gb_addr_config;
1174 	args->num_banks = config.num_banks;
1175 	args->num_ranks = config.num_ranks;
1176 
1177 	if (args->num_tile_configs > config.num_tile_configs)
1178 		args->num_tile_configs = config.num_tile_configs;
1179 	err = copy_to_user((void __user *)args->tile_config_ptr,
1180 			config.tile_config_ptr,
1181 			args->num_tile_configs * sizeof(uint32_t));
1182 	if (err) {
1183 		args->num_tile_configs = 0;
1184 		return -EFAULT;
1185 	}
1186 
1187 	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1188 		args->num_macro_tile_configs =
1189 				config.num_macro_tile_configs;
1190 	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1191 			config.macro_tile_config_ptr,
1192 			args->num_macro_tile_configs * sizeof(uint32_t));
1193 	if (err) {
1194 		args->num_macro_tile_configs = 0;
1195 		return -EFAULT;
1196 	}
1197 
1198 	return 0;
1199 }
1200 
1201 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1202 				void *data)
1203 {
1204 	struct kfd_ioctl_acquire_vm_args *args = data;
1205 	struct kfd_process_device *pdd;
1206 	struct kfd_dev *dev;
1207 	struct file *drm_file;
1208 	int ret;
1209 
1210 	dev = kfd_device_by_id(args->gpu_id);
1211 	if (!dev)
1212 		return -EINVAL;
1213 
1214 	drm_file = fget(args->drm_fd);
1215 	if (!drm_file)
1216 		return -EINVAL;
1217 
1218 	mutex_lock(&p->mutex);
1219 
1220 	pdd = kfd_get_process_device_data(dev, p);
1221 	if (!pdd) {
1222 		ret = -EINVAL;
1223 		goto err_unlock;
1224 	}
1225 
1226 	if (pdd->drm_file) {
1227 		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1228 		goto err_unlock;
1229 	}
1230 
1231 	ret = kfd_process_device_init_vm(pdd, drm_file);
1232 	if (ret)
1233 		goto err_unlock;
1234 	/* On success, the PDD keeps the drm_file reference */
1235 	mutex_unlock(&p->mutex);
1236 
1237 	return 0;
1238 
1239 err_unlock:
1240 	mutex_unlock(&p->mutex);
1241 	fput(drm_file);
1242 	return ret;
1243 }
1244 
1245 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1246 {
1247 	struct kfd_local_mem_info mem_info;
1248 
1249 	if (debug_largebar) {
1250 		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1251 		return true;
1252 	}
1253 
1254 	if (dev->use_iommu_v2)
1255 		return false;
1256 
1257 	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1258 	if (mem_info.local_mem_size_private == 0 &&
1259 			mem_info.local_mem_size_public > 0)
1260 		return true;
1261 	return false;
1262 }
1263 
1264 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1265 					struct kfd_process *p, void *data)
1266 {
1267 	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1268 	struct kfd_process_device *pdd;
1269 	void *mem;
1270 	struct kfd_dev *dev;
1271 	int idr_handle;
1272 	long err;
1273 	uint64_t offset = args->mmap_offset;
1274 	uint32_t flags = args->flags;
1275 
1276 	if (args->size == 0)
1277 		return -EINVAL;
1278 
1279 	dev = kfd_device_by_id(args->gpu_id);
1280 	if (!dev)
1281 		return -EINVAL;
1282 
1283 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1284 		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1285 		!kfd_dev_is_large_bar(dev)) {
1286 		pr_err("Alloc host visible vram on small bar is not allowed\n");
1287 		return -EINVAL;
1288 	}
1289 
1290 	mutex_lock(&p->mutex);
1291 
1292 	pdd = kfd_bind_process_to_device(dev, p);
1293 	if (IS_ERR(pdd)) {
1294 		err = PTR_ERR(pdd);
1295 		goto err_unlock;
1296 	}
1297 
1298 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1299 		if (args->size != kfd_doorbell_process_slice(dev)) {
1300 			err = -EINVAL;
1301 			goto err_unlock;
1302 		}
1303 		offset = kfd_get_process_doorbells(pdd);
1304 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1305 		if (args->size != PAGE_SIZE) {
1306 			err = -EINVAL;
1307 			goto err_unlock;
1308 		}
1309 		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1310 		if (!offset) {
1311 			err = -ENOMEM;
1312 			goto err_unlock;
1313 		}
1314 	}
1315 
1316 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1317 		dev->kgd, args->va_addr, args->size,
1318 		pdd->vm, (struct kgd_mem **) &mem, &offset,
1319 		flags);
1320 
1321 	if (err)
1322 		goto err_unlock;
1323 
1324 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1325 	if (idr_handle < 0) {
1326 		err = -EFAULT;
1327 		goto err_free;
1328 	}
1329 
1330 	/* Update the VRAM usage count */
1331 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1332 		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1333 
1334 	mutex_unlock(&p->mutex);
1335 
1336 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1337 	args->mmap_offset = offset;
1338 
1339 	/* MMIO is mapped through kfd device
1340 	 * Generate a kfd mmap offset
1341 	 */
1342 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1343 		args->mmap_offset = KFD_MMAP_TYPE_MMIO
1344 					| KFD_MMAP_GPU_ID(args->gpu_id);
1345 
1346 	return 0;
1347 
1348 err_free:
1349 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1350 err_unlock:
1351 	mutex_unlock(&p->mutex);
1352 	return err;
1353 }
1354 
1355 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1356 					struct kfd_process *p, void *data)
1357 {
1358 	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1359 	struct kfd_process_device *pdd;
1360 	void *mem;
1361 	struct kfd_dev *dev;
1362 	int ret;
1363 	uint64_t size = 0;
1364 
1365 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1366 	if (!dev)
1367 		return -EINVAL;
1368 
1369 	mutex_lock(&p->mutex);
1370 
1371 	pdd = kfd_get_process_device_data(dev, p);
1372 	if (!pdd) {
1373 		pr_err("Process device data doesn't exist\n");
1374 		ret = -EINVAL;
1375 		goto err_unlock;
1376 	}
1377 
1378 	mem = kfd_process_device_translate_handle(
1379 		pdd, GET_IDR_HANDLE(args->handle));
1380 	if (!mem) {
1381 		ret = -EINVAL;
1382 		goto err_unlock;
1383 	}
1384 
1385 	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1386 						(struct kgd_mem *)mem, &size);
1387 
1388 	/* If freeing the buffer failed, leave the handle in place for
1389 	 * clean-up during process tear-down.
1390 	 */
1391 	if (!ret)
1392 		kfd_process_device_remove_obj_handle(
1393 			pdd, GET_IDR_HANDLE(args->handle));
1394 
1395 	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1396 
1397 err_unlock:
1398 	mutex_unlock(&p->mutex);
1399 	return ret;
1400 }
1401 
1402 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1403 					struct kfd_process *p, void *data)
1404 {
1405 	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1406 	struct kfd_process_device *pdd, *peer_pdd;
1407 	void *mem;
1408 	struct kfd_dev *dev, *peer;
1409 	long err = 0;
1410 	int i;
1411 	uint32_t *devices_arr = NULL;
1412 
1413 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1414 	if (!dev)
1415 		return -EINVAL;
1416 
1417 	if (!args->n_devices) {
1418 		pr_debug("Device IDs array empty\n");
1419 		return -EINVAL;
1420 	}
1421 	if (args->n_success > args->n_devices) {
1422 		pr_debug("n_success exceeds n_devices\n");
1423 		return -EINVAL;
1424 	}
1425 
1426 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1427 				    GFP_KERNEL);
1428 	if (!devices_arr)
1429 		return -ENOMEM;
1430 
1431 	err = copy_from_user(devices_arr,
1432 			     (void __user *)args->device_ids_array_ptr,
1433 			     args->n_devices * sizeof(*devices_arr));
1434 	if (err != 0) {
1435 		err = -EFAULT;
1436 		goto copy_from_user_failed;
1437 	}
1438 
1439 	mutex_lock(&p->mutex);
1440 
1441 	pdd = kfd_bind_process_to_device(dev, p);
1442 	if (IS_ERR(pdd)) {
1443 		err = PTR_ERR(pdd);
1444 		goto bind_process_to_device_failed;
1445 	}
1446 
1447 	mem = kfd_process_device_translate_handle(pdd,
1448 						GET_IDR_HANDLE(args->handle));
1449 	if (!mem) {
1450 		err = -ENOMEM;
1451 		goto get_mem_obj_from_handle_failed;
1452 	}
1453 
1454 	for (i = args->n_success; i < args->n_devices; i++) {
1455 		peer = kfd_device_by_id(devices_arr[i]);
1456 		if (!peer) {
1457 			pr_debug("Getting device by id failed for 0x%x\n",
1458 				 devices_arr[i]);
1459 			err = -EINVAL;
1460 			goto get_mem_obj_from_handle_failed;
1461 		}
1462 
1463 		peer_pdd = kfd_bind_process_to_device(peer, p);
1464 		if (IS_ERR(peer_pdd)) {
1465 			err = PTR_ERR(peer_pdd);
1466 			goto get_mem_obj_from_handle_failed;
1467 		}
1468 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1469 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1470 		if (err) {
1471 			pr_err("Failed to map to gpu %d/%d\n",
1472 			       i, args->n_devices);
1473 			goto map_memory_to_gpu_failed;
1474 		}
1475 		args->n_success = i+1;
1476 	}
1477 
1478 	mutex_unlock(&p->mutex);
1479 
1480 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1481 	if (err) {
1482 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1483 		goto sync_memory_failed;
1484 	}
1485 
1486 	/* Flush TLBs after waiting for the page table updates to complete */
1487 	for (i = 0; i < args->n_devices; i++) {
1488 		peer = kfd_device_by_id(devices_arr[i]);
1489 		if (WARN_ON_ONCE(!peer))
1490 			continue;
1491 		peer_pdd = kfd_get_process_device_data(peer, p);
1492 		if (WARN_ON_ONCE(!peer_pdd))
1493 			continue;
1494 		kfd_flush_tlb(peer_pdd);
1495 	}
1496 
1497 	kfree(devices_arr);
1498 
1499 	return err;
1500 
1501 bind_process_to_device_failed:
1502 get_mem_obj_from_handle_failed:
1503 map_memory_to_gpu_failed:
1504 	mutex_unlock(&p->mutex);
1505 copy_from_user_failed:
1506 sync_memory_failed:
1507 	kfree(devices_arr);
1508 
1509 	return err;
1510 }
1511 
1512 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1513 					struct kfd_process *p, void *data)
1514 {
1515 	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1516 	struct kfd_process_device *pdd, *peer_pdd;
1517 	void *mem;
1518 	struct kfd_dev *dev, *peer;
1519 	long err = 0;
1520 	uint32_t *devices_arr = NULL, i;
1521 
1522 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1523 	if (!dev)
1524 		return -EINVAL;
1525 
1526 	if (!args->n_devices) {
1527 		pr_debug("Device IDs array empty\n");
1528 		return -EINVAL;
1529 	}
1530 	if (args->n_success > args->n_devices) {
1531 		pr_debug("n_success exceeds n_devices\n");
1532 		return -EINVAL;
1533 	}
1534 
1535 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1536 				    GFP_KERNEL);
1537 	if (!devices_arr)
1538 		return -ENOMEM;
1539 
1540 	err = copy_from_user(devices_arr,
1541 			     (void __user *)args->device_ids_array_ptr,
1542 			     args->n_devices * sizeof(*devices_arr));
1543 	if (err != 0) {
1544 		err = -EFAULT;
1545 		goto copy_from_user_failed;
1546 	}
1547 
1548 	mutex_lock(&p->mutex);
1549 
1550 	pdd = kfd_get_process_device_data(dev, p);
1551 	if (!pdd) {
1552 		err = -EINVAL;
1553 		goto bind_process_to_device_failed;
1554 	}
1555 
1556 	mem = kfd_process_device_translate_handle(pdd,
1557 						GET_IDR_HANDLE(args->handle));
1558 	if (!mem) {
1559 		err = -ENOMEM;
1560 		goto get_mem_obj_from_handle_failed;
1561 	}
1562 
1563 	for (i = args->n_success; i < args->n_devices; i++) {
1564 		peer = kfd_device_by_id(devices_arr[i]);
1565 		if (!peer) {
1566 			err = -EINVAL;
1567 			goto get_mem_obj_from_handle_failed;
1568 		}
1569 
1570 		peer_pdd = kfd_get_process_device_data(peer, p);
1571 		if (!peer_pdd) {
1572 			err = -ENODEV;
1573 			goto get_mem_obj_from_handle_failed;
1574 		}
1575 		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1576 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1577 		if (err) {
1578 			pr_err("Failed to unmap from gpu %d/%d\n",
1579 			       i, args->n_devices);
1580 			goto unmap_memory_from_gpu_failed;
1581 		}
1582 		args->n_success = i+1;
1583 	}
1584 	kfree(devices_arr);
1585 
1586 	mutex_unlock(&p->mutex);
1587 
1588 	return 0;
1589 
1590 bind_process_to_device_failed:
1591 get_mem_obj_from_handle_failed:
1592 unmap_memory_from_gpu_failed:
1593 	mutex_unlock(&p->mutex);
1594 copy_from_user_failed:
1595 	kfree(devices_arr);
1596 	return err;
1597 }
1598 
1599 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1600 		struct kfd_process *p, void *data)
1601 {
1602 	int retval;
1603 	struct kfd_ioctl_alloc_queue_gws_args *args = data;
1604 	struct queue *q;
1605 	struct kfd_dev *dev;
1606 
1607 	mutex_lock(&p->mutex);
1608 	q = pqm_get_user_queue(&p->pqm, args->queue_id);
1609 
1610 	if (q) {
1611 		dev = q->device;
1612 	} else {
1613 		retval = -EINVAL;
1614 		goto out_unlock;
1615 	}
1616 
1617 	if (!dev->gws) {
1618 		retval = -ENODEV;
1619 		goto out_unlock;
1620 	}
1621 
1622 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1623 		retval = -ENODEV;
1624 		goto out_unlock;
1625 	}
1626 
1627 	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1628 	mutex_unlock(&p->mutex);
1629 
1630 	args->first_gws = 0;
1631 	return retval;
1632 
1633 out_unlock:
1634 	mutex_unlock(&p->mutex);
1635 	return retval;
1636 }
1637 
1638 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1639 		struct kfd_process *p, void *data)
1640 {
1641 	struct kfd_ioctl_get_dmabuf_info_args *args = data;
1642 	struct kfd_dev *dev = NULL;
1643 	struct kgd_dev *dma_buf_kgd;
1644 	void *metadata_buffer = NULL;
1645 	uint32_t flags;
1646 	unsigned int i;
1647 	int r;
1648 
1649 	/* Find a KFD GPU device that supports the get_dmabuf_info query */
1650 	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1651 		if (dev)
1652 			break;
1653 	if (!dev)
1654 		return -EINVAL;
1655 
1656 	if (args->metadata_ptr) {
1657 		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1658 		if (!metadata_buffer)
1659 			return -ENOMEM;
1660 	}
1661 
1662 	/* Get dmabuf info from KGD */
1663 	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1664 					  &dma_buf_kgd, &args->size,
1665 					  metadata_buffer, args->metadata_size,
1666 					  &args->metadata_size, &flags);
1667 	if (r)
1668 		goto exit;
1669 
1670 	/* Reverse-lookup gpu_id from kgd pointer */
1671 	dev = kfd_device_by_kgd(dma_buf_kgd);
1672 	if (!dev) {
1673 		r = -EINVAL;
1674 		goto exit;
1675 	}
1676 	args->gpu_id = dev->id;
1677 	args->flags = flags;
1678 
1679 	/* Copy metadata buffer to user mode */
1680 	if (metadata_buffer) {
1681 		r = copy_to_user((void __user *)args->metadata_ptr,
1682 				 metadata_buffer, args->metadata_size);
1683 		if (r != 0)
1684 			r = -EFAULT;
1685 	}
1686 
1687 exit:
1688 	kfree(metadata_buffer);
1689 
1690 	return r;
1691 }
1692 
1693 static int kfd_ioctl_import_dmabuf(struct file *filep,
1694 				   struct kfd_process *p, void *data)
1695 {
1696 	struct kfd_ioctl_import_dmabuf_args *args = data;
1697 	struct kfd_process_device *pdd;
1698 	struct dma_buf *dmabuf;
1699 	struct kfd_dev *dev;
1700 	int idr_handle;
1701 	uint64_t size;
1702 	void *mem;
1703 	int r;
1704 
1705 	dev = kfd_device_by_id(args->gpu_id);
1706 	if (!dev)
1707 		return -EINVAL;
1708 
1709 	dmabuf = dma_buf_get(args->dmabuf_fd);
1710 	if (IS_ERR(dmabuf))
1711 		return PTR_ERR(dmabuf);
1712 
1713 	mutex_lock(&p->mutex);
1714 
1715 	pdd = kfd_bind_process_to_device(dev, p);
1716 	if (IS_ERR(pdd)) {
1717 		r = PTR_ERR(pdd);
1718 		goto err_unlock;
1719 	}
1720 
1721 	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1722 					      args->va_addr, pdd->vm,
1723 					      (struct kgd_mem **)&mem, &size,
1724 					      NULL);
1725 	if (r)
1726 		goto err_unlock;
1727 
1728 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1729 	if (idr_handle < 0) {
1730 		r = -EFAULT;
1731 		goto err_free;
1732 	}
1733 
1734 	mutex_unlock(&p->mutex);
1735 	dma_buf_put(dmabuf);
1736 
1737 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1738 
1739 	return 0;
1740 
1741 err_free:
1742 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1743 err_unlock:
1744 	mutex_unlock(&p->mutex);
1745 	dma_buf_put(dmabuf);
1746 	return r;
1747 }
1748 
1749 /* Handle requests for watching SMI events */
1750 static int kfd_ioctl_smi_events(struct file *filep,
1751 				struct kfd_process *p, void *data)
1752 {
1753 	struct kfd_ioctl_smi_events_args *args = data;
1754 	struct kfd_dev *dev;
1755 
1756 	dev = kfd_device_by_id(args->gpuid);
1757 	if (!dev)
1758 		return -EINVAL;
1759 
1760 	return kfd_smi_event_open(dev, &args->anon_fd);
1761 }
1762 
1763 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1764 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1765 			    .cmd_drv = 0, .name = #ioctl}
1766 
1767 /** Ioctl table */
1768 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1769 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1770 			kfd_ioctl_get_version, 0),
1771 
1772 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1773 			kfd_ioctl_create_queue, 0),
1774 
1775 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1776 			kfd_ioctl_destroy_queue, 0),
1777 
1778 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1779 			kfd_ioctl_set_memory_policy, 0),
1780 
1781 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1782 			kfd_ioctl_get_clock_counters, 0),
1783 
1784 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1785 			kfd_ioctl_get_process_apertures, 0),
1786 
1787 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1788 			kfd_ioctl_update_queue, 0),
1789 
1790 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1791 			kfd_ioctl_create_event, 0),
1792 
1793 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1794 			kfd_ioctl_destroy_event, 0),
1795 
1796 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1797 			kfd_ioctl_set_event, 0),
1798 
1799 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1800 			kfd_ioctl_reset_event, 0),
1801 
1802 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1803 			kfd_ioctl_wait_events, 0),
1804 
1805 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1806 			kfd_ioctl_dbg_register, 0),
1807 
1808 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1809 			kfd_ioctl_dbg_unregister, 0),
1810 
1811 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1812 			kfd_ioctl_dbg_address_watch, 0),
1813 
1814 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1815 			kfd_ioctl_dbg_wave_control, 0),
1816 
1817 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1818 			kfd_ioctl_set_scratch_backing_va, 0),
1819 
1820 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1821 			kfd_ioctl_get_tile_config, 0),
1822 
1823 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1824 			kfd_ioctl_set_trap_handler, 0),
1825 
1826 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1827 			kfd_ioctl_get_process_apertures_new, 0),
1828 
1829 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1830 			kfd_ioctl_acquire_vm, 0),
1831 
1832 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1833 			kfd_ioctl_alloc_memory_of_gpu, 0),
1834 
1835 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1836 			kfd_ioctl_free_memory_of_gpu, 0),
1837 
1838 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1839 			kfd_ioctl_map_memory_to_gpu, 0),
1840 
1841 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1842 			kfd_ioctl_unmap_memory_from_gpu, 0),
1843 
1844 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1845 			kfd_ioctl_set_cu_mask, 0),
1846 
1847 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1848 			kfd_ioctl_get_queue_wave_state, 0),
1849 
1850 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1851 				kfd_ioctl_get_dmabuf_info, 0),
1852 
1853 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1854 				kfd_ioctl_import_dmabuf, 0),
1855 
1856 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1857 			kfd_ioctl_alloc_queue_gws, 0),
1858 
1859 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1860 			kfd_ioctl_smi_events, 0),
1861 };
1862 
1863 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
1864 
1865 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1866 {
1867 	struct kfd_process *process;
1868 	amdkfd_ioctl_t *func;
1869 	const struct amdkfd_ioctl_desc *ioctl = NULL;
1870 	unsigned int nr = _IOC_NR(cmd);
1871 	char stack_kdata[128];
1872 	char *kdata = NULL;
1873 	unsigned int usize, asize;
1874 	int retcode = -EINVAL;
1875 
1876 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1877 		goto err_i1;
1878 
1879 	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1880 		u32 amdkfd_size;
1881 
1882 		ioctl = &amdkfd_ioctls[nr];
1883 
1884 		amdkfd_size = _IOC_SIZE(ioctl->cmd);
1885 		usize = asize = _IOC_SIZE(cmd);
1886 		if (amdkfd_size > asize)
1887 			asize = amdkfd_size;
1888 
1889 		cmd = ioctl->cmd;
1890 	} else
1891 		goto err_i1;
1892 
1893 	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1894 
1895 	/* Get the process struct from the filep. Only the process
1896 	 * that opened /dev/kfd can use the file descriptor. Child
1897 	 * processes need to create their own KFD device context.
1898 	 */
1899 	process = filep->private_data;
1900 	if (process->lead_thread != current->group_leader) {
1901 		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1902 		retcode = -EBADF;
1903 		goto err_i1;
1904 	}
1905 
1906 	/* Do not trust userspace, use our own definition */
1907 	func = ioctl->func;
1908 
1909 	if (unlikely(!func)) {
1910 		dev_dbg(kfd_device, "no function\n");
1911 		retcode = -EINVAL;
1912 		goto err_i1;
1913 	}
1914 
1915 	if (cmd & (IOC_IN | IOC_OUT)) {
1916 		if (asize <= sizeof(stack_kdata)) {
1917 			kdata = stack_kdata;
1918 		} else {
1919 			kdata = kmalloc(asize, GFP_KERNEL);
1920 			if (!kdata) {
1921 				retcode = -ENOMEM;
1922 				goto err_i1;
1923 			}
1924 		}
1925 		if (asize > usize)
1926 			memset(kdata + usize, 0, asize - usize);
1927 	}
1928 
1929 	if (cmd & IOC_IN) {
1930 		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1931 			retcode = -EFAULT;
1932 			goto err_i1;
1933 		}
1934 	} else if (cmd & IOC_OUT) {
1935 		memset(kdata, 0, usize);
1936 	}
1937 
1938 	retcode = func(filep, process, kdata);
1939 
1940 	if (cmd & IOC_OUT)
1941 		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1942 			retcode = -EFAULT;
1943 
1944 err_i1:
1945 	if (!ioctl)
1946 		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1947 			  task_pid_nr(current), cmd, nr);
1948 
1949 	if (kdata != stack_kdata)
1950 		kfree(kdata);
1951 
1952 	if (retcode)
1953 		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1954 				nr, arg, retcode);
1955 
1956 	return retcode;
1957 }
1958 
1959 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1960 		      struct vm_area_struct *vma)
1961 {
1962 	phys_addr_t address;
1963 	int ret;
1964 
1965 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1966 		return -EINVAL;
1967 
1968 	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1969 
1970 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1971 				VM_DONTDUMP | VM_PFNMAP;
1972 
1973 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1974 
1975 	pr_debug("pasid 0x%x mapping mmio page\n"
1976 		 "     target user address == 0x%08llX\n"
1977 		 "     physical address    == 0x%08llX\n"
1978 		 "     vm_flags            == 0x%04lX\n"
1979 		 "     size                == 0x%04lX\n",
1980 		 process->pasid, (unsigned long long) vma->vm_start,
1981 		 address, vma->vm_flags, PAGE_SIZE);
1982 
1983 	ret = io_remap_pfn_range(vma,
1984 				vma->vm_start,
1985 				address >> PAGE_SHIFT,
1986 				PAGE_SIZE,
1987 				vma->vm_page_prot);
1988 	return ret;
1989 }
1990 
1991 
1992 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1993 {
1994 	struct kfd_process *process;
1995 	struct kfd_dev *dev = NULL;
1996 	unsigned long mmap_offset;
1997 	unsigned int gpu_id;
1998 
1999 	process = kfd_get_process(current);
2000 	if (IS_ERR(process))
2001 		return PTR_ERR(process);
2002 
2003 	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2004 	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2005 	if (gpu_id)
2006 		dev = kfd_device_by_id(gpu_id);
2007 
2008 	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2009 	case KFD_MMAP_TYPE_DOORBELL:
2010 		if (!dev)
2011 			return -ENODEV;
2012 		return kfd_doorbell_mmap(dev, process, vma);
2013 
2014 	case KFD_MMAP_TYPE_EVENTS:
2015 		return kfd_event_mmap(process, vma);
2016 
2017 	case KFD_MMAP_TYPE_RESERVED_MEM:
2018 		if (!dev)
2019 			return -ENODEV;
2020 		return kfd_reserved_mem_mmap(dev, process, vma);
2021 	case KFD_MMAP_TYPE_MMIO:
2022 		if (!dev)
2023 			return -ENODEV;
2024 		return kfd_mmio_mmap(dev, process, vma);
2025 	}
2026 
2027 	return -EFAULT;
2028 }
2029