1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
26 #include <linux/fs.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
34 #include <linux/mm.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
38 #include "kfd_priv.h"
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
41 #include "amdgpu_amdkfd.h"
42 #include "kfd_smi_events.h"
43 
44 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
45 static int kfd_open(struct inode *, struct file *);
46 static int kfd_release(struct inode *, struct file *);
47 static int kfd_mmap(struct file *, struct vm_area_struct *);
48 
49 static const char kfd_dev_name[] = "kfd";
50 
51 static const struct file_operations kfd_fops = {
52 	.owner = THIS_MODULE,
53 	.unlocked_ioctl = kfd_ioctl,
54 	.compat_ioctl = compat_ptr_ioctl,
55 	.open = kfd_open,
56 	.release = kfd_release,
57 	.mmap = kfd_mmap,
58 };
59 
60 static int kfd_char_dev_major = -1;
61 static struct class *kfd_class;
62 struct device *kfd_device;
63 
64 int kfd_chardev_init(void)
65 {
66 	int err = 0;
67 
68 	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
69 	err = kfd_char_dev_major;
70 	if (err < 0)
71 		goto err_register_chrdev;
72 
73 	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
74 	err = PTR_ERR(kfd_class);
75 	if (IS_ERR(kfd_class))
76 		goto err_class_create;
77 
78 	kfd_device = device_create(kfd_class, NULL,
79 					MKDEV(kfd_char_dev_major, 0),
80 					NULL, kfd_dev_name);
81 	err = PTR_ERR(kfd_device);
82 	if (IS_ERR(kfd_device))
83 		goto err_device_create;
84 
85 	return 0;
86 
87 err_device_create:
88 	class_destroy(kfd_class);
89 err_class_create:
90 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
91 err_register_chrdev:
92 	return err;
93 }
94 
95 void kfd_chardev_exit(void)
96 {
97 	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
98 	class_destroy(kfd_class);
99 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
100 }
101 
102 struct device *kfd_chardev(void)
103 {
104 	return kfd_device;
105 }
106 
107 
108 static int kfd_open(struct inode *inode, struct file *filep)
109 {
110 	struct kfd_process *process;
111 	bool is_32bit_user_mode;
112 
113 	if (iminor(inode) != 0)
114 		return -ENODEV;
115 
116 	is_32bit_user_mode = in_compat_syscall();
117 
118 	if (is_32bit_user_mode) {
119 		dev_warn(kfd_device,
120 			"Process %d (32-bit) failed to open /dev/kfd\n"
121 			"32-bit processes are not supported by amdkfd\n",
122 			current->pid);
123 		return -EPERM;
124 	}
125 
126 	process = kfd_create_process(filep);
127 	if (IS_ERR(process))
128 		return PTR_ERR(process);
129 
130 	if (kfd_is_locked()) {
131 		dev_dbg(kfd_device, "kfd is locked!\n"
132 				"process %d unreferenced", process->pasid);
133 		kfd_unref_process(process);
134 		return -EAGAIN;
135 	}
136 
137 	/* filep now owns the reference returned by kfd_create_process */
138 	filep->private_data = process;
139 
140 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
141 		process->pasid, process->is_32bit_user_mode);
142 
143 	return 0;
144 }
145 
146 static int kfd_release(struct inode *inode, struct file *filep)
147 {
148 	struct kfd_process *process = filep->private_data;
149 
150 	if (process)
151 		kfd_unref_process(process);
152 
153 	return 0;
154 }
155 
156 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
157 					void *data)
158 {
159 	struct kfd_ioctl_get_version_args *args = data;
160 
161 	args->major_version = KFD_IOCTL_MAJOR_VERSION;
162 	args->minor_version = KFD_IOCTL_MINOR_VERSION;
163 
164 	return 0;
165 }
166 
167 static int set_queue_properties_from_user(struct queue_properties *q_properties,
168 				struct kfd_ioctl_create_queue_args *args)
169 {
170 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
171 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
172 		return -EINVAL;
173 	}
174 
175 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
176 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
177 		return -EINVAL;
178 	}
179 
180 	if ((args->ring_base_address) &&
181 		(!access_ok((const void __user *) args->ring_base_address,
182 			sizeof(uint64_t)))) {
183 		pr_err("Can't access ring base address\n");
184 		return -EFAULT;
185 	}
186 
187 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
188 		pr_err("Ring size must be a power of 2 or 0\n");
189 		return -EINVAL;
190 	}
191 
192 	if (!access_ok((const void __user *) args->read_pointer_address,
193 			sizeof(uint32_t))) {
194 		pr_err("Can't access read pointer\n");
195 		return -EFAULT;
196 	}
197 
198 	if (!access_ok((const void __user *) args->write_pointer_address,
199 			sizeof(uint32_t))) {
200 		pr_err("Can't access write pointer\n");
201 		return -EFAULT;
202 	}
203 
204 	if (args->eop_buffer_address &&
205 		!access_ok((const void __user *) args->eop_buffer_address,
206 			sizeof(uint32_t))) {
207 		pr_debug("Can't access eop buffer");
208 		return -EFAULT;
209 	}
210 
211 	if (args->ctx_save_restore_address &&
212 		!access_ok((const void __user *) args->ctx_save_restore_address,
213 			sizeof(uint32_t))) {
214 		pr_debug("Can't access ctx save restore buffer");
215 		return -EFAULT;
216 	}
217 
218 	q_properties->is_interop = false;
219 	q_properties->is_gws = false;
220 	q_properties->queue_percent = args->queue_percentage;
221 	q_properties->priority = args->queue_priority;
222 	q_properties->queue_address = args->ring_base_address;
223 	q_properties->queue_size = args->ring_size;
224 	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
225 	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
226 	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
227 	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
228 	q_properties->ctx_save_restore_area_address =
229 			args->ctx_save_restore_address;
230 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
231 	q_properties->ctl_stack_size = args->ctl_stack_size;
232 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
233 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
234 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
235 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
236 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
237 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
238 		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
239 	else
240 		return -ENOTSUPP;
241 
242 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
243 		q_properties->format = KFD_QUEUE_FORMAT_AQL;
244 	else
245 		q_properties->format = KFD_QUEUE_FORMAT_PM4;
246 
247 	pr_debug("Queue Percentage: %d, %d\n",
248 			q_properties->queue_percent, args->queue_percentage);
249 
250 	pr_debug("Queue Priority: %d, %d\n",
251 			q_properties->priority, args->queue_priority);
252 
253 	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
254 			q_properties->queue_address, args->ring_base_address);
255 
256 	pr_debug("Queue Size: 0x%llX, %u\n",
257 			q_properties->queue_size, args->ring_size);
258 
259 	pr_debug("Queue r/w Pointers: %px, %px\n",
260 			q_properties->read_ptr,
261 			q_properties->write_ptr);
262 
263 	pr_debug("Queue Format: %d\n", q_properties->format);
264 
265 	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
266 
267 	pr_debug("Queue CTX save area: 0x%llX\n",
268 			q_properties->ctx_save_restore_area_address);
269 
270 	return 0;
271 }
272 
273 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
274 					void *data)
275 {
276 	struct kfd_ioctl_create_queue_args *args = data;
277 	struct kfd_dev *dev;
278 	int err = 0;
279 	unsigned int queue_id;
280 	struct kfd_process_device *pdd;
281 	struct queue_properties q_properties;
282 	uint32_t doorbell_offset_in_process = 0;
283 
284 	memset(&q_properties, 0, sizeof(struct queue_properties));
285 
286 	pr_debug("Creating queue ioctl\n");
287 
288 	err = set_queue_properties_from_user(&q_properties, args);
289 	if (err)
290 		return err;
291 
292 	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
293 	dev = kfd_device_by_id(args->gpu_id);
294 	if (!dev) {
295 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
296 		return -EINVAL;
297 	}
298 
299 	mutex_lock(&p->mutex);
300 
301 	pdd = kfd_bind_process_to_device(dev, p);
302 	if (IS_ERR(pdd)) {
303 		err = -ESRCH;
304 		goto err_bind_process;
305 	}
306 
307 	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
308 			p->pasid,
309 			dev->id);
310 
311 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
312 			&doorbell_offset_in_process);
313 	if (err != 0)
314 		goto err_create_queue;
315 
316 	args->queue_id = queue_id;
317 
318 
319 	/* Return gpu_id as doorbell offset for mmap usage */
320 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
321 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
322 	if (KFD_IS_SOC15(dev->device_info->asic_family))
323 		/* On SOC15 ASICs, include the doorbell offset within the
324 		 * process doorbell frame, which is 2 pages.
325 		 */
326 		args->doorbell_offset |= doorbell_offset_in_process;
327 
328 	mutex_unlock(&p->mutex);
329 
330 	pr_debug("Queue id %d was created successfully\n", args->queue_id);
331 
332 	pr_debug("Ring buffer address == 0x%016llX\n",
333 			args->ring_base_address);
334 
335 	pr_debug("Read ptr address    == 0x%016llX\n",
336 			args->read_pointer_address);
337 
338 	pr_debug("Write ptr address   == 0x%016llX\n",
339 			args->write_pointer_address);
340 
341 	return 0;
342 
343 err_create_queue:
344 err_bind_process:
345 	mutex_unlock(&p->mutex);
346 	return err;
347 }
348 
349 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
350 					void *data)
351 {
352 	int retval;
353 	struct kfd_ioctl_destroy_queue_args *args = data;
354 
355 	pr_debug("Destroying queue id %d for pasid 0x%x\n",
356 				args->queue_id,
357 				p->pasid);
358 
359 	mutex_lock(&p->mutex);
360 
361 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
362 
363 	mutex_unlock(&p->mutex);
364 	return retval;
365 }
366 
367 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
368 					void *data)
369 {
370 	int retval;
371 	struct kfd_ioctl_update_queue_args *args = data;
372 	struct queue_properties properties;
373 
374 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
375 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
376 		return -EINVAL;
377 	}
378 
379 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
380 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
381 		return -EINVAL;
382 	}
383 
384 	if ((args->ring_base_address) &&
385 		(!access_ok((const void __user *) args->ring_base_address,
386 			sizeof(uint64_t)))) {
387 		pr_err("Can't access ring base address\n");
388 		return -EFAULT;
389 	}
390 
391 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
392 		pr_err("Ring size must be a power of 2 or 0\n");
393 		return -EINVAL;
394 	}
395 
396 	properties.queue_address = args->ring_base_address;
397 	properties.queue_size = args->ring_size;
398 	properties.queue_percent = args->queue_percentage;
399 	properties.priority = args->queue_priority;
400 
401 	pr_debug("Updating queue id %d for pasid 0x%x\n",
402 			args->queue_id, p->pasid);
403 
404 	mutex_lock(&p->mutex);
405 
406 	retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
407 
408 	mutex_unlock(&p->mutex);
409 
410 	return retval;
411 }
412 
413 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
414 					void *data)
415 {
416 	int retval;
417 	const int max_num_cus = 1024;
418 	struct kfd_ioctl_set_cu_mask_args *args = data;
419 	struct queue_properties properties;
420 	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
421 	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
422 
423 	if ((args->num_cu_mask % 32) != 0) {
424 		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
425 				args->num_cu_mask);
426 		return -EINVAL;
427 	}
428 
429 	properties.cu_mask_count = args->num_cu_mask;
430 	if (properties.cu_mask_count == 0) {
431 		pr_debug("CU mask cannot be 0");
432 		return -EINVAL;
433 	}
434 
435 	/* To prevent an unreasonably large CU mask size, set an arbitrary
436 	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
437 	 * past max_num_cus bits and just use the first max_num_cus bits.
438 	 */
439 	if (properties.cu_mask_count > max_num_cus) {
440 		pr_debug("CU mask cannot be greater than 1024 bits");
441 		properties.cu_mask_count = max_num_cus;
442 		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
443 	}
444 
445 	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
446 	if (!properties.cu_mask)
447 		return -ENOMEM;
448 
449 	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
450 	if (retval) {
451 		pr_debug("Could not copy CU mask from userspace");
452 		kfree(properties.cu_mask);
453 		return -EFAULT;
454 	}
455 
456 	mutex_lock(&p->mutex);
457 
458 	retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
459 
460 	mutex_unlock(&p->mutex);
461 
462 	if (retval)
463 		kfree(properties.cu_mask);
464 
465 	return retval;
466 }
467 
468 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
469 					  struct kfd_process *p, void *data)
470 {
471 	struct kfd_ioctl_get_queue_wave_state_args *args = data;
472 	int r;
473 
474 	mutex_lock(&p->mutex);
475 
476 	r = pqm_get_wave_state(&p->pqm, args->queue_id,
477 			       (void __user *)args->ctl_stack_address,
478 			       &args->ctl_stack_used_size,
479 			       &args->save_area_used_size);
480 
481 	mutex_unlock(&p->mutex);
482 
483 	return r;
484 }
485 
486 static int kfd_ioctl_set_memory_policy(struct file *filep,
487 					struct kfd_process *p, void *data)
488 {
489 	struct kfd_ioctl_set_memory_policy_args *args = data;
490 	struct kfd_dev *dev;
491 	int err = 0;
492 	struct kfd_process_device *pdd;
493 	enum cache_policy default_policy, alternate_policy;
494 
495 	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
496 	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
497 		return -EINVAL;
498 	}
499 
500 	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
501 	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
502 		return -EINVAL;
503 	}
504 
505 	dev = kfd_device_by_id(args->gpu_id);
506 	if (!dev)
507 		return -EINVAL;
508 
509 	mutex_lock(&p->mutex);
510 
511 	pdd = kfd_bind_process_to_device(dev, p);
512 	if (IS_ERR(pdd)) {
513 		err = -ESRCH;
514 		goto out;
515 	}
516 
517 	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
518 			 ? cache_policy_coherent : cache_policy_noncoherent;
519 
520 	alternate_policy =
521 		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
522 		   ? cache_policy_coherent : cache_policy_noncoherent;
523 
524 	if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
525 				&pdd->qpd,
526 				default_policy,
527 				alternate_policy,
528 				(void __user *)args->alternate_aperture_base,
529 				args->alternate_aperture_size))
530 		err = -EINVAL;
531 
532 out:
533 	mutex_unlock(&p->mutex);
534 
535 	return err;
536 }
537 
538 static int kfd_ioctl_set_trap_handler(struct file *filep,
539 					struct kfd_process *p, void *data)
540 {
541 	struct kfd_ioctl_set_trap_handler_args *args = data;
542 	struct kfd_dev *dev;
543 	int err = 0;
544 	struct kfd_process_device *pdd;
545 
546 	dev = kfd_device_by_id(args->gpu_id);
547 	if (!dev)
548 		return -EINVAL;
549 
550 	mutex_lock(&p->mutex);
551 
552 	pdd = kfd_bind_process_to_device(dev, p);
553 	if (IS_ERR(pdd)) {
554 		err = -ESRCH;
555 		goto out;
556 	}
557 
558 	if (dev->dqm->ops.set_trap_handler(dev->dqm,
559 					&pdd->qpd,
560 					args->tba_addr,
561 					args->tma_addr))
562 		err = -EINVAL;
563 
564 out:
565 	mutex_unlock(&p->mutex);
566 
567 	return err;
568 }
569 
570 static int kfd_ioctl_dbg_register(struct file *filep,
571 				struct kfd_process *p, void *data)
572 {
573 	struct kfd_ioctl_dbg_register_args *args = data;
574 	struct kfd_dev *dev;
575 	struct kfd_dbgmgr *dbgmgr_ptr;
576 	struct kfd_process_device *pdd;
577 	bool create_ok;
578 	long status = 0;
579 
580 	dev = kfd_device_by_id(args->gpu_id);
581 	if (!dev)
582 		return -EINVAL;
583 
584 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
585 		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
586 		return -EINVAL;
587 	}
588 
589 	mutex_lock(&p->mutex);
590 	mutex_lock(kfd_get_dbgmgr_mutex());
591 
592 	/*
593 	 * make sure that we have pdd, if this the first queue created for
594 	 * this process
595 	 */
596 	pdd = kfd_bind_process_to_device(dev, p);
597 	if (IS_ERR(pdd)) {
598 		status = PTR_ERR(pdd);
599 		goto out;
600 	}
601 
602 	if (!dev->dbgmgr) {
603 		/* In case of a legal call, we have no dbgmgr yet */
604 		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
605 		if (create_ok) {
606 			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
607 			if (status != 0)
608 				kfd_dbgmgr_destroy(dbgmgr_ptr);
609 			else
610 				dev->dbgmgr = dbgmgr_ptr;
611 		}
612 	} else {
613 		pr_debug("debugger already registered\n");
614 		status = -EINVAL;
615 	}
616 
617 out:
618 	mutex_unlock(kfd_get_dbgmgr_mutex());
619 	mutex_unlock(&p->mutex);
620 
621 	return status;
622 }
623 
624 static int kfd_ioctl_dbg_unregister(struct file *filep,
625 				struct kfd_process *p, void *data)
626 {
627 	struct kfd_ioctl_dbg_unregister_args *args = data;
628 	struct kfd_dev *dev;
629 	long status;
630 
631 	dev = kfd_device_by_id(args->gpu_id);
632 	if (!dev || !dev->dbgmgr)
633 		return -EINVAL;
634 
635 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
636 		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
637 		return -EINVAL;
638 	}
639 
640 	mutex_lock(kfd_get_dbgmgr_mutex());
641 
642 	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
643 	if (!status) {
644 		kfd_dbgmgr_destroy(dev->dbgmgr);
645 		dev->dbgmgr = NULL;
646 	}
647 
648 	mutex_unlock(kfd_get_dbgmgr_mutex());
649 
650 	return status;
651 }
652 
653 /*
654  * Parse and generate variable size data structure for address watch.
655  * Total size of the buffer and # watch points is limited in order
656  * to prevent kernel abuse. (no bearing to the much smaller HW limitation
657  * which is enforced by dbgdev module)
658  * please also note that the watch address itself are not "copied from user",
659  * since it be set into the HW in user mode values.
660  *
661  */
662 static int kfd_ioctl_dbg_address_watch(struct file *filep,
663 					struct kfd_process *p, void *data)
664 {
665 	struct kfd_ioctl_dbg_address_watch_args *args = data;
666 	struct kfd_dev *dev;
667 	struct dbg_address_watch_info aw_info;
668 	unsigned char *args_buff;
669 	long status;
670 	void __user *cmd_from_user;
671 	uint64_t watch_mask_value = 0;
672 	unsigned int args_idx = 0;
673 
674 	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
675 
676 	dev = kfd_device_by_id(args->gpu_id);
677 	if (!dev)
678 		return -EINVAL;
679 
680 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
681 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
682 		return -EINVAL;
683 	}
684 
685 	cmd_from_user = (void __user *) args->content_ptr;
686 
687 	/* Validate arguments */
688 
689 	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
690 		(args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
691 		(cmd_from_user == NULL))
692 		return -EINVAL;
693 
694 	/* this is the actual buffer to work with */
695 	args_buff = memdup_user(cmd_from_user,
696 				args->buf_size_in_bytes - sizeof(*args));
697 	if (IS_ERR(args_buff))
698 		return PTR_ERR(args_buff);
699 
700 	aw_info.process = p;
701 
702 	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
703 	args_idx += sizeof(aw_info.num_watch_points);
704 
705 	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
706 	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
707 
708 	/*
709 	 * set watch address base pointer to point on the array base
710 	 * within args_buff
711 	 */
712 	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
713 
714 	/* skip over the addresses buffer */
715 	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
716 
717 	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
718 		status = -EINVAL;
719 		goto out;
720 	}
721 
722 	watch_mask_value = (uint64_t) args_buff[args_idx];
723 
724 	if (watch_mask_value > 0) {
725 		/*
726 		 * There is an array of masks.
727 		 * set watch mask base pointer to point on the array base
728 		 * within args_buff
729 		 */
730 		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
731 
732 		/* skip over the masks buffer */
733 		args_idx += sizeof(aw_info.watch_mask) *
734 				aw_info.num_watch_points;
735 	} else {
736 		/* just the NULL mask, set to NULL and skip over it */
737 		aw_info.watch_mask = NULL;
738 		args_idx += sizeof(aw_info.watch_mask);
739 	}
740 
741 	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
742 		status = -EINVAL;
743 		goto out;
744 	}
745 
746 	/* Currently HSA Event is not supported for DBG */
747 	aw_info.watch_event = NULL;
748 
749 	mutex_lock(kfd_get_dbgmgr_mutex());
750 
751 	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
752 
753 	mutex_unlock(kfd_get_dbgmgr_mutex());
754 
755 out:
756 	kfree(args_buff);
757 
758 	return status;
759 }
760 
761 /* Parse and generate fixed size data structure for wave control */
762 static int kfd_ioctl_dbg_wave_control(struct file *filep,
763 					struct kfd_process *p, void *data)
764 {
765 	struct kfd_ioctl_dbg_wave_control_args *args = data;
766 	struct kfd_dev *dev;
767 	struct dbg_wave_control_info wac_info;
768 	unsigned char *args_buff;
769 	uint32_t computed_buff_size;
770 	long status;
771 	void __user *cmd_from_user;
772 	unsigned int args_idx = 0;
773 
774 	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
775 
776 	/* we use compact form, independent of the packing attribute value */
777 	computed_buff_size = sizeof(*args) +
778 				sizeof(wac_info.mode) +
779 				sizeof(wac_info.operand) +
780 				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
781 				sizeof(wac_info.dbgWave_msg.MemoryVA) +
782 				sizeof(wac_info.trapId);
783 
784 	dev = kfd_device_by_id(args->gpu_id);
785 	if (!dev)
786 		return -EINVAL;
787 
788 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
789 		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
790 		return -EINVAL;
791 	}
792 
793 	/* input size must match the computed "compact" size */
794 	if (args->buf_size_in_bytes != computed_buff_size) {
795 		pr_debug("size mismatch, computed : actual %u : %u\n",
796 				args->buf_size_in_bytes, computed_buff_size);
797 		return -EINVAL;
798 	}
799 
800 	cmd_from_user = (void __user *) args->content_ptr;
801 
802 	if (cmd_from_user == NULL)
803 		return -EINVAL;
804 
805 	/* copy the entire buffer from user */
806 
807 	args_buff = memdup_user(cmd_from_user,
808 				args->buf_size_in_bytes - sizeof(*args));
809 	if (IS_ERR(args_buff))
810 		return PTR_ERR(args_buff);
811 
812 	/* move ptr to the start of the "pay-load" area */
813 	wac_info.process = p;
814 
815 	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
816 	args_idx += sizeof(wac_info.operand);
817 
818 	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
819 	args_idx += sizeof(wac_info.mode);
820 
821 	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
822 	args_idx += sizeof(wac_info.trapId);
823 
824 	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
825 					*((uint32_t *)(&args_buff[args_idx]));
826 	wac_info.dbgWave_msg.MemoryVA = NULL;
827 
828 	mutex_lock(kfd_get_dbgmgr_mutex());
829 
830 	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
831 			wac_info.process, wac_info.operand,
832 			wac_info.mode, wac_info.trapId,
833 			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
834 
835 	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
836 
837 	pr_debug("Returned status of dbg manager is %ld\n", status);
838 
839 	mutex_unlock(kfd_get_dbgmgr_mutex());
840 
841 	kfree(args_buff);
842 
843 	return status;
844 }
845 
846 static int kfd_ioctl_get_clock_counters(struct file *filep,
847 				struct kfd_process *p, void *data)
848 {
849 	struct kfd_ioctl_get_clock_counters_args *args = data;
850 	struct kfd_dev *dev;
851 
852 	dev = kfd_device_by_id(args->gpu_id);
853 	if (dev)
854 		/* Reading GPU clock counter from KGD */
855 		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
856 	else
857 		/* Node without GPU resource */
858 		args->gpu_clock_counter = 0;
859 
860 	/* No access to rdtsc. Using raw monotonic time */
861 	args->cpu_clock_counter = ktime_get_raw_ns();
862 	args->system_clock_counter = ktime_get_boottime_ns();
863 
864 	/* Since the counter is in nano-seconds we use 1GHz frequency */
865 	args->system_clock_freq = 1000000000;
866 
867 	return 0;
868 }
869 
870 
871 static int kfd_ioctl_get_process_apertures(struct file *filp,
872 				struct kfd_process *p, void *data)
873 {
874 	struct kfd_ioctl_get_process_apertures_args *args = data;
875 	struct kfd_process_device_apertures *pAperture;
876 	struct kfd_process_device *pdd;
877 
878 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
879 
880 	args->num_of_nodes = 0;
881 
882 	mutex_lock(&p->mutex);
883 
884 	/*if the process-device list isn't empty*/
885 	if (kfd_has_process_device_data(p)) {
886 		/* Run over all pdd of the process */
887 		pdd = kfd_get_first_process_device_data(p);
888 		do {
889 			pAperture =
890 				&args->process_apertures[args->num_of_nodes];
891 			pAperture->gpu_id = pdd->dev->id;
892 			pAperture->lds_base = pdd->lds_base;
893 			pAperture->lds_limit = pdd->lds_limit;
894 			pAperture->gpuvm_base = pdd->gpuvm_base;
895 			pAperture->gpuvm_limit = pdd->gpuvm_limit;
896 			pAperture->scratch_base = pdd->scratch_base;
897 			pAperture->scratch_limit = pdd->scratch_limit;
898 
899 			dev_dbg(kfd_device,
900 				"node id %u\n", args->num_of_nodes);
901 			dev_dbg(kfd_device,
902 				"gpu id %u\n", pdd->dev->id);
903 			dev_dbg(kfd_device,
904 				"lds_base %llX\n", pdd->lds_base);
905 			dev_dbg(kfd_device,
906 				"lds_limit %llX\n", pdd->lds_limit);
907 			dev_dbg(kfd_device,
908 				"gpuvm_base %llX\n", pdd->gpuvm_base);
909 			dev_dbg(kfd_device,
910 				"gpuvm_limit %llX\n", pdd->gpuvm_limit);
911 			dev_dbg(kfd_device,
912 				"scratch_base %llX\n", pdd->scratch_base);
913 			dev_dbg(kfd_device,
914 				"scratch_limit %llX\n", pdd->scratch_limit);
915 
916 			args->num_of_nodes++;
917 
918 			pdd = kfd_get_next_process_device_data(p, pdd);
919 		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
920 	}
921 
922 	mutex_unlock(&p->mutex);
923 
924 	return 0;
925 }
926 
927 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
928 				struct kfd_process *p, void *data)
929 {
930 	struct kfd_ioctl_get_process_apertures_new_args *args = data;
931 	struct kfd_process_device_apertures *pa;
932 	struct kfd_process_device *pdd;
933 	uint32_t nodes = 0;
934 	int ret;
935 
936 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
937 
938 	if (args->num_of_nodes == 0) {
939 		/* Return number of nodes, so that user space can alloacate
940 		 * sufficient memory
941 		 */
942 		mutex_lock(&p->mutex);
943 
944 		if (!kfd_has_process_device_data(p))
945 			goto out_unlock;
946 
947 		/* Run over all pdd of the process */
948 		pdd = kfd_get_first_process_device_data(p);
949 		do {
950 			args->num_of_nodes++;
951 			pdd = kfd_get_next_process_device_data(p, pdd);
952 		} while (pdd);
953 
954 		goto out_unlock;
955 	}
956 
957 	/* Fill in process-aperture information for all available
958 	 * nodes, but not more than args->num_of_nodes as that is
959 	 * the amount of memory allocated by user
960 	 */
961 	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
962 				args->num_of_nodes), GFP_KERNEL);
963 	if (!pa)
964 		return -ENOMEM;
965 
966 	mutex_lock(&p->mutex);
967 
968 	if (!kfd_has_process_device_data(p)) {
969 		args->num_of_nodes = 0;
970 		kfree(pa);
971 		goto out_unlock;
972 	}
973 
974 	/* Run over all pdd of the process */
975 	pdd = kfd_get_first_process_device_data(p);
976 	do {
977 		pa[nodes].gpu_id = pdd->dev->id;
978 		pa[nodes].lds_base = pdd->lds_base;
979 		pa[nodes].lds_limit = pdd->lds_limit;
980 		pa[nodes].gpuvm_base = pdd->gpuvm_base;
981 		pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
982 		pa[nodes].scratch_base = pdd->scratch_base;
983 		pa[nodes].scratch_limit = pdd->scratch_limit;
984 
985 		dev_dbg(kfd_device,
986 			"gpu id %u\n", pdd->dev->id);
987 		dev_dbg(kfd_device,
988 			"lds_base %llX\n", pdd->lds_base);
989 		dev_dbg(kfd_device,
990 			"lds_limit %llX\n", pdd->lds_limit);
991 		dev_dbg(kfd_device,
992 			"gpuvm_base %llX\n", pdd->gpuvm_base);
993 		dev_dbg(kfd_device,
994 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
995 		dev_dbg(kfd_device,
996 			"scratch_base %llX\n", pdd->scratch_base);
997 		dev_dbg(kfd_device,
998 			"scratch_limit %llX\n", pdd->scratch_limit);
999 		nodes++;
1000 
1001 		pdd = kfd_get_next_process_device_data(p, pdd);
1002 	} while (pdd && (nodes < args->num_of_nodes));
1003 	mutex_unlock(&p->mutex);
1004 
1005 	args->num_of_nodes = nodes;
1006 	ret = copy_to_user(
1007 			(void __user *)args->kfd_process_device_apertures_ptr,
1008 			pa,
1009 			(nodes * sizeof(struct kfd_process_device_apertures)));
1010 	kfree(pa);
1011 	return ret ? -EFAULT : 0;
1012 
1013 out_unlock:
1014 	mutex_unlock(&p->mutex);
1015 	return 0;
1016 }
1017 
1018 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1019 					void *data)
1020 {
1021 	struct kfd_ioctl_create_event_args *args = data;
1022 	int err;
1023 
1024 	/* For dGPUs the event page is allocated in user mode. The
1025 	 * handle is passed to KFD with the first call to this IOCTL
1026 	 * through the event_page_offset field.
1027 	 */
1028 	if (args->event_page_offset) {
1029 		struct kfd_dev *kfd;
1030 		struct kfd_process_device *pdd;
1031 		void *mem, *kern_addr;
1032 		uint64_t size;
1033 
1034 		if (p->signal_page) {
1035 			pr_err("Event page is already set\n");
1036 			return -EINVAL;
1037 		}
1038 
1039 		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1040 		if (!kfd) {
1041 			pr_err("Getting device by id failed in %s\n", __func__);
1042 			return -EINVAL;
1043 		}
1044 
1045 		mutex_lock(&p->mutex);
1046 		pdd = kfd_bind_process_to_device(kfd, p);
1047 		if (IS_ERR(pdd)) {
1048 			err = PTR_ERR(pdd);
1049 			goto out_unlock;
1050 		}
1051 
1052 		mem = kfd_process_device_translate_handle(pdd,
1053 				GET_IDR_HANDLE(args->event_page_offset));
1054 		if (!mem) {
1055 			pr_err("Can't find BO, offset is 0x%llx\n",
1056 			       args->event_page_offset);
1057 			err = -EINVAL;
1058 			goto out_unlock;
1059 		}
1060 		mutex_unlock(&p->mutex);
1061 
1062 		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1063 						mem, &kern_addr, &size);
1064 		if (err) {
1065 			pr_err("Failed to map event page to kernel\n");
1066 			return err;
1067 		}
1068 
1069 		err = kfd_event_page_set(p, kern_addr, size);
1070 		if (err) {
1071 			pr_err("Failed to set event page\n");
1072 			return err;
1073 		}
1074 	}
1075 
1076 	err = kfd_event_create(filp, p, args->event_type,
1077 				args->auto_reset != 0, args->node_id,
1078 				&args->event_id, &args->event_trigger_data,
1079 				&args->event_page_offset,
1080 				&args->event_slot_index);
1081 
1082 	return err;
1083 
1084 out_unlock:
1085 	mutex_unlock(&p->mutex);
1086 	return err;
1087 }
1088 
1089 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1090 					void *data)
1091 {
1092 	struct kfd_ioctl_destroy_event_args *args = data;
1093 
1094 	return kfd_event_destroy(p, args->event_id);
1095 }
1096 
1097 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1098 				void *data)
1099 {
1100 	struct kfd_ioctl_set_event_args *args = data;
1101 
1102 	return kfd_set_event(p, args->event_id);
1103 }
1104 
1105 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1106 				void *data)
1107 {
1108 	struct kfd_ioctl_reset_event_args *args = data;
1109 
1110 	return kfd_reset_event(p, args->event_id);
1111 }
1112 
1113 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1114 				void *data)
1115 {
1116 	struct kfd_ioctl_wait_events_args *args = data;
1117 	int err;
1118 
1119 	err = kfd_wait_on_events(p, args->num_events,
1120 			(void __user *)args->events_ptr,
1121 			(args->wait_for_all != 0),
1122 			args->timeout, &args->wait_result);
1123 
1124 	return err;
1125 }
1126 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1127 					struct kfd_process *p, void *data)
1128 {
1129 	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1130 	struct kfd_process_device *pdd;
1131 	struct kfd_dev *dev;
1132 	long err;
1133 
1134 	dev = kfd_device_by_id(args->gpu_id);
1135 	if (!dev)
1136 		return -EINVAL;
1137 
1138 	mutex_lock(&p->mutex);
1139 
1140 	pdd = kfd_bind_process_to_device(dev, p);
1141 	if (IS_ERR(pdd)) {
1142 		err = PTR_ERR(pdd);
1143 		goto bind_process_to_device_fail;
1144 	}
1145 
1146 	pdd->qpd.sh_hidden_private_base = args->va_addr;
1147 
1148 	mutex_unlock(&p->mutex);
1149 
1150 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1151 	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1152 		dev->kfd2kgd->set_scratch_backing_va(
1153 			dev->kgd, args->va_addr, pdd->qpd.vmid);
1154 
1155 	return 0;
1156 
1157 bind_process_to_device_fail:
1158 	mutex_unlock(&p->mutex);
1159 	return err;
1160 }
1161 
1162 static int kfd_ioctl_get_tile_config(struct file *filep,
1163 		struct kfd_process *p, void *data)
1164 {
1165 	struct kfd_ioctl_get_tile_config_args *args = data;
1166 	struct kfd_dev *dev;
1167 	struct tile_config config;
1168 	int err = 0;
1169 
1170 	dev = kfd_device_by_id(args->gpu_id);
1171 	if (!dev)
1172 		return -EINVAL;
1173 
1174 	amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1175 
1176 	args->gb_addr_config = config.gb_addr_config;
1177 	args->num_banks = config.num_banks;
1178 	args->num_ranks = config.num_ranks;
1179 
1180 	if (args->num_tile_configs > config.num_tile_configs)
1181 		args->num_tile_configs = config.num_tile_configs;
1182 	err = copy_to_user((void __user *)args->tile_config_ptr,
1183 			config.tile_config_ptr,
1184 			args->num_tile_configs * sizeof(uint32_t));
1185 	if (err) {
1186 		args->num_tile_configs = 0;
1187 		return -EFAULT;
1188 	}
1189 
1190 	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1191 		args->num_macro_tile_configs =
1192 				config.num_macro_tile_configs;
1193 	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1194 			config.macro_tile_config_ptr,
1195 			args->num_macro_tile_configs * sizeof(uint32_t));
1196 	if (err) {
1197 		args->num_macro_tile_configs = 0;
1198 		return -EFAULT;
1199 	}
1200 
1201 	return 0;
1202 }
1203 
1204 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1205 				void *data)
1206 {
1207 	struct kfd_ioctl_acquire_vm_args *args = data;
1208 	struct kfd_process_device *pdd;
1209 	struct kfd_dev *dev;
1210 	struct file *drm_file;
1211 	int ret;
1212 
1213 	dev = kfd_device_by_id(args->gpu_id);
1214 	if (!dev)
1215 		return -EINVAL;
1216 
1217 	drm_file = fget(args->drm_fd);
1218 	if (!drm_file)
1219 		return -EINVAL;
1220 
1221 	mutex_lock(&p->mutex);
1222 
1223 	pdd = kfd_get_process_device_data(dev, p);
1224 	if (!pdd) {
1225 		ret = -EINVAL;
1226 		goto err_unlock;
1227 	}
1228 
1229 	if (pdd->drm_file) {
1230 		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1231 		goto err_unlock;
1232 	}
1233 
1234 	ret = kfd_process_device_init_vm(pdd, drm_file);
1235 	if (ret)
1236 		goto err_unlock;
1237 	/* On success, the PDD keeps the drm_file reference */
1238 	mutex_unlock(&p->mutex);
1239 
1240 	return 0;
1241 
1242 err_unlock:
1243 	mutex_unlock(&p->mutex);
1244 	fput(drm_file);
1245 	return ret;
1246 }
1247 
1248 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1249 {
1250 	struct kfd_local_mem_info mem_info;
1251 
1252 	if (debug_largebar) {
1253 		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1254 		return true;
1255 	}
1256 
1257 	if (dev->device_info->needs_iommu_device)
1258 		return false;
1259 
1260 	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1261 	if (mem_info.local_mem_size_private == 0 &&
1262 			mem_info.local_mem_size_public > 0)
1263 		return true;
1264 	return false;
1265 }
1266 
1267 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1268 					struct kfd_process *p, void *data)
1269 {
1270 	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1271 	struct kfd_process_device *pdd;
1272 	void *mem;
1273 	struct kfd_dev *dev;
1274 	int idr_handle;
1275 	long err;
1276 	uint64_t offset = args->mmap_offset;
1277 	uint32_t flags = args->flags;
1278 
1279 	if (args->size == 0)
1280 		return -EINVAL;
1281 
1282 	dev = kfd_device_by_id(args->gpu_id);
1283 	if (!dev)
1284 		return -EINVAL;
1285 
1286 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1287 		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1288 		!kfd_dev_is_large_bar(dev)) {
1289 		pr_err("Alloc host visible vram on small bar is not allowed\n");
1290 		return -EINVAL;
1291 	}
1292 
1293 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1294 		if (args->size != kfd_doorbell_process_slice(dev))
1295 			return -EINVAL;
1296 		offset = kfd_get_process_doorbells(dev, p);
1297 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1298 		if (args->size != PAGE_SIZE)
1299 			return -EINVAL;
1300 		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1301 		if (!offset)
1302 			return -ENOMEM;
1303 	}
1304 
1305 	mutex_lock(&p->mutex);
1306 
1307 	pdd = kfd_bind_process_to_device(dev, p);
1308 	if (IS_ERR(pdd)) {
1309 		err = PTR_ERR(pdd);
1310 		goto err_unlock;
1311 	}
1312 
1313 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1314 		dev->kgd, args->va_addr, args->size,
1315 		pdd->vm, (struct kgd_mem **) &mem, &offset,
1316 		flags);
1317 
1318 	if (err)
1319 		goto err_unlock;
1320 
1321 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1322 	if (idr_handle < 0) {
1323 		err = -EFAULT;
1324 		goto err_free;
1325 	}
1326 
1327 	/* Update the VRAM usage count */
1328 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1329 		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1330 
1331 	mutex_unlock(&p->mutex);
1332 
1333 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1334 	args->mmap_offset = offset;
1335 
1336 	/* MMIO is mapped through kfd device
1337 	 * Generate a kfd mmap offset
1338 	 */
1339 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1340 		args->mmap_offset = KFD_MMAP_TYPE_MMIO
1341 					| KFD_MMAP_GPU_ID(args->gpu_id);
1342 
1343 	return 0;
1344 
1345 err_free:
1346 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1347 err_unlock:
1348 	mutex_unlock(&p->mutex);
1349 	return err;
1350 }
1351 
1352 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1353 					struct kfd_process *p, void *data)
1354 {
1355 	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1356 	struct kfd_process_device *pdd;
1357 	void *mem;
1358 	struct kfd_dev *dev;
1359 	int ret;
1360 	uint64_t size = 0;
1361 
1362 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1363 	if (!dev)
1364 		return -EINVAL;
1365 
1366 	mutex_lock(&p->mutex);
1367 
1368 	pdd = kfd_get_process_device_data(dev, p);
1369 	if (!pdd) {
1370 		pr_err("Process device data doesn't exist\n");
1371 		ret = -EINVAL;
1372 		goto err_unlock;
1373 	}
1374 
1375 	mem = kfd_process_device_translate_handle(
1376 		pdd, GET_IDR_HANDLE(args->handle));
1377 	if (!mem) {
1378 		ret = -EINVAL;
1379 		goto err_unlock;
1380 	}
1381 
1382 	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1383 						(struct kgd_mem *)mem, &size);
1384 
1385 	/* If freeing the buffer failed, leave the handle in place for
1386 	 * clean-up during process tear-down.
1387 	 */
1388 	if (!ret)
1389 		kfd_process_device_remove_obj_handle(
1390 			pdd, GET_IDR_HANDLE(args->handle));
1391 
1392 	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1393 
1394 err_unlock:
1395 	mutex_unlock(&p->mutex);
1396 	return ret;
1397 }
1398 
1399 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1400 					struct kfd_process *p, void *data)
1401 {
1402 	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1403 	struct kfd_process_device *pdd, *peer_pdd;
1404 	void *mem;
1405 	struct kfd_dev *dev, *peer;
1406 	long err = 0;
1407 	int i;
1408 	uint32_t *devices_arr = NULL;
1409 
1410 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1411 	if (!dev)
1412 		return -EINVAL;
1413 
1414 	if (!args->n_devices) {
1415 		pr_debug("Device IDs array empty\n");
1416 		return -EINVAL;
1417 	}
1418 	if (args->n_success > args->n_devices) {
1419 		pr_debug("n_success exceeds n_devices\n");
1420 		return -EINVAL;
1421 	}
1422 
1423 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1424 				    GFP_KERNEL);
1425 	if (!devices_arr)
1426 		return -ENOMEM;
1427 
1428 	err = copy_from_user(devices_arr,
1429 			     (void __user *)args->device_ids_array_ptr,
1430 			     args->n_devices * sizeof(*devices_arr));
1431 	if (err != 0) {
1432 		err = -EFAULT;
1433 		goto copy_from_user_failed;
1434 	}
1435 
1436 	mutex_lock(&p->mutex);
1437 
1438 	pdd = kfd_bind_process_to_device(dev, p);
1439 	if (IS_ERR(pdd)) {
1440 		err = PTR_ERR(pdd);
1441 		goto bind_process_to_device_failed;
1442 	}
1443 
1444 	mem = kfd_process_device_translate_handle(pdd,
1445 						GET_IDR_HANDLE(args->handle));
1446 	if (!mem) {
1447 		err = -ENOMEM;
1448 		goto get_mem_obj_from_handle_failed;
1449 	}
1450 
1451 	for (i = args->n_success; i < args->n_devices; i++) {
1452 		peer = kfd_device_by_id(devices_arr[i]);
1453 		if (!peer) {
1454 			pr_debug("Getting device by id failed for 0x%x\n",
1455 				 devices_arr[i]);
1456 			err = -EINVAL;
1457 			goto get_mem_obj_from_handle_failed;
1458 		}
1459 
1460 		peer_pdd = kfd_bind_process_to_device(peer, p);
1461 		if (IS_ERR(peer_pdd)) {
1462 			err = PTR_ERR(peer_pdd);
1463 			goto get_mem_obj_from_handle_failed;
1464 		}
1465 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1466 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1467 		if (err) {
1468 			pr_err("Failed to map to gpu %d/%d\n",
1469 			       i, args->n_devices);
1470 			goto map_memory_to_gpu_failed;
1471 		}
1472 		args->n_success = i+1;
1473 	}
1474 
1475 	mutex_unlock(&p->mutex);
1476 
1477 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1478 	if (err) {
1479 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1480 		goto sync_memory_failed;
1481 	}
1482 
1483 	/* Flush TLBs after waiting for the page table updates to complete */
1484 	for (i = 0; i < args->n_devices; i++) {
1485 		peer = kfd_device_by_id(devices_arr[i]);
1486 		if (WARN_ON_ONCE(!peer))
1487 			continue;
1488 		peer_pdd = kfd_get_process_device_data(peer, p);
1489 		if (WARN_ON_ONCE(!peer_pdd))
1490 			continue;
1491 		kfd_flush_tlb(peer_pdd);
1492 	}
1493 
1494 	kfree(devices_arr);
1495 
1496 	return err;
1497 
1498 bind_process_to_device_failed:
1499 get_mem_obj_from_handle_failed:
1500 map_memory_to_gpu_failed:
1501 	mutex_unlock(&p->mutex);
1502 copy_from_user_failed:
1503 sync_memory_failed:
1504 	kfree(devices_arr);
1505 
1506 	return err;
1507 }
1508 
1509 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1510 					struct kfd_process *p, void *data)
1511 {
1512 	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1513 	struct kfd_process_device *pdd, *peer_pdd;
1514 	void *mem;
1515 	struct kfd_dev *dev, *peer;
1516 	long err = 0;
1517 	uint32_t *devices_arr = NULL, i;
1518 
1519 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1520 	if (!dev)
1521 		return -EINVAL;
1522 
1523 	if (!args->n_devices) {
1524 		pr_debug("Device IDs array empty\n");
1525 		return -EINVAL;
1526 	}
1527 	if (args->n_success > args->n_devices) {
1528 		pr_debug("n_success exceeds n_devices\n");
1529 		return -EINVAL;
1530 	}
1531 
1532 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1533 				    GFP_KERNEL);
1534 	if (!devices_arr)
1535 		return -ENOMEM;
1536 
1537 	err = copy_from_user(devices_arr,
1538 			     (void __user *)args->device_ids_array_ptr,
1539 			     args->n_devices * sizeof(*devices_arr));
1540 	if (err != 0) {
1541 		err = -EFAULT;
1542 		goto copy_from_user_failed;
1543 	}
1544 
1545 	mutex_lock(&p->mutex);
1546 
1547 	pdd = kfd_get_process_device_data(dev, p);
1548 	if (!pdd) {
1549 		err = -EINVAL;
1550 		goto bind_process_to_device_failed;
1551 	}
1552 
1553 	mem = kfd_process_device_translate_handle(pdd,
1554 						GET_IDR_HANDLE(args->handle));
1555 	if (!mem) {
1556 		err = -ENOMEM;
1557 		goto get_mem_obj_from_handle_failed;
1558 	}
1559 
1560 	for (i = args->n_success; i < args->n_devices; i++) {
1561 		peer = kfd_device_by_id(devices_arr[i]);
1562 		if (!peer) {
1563 			err = -EINVAL;
1564 			goto get_mem_obj_from_handle_failed;
1565 		}
1566 
1567 		peer_pdd = kfd_get_process_device_data(peer, p);
1568 		if (!peer_pdd) {
1569 			err = -ENODEV;
1570 			goto get_mem_obj_from_handle_failed;
1571 		}
1572 		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1573 			peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1574 		if (err) {
1575 			pr_err("Failed to unmap from gpu %d/%d\n",
1576 			       i, args->n_devices);
1577 			goto unmap_memory_from_gpu_failed;
1578 		}
1579 		args->n_success = i+1;
1580 	}
1581 	kfree(devices_arr);
1582 
1583 	mutex_unlock(&p->mutex);
1584 
1585 	return 0;
1586 
1587 bind_process_to_device_failed:
1588 get_mem_obj_from_handle_failed:
1589 unmap_memory_from_gpu_failed:
1590 	mutex_unlock(&p->mutex);
1591 copy_from_user_failed:
1592 	kfree(devices_arr);
1593 	return err;
1594 }
1595 
1596 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1597 		struct kfd_process *p, void *data)
1598 {
1599 	int retval;
1600 	struct kfd_ioctl_alloc_queue_gws_args *args = data;
1601 	struct queue *q;
1602 	struct kfd_dev *dev;
1603 
1604 	mutex_lock(&p->mutex);
1605 	q = pqm_get_user_queue(&p->pqm, args->queue_id);
1606 
1607 	if (q) {
1608 		dev = q->device;
1609 	} else {
1610 		retval = -EINVAL;
1611 		goto out_unlock;
1612 	}
1613 
1614 	if (!dev->gws) {
1615 		retval = -ENODEV;
1616 		goto out_unlock;
1617 	}
1618 
1619 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1620 		retval = -ENODEV;
1621 		goto out_unlock;
1622 	}
1623 
1624 	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1625 	mutex_unlock(&p->mutex);
1626 
1627 	args->first_gws = 0;
1628 	return retval;
1629 
1630 out_unlock:
1631 	mutex_unlock(&p->mutex);
1632 	return retval;
1633 }
1634 
1635 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1636 		struct kfd_process *p, void *data)
1637 {
1638 	struct kfd_ioctl_get_dmabuf_info_args *args = data;
1639 	struct kfd_dev *dev = NULL;
1640 	struct kgd_dev *dma_buf_kgd;
1641 	void *metadata_buffer = NULL;
1642 	uint32_t flags;
1643 	unsigned int i;
1644 	int r;
1645 
1646 	/* Find a KFD GPU device that supports the get_dmabuf_info query */
1647 	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1648 		if (dev)
1649 			break;
1650 	if (!dev)
1651 		return -EINVAL;
1652 
1653 	if (args->metadata_ptr) {
1654 		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1655 		if (!metadata_buffer)
1656 			return -ENOMEM;
1657 	}
1658 
1659 	/* Get dmabuf info from KGD */
1660 	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1661 					  &dma_buf_kgd, &args->size,
1662 					  metadata_buffer, args->metadata_size,
1663 					  &args->metadata_size, &flags);
1664 	if (r)
1665 		goto exit;
1666 
1667 	/* Reverse-lookup gpu_id from kgd pointer */
1668 	dev = kfd_device_by_kgd(dma_buf_kgd);
1669 	if (!dev) {
1670 		r = -EINVAL;
1671 		goto exit;
1672 	}
1673 	args->gpu_id = dev->id;
1674 	args->flags = flags;
1675 
1676 	/* Copy metadata buffer to user mode */
1677 	if (metadata_buffer) {
1678 		r = copy_to_user((void __user *)args->metadata_ptr,
1679 				 metadata_buffer, args->metadata_size);
1680 		if (r != 0)
1681 			r = -EFAULT;
1682 	}
1683 
1684 exit:
1685 	kfree(metadata_buffer);
1686 
1687 	return r;
1688 }
1689 
1690 static int kfd_ioctl_import_dmabuf(struct file *filep,
1691 				   struct kfd_process *p, void *data)
1692 {
1693 	struct kfd_ioctl_import_dmabuf_args *args = data;
1694 	struct kfd_process_device *pdd;
1695 	struct dma_buf *dmabuf;
1696 	struct kfd_dev *dev;
1697 	int idr_handle;
1698 	uint64_t size;
1699 	void *mem;
1700 	int r;
1701 
1702 	dev = kfd_device_by_id(args->gpu_id);
1703 	if (!dev)
1704 		return -EINVAL;
1705 
1706 	dmabuf = dma_buf_get(args->dmabuf_fd);
1707 	if (IS_ERR(dmabuf))
1708 		return PTR_ERR(dmabuf);
1709 
1710 	mutex_lock(&p->mutex);
1711 
1712 	pdd = kfd_bind_process_to_device(dev, p);
1713 	if (IS_ERR(pdd)) {
1714 		r = PTR_ERR(pdd);
1715 		goto err_unlock;
1716 	}
1717 
1718 	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1719 					      args->va_addr, pdd->vm,
1720 					      (struct kgd_mem **)&mem, &size,
1721 					      NULL);
1722 	if (r)
1723 		goto err_unlock;
1724 
1725 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1726 	if (idr_handle < 0) {
1727 		r = -EFAULT;
1728 		goto err_free;
1729 	}
1730 
1731 	mutex_unlock(&p->mutex);
1732 
1733 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1734 
1735 	return 0;
1736 
1737 err_free:
1738 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
1739 err_unlock:
1740 	mutex_unlock(&p->mutex);
1741 	return r;
1742 }
1743 
1744 /* Handle requests for watching SMI events */
1745 static int kfd_ioctl_smi_events(struct file *filep,
1746 				struct kfd_process *p, void *data)
1747 {
1748 	struct kfd_ioctl_smi_events_args *args = data;
1749 	struct kfd_dev *dev;
1750 
1751 	dev = kfd_device_by_id(args->gpuid);
1752 	if (!dev)
1753 		return -EINVAL;
1754 
1755 	return kfd_smi_event_open(dev, &args->anon_fd);
1756 }
1757 
1758 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1759 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1760 			    .cmd_drv = 0, .name = #ioctl}
1761 
1762 /** Ioctl table */
1763 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1764 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1765 			kfd_ioctl_get_version, 0),
1766 
1767 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1768 			kfd_ioctl_create_queue, 0),
1769 
1770 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1771 			kfd_ioctl_destroy_queue, 0),
1772 
1773 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1774 			kfd_ioctl_set_memory_policy, 0),
1775 
1776 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1777 			kfd_ioctl_get_clock_counters, 0),
1778 
1779 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1780 			kfd_ioctl_get_process_apertures, 0),
1781 
1782 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1783 			kfd_ioctl_update_queue, 0),
1784 
1785 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1786 			kfd_ioctl_create_event, 0),
1787 
1788 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1789 			kfd_ioctl_destroy_event, 0),
1790 
1791 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1792 			kfd_ioctl_set_event, 0),
1793 
1794 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1795 			kfd_ioctl_reset_event, 0),
1796 
1797 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1798 			kfd_ioctl_wait_events, 0),
1799 
1800 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1801 			kfd_ioctl_dbg_register, 0),
1802 
1803 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1804 			kfd_ioctl_dbg_unregister, 0),
1805 
1806 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1807 			kfd_ioctl_dbg_address_watch, 0),
1808 
1809 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1810 			kfd_ioctl_dbg_wave_control, 0),
1811 
1812 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1813 			kfd_ioctl_set_scratch_backing_va, 0),
1814 
1815 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1816 			kfd_ioctl_get_tile_config, 0),
1817 
1818 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1819 			kfd_ioctl_set_trap_handler, 0),
1820 
1821 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1822 			kfd_ioctl_get_process_apertures_new, 0),
1823 
1824 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1825 			kfd_ioctl_acquire_vm, 0),
1826 
1827 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1828 			kfd_ioctl_alloc_memory_of_gpu, 0),
1829 
1830 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1831 			kfd_ioctl_free_memory_of_gpu, 0),
1832 
1833 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1834 			kfd_ioctl_map_memory_to_gpu, 0),
1835 
1836 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1837 			kfd_ioctl_unmap_memory_from_gpu, 0),
1838 
1839 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1840 			kfd_ioctl_set_cu_mask, 0),
1841 
1842 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1843 			kfd_ioctl_get_queue_wave_state, 0),
1844 
1845 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1846 				kfd_ioctl_get_dmabuf_info, 0),
1847 
1848 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1849 				kfd_ioctl_import_dmabuf, 0),
1850 
1851 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1852 			kfd_ioctl_alloc_queue_gws, 0),
1853 
1854 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1855 			kfd_ioctl_smi_events, 0),
1856 };
1857 
1858 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
1859 
1860 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1861 {
1862 	struct kfd_process *process;
1863 	amdkfd_ioctl_t *func;
1864 	const struct amdkfd_ioctl_desc *ioctl = NULL;
1865 	unsigned int nr = _IOC_NR(cmd);
1866 	char stack_kdata[128];
1867 	char *kdata = NULL;
1868 	unsigned int usize, asize;
1869 	int retcode = -EINVAL;
1870 
1871 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1872 		goto err_i1;
1873 
1874 	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1875 		u32 amdkfd_size;
1876 
1877 		ioctl = &amdkfd_ioctls[nr];
1878 
1879 		amdkfd_size = _IOC_SIZE(ioctl->cmd);
1880 		usize = asize = _IOC_SIZE(cmd);
1881 		if (amdkfd_size > asize)
1882 			asize = amdkfd_size;
1883 
1884 		cmd = ioctl->cmd;
1885 	} else
1886 		goto err_i1;
1887 
1888 	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1889 
1890 	/* Get the process struct from the filep. Only the process
1891 	 * that opened /dev/kfd can use the file descriptor. Child
1892 	 * processes need to create their own KFD device context.
1893 	 */
1894 	process = filep->private_data;
1895 	if (process->lead_thread != current->group_leader) {
1896 		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1897 		retcode = -EBADF;
1898 		goto err_i1;
1899 	}
1900 
1901 	/* Do not trust userspace, use our own definition */
1902 	func = ioctl->func;
1903 
1904 	if (unlikely(!func)) {
1905 		dev_dbg(kfd_device, "no function\n");
1906 		retcode = -EINVAL;
1907 		goto err_i1;
1908 	}
1909 
1910 	if (cmd & (IOC_IN | IOC_OUT)) {
1911 		if (asize <= sizeof(stack_kdata)) {
1912 			kdata = stack_kdata;
1913 		} else {
1914 			kdata = kmalloc(asize, GFP_KERNEL);
1915 			if (!kdata) {
1916 				retcode = -ENOMEM;
1917 				goto err_i1;
1918 			}
1919 		}
1920 		if (asize > usize)
1921 			memset(kdata + usize, 0, asize - usize);
1922 	}
1923 
1924 	if (cmd & IOC_IN) {
1925 		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1926 			retcode = -EFAULT;
1927 			goto err_i1;
1928 		}
1929 	} else if (cmd & IOC_OUT) {
1930 		memset(kdata, 0, usize);
1931 	}
1932 
1933 	retcode = func(filep, process, kdata);
1934 
1935 	if (cmd & IOC_OUT)
1936 		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1937 			retcode = -EFAULT;
1938 
1939 err_i1:
1940 	if (!ioctl)
1941 		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1942 			  task_pid_nr(current), cmd, nr);
1943 
1944 	if (kdata != stack_kdata)
1945 		kfree(kdata);
1946 
1947 	if (retcode)
1948 		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
1949 				nr, arg, retcode);
1950 
1951 	return retcode;
1952 }
1953 
1954 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
1955 		      struct vm_area_struct *vma)
1956 {
1957 	phys_addr_t address;
1958 	int ret;
1959 
1960 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1961 		return -EINVAL;
1962 
1963 	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1964 
1965 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
1966 				VM_DONTDUMP | VM_PFNMAP;
1967 
1968 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1969 
1970 	pr_debug("pasid 0x%x mapping mmio page\n"
1971 		 "     target user address == 0x%08llX\n"
1972 		 "     physical address    == 0x%08llX\n"
1973 		 "     vm_flags            == 0x%04lX\n"
1974 		 "     size                == 0x%04lX\n",
1975 		 process->pasid, (unsigned long long) vma->vm_start,
1976 		 address, vma->vm_flags, PAGE_SIZE);
1977 
1978 	ret = io_remap_pfn_range(vma,
1979 				vma->vm_start,
1980 				address >> PAGE_SHIFT,
1981 				PAGE_SIZE,
1982 				vma->vm_page_prot);
1983 	return ret;
1984 }
1985 
1986 
1987 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1988 {
1989 	struct kfd_process *process;
1990 	struct kfd_dev *dev = NULL;
1991 	unsigned long mmap_offset;
1992 	unsigned int gpu_id;
1993 
1994 	process = kfd_get_process(current);
1995 	if (IS_ERR(process))
1996 		return PTR_ERR(process);
1997 
1998 	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
1999 	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2000 	if (gpu_id)
2001 		dev = kfd_device_by_id(gpu_id);
2002 
2003 	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2004 	case KFD_MMAP_TYPE_DOORBELL:
2005 		if (!dev)
2006 			return -ENODEV;
2007 		return kfd_doorbell_mmap(dev, process, vma);
2008 
2009 	case KFD_MMAP_TYPE_EVENTS:
2010 		return kfd_event_mmap(process, vma);
2011 
2012 	case KFD_MMAP_TYPE_RESERVED_MEM:
2013 		if (!dev)
2014 			return -ENODEV;
2015 		return kfd_reserved_mem_mmap(dev, process, vma);
2016 	case KFD_MMAP_TYPE_MMIO:
2017 		if (!dev)
2018 			return -ENODEV;
2019 		return kfd_mmio_mmap(dev, process, vma);
2020 	}
2021 
2022 	return -EFAULT;
2023 }
2024