1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <linux/device.h>
25 #include <linux/export.h>
26 #include <linux/err.h>
27 #include <linux/fs.h>
28 #include <linux/file.h>
29 #include <linux/sched.h>
30 #include <linux/slab.h>
31 #include <linux/uaccess.h>
32 #include <linux/compat.h>
33 #include <uapi/linux/kfd_ioctl.h>
34 #include <linux/time.h>
35 #include <linux/mm.h>
36 #include <linux/mman.h>
37 #include <linux/ptrace.h>
38 #include <linux/dma-buf.h>
39 #include <linux/fdtable.h>
40 #include <linux/processor.h>
41 #include "kfd_priv.h"
42 #include "kfd_device_queue_manager.h"
43 #include "kfd_svm.h"
44 #include "amdgpu_amdkfd.h"
45 #include "kfd_smi_events.h"
46 #include "amdgpu_dma_buf.h"
47 
48 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
49 static int kfd_open(struct inode *, struct file *);
50 static int kfd_release(struct inode *, struct file *);
51 static int kfd_mmap(struct file *, struct vm_area_struct *);
52 
53 static const char kfd_dev_name[] = "kfd";
54 
55 static const struct file_operations kfd_fops = {
56 	.owner = THIS_MODULE,
57 	.unlocked_ioctl = kfd_ioctl,
58 	.compat_ioctl = compat_ptr_ioctl,
59 	.open = kfd_open,
60 	.release = kfd_release,
61 	.mmap = kfd_mmap,
62 };
63 
64 static int kfd_char_dev_major = -1;
65 static struct class *kfd_class;
66 struct device *kfd_device;
67 
68 static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
69 {
70 	struct kfd_process_device *pdd;
71 
72 	mutex_lock(&p->mutex);
73 	pdd = kfd_process_device_data_by_id(p, gpu_id);
74 
75 	if (pdd)
76 		return pdd;
77 
78 	mutex_unlock(&p->mutex);
79 	return NULL;
80 }
81 
82 static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
83 {
84 	mutex_unlock(&pdd->process->mutex);
85 }
86 
87 int kfd_chardev_init(void)
88 {
89 	int err = 0;
90 
91 	kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
92 	err = kfd_char_dev_major;
93 	if (err < 0)
94 		goto err_register_chrdev;
95 
96 	kfd_class = class_create(THIS_MODULE, kfd_dev_name);
97 	err = PTR_ERR(kfd_class);
98 	if (IS_ERR(kfd_class))
99 		goto err_class_create;
100 
101 	kfd_device = device_create(kfd_class, NULL,
102 					MKDEV(kfd_char_dev_major, 0),
103 					NULL, kfd_dev_name);
104 	err = PTR_ERR(kfd_device);
105 	if (IS_ERR(kfd_device))
106 		goto err_device_create;
107 
108 	return 0;
109 
110 err_device_create:
111 	class_destroy(kfd_class);
112 err_class_create:
113 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
114 err_register_chrdev:
115 	return err;
116 }
117 
118 void kfd_chardev_exit(void)
119 {
120 	device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
121 	class_destroy(kfd_class);
122 	unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
123 	kfd_device = NULL;
124 }
125 
126 
127 static int kfd_open(struct inode *inode, struct file *filep)
128 {
129 	struct kfd_process *process;
130 	bool is_32bit_user_mode;
131 
132 	if (iminor(inode) != 0)
133 		return -ENODEV;
134 
135 	is_32bit_user_mode = in_compat_syscall();
136 
137 	if (is_32bit_user_mode) {
138 		dev_warn(kfd_device,
139 			"Process %d (32-bit) failed to open /dev/kfd\n"
140 			"32-bit processes are not supported by amdkfd\n",
141 			current->pid);
142 		return -EPERM;
143 	}
144 
145 	process = kfd_create_process(filep);
146 	if (IS_ERR(process))
147 		return PTR_ERR(process);
148 
149 	/* filep now owns the reference returned by kfd_create_process */
150 	filep->private_data = process;
151 
152 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
153 		process->pasid, process->is_32bit_user_mode);
154 
155 	return 0;
156 }
157 
158 static int kfd_release(struct inode *inode, struct file *filep)
159 {
160 	struct kfd_process *process = filep->private_data;
161 
162 	if (process)
163 		kfd_unref_process(process);
164 
165 	return 0;
166 }
167 
168 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
169 					void *data)
170 {
171 	struct kfd_ioctl_get_version_args *args = data;
172 
173 	args->major_version = KFD_IOCTL_MAJOR_VERSION;
174 	args->minor_version = KFD_IOCTL_MINOR_VERSION;
175 
176 	return 0;
177 }
178 
179 static int set_queue_properties_from_user(struct queue_properties *q_properties,
180 				struct kfd_ioctl_create_queue_args *args)
181 {
182 	/*
183 	 * Repurpose queue percentage to accommodate new features:
184 	 * bit 0-7: queue percentage
185 	 * bit 8-15: pm4_target_xcc
186 	 */
187 	if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
188 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
189 		return -EINVAL;
190 	}
191 
192 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
193 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
194 		return -EINVAL;
195 	}
196 
197 	if ((args->ring_base_address) &&
198 		(!access_ok((const void __user *) args->ring_base_address,
199 			sizeof(uint64_t)))) {
200 		pr_err("Can't access ring base address\n");
201 		return -EFAULT;
202 	}
203 
204 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
205 		pr_err("Ring size must be a power of 2 or 0\n");
206 		return -EINVAL;
207 	}
208 
209 	if (!access_ok((const void __user *) args->read_pointer_address,
210 			sizeof(uint32_t))) {
211 		pr_err("Can't access read pointer\n");
212 		return -EFAULT;
213 	}
214 
215 	if (!access_ok((const void __user *) args->write_pointer_address,
216 			sizeof(uint32_t))) {
217 		pr_err("Can't access write pointer\n");
218 		return -EFAULT;
219 	}
220 
221 	if (args->eop_buffer_address &&
222 		!access_ok((const void __user *) args->eop_buffer_address,
223 			sizeof(uint32_t))) {
224 		pr_debug("Can't access eop buffer");
225 		return -EFAULT;
226 	}
227 
228 	if (args->ctx_save_restore_address &&
229 		!access_ok((const void __user *) args->ctx_save_restore_address,
230 			sizeof(uint32_t))) {
231 		pr_debug("Can't access ctx save restore buffer");
232 		return -EFAULT;
233 	}
234 
235 	q_properties->is_interop = false;
236 	q_properties->is_gws = false;
237 	q_properties->queue_percent = args->queue_percentage & 0xFF;
238 	/* bit 8-15 are repurposed to be PM4 target XCC */
239 	q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
240 	q_properties->priority = args->queue_priority;
241 	q_properties->queue_address = args->ring_base_address;
242 	q_properties->queue_size = args->ring_size;
243 	q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
244 	q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
245 	q_properties->eop_ring_buffer_address = args->eop_buffer_address;
246 	q_properties->eop_ring_buffer_size = args->eop_buffer_size;
247 	q_properties->ctx_save_restore_area_address =
248 			args->ctx_save_restore_address;
249 	q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
250 	q_properties->ctl_stack_size = args->ctl_stack_size;
251 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
252 		args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
253 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
254 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
255 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
256 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
257 		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
258 	else
259 		return -ENOTSUPP;
260 
261 	if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
262 		q_properties->format = KFD_QUEUE_FORMAT_AQL;
263 	else
264 		q_properties->format = KFD_QUEUE_FORMAT_PM4;
265 
266 	pr_debug("Queue Percentage: %d, %d\n",
267 			q_properties->queue_percent, args->queue_percentage);
268 
269 	pr_debug("Queue Priority: %d, %d\n",
270 			q_properties->priority, args->queue_priority);
271 
272 	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
273 			q_properties->queue_address, args->ring_base_address);
274 
275 	pr_debug("Queue Size: 0x%llX, %u\n",
276 			q_properties->queue_size, args->ring_size);
277 
278 	pr_debug("Queue r/w Pointers: %px, %px\n",
279 			q_properties->read_ptr,
280 			q_properties->write_ptr);
281 
282 	pr_debug("Queue Format: %d\n", q_properties->format);
283 
284 	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
285 
286 	pr_debug("Queue CTX save area: 0x%llX\n",
287 			q_properties->ctx_save_restore_area_address);
288 
289 	return 0;
290 }
291 
292 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
293 					void *data)
294 {
295 	struct kfd_ioctl_create_queue_args *args = data;
296 	struct kfd_node *dev;
297 	int err = 0;
298 	unsigned int queue_id;
299 	struct kfd_process_device *pdd;
300 	struct queue_properties q_properties;
301 	uint32_t doorbell_offset_in_process = 0;
302 	struct amdgpu_bo *wptr_bo = NULL;
303 
304 	memset(&q_properties, 0, sizeof(struct queue_properties));
305 
306 	pr_debug("Creating queue ioctl\n");
307 
308 	err = set_queue_properties_from_user(&q_properties, args);
309 	if (err)
310 		return err;
311 
312 	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
313 
314 	mutex_lock(&p->mutex);
315 
316 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
317 	if (!pdd) {
318 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
319 		err = -EINVAL;
320 		goto err_pdd;
321 	}
322 	dev = pdd->dev;
323 
324 	pdd = kfd_bind_process_to_device(dev, p);
325 	if (IS_ERR(pdd)) {
326 		err = -ESRCH;
327 		goto err_bind_process;
328 	}
329 
330 	if (!pdd->doorbell_index &&
331 	    kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
332 		err = -ENOMEM;
333 		goto err_alloc_doorbells;
334 	}
335 
336 	/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
337 	 * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
338 	 */
339 	if (dev->kfd->shared_resources.enable_mes &&
340 			((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
341 			>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
342 		struct amdgpu_bo_va_mapping *wptr_mapping;
343 		struct amdgpu_vm *wptr_vm;
344 
345 		wptr_vm = drm_priv_to_vm(pdd->drm_priv);
346 		err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
347 		if (err)
348 			goto err_wptr_map_gart;
349 
350 		wptr_mapping = amdgpu_vm_bo_lookup_mapping(
351 				wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
352 		amdgpu_bo_unreserve(wptr_vm->root.bo);
353 		if (!wptr_mapping) {
354 			pr_err("Failed to lookup wptr bo\n");
355 			err = -EINVAL;
356 			goto err_wptr_map_gart;
357 		}
358 
359 		wptr_bo = wptr_mapping->bo_va->base.bo;
360 		if (wptr_bo->tbo.base.size > PAGE_SIZE) {
361 			pr_err("Requested GART mapping for wptr bo larger than one page\n");
362 			err = -EINVAL;
363 			goto err_wptr_map_gart;
364 		}
365 
366 		err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
367 		if (err) {
368 			pr_err("Failed to map wptr bo to GART\n");
369 			goto err_wptr_map_gart;
370 		}
371 	}
372 
373 	pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
374 			p->pasid,
375 			dev->id);
376 
377 	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
378 			NULL, NULL, NULL, &doorbell_offset_in_process);
379 	if (err != 0)
380 		goto err_create_queue;
381 
382 	args->queue_id = queue_id;
383 
384 
385 	/* Return gpu_id as doorbell offset for mmap usage */
386 	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
387 	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
388 	if (KFD_IS_SOC15(dev))
389 		/* On SOC15 ASICs, include the doorbell offset within the
390 		 * process doorbell frame, which is 2 pages.
391 		 */
392 		args->doorbell_offset |= doorbell_offset_in_process;
393 
394 	mutex_unlock(&p->mutex);
395 
396 	pr_debug("Queue id %d was created successfully\n", args->queue_id);
397 
398 	pr_debug("Ring buffer address == 0x%016llX\n",
399 			args->ring_base_address);
400 
401 	pr_debug("Read ptr address    == 0x%016llX\n",
402 			args->read_pointer_address);
403 
404 	pr_debug("Write ptr address   == 0x%016llX\n",
405 			args->write_pointer_address);
406 
407 	return 0;
408 
409 err_create_queue:
410 	if (wptr_bo)
411 		amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
412 err_wptr_map_gart:
413 err_alloc_doorbells:
414 err_bind_process:
415 err_pdd:
416 	mutex_unlock(&p->mutex);
417 	return err;
418 }
419 
420 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
421 					void *data)
422 {
423 	int retval;
424 	struct kfd_ioctl_destroy_queue_args *args = data;
425 
426 	pr_debug("Destroying queue id %d for pasid 0x%x\n",
427 				args->queue_id,
428 				p->pasid);
429 
430 	mutex_lock(&p->mutex);
431 
432 	retval = pqm_destroy_queue(&p->pqm, args->queue_id);
433 
434 	mutex_unlock(&p->mutex);
435 	return retval;
436 }
437 
438 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
439 					void *data)
440 {
441 	int retval;
442 	struct kfd_ioctl_update_queue_args *args = data;
443 	struct queue_properties properties;
444 
445 	/*
446 	 * Repurpose queue percentage to accommodate new features:
447 	 * bit 0-7: queue percentage
448 	 * bit 8-15: pm4_target_xcc
449 	 */
450 	if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) {
451 		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
452 		return -EINVAL;
453 	}
454 
455 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
456 		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
457 		return -EINVAL;
458 	}
459 
460 	if ((args->ring_base_address) &&
461 		(!access_ok((const void __user *) args->ring_base_address,
462 			sizeof(uint64_t)))) {
463 		pr_err("Can't access ring base address\n");
464 		return -EFAULT;
465 	}
466 
467 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
468 		pr_err("Ring size must be a power of 2 or 0\n");
469 		return -EINVAL;
470 	}
471 
472 	properties.queue_address = args->ring_base_address;
473 	properties.queue_size = args->ring_size;
474 	properties.queue_percent = args->queue_percentage & 0xFF;
475 	/* bit 8-15 are repurposed to be PM4 target XCC */
476 	properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF;
477 	properties.priority = args->queue_priority;
478 
479 	pr_debug("Updating queue id %d for pasid 0x%x\n",
480 			args->queue_id, p->pasid);
481 
482 	mutex_lock(&p->mutex);
483 
484 	retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);
485 
486 	mutex_unlock(&p->mutex);
487 
488 	return retval;
489 }
490 
491 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
492 					void *data)
493 {
494 	int retval;
495 	const int max_num_cus = 1024;
496 	struct kfd_ioctl_set_cu_mask_args *args = data;
497 	struct mqd_update_info minfo = {0};
498 	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
499 	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
500 
501 	if ((args->num_cu_mask % 32) != 0) {
502 		pr_debug("num_cu_mask 0x%x must be a multiple of 32",
503 				args->num_cu_mask);
504 		return -EINVAL;
505 	}
506 
507 	minfo.cu_mask.count = args->num_cu_mask;
508 	if (minfo.cu_mask.count == 0) {
509 		pr_debug("CU mask cannot be 0");
510 		return -EINVAL;
511 	}
512 
513 	/* To prevent an unreasonably large CU mask size, set an arbitrary
514 	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
515 	 * past max_num_cus bits and just use the first max_num_cus bits.
516 	 */
517 	if (minfo.cu_mask.count > max_num_cus) {
518 		pr_debug("CU mask cannot be greater than 1024 bits");
519 		minfo.cu_mask.count = max_num_cus;
520 		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
521 	}
522 
523 	minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
524 	if (!minfo.cu_mask.ptr)
525 		return -ENOMEM;
526 
527 	retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
528 	if (retval) {
529 		pr_debug("Could not copy CU mask from userspace");
530 		retval = -EFAULT;
531 		goto out;
532 	}
533 
534 	minfo.update_flag = UPDATE_FLAG_CU_MASK;
535 
536 	mutex_lock(&p->mutex);
537 
538 	retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
539 
540 	mutex_unlock(&p->mutex);
541 
542 out:
543 	kfree(minfo.cu_mask.ptr);
544 	return retval;
545 }
546 
547 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
548 					  struct kfd_process *p, void *data)
549 {
550 	struct kfd_ioctl_get_queue_wave_state_args *args = data;
551 	int r;
552 
553 	mutex_lock(&p->mutex);
554 
555 	r = pqm_get_wave_state(&p->pqm, args->queue_id,
556 			       (void __user *)args->ctl_stack_address,
557 			       &args->ctl_stack_used_size,
558 			       &args->save_area_used_size);
559 
560 	mutex_unlock(&p->mutex);
561 
562 	return r;
563 }
564 
565 static int kfd_ioctl_set_memory_policy(struct file *filep,
566 					struct kfd_process *p, void *data)
567 {
568 	struct kfd_ioctl_set_memory_policy_args *args = data;
569 	int err = 0;
570 	struct kfd_process_device *pdd;
571 	enum cache_policy default_policy, alternate_policy;
572 
573 	if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
574 	    && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
575 		return -EINVAL;
576 	}
577 
578 	if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
579 	    && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
580 		return -EINVAL;
581 	}
582 
583 	mutex_lock(&p->mutex);
584 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
585 	if (!pdd) {
586 		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
587 		err = -EINVAL;
588 		goto err_pdd;
589 	}
590 
591 	pdd = kfd_bind_process_to_device(pdd->dev, p);
592 	if (IS_ERR(pdd)) {
593 		err = -ESRCH;
594 		goto out;
595 	}
596 
597 	default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
598 			 ? cache_policy_coherent : cache_policy_noncoherent;
599 
600 	alternate_policy =
601 		(args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
602 		   ? cache_policy_coherent : cache_policy_noncoherent;
603 
604 	if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
605 				&pdd->qpd,
606 				default_policy,
607 				alternate_policy,
608 				(void __user *)args->alternate_aperture_base,
609 				args->alternate_aperture_size))
610 		err = -EINVAL;
611 
612 out:
613 err_pdd:
614 	mutex_unlock(&p->mutex);
615 
616 	return err;
617 }
618 
619 static int kfd_ioctl_set_trap_handler(struct file *filep,
620 					struct kfd_process *p, void *data)
621 {
622 	struct kfd_ioctl_set_trap_handler_args *args = data;
623 	int err = 0;
624 	struct kfd_process_device *pdd;
625 
626 	mutex_lock(&p->mutex);
627 
628 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
629 	if (!pdd) {
630 		err = -EINVAL;
631 		goto err_pdd;
632 	}
633 
634 	pdd = kfd_bind_process_to_device(pdd->dev, p);
635 	if (IS_ERR(pdd)) {
636 		err = -ESRCH;
637 		goto out;
638 	}
639 
640 	kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
641 
642 out:
643 err_pdd:
644 	mutex_unlock(&p->mutex);
645 
646 	return err;
647 }
648 
649 static int kfd_ioctl_dbg_register(struct file *filep,
650 				struct kfd_process *p, void *data)
651 {
652 	return -EPERM;
653 }
654 
655 static int kfd_ioctl_dbg_unregister(struct file *filep,
656 				struct kfd_process *p, void *data)
657 {
658 	return -EPERM;
659 }
660 
661 static int kfd_ioctl_dbg_address_watch(struct file *filep,
662 					struct kfd_process *p, void *data)
663 {
664 	return -EPERM;
665 }
666 
667 /* Parse and generate fixed size data structure for wave control */
668 static int kfd_ioctl_dbg_wave_control(struct file *filep,
669 					struct kfd_process *p, void *data)
670 {
671 	return -EPERM;
672 }
673 
674 static int kfd_ioctl_get_clock_counters(struct file *filep,
675 				struct kfd_process *p, void *data)
676 {
677 	struct kfd_ioctl_get_clock_counters_args *args = data;
678 	struct kfd_process_device *pdd;
679 
680 	mutex_lock(&p->mutex);
681 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
682 	mutex_unlock(&p->mutex);
683 	if (pdd)
684 		/* Reading GPU clock counter from KGD */
685 		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
686 	else
687 		/* Node without GPU resource */
688 		args->gpu_clock_counter = 0;
689 
690 	/* No access to rdtsc. Using raw monotonic time */
691 	args->cpu_clock_counter = ktime_get_raw_ns();
692 	args->system_clock_counter = ktime_get_boottime_ns();
693 
694 	/* Since the counter is in nano-seconds we use 1GHz frequency */
695 	args->system_clock_freq = 1000000000;
696 
697 	return 0;
698 }
699 
700 
701 static int kfd_ioctl_get_process_apertures(struct file *filp,
702 				struct kfd_process *p, void *data)
703 {
704 	struct kfd_ioctl_get_process_apertures_args *args = data;
705 	struct kfd_process_device_apertures *pAperture;
706 	int i;
707 
708 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
709 
710 	args->num_of_nodes = 0;
711 
712 	mutex_lock(&p->mutex);
713 	/* Run over all pdd of the process */
714 	for (i = 0; i < p->n_pdds; i++) {
715 		struct kfd_process_device *pdd = p->pdds[i];
716 
717 		pAperture =
718 			&args->process_apertures[args->num_of_nodes];
719 		pAperture->gpu_id = pdd->dev->id;
720 		pAperture->lds_base = pdd->lds_base;
721 		pAperture->lds_limit = pdd->lds_limit;
722 		pAperture->gpuvm_base = pdd->gpuvm_base;
723 		pAperture->gpuvm_limit = pdd->gpuvm_limit;
724 		pAperture->scratch_base = pdd->scratch_base;
725 		pAperture->scratch_limit = pdd->scratch_limit;
726 
727 		dev_dbg(kfd_device,
728 			"node id %u\n", args->num_of_nodes);
729 		dev_dbg(kfd_device,
730 			"gpu id %u\n", pdd->dev->id);
731 		dev_dbg(kfd_device,
732 			"lds_base %llX\n", pdd->lds_base);
733 		dev_dbg(kfd_device,
734 			"lds_limit %llX\n", pdd->lds_limit);
735 		dev_dbg(kfd_device,
736 			"gpuvm_base %llX\n", pdd->gpuvm_base);
737 		dev_dbg(kfd_device,
738 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
739 		dev_dbg(kfd_device,
740 			"scratch_base %llX\n", pdd->scratch_base);
741 		dev_dbg(kfd_device,
742 			"scratch_limit %llX\n", pdd->scratch_limit);
743 
744 		if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
745 			break;
746 	}
747 	mutex_unlock(&p->mutex);
748 
749 	return 0;
750 }
751 
752 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
753 				struct kfd_process *p, void *data)
754 {
755 	struct kfd_ioctl_get_process_apertures_new_args *args = data;
756 	struct kfd_process_device_apertures *pa;
757 	int ret;
758 	int i;
759 
760 	dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
761 
762 	if (args->num_of_nodes == 0) {
763 		/* Return number of nodes, so that user space can alloacate
764 		 * sufficient memory
765 		 */
766 		mutex_lock(&p->mutex);
767 		args->num_of_nodes = p->n_pdds;
768 		goto out_unlock;
769 	}
770 
771 	/* Fill in process-aperture information for all available
772 	 * nodes, but not more than args->num_of_nodes as that is
773 	 * the amount of memory allocated by user
774 	 */
775 	pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
776 				args->num_of_nodes), GFP_KERNEL);
777 	if (!pa)
778 		return -ENOMEM;
779 
780 	mutex_lock(&p->mutex);
781 
782 	if (!p->n_pdds) {
783 		args->num_of_nodes = 0;
784 		kfree(pa);
785 		goto out_unlock;
786 	}
787 
788 	/* Run over all pdd of the process */
789 	for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
790 		struct kfd_process_device *pdd = p->pdds[i];
791 
792 		pa[i].gpu_id = pdd->dev->id;
793 		pa[i].lds_base = pdd->lds_base;
794 		pa[i].lds_limit = pdd->lds_limit;
795 		pa[i].gpuvm_base = pdd->gpuvm_base;
796 		pa[i].gpuvm_limit = pdd->gpuvm_limit;
797 		pa[i].scratch_base = pdd->scratch_base;
798 		pa[i].scratch_limit = pdd->scratch_limit;
799 
800 		dev_dbg(kfd_device,
801 			"gpu id %u\n", pdd->dev->id);
802 		dev_dbg(kfd_device,
803 			"lds_base %llX\n", pdd->lds_base);
804 		dev_dbg(kfd_device,
805 			"lds_limit %llX\n", pdd->lds_limit);
806 		dev_dbg(kfd_device,
807 			"gpuvm_base %llX\n", pdd->gpuvm_base);
808 		dev_dbg(kfd_device,
809 			"gpuvm_limit %llX\n", pdd->gpuvm_limit);
810 		dev_dbg(kfd_device,
811 			"scratch_base %llX\n", pdd->scratch_base);
812 		dev_dbg(kfd_device,
813 			"scratch_limit %llX\n", pdd->scratch_limit);
814 	}
815 	mutex_unlock(&p->mutex);
816 
817 	args->num_of_nodes = i;
818 	ret = copy_to_user(
819 			(void __user *)args->kfd_process_device_apertures_ptr,
820 			pa,
821 			(i * sizeof(struct kfd_process_device_apertures)));
822 	kfree(pa);
823 	return ret ? -EFAULT : 0;
824 
825 out_unlock:
826 	mutex_unlock(&p->mutex);
827 	return 0;
828 }
829 
830 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
831 					void *data)
832 {
833 	struct kfd_ioctl_create_event_args *args = data;
834 	int err;
835 
836 	/* For dGPUs the event page is allocated in user mode. The
837 	 * handle is passed to KFD with the first call to this IOCTL
838 	 * through the event_page_offset field.
839 	 */
840 	if (args->event_page_offset) {
841 		mutex_lock(&p->mutex);
842 		err = kfd_kmap_event_page(p, args->event_page_offset);
843 		mutex_unlock(&p->mutex);
844 		if (err)
845 			return err;
846 	}
847 
848 	err = kfd_event_create(filp, p, args->event_type,
849 				args->auto_reset != 0, args->node_id,
850 				&args->event_id, &args->event_trigger_data,
851 				&args->event_page_offset,
852 				&args->event_slot_index);
853 
854 	pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
855 	return err;
856 }
857 
858 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
859 					void *data)
860 {
861 	struct kfd_ioctl_destroy_event_args *args = data;
862 
863 	return kfd_event_destroy(p, args->event_id);
864 }
865 
866 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
867 				void *data)
868 {
869 	struct kfd_ioctl_set_event_args *args = data;
870 
871 	return kfd_set_event(p, args->event_id);
872 }
873 
874 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
875 				void *data)
876 {
877 	struct kfd_ioctl_reset_event_args *args = data;
878 
879 	return kfd_reset_event(p, args->event_id);
880 }
881 
882 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
883 				void *data)
884 {
885 	struct kfd_ioctl_wait_events_args *args = data;
886 
887 	return kfd_wait_on_events(p, args->num_events,
888 			(void __user *)args->events_ptr,
889 			(args->wait_for_all != 0),
890 			&args->timeout, &args->wait_result);
891 }
892 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
893 					struct kfd_process *p, void *data)
894 {
895 	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
896 	struct kfd_process_device *pdd;
897 	struct kfd_node *dev;
898 	long err;
899 
900 	mutex_lock(&p->mutex);
901 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
902 	if (!pdd) {
903 		err = -EINVAL;
904 		goto err_pdd;
905 	}
906 	dev = pdd->dev;
907 
908 	pdd = kfd_bind_process_to_device(dev, p);
909 	if (IS_ERR(pdd)) {
910 		err = PTR_ERR(pdd);
911 		goto bind_process_to_device_fail;
912 	}
913 
914 	pdd->qpd.sh_hidden_private_base = args->va_addr;
915 
916 	mutex_unlock(&p->mutex);
917 
918 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
919 	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
920 		dev->kfd2kgd->set_scratch_backing_va(
921 			dev->adev, args->va_addr, pdd->qpd.vmid);
922 
923 	return 0;
924 
925 bind_process_to_device_fail:
926 err_pdd:
927 	mutex_unlock(&p->mutex);
928 	return err;
929 }
930 
931 static int kfd_ioctl_get_tile_config(struct file *filep,
932 		struct kfd_process *p, void *data)
933 {
934 	struct kfd_ioctl_get_tile_config_args *args = data;
935 	struct kfd_process_device *pdd;
936 	struct tile_config config;
937 	int err = 0;
938 
939 	mutex_lock(&p->mutex);
940 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
941 	mutex_unlock(&p->mutex);
942 	if (!pdd)
943 		return -EINVAL;
944 
945 	amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
946 
947 	args->gb_addr_config = config.gb_addr_config;
948 	args->num_banks = config.num_banks;
949 	args->num_ranks = config.num_ranks;
950 
951 	if (args->num_tile_configs > config.num_tile_configs)
952 		args->num_tile_configs = config.num_tile_configs;
953 	err = copy_to_user((void __user *)args->tile_config_ptr,
954 			config.tile_config_ptr,
955 			args->num_tile_configs * sizeof(uint32_t));
956 	if (err) {
957 		args->num_tile_configs = 0;
958 		return -EFAULT;
959 	}
960 
961 	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
962 		args->num_macro_tile_configs =
963 				config.num_macro_tile_configs;
964 	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
965 			config.macro_tile_config_ptr,
966 			args->num_macro_tile_configs * sizeof(uint32_t));
967 	if (err) {
968 		args->num_macro_tile_configs = 0;
969 		return -EFAULT;
970 	}
971 
972 	return 0;
973 }
974 
975 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
976 				void *data)
977 {
978 	struct kfd_ioctl_acquire_vm_args *args = data;
979 	struct kfd_process_device *pdd;
980 	struct file *drm_file;
981 	int ret;
982 
983 	drm_file = fget(args->drm_fd);
984 	if (!drm_file)
985 		return -EINVAL;
986 
987 	mutex_lock(&p->mutex);
988 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
989 	if (!pdd) {
990 		ret = -EINVAL;
991 		goto err_pdd;
992 	}
993 
994 	if (pdd->drm_file) {
995 		ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
996 		goto err_drm_file;
997 	}
998 
999 	ret = kfd_process_device_init_vm(pdd, drm_file);
1000 	if (ret)
1001 		goto err_unlock;
1002 
1003 	/* On success, the PDD keeps the drm_file reference */
1004 	mutex_unlock(&p->mutex);
1005 
1006 	return 0;
1007 
1008 err_unlock:
1009 err_pdd:
1010 err_drm_file:
1011 	mutex_unlock(&p->mutex);
1012 	fput(drm_file);
1013 	return ret;
1014 }
1015 
1016 bool kfd_dev_is_large_bar(struct kfd_node *dev)
1017 {
1018 	if (debug_largebar) {
1019 		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1020 		return true;
1021 	}
1022 
1023 	if (dev->kfd->use_iommu_v2)
1024 		return false;
1025 
1026 	if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
1027 	    dev->kfd->local_mem_info.local_mem_size_public > 0)
1028 		return true;
1029 	return false;
1030 }
1031 
1032 static int kfd_ioctl_get_available_memory(struct file *filep,
1033 					  struct kfd_process *p, void *data)
1034 {
1035 	struct kfd_ioctl_get_available_memory_args *args = data;
1036 	struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
1037 
1038 	if (!pdd)
1039 		return -EINVAL;
1040 	args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
1041 	kfd_unlock_pdd(pdd);
1042 	return 0;
1043 }
1044 
1045 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1046 					struct kfd_process *p, void *data)
1047 {
1048 	struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1049 	struct kfd_process_device *pdd;
1050 	void *mem;
1051 	struct kfd_node *dev;
1052 	int idr_handle;
1053 	long err;
1054 	uint64_t offset = args->mmap_offset;
1055 	uint32_t flags = args->flags;
1056 
1057 	if (args->size == 0)
1058 		return -EINVAL;
1059 
1060 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1061 	/* Flush pending deferred work to avoid racing with deferred actions
1062 	 * from previous memory map changes (e.g. munmap).
1063 	 */
1064 	svm_range_list_lock_and_flush_work(&p->svms, current->mm);
1065 	mutex_lock(&p->svms.lock);
1066 	mmap_write_unlock(current->mm);
1067 	if (interval_tree_iter_first(&p->svms.objects,
1068 				     args->va_addr >> PAGE_SHIFT,
1069 				     (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
1070 		pr_err("Address: 0x%llx already allocated by SVM\n",
1071 			args->va_addr);
1072 		mutex_unlock(&p->svms.lock);
1073 		return -EADDRINUSE;
1074 	}
1075 
1076 	/* When register user buffer check if it has been registered by svm by
1077 	 * buffer cpu virtual address.
1078 	 */
1079 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
1080 	    interval_tree_iter_first(&p->svms.objects,
1081 				     args->mmap_offset >> PAGE_SHIFT,
1082 				     (args->mmap_offset  + args->size - 1) >> PAGE_SHIFT)) {
1083 		pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
1084 			args->mmap_offset);
1085 		mutex_unlock(&p->svms.lock);
1086 		return -EADDRINUSE;
1087 	}
1088 
1089 	mutex_unlock(&p->svms.lock);
1090 #endif
1091 	mutex_lock(&p->mutex);
1092 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
1093 	if (!pdd) {
1094 		err = -EINVAL;
1095 		goto err_pdd;
1096 	}
1097 
1098 	dev = pdd->dev;
1099 
1100 	if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1101 		(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1102 		!kfd_dev_is_large_bar(dev)) {
1103 		pr_err("Alloc host visible vram on small bar is not allowed\n");
1104 		err = -EINVAL;
1105 		goto err_large_bar;
1106 	}
1107 
1108 	pdd = kfd_bind_process_to_device(dev, p);
1109 	if (IS_ERR(pdd)) {
1110 		err = PTR_ERR(pdd);
1111 		goto err_unlock;
1112 	}
1113 
1114 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1115 		if (args->size != kfd_doorbell_process_slice(dev->kfd)) {
1116 			err = -EINVAL;
1117 			goto err_unlock;
1118 		}
1119 		offset = kfd_get_process_doorbells(pdd);
1120 		if (!offset) {
1121 			err = -ENOMEM;
1122 			goto err_unlock;
1123 		}
1124 	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1125 		if (args->size != PAGE_SIZE) {
1126 			err = -EINVAL;
1127 			goto err_unlock;
1128 		}
1129 		offset = dev->adev->rmmio_remap.bus_addr;
1130 		if (!offset) {
1131 			err = -ENOMEM;
1132 			goto err_unlock;
1133 		}
1134 	}
1135 
1136 	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1137 		dev->adev, args->va_addr, args->size,
1138 		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
1139 		flags, false);
1140 
1141 	if (err)
1142 		goto err_unlock;
1143 
1144 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1145 	if (idr_handle < 0) {
1146 		err = -EFAULT;
1147 		goto err_free;
1148 	}
1149 
1150 	/* Update the VRAM usage count */
1151 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
1152 		uint64_t size = args->size;
1153 
1154 		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
1155 			size >>= 1;
1156 		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
1157 	}
1158 
1159 	mutex_unlock(&p->mutex);
1160 
1161 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1162 	args->mmap_offset = offset;
1163 
1164 	/* MMIO is mapped through kfd device
1165 	 * Generate a kfd mmap offset
1166 	 */
1167 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1168 		args->mmap_offset = KFD_MMAP_TYPE_MMIO
1169 					| KFD_MMAP_GPU_ID(args->gpu_id);
1170 
1171 	return 0;
1172 
1173 err_free:
1174 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
1175 					       pdd->drm_priv, NULL);
1176 err_unlock:
1177 err_pdd:
1178 err_large_bar:
1179 	mutex_unlock(&p->mutex);
1180 	return err;
1181 }
1182 
1183 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1184 					struct kfd_process *p, void *data)
1185 {
1186 	struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1187 	struct kfd_process_device *pdd;
1188 	void *mem;
1189 	int ret;
1190 	uint64_t size = 0;
1191 
1192 	mutex_lock(&p->mutex);
1193 	/*
1194 	 * Safeguard to prevent user space from freeing signal BO.
1195 	 * It will be freed at process termination.
1196 	 */
1197 	if (p->signal_handle && (p->signal_handle == args->handle)) {
1198 		pr_err("Free signal BO is not allowed\n");
1199 		ret = -EPERM;
1200 		goto err_unlock;
1201 	}
1202 
1203 	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1204 	if (!pdd) {
1205 		pr_err("Process device data doesn't exist\n");
1206 		ret = -EINVAL;
1207 		goto err_pdd;
1208 	}
1209 
1210 	mem = kfd_process_device_translate_handle(
1211 		pdd, GET_IDR_HANDLE(args->handle));
1212 	if (!mem) {
1213 		ret = -EINVAL;
1214 		goto err_unlock;
1215 	}
1216 
1217 	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
1218 				(struct kgd_mem *)mem, pdd->drm_priv, &size);
1219 
1220 	/* If freeing the buffer failed, leave the handle in place for
1221 	 * clean-up during process tear-down.
1222 	 */
1223 	if (!ret)
1224 		kfd_process_device_remove_obj_handle(
1225 			pdd, GET_IDR_HANDLE(args->handle));
1226 
1227 	WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1228 
1229 err_unlock:
1230 err_pdd:
1231 	mutex_unlock(&p->mutex);
1232 	return ret;
1233 }
1234 
1235 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1236 					struct kfd_process *p, void *data)
1237 {
1238 	struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1239 	struct kfd_process_device *pdd, *peer_pdd;
1240 	void *mem;
1241 	struct kfd_node *dev;
1242 	long err = 0;
1243 	int i;
1244 	uint32_t *devices_arr = NULL;
1245 
1246 	if (!args->n_devices) {
1247 		pr_debug("Device IDs array empty\n");
1248 		return -EINVAL;
1249 	}
1250 	if (args->n_success > args->n_devices) {
1251 		pr_debug("n_success exceeds n_devices\n");
1252 		return -EINVAL;
1253 	}
1254 
1255 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1256 				    GFP_KERNEL);
1257 	if (!devices_arr)
1258 		return -ENOMEM;
1259 
1260 	err = copy_from_user(devices_arr,
1261 			     (void __user *)args->device_ids_array_ptr,
1262 			     args->n_devices * sizeof(*devices_arr));
1263 	if (err != 0) {
1264 		err = -EFAULT;
1265 		goto copy_from_user_failed;
1266 	}
1267 
1268 	mutex_lock(&p->mutex);
1269 	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1270 	if (!pdd) {
1271 		err = -EINVAL;
1272 		goto get_process_device_data_failed;
1273 	}
1274 	dev = pdd->dev;
1275 
1276 	pdd = kfd_bind_process_to_device(dev, p);
1277 	if (IS_ERR(pdd)) {
1278 		err = PTR_ERR(pdd);
1279 		goto bind_process_to_device_failed;
1280 	}
1281 
1282 	mem = kfd_process_device_translate_handle(pdd,
1283 						GET_IDR_HANDLE(args->handle));
1284 	if (!mem) {
1285 		err = -ENOMEM;
1286 		goto get_mem_obj_from_handle_failed;
1287 	}
1288 
1289 	for (i = args->n_success; i < args->n_devices; i++) {
1290 		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1291 		if (!peer_pdd) {
1292 			pr_debug("Getting device by id failed for 0x%x\n",
1293 				 devices_arr[i]);
1294 			err = -EINVAL;
1295 			goto get_mem_obj_from_handle_failed;
1296 		}
1297 
1298 		peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
1299 		if (IS_ERR(peer_pdd)) {
1300 			err = PTR_ERR(peer_pdd);
1301 			goto get_mem_obj_from_handle_failed;
1302 		}
1303 
1304 		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1305 			peer_pdd->dev->adev, (struct kgd_mem *)mem,
1306 			peer_pdd->drm_priv);
1307 		if (err) {
1308 			struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
1309 
1310 			dev_err(dev->adev->dev,
1311 			       "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
1312 			       pci_domain_nr(pdev->bus),
1313 			       pdev->bus->number,
1314 			       PCI_SLOT(pdev->devfn),
1315 			       PCI_FUNC(pdev->devfn),
1316 			       ((struct kgd_mem *)mem)->domain);
1317 			goto map_memory_to_gpu_failed;
1318 		}
1319 		args->n_success = i+1;
1320 	}
1321 
1322 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
1323 	if (err) {
1324 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
1325 		goto sync_memory_failed;
1326 	}
1327 
1328 	mutex_unlock(&p->mutex);
1329 
1330 	/* Flush TLBs after waiting for the page table updates to complete */
1331 	for (i = 0; i < args->n_devices; i++) {
1332 		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1333 		if (WARN_ON_ONCE(!peer_pdd))
1334 			continue;
1335 		kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
1336 	}
1337 	kfree(devices_arr);
1338 
1339 	return err;
1340 
1341 get_process_device_data_failed:
1342 bind_process_to_device_failed:
1343 get_mem_obj_from_handle_failed:
1344 map_memory_to_gpu_failed:
1345 sync_memory_failed:
1346 	mutex_unlock(&p->mutex);
1347 copy_from_user_failed:
1348 	kfree(devices_arr);
1349 
1350 	return err;
1351 }
1352 
1353 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1354 					struct kfd_process *p, void *data)
1355 {
1356 	struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1357 	struct kfd_process_device *pdd, *peer_pdd;
1358 	void *mem;
1359 	long err = 0;
1360 	uint32_t *devices_arr = NULL, i;
1361 	bool flush_tlb;
1362 
1363 	if (!args->n_devices) {
1364 		pr_debug("Device IDs array empty\n");
1365 		return -EINVAL;
1366 	}
1367 	if (args->n_success > args->n_devices) {
1368 		pr_debug("n_success exceeds n_devices\n");
1369 		return -EINVAL;
1370 	}
1371 
1372 	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1373 				    GFP_KERNEL);
1374 	if (!devices_arr)
1375 		return -ENOMEM;
1376 
1377 	err = copy_from_user(devices_arr,
1378 			     (void __user *)args->device_ids_array_ptr,
1379 			     args->n_devices * sizeof(*devices_arr));
1380 	if (err != 0) {
1381 		err = -EFAULT;
1382 		goto copy_from_user_failed;
1383 	}
1384 
1385 	mutex_lock(&p->mutex);
1386 	pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1387 	if (!pdd) {
1388 		err = -EINVAL;
1389 		goto bind_process_to_device_failed;
1390 	}
1391 
1392 	mem = kfd_process_device_translate_handle(pdd,
1393 						GET_IDR_HANDLE(args->handle));
1394 	if (!mem) {
1395 		err = -ENOMEM;
1396 		goto get_mem_obj_from_handle_failed;
1397 	}
1398 
1399 	for (i = args->n_success; i < args->n_devices; i++) {
1400 		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1401 		if (!peer_pdd) {
1402 			err = -EINVAL;
1403 			goto get_mem_obj_from_handle_failed;
1404 		}
1405 		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1406 			peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1407 		if (err) {
1408 			pr_err("Failed to unmap from gpu %d/%d\n",
1409 			       i, args->n_devices);
1410 			goto unmap_memory_from_gpu_failed;
1411 		}
1412 		args->n_success = i+1;
1413 	}
1414 
1415 	flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
1416 	if (flush_tlb) {
1417 		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
1418 				(struct kgd_mem *) mem, true);
1419 		if (err) {
1420 			pr_debug("Sync memory failed, wait interrupted by user signal\n");
1421 			goto sync_memory_failed;
1422 		}
1423 	}
1424 	mutex_unlock(&p->mutex);
1425 
1426 	if (flush_tlb) {
1427 		/* Flush TLBs after waiting for the page table updates to complete */
1428 		for (i = 0; i < args->n_devices; i++) {
1429 			peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1430 			if (WARN_ON_ONCE(!peer_pdd))
1431 				continue;
1432 			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
1433 		}
1434 	}
1435 	kfree(devices_arr);
1436 
1437 	return 0;
1438 
1439 bind_process_to_device_failed:
1440 get_mem_obj_from_handle_failed:
1441 unmap_memory_from_gpu_failed:
1442 sync_memory_failed:
1443 	mutex_unlock(&p->mutex);
1444 copy_from_user_failed:
1445 	kfree(devices_arr);
1446 	return err;
1447 }
1448 
1449 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1450 		struct kfd_process *p, void *data)
1451 {
1452 	int retval;
1453 	struct kfd_ioctl_alloc_queue_gws_args *args = data;
1454 	struct queue *q;
1455 	struct kfd_node *dev;
1456 
1457 	mutex_lock(&p->mutex);
1458 	q = pqm_get_user_queue(&p->pqm, args->queue_id);
1459 
1460 	if (q) {
1461 		dev = q->device;
1462 	} else {
1463 		retval = -EINVAL;
1464 		goto out_unlock;
1465 	}
1466 
1467 	if (!dev->gws) {
1468 		retval = -ENODEV;
1469 		goto out_unlock;
1470 	}
1471 
1472 	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1473 		retval = -ENODEV;
1474 		goto out_unlock;
1475 	}
1476 
1477 	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1478 	mutex_unlock(&p->mutex);
1479 
1480 	args->first_gws = 0;
1481 	return retval;
1482 
1483 out_unlock:
1484 	mutex_unlock(&p->mutex);
1485 	return retval;
1486 }
1487 
1488 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1489 		struct kfd_process *p, void *data)
1490 {
1491 	struct kfd_ioctl_get_dmabuf_info_args *args = data;
1492 	struct kfd_node *dev = NULL;
1493 	struct amdgpu_device *dmabuf_adev;
1494 	void *metadata_buffer = NULL;
1495 	uint32_t flags;
1496 	unsigned int i;
1497 	int r;
1498 
1499 	/* Find a KFD GPU device that supports the get_dmabuf_info query */
1500 	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1501 		if (dev)
1502 			break;
1503 	if (!dev)
1504 		return -EINVAL;
1505 
1506 	if (args->metadata_ptr) {
1507 		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1508 		if (!metadata_buffer)
1509 			return -ENOMEM;
1510 	}
1511 
1512 	/* Get dmabuf info from KGD */
1513 	r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
1514 					  &dmabuf_adev, &args->size,
1515 					  metadata_buffer, args->metadata_size,
1516 					  &args->metadata_size, &flags);
1517 	if (r)
1518 		goto exit;
1519 
1520 	/* Reverse-lookup gpu_id from kgd pointer */
1521 	dev = kfd_device_by_adev(dmabuf_adev);
1522 	if (!dev) {
1523 		r = -EINVAL;
1524 		goto exit;
1525 	}
1526 	args->gpu_id = dev->id;
1527 	args->flags = flags;
1528 
1529 	/* Copy metadata buffer to user mode */
1530 	if (metadata_buffer) {
1531 		r = copy_to_user((void __user *)args->metadata_ptr,
1532 				 metadata_buffer, args->metadata_size);
1533 		if (r != 0)
1534 			r = -EFAULT;
1535 	}
1536 
1537 exit:
1538 	kfree(metadata_buffer);
1539 
1540 	return r;
1541 }
1542 
1543 static int kfd_ioctl_import_dmabuf(struct file *filep,
1544 				   struct kfd_process *p, void *data)
1545 {
1546 	struct kfd_ioctl_import_dmabuf_args *args = data;
1547 	struct kfd_process_device *pdd;
1548 	struct dma_buf *dmabuf;
1549 	int idr_handle;
1550 	uint64_t size;
1551 	void *mem;
1552 	int r;
1553 
1554 	dmabuf = dma_buf_get(args->dmabuf_fd);
1555 	if (IS_ERR(dmabuf))
1556 		return PTR_ERR(dmabuf);
1557 
1558 	mutex_lock(&p->mutex);
1559 	pdd = kfd_process_device_data_by_id(p, args->gpu_id);
1560 	if (!pdd) {
1561 		r = -EINVAL;
1562 		goto err_unlock;
1563 	}
1564 
1565 	pdd = kfd_bind_process_to_device(pdd->dev, p);
1566 	if (IS_ERR(pdd)) {
1567 		r = PTR_ERR(pdd);
1568 		goto err_unlock;
1569 	}
1570 
1571 	r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
1572 					      args->va_addr, pdd->drm_priv,
1573 					      (struct kgd_mem **)&mem, &size,
1574 					      NULL);
1575 	if (r)
1576 		goto err_unlock;
1577 
1578 	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1579 	if (idr_handle < 0) {
1580 		r = -EFAULT;
1581 		goto err_free;
1582 	}
1583 
1584 	mutex_unlock(&p->mutex);
1585 	dma_buf_put(dmabuf);
1586 
1587 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1588 
1589 	return 0;
1590 
1591 err_free:
1592 	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
1593 					       pdd->drm_priv, NULL);
1594 err_unlock:
1595 	mutex_unlock(&p->mutex);
1596 	dma_buf_put(dmabuf);
1597 	return r;
1598 }
1599 
1600 static int kfd_ioctl_export_dmabuf(struct file *filep,
1601 				   struct kfd_process *p, void *data)
1602 {
1603 	struct kfd_ioctl_export_dmabuf_args *args = data;
1604 	struct kfd_process_device *pdd;
1605 	struct dma_buf *dmabuf;
1606 	struct kfd_node *dev;
1607 	void *mem;
1608 	int ret = 0;
1609 
1610 	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1611 	if (!dev)
1612 		return -EINVAL;
1613 
1614 	mutex_lock(&p->mutex);
1615 
1616 	pdd = kfd_get_process_device_data(dev, p);
1617 	if (!pdd) {
1618 		ret = -EINVAL;
1619 		goto err_unlock;
1620 	}
1621 
1622 	mem = kfd_process_device_translate_handle(pdd,
1623 						GET_IDR_HANDLE(args->handle));
1624 	if (!mem) {
1625 		ret = -EINVAL;
1626 		goto err_unlock;
1627 	}
1628 
1629 	ret = amdgpu_amdkfd_gpuvm_export_dmabuf(mem, &dmabuf);
1630 	mutex_unlock(&p->mutex);
1631 	if (ret)
1632 		goto err_out;
1633 
1634 	ret = dma_buf_fd(dmabuf, args->flags);
1635 	if (ret < 0) {
1636 		dma_buf_put(dmabuf);
1637 		goto err_out;
1638 	}
1639 	/* dma_buf_fd assigns the reference count to the fd, no need to
1640 	 * put the reference here.
1641 	 */
1642 	args->dmabuf_fd = ret;
1643 
1644 	return 0;
1645 
1646 err_unlock:
1647 	mutex_unlock(&p->mutex);
1648 err_out:
1649 	return ret;
1650 }
1651 
1652 /* Handle requests for watching SMI events */
1653 static int kfd_ioctl_smi_events(struct file *filep,
1654 				struct kfd_process *p, void *data)
1655 {
1656 	struct kfd_ioctl_smi_events_args *args = data;
1657 	struct kfd_process_device *pdd;
1658 
1659 	mutex_lock(&p->mutex);
1660 
1661 	pdd = kfd_process_device_data_by_id(p, args->gpuid);
1662 	mutex_unlock(&p->mutex);
1663 	if (!pdd)
1664 		return -EINVAL;
1665 
1666 	return kfd_smi_event_open(pdd->dev, &args->anon_fd);
1667 }
1668 
1669 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1670 
1671 static int kfd_ioctl_set_xnack_mode(struct file *filep,
1672 				    struct kfd_process *p, void *data)
1673 {
1674 	struct kfd_ioctl_set_xnack_mode_args *args = data;
1675 	int r = 0;
1676 
1677 	mutex_lock(&p->mutex);
1678 	if (args->xnack_enabled >= 0) {
1679 		if (!list_empty(&p->pqm.queues)) {
1680 			pr_debug("Process has user queues running\n");
1681 			r = -EBUSY;
1682 			goto out_unlock;
1683 		}
1684 
1685 		if (p->xnack_enabled == args->xnack_enabled)
1686 			goto out_unlock;
1687 
1688 		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
1689 			r = -EPERM;
1690 			goto out_unlock;
1691 		}
1692 
1693 		r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
1694 	} else {
1695 		args->xnack_enabled = p->xnack_enabled;
1696 	}
1697 
1698 out_unlock:
1699 	mutex_unlock(&p->mutex);
1700 
1701 	return r;
1702 }
1703 
1704 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1705 {
1706 	struct kfd_ioctl_svm_args *args = data;
1707 	int r = 0;
1708 
1709 	pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
1710 		 args->start_addr, args->size, args->op, args->nattr);
1711 
1712 	if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
1713 		return -EINVAL;
1714 	if (!args->start_addr || !args->size)
1715 		return -EINVAL;
1716 
1717 	r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
1718 		      args->attrs);
1719 
1720 	return r;
1721 }
1722 #else
1723 static int kfd_ioctl_set_xnack_mode(struct file *filep,
1724 				    struct kfd_process *p, void *data)
1725 {
1726 	return -EPERM;
1727 }
1728 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1729 {
1730 	return -EPERM;
1731 }
1732 #endif
1733 
1734 static int criu_checkpoint_process(struct kfd_process *p,
1735 			     uint8_t __user *user_priv_data,
1736 			     uint64_t *priv_offset)
1737 {
1738 	struct kfd_criu_process_priv_data process_priv;
1739 	int ret;
1740 
1741 	memset(&process_priv, 0, sizeof(process_priv));
1742 
1743 	process_priv.version = KFD_CRIU_PRIV_VERSION;
1744 	/* For CR, we don't consider negative xnack mode which is used for
1745 	 * querying without changing it, here 0 simply means disabled and 1
1746 	 * means enabled so retry for finding a valid PTE.
1747 	 */
1748 	process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
1749 
1750 	ret = copy_to_user(user_priv_data + *priv_offset,
1751 				&process_priv, sizeof(process_priv));
1752 
1753 	if (ret) {
1754 		pr_err("Failed to copy process information to user\n");
1755 		ret = -EFAULT;
1756 	}
1757 
1758 	*priv_offset += sizeof(process_priv);
1759 	return ret;
1760 }
1761 
1762 static int criu_checkpoint_devices(struct kfd_process *p,
1763 			     uint32_t num_devices,
1764 			     uint8_t __user *user_addr,
1765 			     uint8_t __user *user_priv_data,
1766 			     uint64_t *priv_offset)
1767 {
1768 	struct kfd_criu_device_priv_data *device_priv = NULL;
1769 	struct kfd_criu_device_bucket *device_buckets = NULL;
1770 	int ret = 0, i;
1771 
1772 	device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
1773 	if (!device_buckets) {
1774 		ret = -ENOMEM;
1775 		goto exit;
1776 	}
1777 
1778 	device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
1779 	if (!device_priv) {
1780 		ret = -ENOMEM;
1781 		goto exit;
1782 	}
1783 
1784 	for (i = 0; i < num_devices; i++) {
1785 		struct kfd_process_device *pdd = p->pdds[i];
1786 
1787 		device_buckets[i].user_gpu_id = pdd->user_gpu_id;
1788 		device_buckets[i].actual_gpu_id = pdd->dev->id;
1789 
1790 		/*
1791 		 * priv_data does not contain useful information for now and is reserved for
1792 		 * future use, so we do not set its contents.
1793 		 */
1794 	}
1795 
1796 	ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
1797 	if (ret) {
1798 		pr_err("Failed to copy device information to user\n");
1799 		ret = -EFAULT;
1800 		goto exit;
1801 	}
1802 
1803 	ret = copy_to_user(user_priv_data + *priv_offset,
1804 			   device_priv,
1805 			   num_devices * sizeof(*device_priv));
1806 	if (ret) {
1807 		pr_err("Failed to copy device information to user\n");
1808 		ret = -EFAULT;
1809 	}
1810 	*priv_offset += num_devices * sizeof(*device_priv);
1811 
1812 exit:
1813 	kvfree(device_buckets);
1814 	kvfree(device_priv);
1815 	return ret;
1816 }
1817 
1818 static uint32_t get_process_num_bos(struct kfd_process *p)
1819 {
1820 	uint32_t num_of_bos = 0;
1821 	int i;
1822 
1823 	/* Run over all PDDs of the process */
1824 	for (i = 0; i < p->n_pdds; i++) {
1825 		struct kfd_process_device *pdd = p->pdds[i];
1826 		void *mem;
1827 		int id;
1828 
1829 		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
1830 			struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
1831 
1832 			if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
1833 				num_of_bos++;
1834 		}
1835 	}
1836 	return num_of_bos;
1837 }
1838 
1839 static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
1840 				      u32 *shared_fd)
1841 {
1842 	struct dma_buf *dmabuf;
1843 	int ret;
1844 
1845 	dmabuf = amdgpu_gem_prime_export(gobj, flags);
1846 	if (IS_ERR(dmabuf)) {
1847 		ret = PTR_ERR(dmabuf);
1848 		pr_err("dmabuf export failed for the BO\n");
1849 		return ret;
1850 	}
1851 
1852 	ret = dma_buf_fd(dmabuf, flags);
1853 	if (ret < 0) {
1854 		pr_err("dmabuf create fd failed, ret:%d\n", ret);
1855 		goto out_free_dmabuf;
1856 	}
1857 
1858 	*shared_fd = ret;
1859 	return 0;
1860 
1861 out_free_dmabuf:
1862 	dma_buf_put(dmabuf);
1863 	return ret;
1864 }
1865 
1866 static int criu_checkpoint_bos(struct kfd_process *p,
1867 			       uint32_t num_bos,
1868 			       uint8_t __user *user_bos,
1869 			       uint8_t __user *user_priv_data,
1870 			       uint64_t *priv_offset)
1871 {
1872 	struct kfd_criu_bo_bucket *bo_buckets;
1873 	struct kfd_criu_bo_priv_data *bo_privs;
1874 	int ret = 0, pdd_index, bo_index = 0, id;
1875 	void *mem;
1876 
1877 	bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
1878 	if (!bo_buckets)
1879 		return -ENOMEM;
1880 
1881 	bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
1882 	if (!bo_privs) {
1883 		ret = -ENOMEM;
1884 		goto exit;
1885 	}
1886 
1887 	for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
1888 		struct kfd_process_device *pdd = p->pdds[pdd_index];
1889 		struct amdgpu_bo *dumper_bo;
1890 		struct kgd_mem *kgd_mem;
1891 
1892 		idr_for_each_entry(&pdd->alloc_idr, mem, id) {
1893 			struct kfd_criu_bo_bucket *bo_bucket;
1894 			struct kfd_criu_bo_priv_data *bo_priv;
1895 			int i, dev_idx = 0;
1896 
1897 			if (!mem) {
1898 				ret = -ENOMEM;
1899 				goto exit;
1900 			}
1901 
1902 			kgd_mem = (struct kgd_mem *)mem;
1903 			dumper_bo = kgd_mem->bo;
1904 
1905 			if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
1906 				continue;
1907 
1908 			bo_bucket = &bo_buckets[bo_index];
1909 			bo_priv = &bo_privs[bo_index];
1910 
1911 			bo_bucket->gpu_id = pdd->user_gpu_id;
1912 			bo_bucket->addr = (uint64_t)kgd_mem->va;
1913 			bo_bucket->size = amdgpu_bo_size(dumper_bo);
1914 			bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
1915 			bo_priv->idr_handle = id;
1916 
1917 			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1918 				ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
1919 								&bo_priv->user_addr);
1920 				if (ret) {
1921 					pr_err("Failed to obtain user address for user-pointer bo\n");
1922 					goto exit;
1923 				}
1924 			}
1925 			if (bo_bucket->alloc_flags
1926 			    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
1927 				ret = criu_get_prime_handle(&dumper_bo->tbo.base,
1928 						bo_bucket->alloc_flags &
1929 						KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
1930 						&bo_bucket->dmabuf_fd);
1931 				if (ret)
1932 					goto exit;
1933 			} else {
1934 				bo_bucket->dmabuf_fd = KFD_INVALID_FD;
1935 			}
1936 
1937 			if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
1938 				bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
1939 					KFD_MMAP_GPU_ID(pdd->dev->id);
1940 			else if (bo_bucket->alloc_flags &
1941 				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1942 				bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
1943 					KFD_MMAP_GPU_ID(pdd->dev->id);
1944 			else
1945 				bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
1946 
1947 			for (i = 0; i < p->n_pdds; i++) {
1948 				if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
1949 					bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
1950 			}
1951 
1952 			pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
1953 					"gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
1954 					bo_bucket->size,
1955 					bo_bucket->addr,
1956 					bo_bucket->offset,
1957 					bo_bucket->gpu_id,
1958 					bo_bucket->alloc_flags,
1959 					bo_priv->idr_handle);
1960 			bo_index++;
1961 		}
1962 	}
1963 
1964 	ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
1965 	if (ret) {
1966 		pr_err("Failed to copy BO information to user\n");
1967 		ret = -EFAULT;
1968 		goto exit;
1969 	}
1970 
1971 	ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
1972 	if (ret) {
1973 		pr_err("Failed to copy BO priv information to user\n");
1974 		ret = -EFAULT;
1975 		goto exit;
1976 	}
1977 
1978 	*priv_offset += num_bos * sizeof(*bo_privs);
1979 
1980 exit:
1981 	while (ret && bo_index--) {
1982 		if (bo_buckets[bo_index].alloc_flags
1983 		    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
1984 			close_fd(bo_buckets[bo_index].dmabuf_fd);
1985 	}
1986 
1987 	kvfree(bo_buckets);
1988 	kvfree(bo_privs);
1989 	return ret;
1990 }
1991 
1992 static int criu_get_process_object_info(struct kfd_process *p,
1993 					uint32_t *num_devices,
1994 					uint32_t *num_bos,
1995 					uint32_t *num_objects,
1996 					uint64_t *objs_priv_size)
1997 {
1998 	uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
1999 	uint32_t num_queues, num_events, num_svm_ranges;
2000 	int ret;
2001 
2002 	*num_devices = p->n_pdds;
2003 	*num_bos = get_process_num_bos(p);
2004 
2005 	ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
2006 	if (ret)
2007 		return ret;
2008 
2009 	num_events = kfd_get_num_events(p);
2010 
2011 	ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
2012 	if (ret)
2013 		return ret;
2014 
2015 	*num_objects = num_queues + num_events + num_svm_ranges;
2016 
2017 	if (objs_priv_size) {
2018 		priv_size = sizeof(struct kfd_criu_process_priv_data);
2019 		priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
2020 		priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
2021 		priv_size += queues_priv_data_size;
2022 		priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
2023 		priv_size += svm_priv_data_size;
2024 		*objs_priv_size = priv_size;
2025 	}
2026 	return 0;
2027 }
2028 
2029 static int criu_checkpoint(struct file *filep,
2030 			   struct kfd_process *p,
2031 			   struct kfd_ioctl_criu_args *args)
2032 {
2033 	int ret;
2034 	uint32_t num_devices, num_bos, num_objects;
2035 	uint64_t priv_size, priv_offset = 0, bo_priv_offset;
2036 
2037 	if (!args->devices || !args->bos || !args->priv_data)
2038 		return -EINVAL;
2039 
2040 	mutex_lock(&p->mutex);
2041 
2042 	if (!p->n_pdds) {
2043 		pr_err("No pdd for given process\n");
2044 		ret = -ENODEV;
2045 		goto exit_unlock;
2046 	}
2047 
2048 	/* Confirm all process queues are evicted */
2049 	if (!p->queues_paused) {
2050 		pr_err("Cannot dump process when queues are not in evicted state\n");
2051 		/* CRIU plugin did not call op PROCESS_INFO before checkpointing */
2052 		ret = -EINVAL;
2053 		goto exit_unlock;
2054 	}
2055 
2056 	ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
2057 	if (ret)
2058 		goto exit_unlock;
2059 
2060 	if (num_devices != args->num_devices ||
2061 	    num_bos != args->num_bos ||
2062 	    num_objects != args->num_objects ||
2063 	    priv_size != args->priv_data_size) {
2064 
2065 		ret = -EINVAL;
2066 		goto exit_unlock;
2067 	}
2068 
2069 	/* each function will store private data inside priv_data and adjust priv_offset */
2070 	ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
2071 	if (ret)
2072 		goto exit_unlock;
2073 
2074 	ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
2075 				(uint8_t __user *)args->priv_data, &priv_offset);
2076 	if (ret)
2077 		goto exit_unlock;
2078 
2079 	/* Leave room for BOs in the private data. They need to be restored
2080 	 * before events, but we checkpoint them last to simplify the error
2081 	 * handling.
2082 	 */
2083 	bo_priv_offset = priv_offset;
2084 	priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
2085 
2086 	if (num_objects) {
2087 		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
2088 						 &priv_offset);
2089 		if (ret)
2090 			goto exit_unlock;
2091 
2092 		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
2093 						 &priv_offset);
2094 		if (ret)
2095 			goto exit_unlock;
2096 
2097 		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
2098 		if (ret)
2099 			goto exit_unlock;
2100 	}
2101 
2102 	/* This must be the last thing in this function that can fail.
2103 	 * Otherwise we leak dmabuf file descriptors.
2104 	 */
2105 	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
2106 			   (uint8_t __user *)args->priv_data, &bo_priv_offset);
2107 
2108 exit_unlock:
2109 	mutex_unlock(&p->mutex);
2110 	if (ret)
2111 		pr_err("Failed to dump CRIU ret:%d\n", ret);
2112 	else
2113 		pr_debug("CRIU dump ret:%d\n", ret);
2114 
2115 	return ret;
2116 }
2117 
2118 static int criu_restore_process(struct kfd_process *p,
2119 				struct kfd_ioctl_criu_args *args,
2120 				uint64_t *priv_offset,
2121 				uint64_t max_priv_data_size)
2122 {
2123 	int ret = 0;
2124 	struct kfd_criu_process_priv_data process_priv;
2125 
2126 	if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
2127 		return -EINVAL;
2128 
2129 	ret = copy_from_user(&process_priv,
2130 				(void __user *)(args->priv_data + *priv_offset),
2131 				sizeof(process_priv));
2132 	if (ret) {
2133 		pr_err("Failed to copy process private information from user\n");
2134 		ret = -EFAULT;
2135 		goto exit;
2136 	}
2137 	*priv_offset += sizeof(process_priv);
2138 
2139 	if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
2140 		pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
2141 			process_priv.version, KFD_CRIU_PRIV_VERSION);
2142 		return -EINVAL;
2143 	}
2144 
2145 	pr_debug("Setting XNACK mode\n");
2146 	if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
2147 		pr_err("xnack mode cannot be set\n");
2148 		ret = -EPERM;
2149 		goto exit;
2150 	} else {
2151 		pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
2152 		p->xnack_enabled = process_priv.xnack_mode;
2153 	}
2154 
2155 exit:
2156 	return ret;
2157 }
2158 
2159 static int criu_restore_devices(struct kfd_process *p,
2160 				struct kfd_ioctl_criu_args *args,
2161 				uint64_t *priv_offset,
2162 				uint64_t max_priv_data_size)
2163 {
2164 	struct kfd_criu_device_bucket *device_buckets;
2165 	struct kfd_criu_device_priv_data *device_privs;
2166 	int ret = 0;
2167 	uint32_t i;
2168 
2169 	if (args->num_devices != p->n_pdds)
2170 		return -EINVAL;
2171 
2172 	if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
2173 		return -EINVAL;
2174 
2175 	device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
2176 	if (!device_buckets)
2177 		return -ENOMEM;
2178 
2179 	ret = copy_from_user(device_buckets, (void __user *)args->devices,
2180 				args->num_devices * sizeof(*device_buckets));
2181 	if (ret) {
2182 		pr_err("Failed to copy devices buckets from user\n");
2183 		ret = -EFAULT;
2184 		goto exit;
2185 	}
2186 
2187 	for (i = 0; i < args->num_devices; i++) {
2188 		struct kfd_node *dev;
2189 		struct kfd_process_device *pdd;
2190 		struct file *drm_file;
2191 
2192 		/* device private data is not currently used */
2193 
2194 		if (!device_buckets[i].user_gpu_id) {
2195 			pr_err("Invalid user gpu_id\n");
2196 			ret = -EINVAL;
2197 			goto exit;
2198 		}
2199 
2200 		dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
2201 		if (!dev) {
2202 			pr_err("Failed to find device with gpu_id = %x\n",
2203 				device_buckets[i].actual_gpu_id);
2204 			ret = -EINVAL;
2205 			goto exit;
2206 		}
2207 
2208 		pdd = kfd_get_process_device_data(dev, p);
2209 		if (!pdd) {
2210 			pr_err("Failed to get pdd for gpu_id = %x\n",
2211 					device_buckets[i].actual_gpu_id);
2212 			ret = -EINVAL;
2213 			goto exit;
2214 		}
2215 		pdd->user_gpu_id = device_buckets[i].user_gpu_id;
2216 
2217 		drm_file = fget(device_buckets[i].drm_fd);
2218 		if (!drm_file) {
2219 			pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
2220 				device_buckets[i].drm_fd);
2221 			ret = -EINVAL;
2222 			goto exit;
2223 		}
2224 
2225 		if (pdd->drm_file) {
2226 			ret = -EINVAL;
2227 			goto exit;
2228 		}
2229 
2230 		/* create the vm using render nodes for kfd pdd */
2231 		if (kfd_process_device_init_vm(pdd, drm_file)) {
2232 			pr_err("could not init vm for given pdd\n");
2233 			/* On success, the PDD keeps the drm_file reference */
2234 			fput(drm_file);
2235 			ret = -EINVAL;
2236 			goto exit;
2237 		}
2238 		/*
2239 		 * pdd now already has the vm bound to render node so below api won't create a new
2240 		 * exclusive kfd mapping but use existing one with renderDXXX but is still needed
2241 		 * for iommu v2 binding  and runtime pm.
2242 		 */
2243 		pdd = kfd_bind_process_to_device(dev, p);
2244 		if (IS_ERR(pdd)) {
2245 			ret = PTR_ERR(pdd);
2246 			goto exit;
2247 		}
2248 
2249 		if (!pdd->doorbell_index &&
2250 		    kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
2251 			ret = -ENOMEM;
2252 			goto exit;
2253 		}
2254 	}
2255 
2256 	/*
2257 	 * We are not copying device private data from user as we are not using the data for now,
2258 	 * but we still adjust for its private data.
2259 	 */
2260 	*priv_offset += args->num_devices * sizeof(*device_privs);
2261 
2262 exit:
2263 	kfree(device_buckets);
2264 	return ret;
2265 }
2266 
2267 static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
2268 				      struct kfd_criu_bo_bucket *bo_bucket,
2269 				      struct kfd_criu_bo_priv_data *bo_priv,
2270 				      struct kgd_mem **kgd_mem)
2271 {
2272 	int idr_handle;
2273 	int ret;
2274 	const bool criu_resume = true;
2275 	u64 offset;
2276 
2277 	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
2278 		if (bo_bucket->size !=
2279 				kfd_doorbell_process_slice(pdd->dev->kfd))
2280 			return -EINVAL;
2281 
2282 		offset = kfd_get_process_doorbells(pdd);
2283 		if (!offset)
2284 			return -ENOMEM;
2285 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
2286 		/* MMIO BOs need remapped bus address */
2287 		if (bo_bucket->size != PAGE_SIZE) {
2288 			pr_err("Invalid page size\n");
2289 			return -EINVAL;
2290 		}
2291 		offset = pdd->dev->adev->rmmio_remap.bus_addr;
2292 		if (!offset) {
2293 			pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
2294 			return -ENOMEM;
2295 		}
2296 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
2297 		offset = bo_priv->user_addr;
2298 	}
2299 	/* Create the BO */
2300 	ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
2301 						      bo_bucket->size, pdd->drm_priv, kgd_mem,
2302 						      &offset, bo_bucket->alloc_flags, criu_resume);
2303 	if (ret) {
2304 		pr_err("Could not create the BO\n");
2305 		return ret;
2306 	}
2307 	pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
2308 		 bo_bucket->size, bo_bucket->addr, offset);
2309 
2310 	/* Restore previous IDR handle */
2311 	pr_debug("Restoring old IDR handle for the BO");
2312 	idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
2313 			       bo_priv->idr_handle + 1, GFP_KERNEL);
2314 
2315 	if (idr_handle < 0) {
2316 		pr_err("Could not allocate idr\n");
2317 		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
2318 						       NULL);
2319 		return -ENOMEM;
2320 	}
2321 
2322 	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
2323 		bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
2324 	if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
2325 		bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
2326 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
2327 		bo_bucket->restored_offset = offset;
2328 	} else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
2329 		bo_bucket->restored_offset = offset;
2330 		/* Update the VRAM usage count */
2331 		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
2332 	}
2333 	return 0;
2334 }
2335 
2336 static int criu_restore_bo(struct kfd_process *p,
2337 			   struct kfd_criu_bo_bucket *bo_bucket,
2338 			   struct kfd_criu_bo_priv_data *bo_priv)
2339 {
2340 	struct kfd_process_device *pdd;
2341 	struct kgd_mem *kgd_mem;
2342 	int ret;
2343 	int j;
2344 
2345 	pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
2346 		 bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
2347 		 bo_priv->idr_handle);
2348 
2349 	pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
2350 	if (!pdd) {
2351 		pr_err("Failed to get pdd\n");
2352 		return -ENODEV;
2353 	}
2354 
2355 	ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
2356 	if (ret)
2357 		return ret;
2358 
2359 	/* now map these BOs to GPU/s */
2360 	for (j = 0; j < p->n_pdds; j++) {
2361 		struct kfd_node *peer;
2362 		struct kfd_process_device *peer_pdd;
2363 
2364 		if (!bo_priv->mapped_gpuids[j])
2365 			break;
2366 
2367 		peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
2368 		if (!peer_pdd)
2369 			return -EINVAL;
2370 
2371 		peer = peer_pdd->dev;
2372 
2373 		peer_pdd = kfd_bind_process_to_device(peer, p);
2374 		if (IS_ERR(peer_pdd))
2375 			return PTR_ERR(peer_pdd);
2376 
2377 		ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
2378 							    peer_pdd->drm_priv);
2379 		if (ret) {
2380 			pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
2381 			return ret;
2382 		}
2383 	}
2384 
2385 	pr_debug("map memory was successful for the BO\n");
2386 	/* create the dmabuf object and export the bo */
2387 	if (bo_bucket->alloc_flags
2388 	    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
2389 		ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
2390 					    &bo_bucket->dmabuf_fd);
2391 		if (ret)
2392 			return ret;
2393 	} else {
2394 		bo_bucket->dmabuf_fd = KFD_INVALID_FD;
2395 	}
2396 
2397 	return 0;
2398 }
2399 
2400 static int criu_restore_bos(struct kfd_process *p,
2401 			    struct kfd_ioctl_criu_args *args,
2402 			    uint64_t *priv_offset,
2403 			    uint64_t max_priv_data_size)
2404 {
2405 	struct kfd_criu_bo_bucket *bo_buckets = NULL;
2406 	struct kfd_criu_bo_priv_data *bo_privs = NULL;
2407 	int ret = 0;
2408 	uint32_t i = 0;
2409 
2410 	if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
2411 		return -EINVAL;
2412 
2413 	/* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
2414 	amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
2415 
2416 	bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
2417 	if (!bo_buckets)
2418 		return -ENOMEM;
2419 
2420 	ret = copy_from_user(bo_buckets, (void __user *)args->bos,
2421 			     args->num_bos * sizeof(*bo_buckets));
2422 	if (ret) {
2423 		pr_err("Failed to copy BOs information from user\n");
2424 		ret = -EFAULT;
2425 		goto exit;
2426 	}
2427 
2428 	bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
2429 	if (!bo_privs) {
2430 		ret = -ENOMEM;
2431 		goto exit;
2432 	}
2433 
2434 	ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
2435 			     args->num_bos * sizeof(*bo_privs));
2436 	if (ret) {
2437 		pr_err("Failed to copy BOs information from user\n");
2438 		ret = -EFAULT;
2439 		goto exit;
2440 	}
2441 	*priv_offset += args->num_bos * sizeof(*bo_privs);
2442 
2443 	/* Create and map new BOs */
2444 	for (; i < args->num_bos; i++) {
2445 		ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
2446 		if (ret) {
2447 			pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
2448 			goto exit;
2449 		}
2450 	} /* done */
2451 
2452 	/* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
2453 	ret = copy_to_user((void __user *)args->bos,
2454 				bo_buckets,
2455 				(args->num_bos * sizeof(*bo_buckets)));
2456 	if (ret)
2457 		ret = -EFAULT;
2458 
2459 exit:
2460 	while (ret && i--) {
2461 		if (bo_buckets[i].alloc_flags
2462 		   & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
2463 			close_fd(bo_buckets[i].dmabuf_fd);
2464 	}
2465 	kvfree(bo_buckets);
2466 	kvfree(bo_privs);
2467 	return ret;
2468 }
2469 
2470 static int criu_restore_objects(struct file *filep,
2471 				struct kfd_process *p,
2472 				struct kfd_ioctl_criu_args *args,
2473 				uint64_t *priv_offset,
2474 				uint64_t max_priv_data_size)
2475 {
2476 	int ret = 0;
2477 	uint32_t i;
2478 
2479 	BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
2480 	BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
2481 	BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
2482 
2483 	for (i = 0; i < args->num_objects; i++) {
2484 		uint32_t object_type;
2485 
2486 		if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
2487 			pr_err("Invalid private data size\n");
2488 			return -EINVAL;
2489 		}
2490 
2491 		ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
2492 		if (ret) {
2493 			pr_err("Failed to copy private information from user\n");
2494 			goto exit;
2495 		}
2496 
2497 		switch (object_type) {
2498 		case KFD_CRIU_OBJECT_TYPE_QUEUE:
2499 			ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
2500 						     priv_offset, max_priv_data_size);
2501 			if (ret)
2502 				goto exit;
2503 			break;
2504 		case KFD_CRIU_OBJECT_TYPE_EVENT:
2505 			ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
2506 						     priv_offset, max_priv_data_size);
2507 			if (ret)
2508 				goto exit;
2509 			break;
2510 		case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
2511 			ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
2512 						     priv_offset, max_priv_data_size);
2513 			if (ret)
2514 				goto exit;
2515 			break;
2516 		default:
2517 			pr_err("Invalid object type:%u at index:%d\n", object_type, i);
2518 			ret = -EINVAL;
2519 			goto exit;
2520 		}
2521 	}
2522 exit:
2523 	return ret;
2524 }
2525 
2526 static int criu_restore(struct file *filep,
2527 			struct kfd_process *p,
2528 			struct kfd_ioctl_criu_args *args)
2529 {
2530 	uint64_t priv_offset = 0;
2531 	int ret = 0;
2532 
2533 	pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
2534 		 args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
2535 
2536 	if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
2537 	    !args->num_devices || !args->num_bos)
2538 		return -EINVAL;
2539 
2540 	mutex_lock(&p->mutex);
2541 
2542 	/*
2543 	 * Set the process to evicted state to avoid running any new queues before all the memory
2544 	 * mappings are ready.
2545 	 */
2546 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
2547 	if (ret)
2548 		goto exit_unlock;
2549 
2550 	/* Each function will adjust priv_offset based on how many bytes they consumed */
2551 	ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
2552 	if (ret)
2553 		goto exit_unlock;
2554 
2555 	ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
2556 	if (ret)
2557 		goto exit_unlock;
2558 
2559 	ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
2560 	if (ret)
2561 		goto exit_unlock;
2562 
2563 	ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
2564 	if (ret)
2565 		goto exit_unlock;
2566 
2567 	if (priv_offset != args->priv_data_size) {
2568 		pr_err("Invalid private data size\n");
2569 		ret = -EINVAL;
2570 	}
2571 
2572 exit_unlock:
2573 	mutex_unlock(&p->mutex);
2574 	if (ret)
2575 		pr_err("Failed to restore CRIU ret:%d\n", ret);
2576 	else
2577 		pr_debug("CRIU restore successful\n");
2578 
2579 	return ret;
2580 }
2581 
2582 static int criu_unpause(struct file *filep,
2583 			struct kfd_process *p,
2584 			struct kfd_ioctl_criu_args *args)
2585 {
2586 	int ret;
2587 
2588 	mutex_lock(&p->mutex);
2589 
2590 	if (!p->queues_paused) {
2591 		mutex_unlock(&p->mutex);
2592 		return -EINVAL;
2593 	}
2594 
2595 	ret = kfd_process_restore_queues(p);
2596 	if (ret)
2597 		pr_err("Failed to unpause queues ret:%d\n", ret);
2598 	else
2599 		p->queues_paused = false;
2600 
2601 	mutex_unlock(&p->mutex);
2602 
2603 	return ret;
2604 }
2605 
2606 static int criu_resume(struct file *filep,
2607 			struct kfd_process *p,
2608 			struct kfd_ioctl_criu_args *args)
2609 {
2610 	struct kfd_process *target = NULL;
2611 	struct pid *pid = NULL;
2612 	int ret = 0;
2613 
2614 	pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
2615 		 args->pid);
2616 
2617 	pid = find_get_pid(args->pid);
2618 	if (!pid) {
2619 		pr_err("Cannot find pid info for %i\n", args->pid);
2620 		return -ESRCH;
2621 	}
2622 
2623 	pr_debug("calling kfd_lookup_process_by_pid\n");
2624 	target = kfd_lookup_process_by_pid(pid);
2625 
2626 	put_pid(pid);
2627 
2628 	if (!target) {
2629 		pr_debug("Cannot find process info for %i\n", args->pid);
2630 		return -ESRCH;
2631 	}
2632 
2633 	mutex_lock(&target->mutex);
2634 	ret = kfd_criu_resume_svm(target);
2635 	if (ret) {
2636 		pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
2637 		goto exit;
2638 	}
2639 
2640 	ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
2641 	if (ret)
2642 		pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
2643 
2644 exit:
2645 	mutex_unlock(&target->mutex);
2646 
2647 	kfd_unref_process(target);
2648 	return ret;
2649 }
2650 
2651 static int criu_process_info(struct file *filep,
2652 				struct kfd_process *p,
2653 				struct kfd_ioctl_criu_args *args)
2654 {
2655 	int ret = 0;
2656 
2657 	mutex_lock(&p->mutex);
2658 
2659 	if (!p->n_pdds) {
2660 		pr_err("No pdd for given process\n");
2661 		ret = -ENODEV;
2662 		goto err_unlock;
2663 	}
2664 
2665 	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
2666 	if (ret)
2667 		goto err_unlock;
2668 
2669 	p->queues_paused = true;
2670 
2671 	args->pid = task_pid_nr_ns(p->lead_thread,
2672 					task_active_pid_ns(p->lead_thread));
2673 
2674 	ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
2675 					   &args->num_objects, &args->priv_data_size);
2676 	if (ret)
2677 		goto err_unlock;
2678 
2679 	dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
2680 				args->num_devices, args->num_bos, args->num_objects,
2681 				args->priv_data_size);
2682 
2683 err_unlock:
2684 	if (ret) {
2685 		kfd_process_restore_queues(p);
2686 		p->queues_paused = false;
2687 	}
2688 	mutex_unlock(&p->mutex);
2689 	return ret;
2690 }
2691 
2692 static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
2693 {
2694 	struct kfd_ioctl_criu_args *args = data;
2695 	int ret;
2696 
2697 	dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
2698 	switch (args->op) {
2699 	case KFD_CRIU_OP_PROCESS_INFO:
2700 		ret = criu_process_info(filep, p, args);
2701 		break;
2702 	case KFD_CRIU_OP_CHECKPOINT:
2703 		ret = criu_checkpoint(filep, p, args);
2704 		break;
2705 	case KFD_CRIU_OP_UNPAUSE:
2706 		ret = criu_unpause(filep, p, args);
2707 		break;
2708 	case KFD_CRIU_OP_RESTORE:
2709 		ret = criu_restore(filep, p, args);
2710 		break;
2711 	case KFD_CRIU_OP_RESUME:
2712 		ret = criu_resume(filep, p, args);
2713 		break;
2714 	default:
2715 		dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
2716 		ret = -EINVAL;
2717 		break;
2718 	}
2719 
2720 	if (ret)
2721 		dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
2722 
2723 	return ret;
2724 }
2725 
2726 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
2727 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
2728 			    .cmd_drv = 0, .name = #ioctl}
2729 
2730 /** Ioctl table */
2731 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
2732 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
2733 			kfd_ioctl_get_version, 0),
2734 
2735 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
2736 			kfd_ioctl_create_queue, 0),
2737 
2738 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
2739 			kfd_ioctl_destroy_queue, 0),
2740 
2741 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
2742 			kfd_ioctl_set_memory_policy, 0),
2743 
2744 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
2745 			kfd_ioctl_get_clock_counters, 0),
2746 
2747 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
2748 			kfd_ioctl_get_process_apertures, 0),
2749 
2750 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
2751 			kfd_ioctl_update_queue, 0),
2752 
2753 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
2754 			kfd_ioctl_create_event, 0),
2755 
2756 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
2757 			kfd_ioctl_destroy_event, 0),
2758 
2759 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
2760 			kfd_ioctl_set_event, 0),
2761 
2762 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
2763 			kfd_ioctl_reset_event, 0),
2764 
2765 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
2766 			kfd_ioctl_wait_events, 0),
2767 
2768 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
2769 			kfd_ioctl_dbg_register, 0),
2770 
2771 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
2772 			kfd_ioctl_dbg_unregister, 0),
2773 
2774 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
2775 			kfd_ioctl_dbg_address_watch, 0),
2776 
2777 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
2778 			kfd_ioctl_dbg_wave_control, 0),
2779 
2780 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
2781 			kfd_ioctl_set_scratch_backing_va, 0),
2782 
2783 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
2784 			kfd_ioctl_get_tile_config, 0),
2785 
2786 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
2787 			kfd_ioctl_set_trap_handler, 0),
2788 
2789 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
2790 			kfd_ioctl_get_process_apertures_new, 0),
2791 
2792 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
2793 			kfd_ioctl_acquire_vm, 0),
2794 
2795 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
2796 			kfd_ioctl_alloc_memory_of_gpu, 0),
2797 
2798 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
2799 			kfd_ioctl_free_memory_of_gpu, 0),
2800 
2801 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
2802 			kfd_ioctl_map_memory_to_gpu, 0),
2803 
2804 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
2805 			kfd_ioctl_unmap_memory_from_gpu, 0),
2806 
2807 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
2808 			kfd_ioctl_set_cu_mask, 0),
2809 
2810 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
2811 			kfd_ioctl_get_queue_wave_state, 0),
2812 
2813 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
2814 				kfd_ioctl_get_dmabuf_info, 0),
2815 
2816 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
2817 				kfd_ioctl_import_dmabuf, 0),
2818 
2819 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
2820 			kfd_ioctl_alloc_queue_gws, 0),
2821 
2822 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
2823 			kfd_ioctl_smi_events, 0),
2824 
2825 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
2826 
2827 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
2828 			kfd_ioctl_set_xnack_mode, 0),
2829 
2830 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
2831 			kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
2832 
2833 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
2834 			kfd_ioctl_get_available_memory, 0),
2835 
2836 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF,
2837 				kfd_ioctl_export_dmabuf, 0),
2838 };
2839 
2840 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
2841 
2842 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
2843 {
2844 	struct kfd_process *process;
2845 	amdkfd_ioctl_t *func;
2846 	const struct amdkfd_ioctl_desc *ioctl = NULL;
2847 	unsigned int nr = _IOC_NR(cmd);
2848 	char stack_kdata[128];
2849 	char *kdata = NULL;
2850 	unsigned int usize, asize;
2851 	int retcode = -EINVAL;
2852 	bool ptrace_attached = false;
2853 
2854 	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
2855 		goto err_i1;
2856 
2857 	if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
2858 		u32 amdkfd_size;
2859 
2860 		ioctl = &amdkfd_ioctls[nr];
2861 
2862 		amdkfd_size = _IOC_SIZE(ioctl->cmd);
2863 		usize = asize = _IOC_SIZE(cmd);
2864 		if (amdkfd_size > asize)
2865 			asize = amdkfd_size;
2866 
2867 		cmd = ioctl->cmd;
2868 	} else
2869 		goto err_i1;
2870 
2871 	dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
2872 
2873 	/* Get the process struct from the filep. Only the process
2874 	 * that opened /dev/kfd can use the file descriptor. Child
2875 	 * processes need to create their own KFD device context.
2876 	 */
2877 	process = filep->private_data;
2878 
2879 	rcu_read_lock();
2880 	if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
2881 	    ptrace_parent(process->lead_thread) == current)
2882 		ptrace_attached = true;
2883 	rcu_read_unlock();
2884 
2885 	if (process->lead_thread != current->group_leader
2886 	    && !ptrace_attached) {
2887 		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
2888 		retcode = -EBADF;
2889 		goto err_i1;
2890 	}
2891 
2892 	/* Do not trust userspace, use our own definition */
2893 	func = ioctl->func;
2894 
2895 	if (unlikely(!func)) {
2896 		dev_dbg(kfd_device, "no function\n");
2897 		retcode = -EINVAL;
2898 		goto err_i1;
2899 	}
2900 
2901 	/*
2902 	 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
2903 	 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
2904 	 * more priviledged access.
2905 	 */
2906 	if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
2907 		if (!capable(CAP_CHECKPOINT_RESTORE) &&
2908 						!capable(CAP_SYS_ADMIN)) {
2909 			retcode = -EACCES;
2910 			goto err_i1;
2911 		}
2912 	}
2913 
2914 	if (cmd & (IOC_IN | IOC_OUT)) {
2915 		if (asize <= sizeof(stack_kdata)) {
2916 			kdata = stack_kdata;
2917 		} else {
2918 			kdata = kmalloc(asize, GFP_KERNEL);
2919 			if (!kdata) {
2920 				retcode = -ENOMEM;
2921 				goto err_i1;
2922 			}
2923 		}
2924 		if (asize > usize)
2925 			memset(kdata + usize, 0, asize - usize);
2926 	}
2927 
2928 	if (cmd & IOC_IN) {
2929 		if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
2930 			retcode = -EFAULT;
2931 			goto err_i1;
2932 		}
2933 	} else if (cmd & IOC_OUT) {
2934 		memset(kdata, 0, usize);
2935 	}
2936 
2937 	retcode = func(filep, process, kdata);
2938 
2939 	if (cmd & IOC_OUT)
2940 		if (copy_to_user((void __user *)arg, kdata, usize) != 0)
2941 			retcode = -EFAULT;
2942 
2943 err_i1:
2944 	if (!ioctl)
2945 		dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
2946 			  task_pid_nr(current), cmd, nr);
2947 
2948 	if (kdata != stack_kdata)
2949 		kfree(kdata);
2950 
2951 	if (retcode)
2952 		dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
2953 				nr, arg, retcode);
2954 
2955 	return retcode;
2956 }
2957 
2958 static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
2959 		      struct vm_area_struct *vma)
2960 {
2961 	phys_addr_t address;
2962 
2963 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2964 		return -EINVAL;
2965 
2966 	address = dev->adev->rmmio_remap.bus_addr;
2967 
2968 	vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2969 				VM_DONTDUMP | VM_PFNMAP);
2970 
2971 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2972 
2973 	pr_debug("pasid 0x%x mapping mmio page\n"
2974 		 "     target user address == 0x%08llX\n"
2975 		 "     physical address    == 0x%08llX\n"
2976 		 "     vm_flags            == 0x%04lX\n"
2977 		 "     size                == 0x%04lX\n",
2978 		 process->pasid, (unsigned long long) vma->vm_start,
2979 		 address, vma->vm_flags, PAGE_SIZE);
2980 
2981 	return io_remap_pfn_range(vma,
2982 				vma->vm_start,
2983 				address >> PAGE_SHIFT,
2984 				PAGE_SIZE,
2985 				vma->vm_page_prot);
2986 }
2987 
2988 
2989 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
2990 {
2991 	struct kfd_process *process;
2992 	struct kfd_node *dev = NULL;
2993 	unsigned long mmap_offset;
2994 	unsigned int gpu_id;
2995 
2996 	process = kfd_get_process(current);
2997 	if (IS_ERR(process))
2998 		return PTR_ERR(process);
2999 
3000 	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
3001 	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
3002 	if (gpu_id)
3003 		dev = kfd_device_by_id(gpu_id);
3004 
3005 	switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
3006 	case KFD_MMAP_TYPE_DOORBELL:
3007 		if (!dev)
3008 			return -ENODEV;
3009 		return kfd_doorbell_mmap(dev, process, vma);
3010 
3011 	case KFD_MMAP_TYPE_EVENTS:
3012 		return kfd_event_mmap(process, vma);
3013 
3014 	case KFD_MMAP_TYPE_RESERVED_MEM:
3015 		if (!dev)
3016 			return -ENODEV;
3017 		return kfd_reserved_mem_mmap(dev, process, vma);
3018 	case KFD_MMAP_TYPE_MMIO:
3019 		if (!dev)
3020 			return -ENODEV;
3021 		return kfd_mmio_mmap(dev, process, vma);
3022 	}
3023 
3024 	return -EFAULT;
3025 }
3026