1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 #include <linux/kthread.h>
27 #include <linux/pci.h>
28 #include <linux/uaccess.h>
29 #include <linux/pm_runtime.h>
30 
31 #include <drm/drm_debugfs.h>
32 
33 #include "amdgpu.h"
34 
35 /**
36  * amdgpu_debugfs_add_files - Add simple debugfs entries
37  *
38  * @adev:  Device to attach debugfs entries to
39  * @files:  Array of function callbacks that respond to reads
40  * @nfiles: Number of callbacks to register
41  *
42  */
43 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
44 			     const struct drm_info_list *files,
45 			     unsigned nfiles)
46 {
47 	unsigned i;
48 
49 	for (i = 0; i < adev->debugfs_count; i++) {
50 		if (adev->debugfs[i].files == files) {
51 			/* Already registered */
52 			return 0;
53 		}
54 	}
55 
56 	i = adev->debugfs_count + 1;
57 	if (i > AMDGPU_DEBUGFS_MAX_COMPONENTS) {
58 		DRM_ERROR("Reached maximum number of debugfs components.\n");
59 		DRM_ERROR("Report so we increase "
60 			  "AMDGPU_DEBUGFS_MAX_COMPONENTS.\n");
61 		return -EINVAL;
62 	}
63 	adev->debugfs[adev->debugfs_count].files = files;
64 	adev->debugfs[adev->debugfs_count].num_files = nfiles;
65 	adev->debugfs_count = i;
66 #if defined(CONFIG_DEBUG_FS)
67 	drm_debugfs_create_files(files, nfiles,
68 				 adev->ddev->primary->debugfs_root,
69 				 adev->ddev->primary);
70 #endif
71 	return 0;
72 }
73 
74 #if defined(CONFIG_DEBUG_FS)
75 
76 /**
77  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
78  *
79  * @read: True if reading
80  * @f: open file handle
81  * @buf: User buffer to write/read to
82  * @size: Number of bytes to write/read
83  * @pos:  Offset to seek to
84  *
85  * This debugfs entry has special meaning on the offset being sought.
86  * Various bits have different meanings:
87  *
88  * Bit 62:  Indicates a GRBM bank switch is needed
89  * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
90  * 			zero)
91  * Bits 24..33: The SE or ME selector if needed
92  * Bits 34..43: The SH (or SA) or PIPE selector if needed
93  * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
94  *
95  * Bit 23:  Indicates that the PM power gating lock should be held
96  * 			This is necessary to read registers that might be
97  * 			unreliable during a power gating transistion.
98  *
99  * The lower bits are the BYTE offset of the register to read.  This
100  * allows reading multiple registers in a single call and having
101  * the returned size reflect that.
102  */
103 static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
104 		char __user *buf, size_t size, loff_t *pos)
105 {
106 	struct amdgpu_device *adev = file_inode(f)->i_private;
107 	ssize_t result = 0;
108 	int r;
109 	bool pm_pg_lock, use_bank, use_ring;
110 	unsigned instance_bank, sh_bank, se_bank, me, pipe, queue, vmid;
111 
112 	pm_pg_lock = use_bank = use_ring = false;
113 	instance_bank = sh_bank = se_bank = me = pipe = queue = vmid = 0;
114 
115 	if (size & 0x3 || *pos & 0x3 ||
116 			((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
117 		return -EINVAL;
118 
119 	/* are we reading registers for which a PG lock is necessary? */
120 	pm_pg_lock = (*pos >> 23) & 1;
121 
122 	if (*pos & (1ULL << 62)) {
123 		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
124 		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
125 		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
126 
127 		if (se_bank == 0x3FF)
128 			se_bank = 0xFFFFFFFF;
129 		if (sh_bank == 0x3FF)
130 			sh_bank = 0xFFFFFFFF;
131 		if (instance_bank == 0x3FF)
132 			instance_bank = 0xFFFFFFFF;
133 		use_bank = true;
134 	} else if (*pos & (1ULL << 61)) {
135 
136 		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
137 		pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
138 		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
139 		vmid = (*pos & GENMASK_ULL(58, 54)) >> 54;
140 
141 		use_ring = true;
142 	} else {
143 		use_bank = use_ring = false;
144 	}
145 
146 	*pos &= (1UL << 22) - 1;
147 
148 	r = pm_runtime_get_sync(adev->ddev->dev);
149 	if (r < 0)
150 		return r;
151 
152 	if (use_bank) {
153 		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
154 		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
155 			pm_runtime_mark_last_busy(adev->ddev->dev);
156 			pm_runtime_put_autosuspend(adev->ddev->dev);
157 			return -EINVAL;
158 		}
159 		mutex_lock(&adev->grbm_idx_mutex);
160 		amdgpu_gfx_select_se_sh(adev, se_bank,
161 					sh_bank, instance_bank);
162 	} else if (use_ring) {
163 		mutex_lock(&adev->srbm_mutex);
164 		amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
165 	}
166 
167 	if (pm_pg_lock)
168 		mutex_lock(&adev->pm.mutex);
169 
170 	while (size) {
171 		uint32_t value;
172 
173 		if (read) {
174 			value = RREG32(*pos >> 2);
175 			r = put_user(value, (uint32_t *)buf);
176 		} else {
177 			r = get_user(value, (uint32_t *)buf);
178 			if (!r)
179 				WREG32(*pos >> 2, value);
180 		}
181 		if (r) {
182 			result = r;
183 			goto end;
184 		}
185 
186 		result += 4;
187 		buf += 4;
188 		*pos += 4;
189 		size -= 4;
190 	}
191 
192 end:
193 	if (use_bank) {
194 		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
195 		mutex_unlock(&adev->grbm_idx_mutex);
196 	} else if (use_ring) {
197 		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
198 		mutex_unlock(&adev->srbm_mutex);
199 	}
200 
201 	if (pm_pg_lock)
202 		mutex_unlock(&adev->pm.mutex);
203 
204 	pm_runtime_mark_last_busy(adev->ddev->dev);
205 	pm_runtime_put_autosuspend(adev->ddev->dev);
206 
207 	return result;
208 }
209 
210 /**
211  * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
212  */
213 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
214 					size_t size, loff_t *pos)
215 {
216 	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
217 }
218 
219 /**
220  * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
221  */
222 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
223 					 size_t size, loff_t *pos)
224 {
225 	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
226 }
227 
228 
229 /**
230  * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
231  *
232  * @f: open file handle
233  * @buf: User buffer to store read data in
234  * @size: Number of bytes to read
235  * @pos:  Offset to seek to
236  *
237  * The lower bits are the BYTE offset of the register to read.  This
238  * allows reading multiple registers in a single call and having
239  * the returned size reflect that.
240  */
241 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
242 					size_t size, loff_t *pos)
243 {
244 	struct amdgpu_device *adev = file_inode(f)->i_private;
245 	ssize_t result = 0;
246 	int r;
247 
248 	if (size & 0x3 || *pos & 0x3)
249 		return -EINVAL;
250 
251 	r = pm_runtime_get_sync(adev->ddev->dev);
252 	if (r < 0)
253 		return r;
254 
255 	while (size) {
256 		uint32_t value;
257 
258 		value = RREG32_PCIE(*pos >> 2);
259 		r = put_user(value, (uint32_t *)buf);
260 		if (r) {
261 			pm_runtime_mark_last_busy(adev->ddev->dev);
262 			pm_runtime_put_autosuspend(adev->ddev->dev);
263 			return r;
264 		}
265 
266 		result += 4;
267 		buf += 4;
268 		*pos += 4;
269 		size -= 4;
270 	}
271 
272 	pm_runtime_mark_last_busy(adev->ddev->dev);
273 	pm_runtime_put_autosuspend(adev->ddev->dev);
274 
275 	return result;
276 }
277 
278 /**
279  * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
280  *
281  * @f: open file handle
282  * @buf: User buffer to write data from
283  * @size: Number of bytes to write
284  * @pos:  Offset to seek to
285  *
286  * The lower bits are the BYTE offset of the register to write.  This
287  * allows writing multiple registers in a single call and having
288  * the returned size reflect that.
289  */
290 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
291 					 size_t size, loff_t *pos)
292 {
293 	struct amdgpu_device *adev = file_inode(f)->i_private;
294 	ssize_t result = 0;
295 	int r;
296 
297 	if (size & 0x3 || *pos & 0x3)
298 		return -EINVAL;
299 
300 	r = pm_runtime_get_sync(adev->ddev->dev);
301 	if (r < 0)
302 		return r;
303 
304 	while (size) {
305 		uint32_t value;
306 
307 		r = get_user(value, (uint32_t *)buf);
308 		if (r) {
309 			pm_runtime_mark_last_busy(adev->ddev->dev);
310 			pm_runtime_put_autosuspend(adev->ddev->dev);
311 			return r;
312 		}
313 
314 		WREG32_PCIE(*pos >> 2, value);
315 
316 		result += 4;
317 		buf += 4;
318 		*pos += 4;
319 		size -= 4;
320 	}
321 
322 	pm_runtime_mark_last_busy(adev->ddev->dev);
323 	pm_runtime_put_autosuspend(adev->ddev->dev);
324 
325 	return result;
326 }
327 
328 /**
329  * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
330  *
331  * @f: open file handle
332  * @buf: User buffer to store read data in
333  * @size: Number of bytes to read
334  * @pos:  Offset to seek to
335  *
336  * The lower bits are the BYTE offset of the register to read.  This
337  * allows reading multiple registers in a single call and having
338  * the returned size reflect that.
339  */
340 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
341 					size_t size, loff_t *pos)
342 {
343 	struct amdgpu_device *adev = file_inode(f)->i_private;
344 	ssize_t result = 0;
345 	int r;
346 
347 	if (size & 0x3 || *pos & 0x3)
348 		return -EINVAL;
349 
350 	r = pm_runtime_get_sync(adev->ddev->dev);
351 	if (r < 0)
352 		return r;
353 
354 	while (size) {
355 		uint32_t value;
356 
357 		value = RREG32_DIDT(*pos >> 2);
358 		r = put_user(value, (uint32_t *)buf);
359 		if (r) {
360 			pm_runtime_mark_last_busy(adev->ddev->dev);
361 			pm_runtime_put_autosuspend(adev->ddev->dev);
362 			return r;
363 		}
364 
365 		result += 4;
366 		buf += 4;
367 		*pos += 4;
368 		size -= 4;
369 	}
370 
371 	pm_runtime_mark_last_busy(adev->ddev->dev);
372 	pm_runtime_put_autosuspend(adev->ddev->dev);
373 
374 	return result;
375 }
376 
377 /**
378  * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
379  *
380  * @f: open file handle
381  * @buf: User buffer to write data from
382  * @size: Number of bytes to write
383  * @pos:  Offset to seek to
384  *
385  * The lower bits are the BYTE offset of the register to write.  This
386  * allows writing multiple registers in a single call and having
387  * the returned size reflect that.
388  */
389 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
390 					 size_t size, loff_t *pos)
391 {
392 	struct amdgpu_device *adev = file_inode(f)->i_private;
393 	ssize_t result = 0;
394 	int r;
395 
396 	if (size & 0x3 || *pos & 0x3)
397 		return -EINVAL;
398 
399 	r = pm_runtime_get_sync(adev->ddev->dev);
400 	if (r < 0)
401 		return r;
402 
403 	while (size) {
404 		uint32_t value;
405 
406 		r = get_user(value, (uint32_t *)buf);
407 		if (r) {
408 			pm_runtime_mark_last_busy(adev->ddev->dev);
409 			pm_runtime_put_autosuspend(adev->ddev->dev);
410 			return r;
411 		}
412 
413 		WREG32_DIDT(*pos >> 2, value);
414 
415 		result += 4;
416 		buf += 4;
417 		*pos += 4;
418 		size -= 4;
419 	}
420 
421 	pm_runtime_mark_last_busy(adev->ddev->dev);
422 	pm_runtime_put_autosuspend(adev->ddev->dev);
423 
424 	return result;
425 }
426 
427 /**
428  * amdgpu_debugfs_regs_smc_read - Read from a SMC register
429  *
430  * @f: open file handle
431  * @buf: User buffer to store read data in
432  * @size: Number of bytes to read
433  * @pos:  Offset to seek to
434  *
435  * The lower bits are the BYTE offset of the register to read.  This
436  * allows reading multiple registers in a single call and having
437  * the returned size reflect that.
438  */
439 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
440 					size_t size, loff_t *pos)
441 {
442 	struct amdgpu_device *adev = file_inode(f)->i_private;
443 	ssize_t result = 0;
444 	int r;
445 
446 	if (size & 0x3 || *pos & 0x3)
447 		return -EINVAL;
448 
449 	r = pm_runtime_get_sync(adev->ddev->dev);
450 	if (r < 0)
451 		return r;
452 
453 	while (size) {
454 		uint32_t value;
455 
456 		value = RREG32_SMC(*pos);
457 		r = put_user(value, (uint32_t *)buf);
458 		if (r) {
459 			pm_runtime_mark_last_busy(adev->ddev->dev);
460 			pm_runtime_put_autosuspend(adev->ddev->dev);
461 			return r;
462 		}
463 
464 		result += 4;
465 		buf += 4;
466 		*pos += 4;
467 		size -= 4;
468 	}
469 
470 	pm_runtime_mark_last_busy(adev->ddev->dev);
471 	pm_runtime_put_autosuspend(adev->ddev->dev);
472 
473 	return result;
474 }
475 
476 /**
477  * amdgpu_debugfs_regs_smc_write - Write to a SMC register
478  *
479  * @f: open file handle
480  * @buf: User buffer to write data from
481  * @size: Number of bytes to write
482  * @pos:  Offset to seek to
483  *
484  * The lower bits are the BYTE offset of the register to write.  This
485  * allows writing multiple registers in a single call and having
486  * the returned size reflect that.
487  */
488 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
489 					 size_t size, loff_t *pos)
490 {
491 	struct amdgpu_device *adev = file_inode(f)->i_private;
492 	ssize_t result = 0;
493 	int r;
494 
495 	if (size & 0x3 || *pos & 0x3)
496 		return -EINVAL;
497 
498 	r = pm_runtime_get_sync(adev->ddev->dev);
499 	if (r < 0)
500 		return r;
501 
502 	while (size) {
503 		uint32_t value;
504 
505 		r = get_user(value, (uint32_t *)buf);
506 		if (r) {
507 			pm_runtime_mark_last_busy(adev->ddev->dev);
508 			pm_runtime_put_autosuspend(adev->ddev->dev);
509 			return r;
510 		}
511 
512 		WREG32_SMC(*pos, value);
513 
514 		result += 4;
515 		buf += 4;
516 		*pos += 4;
517 		size -= 4;
518 	}
519 
520 	pm_runtime_mark_last_busy(adev->ddev->dev);
521 	pm_runtime_put_autosuspend(adev->ddev->dev);
522 
523 	return result;
524 }
525 
526 /**
527  * amdgpu_debugfs_gca_config_read - Read from gfx config data
528  *
529  * @f: open file handle
530  * @buf: User buffer to store read data in
531  * @size: Number of bytes to read
532  * @pos:  Offset to seek to
533  *
534  * This file is used to access configuration data in a somewhat
535  * stable fashion.  The format is a series of DWORDs with the first
536  * indicating which revision it is.  New content is appended to the
537  * end so that older software can still read the data.
538  */
539 
540 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
541 					size_t size, loff_t *pos)
542 {
543 	struct amdgpu_device *adev = file_inode(f)->i_private;
544 	ssize_t result = 0;
545 	int r;
546 	uint32_t *config, no_regs = 0;
547 
548 	if (size & 0x3 || *pos & 0x3)
549 		return -EINVAL;
550 
551 	config = kmalloc_array(256, sizeof(*config), GFP_KERNEL);
552 	if (!config)
553 		return -ENOMEM;
554 
555 	/* version, increment each time something is added */
556 	config[no_regs++] = 3;
557 	config[no_regs++] = adev->gfx.config.max_shader_engines;
558 	config[no_regs++] = adev->gfx.config.max_tile_pipes;
559 	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
560 	config[no_regs++] = adev->gfx.config.max_sh_per_se;
561 	config[no_regs++] = adev->gfx.config.max_backends_per_se;
562 	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
563 	config[no_regs++] = adev->gfx.config.max_gprs;
564 	config[no_regs++] = adev->gfx.config.max_gs_threads;
565 	config[no_regs++] = adev->gfx.config.max_hw_contexts;
566 	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
567 	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
568 	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
569 	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
570 	config[no_regs++] = adev->gfx.config.num_tile_pipes;
571 	config[no_regs++] = adev->gfx.config.backend_enable_mask;
572 	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
573 	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
574 	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
575 	config[no_regs++] = adev->gfx.config.num_gpus;
576 	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
577 	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
578 	config[no_regs++] = adev->gfx.config.gb_addr_config;
579 	config[no_regs++] = adev->gfx.config.num_rbs;
580 
581 	/* rev==1 */
582 	config[no_regs++] = adev->rev_id;
583 	config[no_regs++] = adev->pg_flags;
584 	config[no_regs++] = adev->cg_flags;
585 
586 	/* rev==2 */
587 	config[no_regs++] = adev->family;
588 	config[no_regs++] = adev->external_rev_id;
589 
590 	/* rev==3 */
591 	config[no_regs++] = adev->pdev->device;
592 	config[no_regs++] = adev->pdev->revision;
593 	config[no_regs++] = adev->pdev->subsystem_device;
594 	config[no_regs++] = adev->pdev->subsystem_vendor;
595 
596 	while (size && (*pos < no_regs * 4)) {
597 		uint32_t value;
598 
599 		value = config[*pos >> 2];
600 		r = put_user(value, (uint32_t *)buf);
601 		if (r) {
602 			kfree(config);
603 			return r;
604 		}
605 
606 		result += 4;
607 		buf += 4;
608 		*pos += 4;
609 		size -= 4;
610 	}
611 
612 	kfree(config);
613 	return result;
614 }
615 
616 /**
617  * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
618  *
619  * @f: open file handle
620  * @buf: User buffer to store read data in
621  * @size: Number of bytes to read
622  * @pos:  Offset to seek to
623  *
624  * The offset is treated as the BYTE address of one of the sensors
625  * enumerated in amd/include/kgd_pp_interface.h under the
626  * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
627  * you would use the offset 3 * 4 = 12.
628  */
629 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
630 					size_t size, loff_t *pos)
631 {
632 	struct amdgpu_device *adev = file_inode(f)->i_private;
633 	int idx, x, outsize, r, valuesize;
634 	uint32_t values[16];
635 
636 	if (size & 3 || *pos & 0x3)
637 		return -EINVAL;
638 
639 	if (!adev->pm.dpm_enabled)
640 		return -EINVAL;
641 
642 	/* convert offset to sensor number */
643 	idx = *pos >> 2;
644 
645 	valuesize = sizeof(values);
646 
647 	r = pm_runtime_get_sync(adev->ddev->dev);
648 	if (r < 0)
649 		return r;
650 
651 	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
652 
653 	pm_runtime_mark_last_busy(adev->ddev->dev);
654 	pm_runtime_put_autosuspend(adev->ddev->dev);
655 
656 	if (r)
657 		return r;
658 
659 	if (size > valuesize)
660 		return -EINVAL;
661 
662 	outsize = 0;
663 	x = 0;
664 	if (!r) {
665 		while (size) {
666 			r = put_user(values[x++], (int32_t *)buf);
667 			buf += 4;
668 			size -= 4;
669 			outsize += 4;
670 		}
671 	}
672 
673 	return !r ? outsize : r;
674 }
675 
676 /** amdgpu_debugfs_wave_read - Read WAVE STATUS data
677  *
678  * @f: open file handle
679  * @buf: User buffer to store read data in
680  * @size: Number of bytes to read
681  * @pos:  Offset to seek to
682  *
683  * The offset being sought changes which wave that the status data
684  * will be returned for.  The bits are used as follows:
685  *
686  * Bits 0..6: 	Byte offset into data
687  * Bits 7..14:	SE selector
688  * Bits 15..22:	SH/SA selector
689  * Bits 23..30: CU/{WGP+SIMD} selector
690  * Bits 31..36: WAVE ID selector
691  * Bits 37..44: SIMD ID selector
692  *
693  * The returned data begins with one DWORD of version information
694  * Followed by WAVE STATUS registers relevant to the GFX IP version
695  * being used.  See gfx_v8_0_read_wave_data() for an example output.
696  */
697 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
698 					size_t size, loff_t *pos)
699 {
700 	struct amdgpu_device *adev = f->f_inode->i_private;
701 	int r, x;
702 	ssize_t result=0;
703 	uint32_t offset, se, sh, cu, wave, simd, data[32];
704 
705 	if (size & 3 || *pos & 3)
706 		return -EINVAL;
707 
708 	/* decode offset */
709 	offset = (*pos & GENMASK_ULL(6, 0));
710 	se = (*pos & GENMASK_ULL(14, 7)) >> 7;
711 	sh = (*pos & GENMASK_ULL(22, 15)) >> 15;
712 	cu = (*pos & GENMASK_ULL(30, 23)) >> 23;
713 	wave = (*pos & GENMASK_ULL(36, 31)) >> 31;
714 	simd = (*pos & GENMASK_ULL(44, 37)) >> 37;
715 
716 	r = pm_runtime_get_sync(adev->ddev->dev);
717 	if (r < 0)
718 		return r;
719 
720 	/* switch to the specific se/sh/cu */
721 	mutex_lock(&adev->grbm_idx_mutex);
722 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
723 
724 	x = 0;
725 	if (adev->gfx.funcs->read_wave_data)
726 		adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
727 
728 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
729 	mutex_unlock(&adev->grbm_idx_mutex);
730 
731 	pm_runtime_mark_last_busy(adev->ddev->dev);
732 	pm_runtime_put_autosuspend(adev->ddev->dev);
733 
734 	if (!x)
735 		return -EINVAL;
736 
737 	while (size && (offset < x * 4)) {
738 		uint32_t value;
739 
740 		value = data[offset >> 2];
741 		r = put_user(value, (uint32_t *)buf);
742 		if (r)
743 			return r;
744 
745 		result += 4;
746 		buf += 4;
747 		offset += 4;
748 		size -= 4;
749 	}
750 
751 	return result;
752 }
753 
754 /** amdgpu_debugfs_gpr_read - Read wave gprs
755  *
756  * @f: open file handle
757  * @buf: User buffer to store read data in
758  * @size: Number of bytes to read
759  * @pos:  Offset to seek to
760  *
761  * The offset being sought changes which wave that the status data
762  * will be returned for.  The bits are used as follows:
763  *
764  * Bits 0..11:	Byte offset into data
765  * Bits 12..19:	SE selector
766  * Bits 20..27:	SH/SA selector
767  * Bits 28..35: CU/{WGP+SIMD} selector
768  * Bits 36..43: WAVE ID selector
769  * Bits 37..44: SIMD ID selector
770  * Bits 52..59: Thread selector
771  * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
772  *
773  * The return data comes from the SGPR or VGPR register bank for
774  * the selected operational unit.
775  */
776 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
777 					size_t size, loff_t *pos)
778 {
779 	struct amdgpu_device *adev = f->f_inode->i_private;
780 	int r;
781 	ssize_t result = 0;
782 	uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data;
783 
784 	if (size & 3 || *pos & 3)
785 		return -EINVAL;
786 
787 	/* decode offset */
788 	offset = *pos & GENMASK_ULL(11, 0);
789 	se = (*pos & GENMASK_ULL(19, 12)) >> 12;
790 	sh = (*pos & GENMASK_ULL(27, 20)) >> 20;
791 	cu = (*pos & GENMASK_ULL(35, 28)) >> 28;
792 	wave = (*pos & GENMASK_ULL(43, 36)) >> 36;
793 	simd = (*pos & GENMASK_ULL(51, 44)) >> 44;
794 	thread = (*pos & GENMASK_ULL(59, 52)) >> 52;
795 	bank = (*pos & GENMASK_ULL(61, 60)) >> 60;
796 
797 	data = kcalloc(1024, sizeof(*data), GFP_KERNEL);
798 	if (!data)
799 		return -ENOMEM;
800 
801 	r = pm_runtime_get_sync(adev->ddev->dev);
802 	if (r < 0)
803 		return r;
804 
805 	/* switch to the specific se/sh/cu */
806 	mutex_lock(&adev->grbm_idx_mutex);
807 	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
808 
809 	if (bank == 0) {
810 		if (adev->gfx.funcs->read_wave_vgprs)
811 			adev->gfx.funcs->read_wave_vgprs(adev, simd, wave, thread, offset, size>>2, data);
812 	} else {
813 		if (adev->gfx.funcs->read_wave_sgprs)
814 			adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
815 	}
816 
817 	amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
818 	mutex_unlock(&adev->grbm_idx_mutex);
819 
820 	pm_runtime_mark_last_busy(adev->ddev->dev);
821 	pm_runtime_put_autosuspend(adev->ddev->dev);
822 
823 	while (size) {
824 		uint32_t value;
825 
826 		value = data[offset++];
827 		r = put_user(value, (uint32_t *)buf);
828 		if (r) {
829 			result = r;
830 			goto err;
831 		}
832 
833 		result += 4;
834 		buf += 4;
835 		size -= 4;
836 	}
837 
838 err:
839 	kfree(data);
840 	return result;
841 }
842 
843 static const struct file_operations amdgpu_debugfs_regs_fops = {
844 	.owner = THIS_MODULE,
845 	.read = amdgpu_debugfs_regs_read,
846 	.write = amdgpu_debugfs_regs_write,
847 	.llseek = default_llseek
848 };
849 static const struct file_operations amdgpu_debugfs_regs_didt_fops = {
850 	.owner = THIS_MODULE,
851 	.read = amdgpu_debugfs_regs_didt_read,
852 	.write = amdgpu_debugfs_regs_didt_write,
853 	.llseek = default_llseek
854 };
855 static const struct file_operations amdgpu_debugfs_regs_pcie_fops = {
856 	.owner = THIS_MODULE,
857 	.read = amdgpu_debugfs_regs_pcie_read,
858 	.write = amdgpu_debugfs_regs_pcie_write,
859 	.llseek = default_llseek
860 };
861 static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
862 	.owner = THIS_MODULE,
863 	.read = amdgpu_debugfs_regs_smc_read,
864 	.write = amdgpu_debugfs_regs_smc_write,
865 	.llseek = default_llseek
866 };
867 
868 static const struct file_operations amdgpu_debugfs_gca_config_fops = {
869 	.owner = THIS_MODULE,
870 	.read = amdgpu_debugfs_gca_config_read,
871 	.llseek = default_llseek
872 };
873 
874 static const struct file_operations amdgpu_debugfs_sensors_fops = {
875 	.owner = THIS_MODULE,
876 	.read = amdgpu_debugfs_sensor_read,
877 	.llseek = default_llseek
878 };
879 
880 static const struct file_operations amdgpu_debugfs_wave_fops = {
881 	.owner = THIS_MODULE,
882 	.read = amdgpu_debugfs_wave_read,
883 	.llseek = default_llseek
884 };
885 static const struct file_operations amdgpu_debugfs_gpr_fops = {
886 	.owner = THIS_MODULE,
887 	.read = amdgpu_debugfs_gpr_read,
888 	.llseek = default_llseek
889 };
890 
891 static const struct file_operations *debugfs_regs[] = {
892 	&amdgpu_debugfs_regs_fops,
893 	&amdgpu_debugfs_regs_didt_fops,
894 	&amdgpu_debugfs_regs_pcie_fops,
895 	&amdgpu_debugfs_regs_smc_fops,
896 	&amdgpu_debugfs_gca_config_fops,
897 	&amdgpu_debugfs_sensors_fops,
898 	&amdgpu_debugfs_wave_fops,
899 	&amdgpu_debugfs_gpr_fops,
900 };
901 
902 static const char *debugfs_regs_names[] = {
903 	"amdgpu_regs",
904 	"amdgpu_regs_didt",
905 	"amdgpu_regs_pcie",
906 	"amdgpu_regs_smc",
907 	"amdgpu_gca_config",
908 	"amdgpu_sensors",
909 	"amdgpu_wave",
910 	"amdgpu_gpr",
911 };
912 
913 /**
914  * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
915  * 								register access.
916  *
917  * @adev: The device to attach the debugfs entries to
918  */
919 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
920 {
921 	struct drm_minor *minor = adev->ddev->primary;
922 	struct dentry *ent, *root = minor->debugfs_root;
923 	unsigned int i;
924 
925 	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
926 		ent = debugfs_create_file(debugfs_regs_names[i],
927 					  S_IFREG | S_IRUGO, root,
928 					  adev, debugfs_regs[i]);
929 		if (!i && !IS_ERR_OR_NULL(ent))
930 			i_size_write(ent->d_inode, adev->rmmio_size);
931 		adev->debugfs_regs[i] = ent;
932 	}
933 
934 	return 0;
935 }
936 
937 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev)
938 {
939 	unsigned i;
940 
941 	for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) {
942 		if (adev->debugfs_regs[i]) {
943 			debugfs_remove(adev->debugfs_regs[i]);
944 			adev->debugfs_regs[i] = NULL;
945 		}
946 	}
947 }
948 
949 static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
950 {
951 	struct drm_info_node *node = (struct drm_info_node *) m->private;
952 	struct drm_device *dev = node->minor->dev;
953 	struct amdgpu_device *adev = dev->dev_private;
954 	int r = 0, i;
955 
956 	r = pm_runtime_get_sync(dev->dev);
957 	if (r < 0)
958 		return r;
959 
960 	/* Avoid accidently unparking the sched thread during GPU reset */
961 	mutex_lock(&adev->lock_reset);
962 
963 	/* hold on the scheduler */
964 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
965 		struct amdgpu_ring *ring = adev->rings[i];
966 
967 		if (!ring || !ring->sched.thread)
968 			continue;
969 		kthread_park(ring->sched.thread);
970 	}
971 
972 	seq_printf(m, "run ib test:\n");
973 	r = amdgpu_ib_ring_tests(adev);
974 	if (r)
975 		seq_printf(m, "ib ring tests failed (%d).\n", r);
976 	else
977 		seq_printf(m, "ib ring tests passed.\n");
978 
979 	/* go on the scheduler */
980 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
981 		struct amdgpu_ring *ring = adev->rings[i];
982 
983 		if (!ring || !ring->sched.thread)
984 			continue;
985 		kthread_unpark(ring->sched.thread);
986 	}
987 
988 	mutex_unlock(&adev->lock_reset);
989 
990 	pm_runtime_mark_last_busy(dev->dev);
991 	pm_runtime_put_autosuspend(dev->dev);
992 
993 	return 0;
994 }
995 
996 static int amdgpu_debugfs_get_vbios_dump(struct seq_file *m, void *data)
997 {
998 	struct drm_info_node *node = (struct drm_info_node *) m->private;
999 	struct drm_device *dev = node->minor->dev;
1000 	struct amdgpu_device *adev = dev->dev_private;
1001 
1002 	seq_write(m, adev->bios, adev->bios_size);
1003 	return 0;
1004 }
1005 
1006 static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
1007 {
1008 	struct drm_info_node *node = (struct drm_info_node *)m->private;
1009 	struct drm_device *dev = node->minor->dev;
1010 	struct amdgpu_device *adev = dev->dev_private;
1011 	int r;
1012 
1013 	r = pm_runtime_get_sync(dev->dev);
1014 	if (r < 0)
1015 		return r;
1016 
1017 	seq_printf(m, "(%d)\n", amdgpu_bo_evict_vram(adev));
1018 
1019 	pm_runtime_mark_last_busy(dev->dev);
1020 	pm_runtime_put_autosuspend(dev->dev);
1021 
1022 	return 0;
1023 }
1024 
1025 static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
1026 {
1027 	struct drm_info_node *node = (struct drm_info_node *)m->private;
1028 	struct drm_device *dev = node->minor->dev;
1029 	struct amdgpu_device *adev = dev->dev_private;
1030 	int r;
1031 
1032 	r = pm_runtime_get_sync(dev->dev);
1033 	if (r < 0)
1034 		return r;
1035 
1036 	seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
1037 
1038 	pm_runtime_mark_last_busy(dev->dev);
1039 	pm_runtime_put_autosuspend(dev->dev);
1040 
1041 	return 0;
1042 }
1043 
1044 static const struct drm_info_list amdgpu_debugfs_list[] = {
1045 	{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
1046 	{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
1047 	{"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
1048 	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
1049 };
1050 
1051 static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
1052 					  struct dma_fence **fences)
1053 {
1054 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
1055 	uint32_t sync_seq, last_seq;
1056 
1057 	last_seq = atomic_read(&ring->fence_drv.last_seq);
1058 	sync_seq = ring->fence_drv.sync_seq;
1059 
1060 	last_seq &= drv->num_fences_mask;
1061 	sync_seq &= drv->num_fences_mask;
1062 
1063 	do {
1064 		struct dma_fence *fence, **ptr;
1065 
1066 		++last_seq;
1067 		last_seq &= drv->num_fences_mask;
1068 		ptr = &drv->fences[last_seq];
1069 
1070 		fence = rcu_dereference_protected(*ptr, 1);
1071 		RCU_INIT_POINTER(*ptr, NULL);
1072 
1073 		if (!fence)
1074 			continue;
1075 
1076 		fences[last_seq] = fence;
1077 
1078 	} while (last_seq != sync_seq);
1079 }
1080 
1081 static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
1082 					    int length)
1083 {
1084 	int i;
1085 	struct dma_fence *fence;
1086 
1087 	for (i = 0; i < length; i++) {
1088 		fence = fences[i];
1089 		if (!fence)
1090 			continue;
1091 		dma_fence_signal(fence);
1092 		dma_fence_put(fence);
1093 	}
1094 }
1095 
1096 static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
1097 {
1098 	struct drm_sched_job *s_job;
1099 	struct dma_fence *fence;
1100 
1101 	spin_lock(&sched->job_list_lock);
1102 	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
1103 		fence = sched->ops->run_job(s_job);
1104 		dma_fence_put(fence);
1105 	}
1106 	spin_unlock(&sched->job_list_lock);
1107 }
1108 
1109 static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
1110 {
1111 	struct amdgpu_job *job;
1112 	struct drm_sched_job *s_job;
1113 	uint32_t preempt_seq;
1114 	struct dma_fence *fence, **ptr;
1115 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
1116 	struct drm_gpu_scheduler *sched = &ring->sched;
1117 
1118 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
1119 		return;
1120 
1121 	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
1122 	if (preempt_seq <= atomic_read(&drv->last_seq))
1123 		return;
1124 
1125 	preempt_seq &= drv->num_fences_mask;
1126 	ptr = &drv->fences[preempt_seq];
1127 	fence = rcu_dereference_protected(*ptr, 1);
1128 
1129 	spin_lock(&sched->job_list_lock);
1130 	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
1131 		job = to_amdgpu_job(s_job);
1132 		if (job->fence == fence)
1133 			/* mark the job as preempted */
1134 			job->preemption_status |= AMDGPU_IB_PREEMPTED;
1135 	}
1136 	spin_unlock(&sched->job_list_lock);
1137 }
1138 
1139 static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
1140 {
1141 	int r, resched, length;
1142 	struct amdgpu_ring *ring;
1143 	struct dma_fence **fences = NULL;
1144 	struct amdgpu_device *adev = (struct amdgpu_device *)data;
1145 
1146 	if (val >= AMDGPU_MAX_RINGS)
1147 		return -EINVAL;
1148 
1149 	ring = adev->rings[val];
1150 
1151 	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
1152 		return -EINVAL;
1153 
1154 	/* the last preemption failed */
1155 	if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
1156 		return -EBUSY;
1157 
1158 	length = ring->fence_drv.num_fences_mask + 1;
1159 	fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
1160 	if (!fences)
1161 		return -ENOMEM;
1162 
1163 	/* Avoid accidently unparking the sched thread during GPU reset */
1164 	mutex_lock(&adev->lock_reset);
1165 
1166 	/* stop the scheduler */
1167 	kthread_park(ring->sched.thread);
1168 
1169 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
1170 
1171 	/* preempt the IB */
1172 	r = amdgpu_ring_preempt_ib(ring);
1173 	if (r) {
1174 		DRM_WARN("failed to preempt ring %d\n", ring->idx);
1175 		goto failure;
1176 	}
1177 
1178 	amdgpu_fence_process(ring);
1179 
1180 	if (atomic_read(&ring->fence_drv.last_seq) !=
1181 	    ring->fence_drv.sync_seq) {
1182 		DRM_INFO("ring %d was preempted\n", ring->idx);
1183 
1184 		amdgpu_ib_preempt_mark_partial_job(ring);
1185 
1186 		/* swap out the old fences */
1187 		amdgpu_ib_preempt_fences_swap(ring, fences);
1188 
1189 		amdgpu_fence_driver_force_completion(ring);
1190 
1191 		/* resubmit unfinished jobs */
1192 		amdgpu_ib_preempt_job_recovery(&ring->sched);
1193 
1194 		/* wait for jobs finished */
1195 		amdgpu_fence_wait_empty(ring);
1196 
1197 		/* signal the old fences */
1198 		amdgpu_ib_preempt_signal_fences(fences, length);
1199 	}
1200 
1201 failure:
1202 	/* restart the scheduler */
1203 	kthread_unpark(ring->sched.thread);
1204 
1205 	mutex_unlock(&adev->lock_reset);
1206 
1207 	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
1208 
1209 	kfree(fences);
1210 
1211 	return 0;
1212 }
1213 
1214 DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
1215 			amdgpu_debugfs_ib_preempt, "%llu\n");
1216 
1217 int amdgpu_debugfs_init(struct amdgpu_device *adev)
1218 {
1219 	adev->debugfs_preempt =
1220 		debugfs_create_file("amdgpu_preempt_ib", 0600,
1221 				    adev->ddev->primary->debugfs_root, adev,
1222 				    &fops_ib_preempt);
1223 	if (!(adev->debugfs_preempt)) {
1224 		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
1225 		return -EIO;
1226 	}
1227 
1228 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
1229 					ARRAY_SIZE(amdgpu_debugfs_list));
1230 }
1231 
1232 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
1233 {
1234 	debugfs_remove(adev->debugfs_preempt);
1235 }
1236 
1237 #else
1238 int amdgpu_debugfs_init(struct amdgpu_device *adev)
1239 {
1240 	return 0;
1241 }
1242 void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { }
1243 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
1244 {
1245 	return 0;
1246 }
1247 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
1248 #endif
1249