1d38ceaf9SAlex Deucher /*
2d38ceaf9SAlex Deucher  * Copyright 2014 Advanced Micro Devices, Inc.
3d38ceaf9SAlex Deucher  *
4d38ceaf9SAlex Deucher  * Permission is hereby granted, free of charge, to any person obtaining a
5d38ceaf9SAlex Deucher  * copy of this software and associated documentation files (the "Software"),
6d38ceaf9SAlex Deucher  * to deal in the Software without restriction, including without limitation
7d38ceaf9SAlex Deucher  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d38ceaf9SAlex Deucher  * and/or sell copies of the Software, and to permit persons to whom the
9d38ceaf9SAlex Deucher  * Software is furnished to do so, subject to the following conditions:
10d38ceaf9SAlex Deucher  *
11d38ceaf9SAlex Deucher  * The above copyright notice and this permission notice shall be included in
12d38ceaf9SAlex Deucher  * all copies or substantial portions of the Software.
13d38ceaf9SAlex Deucher  *
14d38ceaf9SAlex Deucher  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d38ceaf9SAlex Deucher  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d38ceaf9SAlex Deucher  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17d38ceaf9SAlex Deucher  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18d38ceaf9SAlex Deucher  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19d38ceaf9SAlex Deucher  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20d38ceaf9SAlex Deucher  * OTHER DEALINGS IN THE SOFTWARE.
21d38ceaf9SAlex Deucher  *
22d38ceaf9SAlex Deucher  */
23d38ceaf9SAlex Deucher 
24fdf2f6c5SSam Ravnborg #include <linux/dma-mapping.h>
25fdf2f6c5SSam Ravnborg 
26d38ceaf9SAlex Deucher #include "amdgpu.h"
27d38ceaf9SAlex Deucher #include "amdgpu_ih.h"
28d38ceaf9SAlex Deucher 
29d38ceaf9SAlex Deucher /**
30d38ceaf9SAlex Deucher  * amdgpu_ih_ring_init - initialize the IH state
31d38ceaf9SAlex Deucher  *
32d38ceaf9SAlex Deucher  * @adev: amdgpu_device pointer
33425c3143SChristian König  * @ih: ih ring to initialize
34425c3143SChristian König  * @ring_size: ring size to allocate
35425c3143SChristian König  * @use_bus_addr: true when we can use dma_alloc_coherent
36d38ceaf9SAlex Deucher  *
37d38ceaf9SAlex Deucher  * Initializes the IH state and allocates a buffer
38d38ceaf9SAlex Deucher  * for the IH ring buffer.
39d38ceaf9SAlex Deucher  * Returns 0 for success, errors for failure.
40d38ceaf9SAlex Deucher  */
amdgpu_ih_ring_init(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,unsigned ring_size,bool use_bus_addr)41425c3143SChristian König int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
42425c3143SChristian König 			unsigned ring_size, bool use_bus_addr)
43d38ceaf9SAlex Deucher {
44d38ceaf9SAlex Deucher 	u32 rb_bufsz;
45d38ceaf9SAlex Deucher 	int r;
46d38ceaf9SAlex Deucher 
47d38ceaf9SAlex Deucher 	/* Align ring size */
48d38ceaf9SAlex Deucher 	rb_bufsz = order_base_2(ring_size / 4);
49d38ceaf9SAlex Deucher 	ring_size = (1 << rb_bufsz) * 4;
50425c3143SChristian König 	ih->ring_size = ring_size;
51425c3143SChristian König 	ih->ptr_mask = ih->ring_size - 1;
52425c3143SChristian König 	ih->rptr = 0;
53425c3143SChristian König 	ih->use_bus_addr = use_bus_addr;
54d38ceaf9SAlex Deucher 
55425c3143SChristian König 	if (use_bus_addr) {
56d81f78b4SChristian König 		dma_addr_t dma_addr;
57d81f78b4SChristian König 
58425c3143SChristian König 		if (ih->ring)
59425c3143SChristian König 			return 0;
60425c3143SChristian König 
61d38ceaf9SAlex Deucher 		/* add 8 bytes for the rptr/wptr shadows and
62d38ceaf9SAlex Deucher 		 * add them to the end of the ring allocation.
63d38ceaf9SAlex Deucher 		 */
64425c3143SChristian König 		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
65d81f78b4SChristian König 					      &dma_addr, GFP_KERNEL);
66425c3143SChristian König 		if (ih->ring == NULL)
67d38ceaf9SAlex Deucher 			return -ENOMEM;
68425c3143SChristian König 
69d81f78b4SChristian König 		ih->gpu_addr = dma_addr;
70d81f78b4SChristian König 		ih->wptr_addr = dma_addr + ih->ring_size;
71d81f78b4SChristian König 		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
72d81f78b4SChristian König 		ih->rptr_addr = dma_addr + ih->ring_size + 4;
73d81f78b4SChristian König 		ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
74425c3143SChristian König 	} else {
75d81f78b4SChristian König 		unsigned wptr_offs, rptr_offs;
76d81f78b4SChristian König 
77d81f78b4SChristian König 		r = amdgpu_device_wb_get(adev, &wptr_offs);
78425c3143SChristian König 		if (r)
79425c3143SChristian König 			return r;
80425c3143SChristian König 
81d81f78b4SChristian König 		r = amdgpu_device_wb_get(adev, &rptr_offs);
82425c3143SChristian König 		if (r) {
83d81f78b4SChristian König 			amdgpu_device_wb_free(adev, wptr_offs);
84425c3143SChristian König 			return r;
85425c3143SChristian König 		}
86425c3143SChristian König 
87425c3143SChristian König 		r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
88425c3143SChristian König 					    AMDGPU_GEM_DOMAIN_GTT,
89425c3143SChristian König 					    &ih->ring_obj, &ih->gpu_addr,
90425c3143SChristian König 					    (void **)&ih->ring);
91425c3143SChristian König 		if (r) {
92d81f78b4SChristian König 			amdgpu_device_wb_free(adev, rptr_offs);
93d81f78b4SChristian König 			amdgpu_device_wb_free(adev, wptr_offs);
94425c3143SChristian König 			return r;
95425c3143SChristian König 		}
96d81f78b4SChristian König 
97d81f78b4SChristian König 		ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
98d81f78b4SChristian König 		ih->wptr_cpu = &adev->wb.wb[wptr_offs];
99d81f78b4SChristian König 		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
100d81f78b4SChristian König 		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
101d38ceaf9SAlex Deucher 	}
1023f1d1eb2SJonathan Kim 
1033f1d1eb2SJonathan Kim 	init_waitqueue_head(&ih->wait_process);
104d38ceaf9SAlex Deucher 	return 0;
105d38ceaf9SAlex Deucher }
106d38ceaf9SAlex Deucher 
107d38ceaf9SAlex Deucher /**
108d38ceaf9SAlex Deucher  * amdgpu_ih_ring_fini - tear down the IH state
109d38ceaf9SAlex Deucher  *
110d38ceaf9SAlex Deucher  * @adev: amdgpu_device pointer
111425c3143SChristian König  * @ih: ih ring to tear down
112d38ceaf9SAlex Deucher  *
113d38ceaf9SAlex Deucher  * Tears down the IH state and frees buffer
114d38ceaf9SAlex Deucher  * used for the IH ring buffer.
115d38ceaf9SAlex Deucher  */
amdgpu_ih_ring_fini(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)116425c3143SChristian König void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
117d38ceaf9SAlex Deucher {
118d10d0daaSAndrey Grodzovsky 
119425c3143SChristian König 	if (!ih->ring)
120425c3143SChristian König 		return;
121425c3143SChristian König 
122d10d0daaSAndrey Grodzovsky 	if (ih->use_bus_addr) {
123d10d0daaSAndrey Grodzovsky 
124d38ceaf9SAlex Deucher 		/* add 8 bytes for the rptr/wptr shadows and
125d38ceaf9SAlex Deucher 		 * add them to the end of the ring allocation.
126d38ceaf9SAlex Deucher 		 */
127425c3143SChristian König 		dma_free_coherent(adev->dev, ih->ring_size + 8,
128d81f78b4SChristian König 				  (void *)ih->ring, ih->gpu_addr);
129425c3143SChristian König 		ih->ring = NULL;
130d38ceaf9SAlex Deucher 	} else {
131425c3143SChristian König 		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
132425c3143SChristian König 				      (void **)&ih->ring);
133d81f78b4SChristian König 		amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
134d81f78b4SChristian König 		amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
135d38ceaf9SAlex Deucher 	}
136d38ceaf9SAlex Deucher }
137d38ceaf9SAlex Deucher 
138d38ceaf9SAlex Deucher /**
13926f32a37SChristian König  * amdgpu_ih_ring_write - write IV to the ring buffer
14026f32a37SChristian König  *
141*bf80d34bSPhilip Yang  * @adev: amdgpu_device pointer
14226f32a37SChristian König  * @ih: ih ring to write to
14326f32a37SChristian König  * @iv: the iv to write
14426f32a37SChristian König  * @num_dw: size of the iv in dw
14526f32a37SChristian König  *
14626f32a37SChristian König  * Writes an IV to the ring buffer using the CPU and increment the wptr.
14726f32a37SChristian König  * Used for testing and delegating IVs to a software ring.
14826f32a37SChristian König  */
amdgpu_ih_ring_write(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,const uint32_t * iv,unsigned int num_dw)149*bf80d34bSPhilip Yang void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
150*bf80d34bSPhilip Yang 			  const uint32_t *iv, unsigned int num_dw)
15126f32a37SChristian König {
15226f32a37SChristian König 	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
15326f32a37SChristian König 	unsigned int i;
15426f32a37SChristian König 
15526f32a37SChristian König 	for (i = 0; i < num_dw; ++i)
15626f32a37SChristian König 	        ih->ring[wptr++] = cpu_to_le32(iv[i]);
15726f32a37SChristian König 
15826f32a37SChristian König 	wptr <<= 2;
15926f32a37SChristian König 	wptr &= ih->ptr_mask;
16026f32a37SChristian König 
16126f32a37SChristian König 	/* Only commit the new wptr if we don't overflow */
16226f32a37SChristian König 	if (wptr != READ_ONCE(ih->rptr)) {
16326f32a37SChristian König 		wmb();
16426f32a37SChristian König 		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
165*bf80d34bSPhilip Yang 	} else if (adev->irq.retry_cam_enabled) {
166*bf80d34bSPhilip Yang 		dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
167*bf80d34bSPhilip Yang 			      wptr, ih->rptr);
16826f32a37SChristian König 	}
16926f32a37SChristian König }
17026f32a37SChristian König 
1713f1d1eb2SJonathan Kim /**
1723c2d6ea2SPhilip Yang  * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
1733f1d1eb2SJonathan Kim  *
1743f1d1eb2SJonathan Kim  * @adev: amdgpu_device pointer
1753f1d1eb2SJonathan Kim  * @ih: ih ring to process
1763f1d1eb2SJonathan Kim  *
1773f1d1eb2SJonathan Kim  * Used to ensure ring has processed IVs up to the checkpoint write pointer.
1783f1d1eb2SJonathan Kim  */
amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)1793c2d6ea2SPhilip Yang int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
1803f1d1eb2SJonathan Kim 					struct amdgpu_ih_ring *ih)
1813f1d1eb2SJonathan Kim {
1823c2d6ea2SPhilip Yang 	uint32_t checkpoint_wptr;
1833c2d6ea2SPhilip Yang 	uint64_t checkpoint_ts;
1843c2d6ea2SPhilip Yang 	long timeout = HZ;
1853f1d1eb2SJonathan Kim 
1863f1d1eb2SJonathan Kim 	if (!ih->enabled || adev->shutdown)
1873f1d1eb2SJonathan Kim 		return -ENODEV;
1883f1d1eb2SJonathan Kim 
1893f1d1eb2SJonathan Kim 	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
1903c2d6ea2SPhilip Yang 	/* Order wptr with ring data. */
1913f1d1eb2SJonathan Kim 	rmb();
1923c2d6ea2SPhilip Yang 	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
1933f1d1eb2SJonathan Kim 
1943c2d6ea2SPhilip Yang 	return wait_event_interruptible_timeout(ih->wait_process,
1950771c805SPhilip Yang 		    amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
1960771c805SPhilip Yang 		    ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
1973f1d1eb2SJonathan Kim }
1983f1d1eb2SJonathan Kim 
19926f32a37SChristian König /**
200d38ceaf9SAlex Deucher  * amdgpu_ih_process - interrupt handler
201d38ceaf9SAlex Deucher  *
202d38ceaf9SAlex Deucher  * @adev: amdgpu_device pointer
203425c3143SChristian König  * @ih: ih ring to process
204d38ceaf9SAlex Deucher  *
205d38ceaf9SAlex Deucher  * Interrupt hander (VI), walk the IH ring.
206d38ceaf9SAlex Deucher  * Returns irq process return code.
207d38ceaf9SAlex Deucher  */
amdgpu_ih_process(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)208e2fb6e0aSChristian König int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
209d38ceaf9SAlex Deucher {
210514f4a99SPhilip Yang 	unsigned int count;
211d38ceaf9SAlex Deucher 	u32 wptr;
212d38ceaf9SAlex Deucher 
213425c3143SChristian König 	if (!ih->enabled || adev->shutdown)
214d38ceaf9SAlex Deucher 		return IRQ_NONE;
215d38ceaf9SAlex Deucher 
2168bb9eb48SChristian König 	wptr = amdgpu_ih_get_wptr(adev, ih);
217d38ceaf9SAlex Deucher 
218d38ceaf9SAlex Deucher restart_ih:
219514f4a99SPhilip Yang 	count  = AMDGPU_IH_MAX_NUM_IVS;
220425c3143SChristian König 	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
221d38ceaf9SAlex Deucher 
222d38ceaf9SAlex Deucher 	/* Order reading of wptr vs. reading of IH ring data */
223d38ceaf9SAlex Deucher 	rmb();
224d38ceaf9SAlex Deucher 
2258c65fe5fSChristian König 	while (ih->rptr != wptr && --count) {
226e2fb6e0aSChristian König 		amdgpu_irq_dispatch(adev, ih);
227425c3143SChristian König 		ih->rptr &= ih->ptr_mask;
22800ecd8a2SFelix Kuehling 	}
22900ecd8a2SFelix Kuehling 
2308bb9eb48SChristian König 	amdgpu_ih_set_rptr(adev, ih);
2313f1d1eb2SJonathan Kim 	wake_up_all(&ih->wait_process);
232d38ceaf9SAlex Deucher 
233d38ceaf9SAlex Deucher 	/* make sure wptr hasn't changed while processing */
2348bb9eb48SChristian König 	wptr = amdgpu_ih_get_wptr(adev, ih);
235425c3143SChristian König 	if (wptr != ih->rptr)
236d38ceaf9SAlex Deucher 		goto restart_ih;
237d38ceaf9SAlex Deucher 
238d38ceaf9SAlex Deucher 	return IRQ_HANDLED;
239d38ceaf9SAlex Deucher }
240a2f14820SFelix Kuehling 
24178bd101cSHawking Zhang /**
24278bd101cSHawking Zhang  * amdgpu_ih_decode_iv_helper - decode an interrupt vector
24378bd101cSHawking Zhang  *
24478bd101cSHawking Zhang  * @adev: amdgpu_device pointer
2459c573cf2SLee Jones  * @ih: ih ring to process
2469c573cf2SLee Jones  * @entry: IV entry
24778bd101cSHawking Zhang  *
24878bd101cSHawking Zhang  * Decodes the interrupt vector at the current rptr
24950ef0cacSJiang Jian  * position and also advance the position for Vega10
25078bd101cSHawking Zhang  * and later GPUs.
25178bd101cSHawking Zhang  */
amdgpu_ih_decode_iv_helper(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,struct amdgpu_iv_entry * entry)25278bd101cSHawking Zhang void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
25378bd101cSHawking Zhang 				struct amdgpu_ih_ring *ih,
25478bd101cSHawking Zhang 				struct amdgpu_iv_entry *entry)
25578bd101cSHawking Zhang {
25678bd101cSHawking Zhang 	/* wptr/rptr are in bytes! */
25778bd101cSHawking Zhang 	u32 ring_index = ih->rptr >> 2;
25878bd101cSHawking Zhang 	uint32_t dw[8];
25978bd101cSHawking Zhang 
26078bd101cSHawking Zhang 	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
26178bd101cSHawking Zhang 	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
26278bd101cSHawking Zhang 	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
26378bd101cSHawking Zhang 	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
26478bd101cSHawking Zhang 	dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
26578bd101cSHawking Zhang 	dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
26678bd101cSHawking Zhang 	dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
26778bd101cSHawking Zhang 	dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
26878bd101cSHawking Zhang 
26978bd101cSHawking Zhang 	entry->client_id = dw[0] & 0xff;
27078bd101cSHawking Zhang 	entry->src_id = (dw[0] >> 8) & 0xff;
27178bd101cSHawking Zhang 	entry->ring_id = (dw[0] >> 16) & 0xff;
27278bd101cSHawking Zhang 	entry->vmid = (dw[0] >> 24) & 0xf;
27378bd101cSHawking Zhang 	entry->vmid_src = (dw[0] >> 31);
27478bd101cSHawking Zhang 	entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
27578bd101cSHawking Zhang 	entry->timestamp_src = dw[2] >> 31;
27678bd101cSHawking Zhang 	entry->pasid = dw[3] & 0xffff;
27747659738SLe Ma 	entry->node_id = (dw[3] >> 16) & 0xff;
27878bd101cSHawking Zhang 	entry->src_data[0] = dw[4];
27978bd101cSHawking Zhang 	entry->src_data[1] = dw[5];
28078bd101cSHawking Zhang 	entry->src_data[2] = dw[6];
28178bd101cSHawking Zhang 	entry->src_data[3] = dw[7];
28278bd101cSHawking Zhang 
28378bd101cSHawking Zhang 	/* wptr/rptr are in bytes! */
28478bd101cSHawking Zhang 	ih->rptr += 32;
28578bd101cSHawking Zhang }
2863c2d6ea2SPhilip Yang 
amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring * ih,u32 rptr,signed int offset)2873c2d6ea2SPhilip Yang uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
2883c2d6ea2SPhilip Yang 				       signed int offset)
2893c2d6ea2SPhilip Yang {
2903c2d6ea2SPhilip Yang 	uint32_t iv_size = 32;
2913c2d6ea2SPhilip Yang 	uint32_t ring_index;
2923c2d6ea2SPhilip Yang 	uint32_t dw1, dw2;
2933c2d6ea2SPhilip Yang 
2943c2d6ea2SPhilip Yang 	rptr += iv_size * offset;
2953c2d6ea2SPhilip Yang 	ring_index = (rptr & ih->ptr_mask) >> 2;
2963c2d6ea2SPhilip Yang 
2973c2d6ea2SPhilip Yang 	dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
2983c2d6ea2SPhilip Yang 	dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
2993c2d6ea2SPhilip Yang 	return dw1 | ((u64)(dw2 & 0xffff) << 32);
3003c2d6ea2SPhilip Yang }
301