1d38ceaf9SAlex Deucher /*
2d38ceaf9SAlex Deucher * Copyright 2014 Advanced Micro Devices, Inc.
3d38ceaf9SAlex Deucher *
4d38ceaf9SAlex Deucher * Permission is hereby granted, free of charge, to any person obtaining a
5d38ceaf9SAlex Deucher * copy of this software and associated documentation files (the "Software"),
6d38ceaf9SAlex Deucher * to deal in the Software without restriction, including without limitation
7d38ceaf9SAlex Deucher * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8d38ceaf9SAlex Deucher * and/or sell copies of the Software, and to permit persons to whom the
9d38ceaf9SAlex Deucher * Software is furnished to do so, subject to the following conditions:
10d38ceaf9SAlex Deucher *
11d38ceaf9SAlex Deucher * The above copyright notice and this permission notice shall be included in
12d38ceaf9SAlex Deucher * all copies or substantial portions of the Software.
13d38ceaf9SAlex Deucher *
14d38ceaf9SAlex Deucher * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15d38ceaf9SAlex Deucher * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16d38ceaf9SAlex Deucher * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17d38ceaf9SAlex Deucher * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18d38ceaf9SAlex Deucher * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19d38ceaf9SAlex Deucher * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20d38ceaf9SAlex Deucher * OTHER DEALINGS IN THE SOFTWARE.
21d38ceaf9SAlex Deucher *
22d38ceaf9SAlex Deucher */
23d38ceaf9SAlex Deucher
24fdf2f6c5SSam Ravnborg #include <linux/dma-mapping.h>
25fdf2f6c5SSam Ravnborg
26d38ceaf9SAlex Deucher #include "amdgpu.h"
27d38ceaf9SAlex Deucher #include "amdgpu_ih.h"
28d38ceaf9SAlex Deucher
29d38ceaf9SAlex Deucher /**
30d38ceaf9SAlex Deucher * amdgpu_ih_ring_init - initialize the IH state
31d38ceaf9SAlex Deucher *
32d38ceaf9SAlex Deucher * @adev: amdgpu_device pointer
33425c3143SChristian König * @ih: ih ring to initialize
34425c3143SChristian König * @ring_size: ring size to allocate
35425c3143SChristian König * @use_bus_addr: true when we can use dma_alloc_coherent
36d38ceaf9SAlex Deucher *
37d38ceaf9SAlex Deucher * Initializes the IH state and allocates a buffer
38d38ceaf9SAlex Deucher * for the IH ring buffer.
39d38ceaf9SAlex Deucher * Returns 0 for success, errors for failure.
40d38ceaf9SAlex Deucher */
amdgpu_ih_ring_init(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,unsigned ring_size,bool use_bus_addr)41425c3143SChristian König int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
42425c3143SChristian König unsigned ring_size, bool use_bus_addr)
43d38ceaf9SAlex Deucher {
44d38ceaf9SAlex Deucher u32 rb_bufsz;
45d38ceaf9SAlex Deucher int r;
46d38ceaf9SAlex Deucher
47d38ceaf9SAlex Deucher /* Align ring size */
48d38ceaf9SAlex Deucher rb_bufsz = order_base_2(ring_size / 4);
49d38ceaf9SAlex Deucher ring_size = (1 << rb_bufsz) * 4;
50425c3143SChristian König ih->ring_size = ring_size;
51425c3143SChristian König ih->ptr_mask = ih->ring_size - 1;
52425c3143SChristian König ih->rptr = 0;
53425c3143SChristian König ih->use_bus_addr = use_bus_addr;
54d38ceaf9SAlex Deucher
55425c3143SChristian König if (use_bus_addr) {
56d81f78b4SChristian König dma_addr_t dma_addr;
57d81f78b4SChristian König
58425c3143SChristian König if (ih->ring)
59425c3143SChristian König return 0;
60425c3143SChristian König
61d38ceaf9SAlex Deucher /* add 8 bytes for the rptr/wptr shadows and
62d38ceaf9SAlex Deucher * add them to the end of the ring allocation.
63d38ceaf9SAlex Deucher */
64425c3143SChristian König ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
65d81f78b4SChristian König &dma_addr, GFP_KERNEL);
66425c3143SChristian König if (ih->ring == NULL)
67d38ceaf9SAlex Deucher return -ENOMEM;
68425c3143SChristian König
69d81f78b4SChristian König ih->gpu_addr = dma_addr;
70d81f78b4SChristian König ih->wptr_addr = dma_addr + ih->ring_size;
71d81f78b4SChristian König ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
72d81f78b4SChristian König ih->rptr_addr = dma_addr + ih->ring_size + 4;
73d81f78b4SChristian König ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
74425c3143SChristian König } else {
75d81f78b4SChristian König unsigned wptr_offs, rptr_offs;
76d81f78b4SChristian König
77d81f78b4SChristian König r = amdgpu_device_wb_get(adev, &wptr_offs);
78425c3143SChristian König if (r)
79425c3143SChristian König return r;
80425c3143SChristian König
81d81f78b4SChristian König r = amdgpu_device_wb_get(adev, &rptr_offs);
82425c3143SChristian König if (r) {
83d81f78b4SChristian König amdgpu_device_wb_free(adev, wptr_offs);
84425c3143SChristian König return r;
85425c3143SChristian König }
86425c3143SChristian König
87425c3143SChristian König r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
88425c3143SChristian König AMDGPU_GEM_DOMAIN_GTT,
89425c3143SChristian König &ih->ring_obj, &ih->gpu_addr,
90425c3143SChristian König (void **)&ih->ring);
91425c3143SChristian König if (r) {
92d81f78b4SChristian König amdgpu_device_wb_free(adev, rptr_offs);
93d81f78b4SChristian König amdgpu_device_wb_free(adev, wptr_offs);
94425c3143SChristian König return r;
95425c3143SChristian König }
96d81f78b4SChristian König
97d81f78b4SChristian König ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
98d81f78b4SChristian König ih->wptr_cpu = &adev->wb.wb[wptr_offs];
99d81f78b4SChristian König ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
100d81f78b4SChristian König ih->rptr_cpu = &adev->wb.wb[rptr_offs];
101d38ceaf9SAlex Deucher }
1023f1d1eb2SJonathan Kim
1033f1d1eb2SJonathan Kim init_waitqueue_head(&ih->wait_process);
104d38ceaf9SAlex Deucher return 0;
105d38ceaf9SAlex Deucher }
106d38ceaf9SAlex Deucher
107d38ceaf9SAlex Deucher /**
108d38ceaf9SAlex Deucher * amdgpu_ih_ring_fini - tear down the IH state
109d38ceaf9SAlex Deucher *
110d38ceaf9SAlex Deucher * @adev: amdgpu_device pointer
111425c3143SChristian König * @ih: ih ring to tear down
112d38ceaf9SAlex Deucher *
113d38ceaf9SAlex Deucher * Tears down the IH state and frees buffer
114d38ceaf9SAlex Deucher * used for the IH ring buffer.
115d38ceaf9SAlex Deucher */
amdgpu_ih_ring_fini(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)116425c3143SChristian König void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
117d38ceaf9SAlex Deucher {
118d10d0daaSAndrey Grodzovsky
119425c3143SChristian König if (!ih->ring)
120425c3143SChristian König return;
121425c3143SChristian König
122d10d0daaSAndrey Grodzovsky if (ih->use_bus_addr) {
123d10d0daaSAndrey Grodzovsky
124d38ceaf9SAlex Deucher /* add 8 bytes for the rptr/wptr shadows and
125d38ceaf9SAlex Deucher * add them to the end of the ring allocation.
126d38ceaf9SAlex Deucher */
127425c3143SChristian König dma_free_coherent(adev->dev, ih->ring_size + 8,
128d81f78b4SChristian König (void *)ih->ring, ih->gpu_addr);
129425c3143SChristian König ih->ring = NULL;
130d38ceaf9SAlex Deucher } else {
131425c3143SChristian König amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
132425c3143SChristian König (void **)&ih->ring);
133d81f78b4SChristian König amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
134d81f78b4SChristian König amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
135d38ceaf9SAlex Deucher }
136d38ceaf9SAlex Deucher }
137d38ceaf9SAlex Deucher
138d38ceaf9SAlex Deucher /**
13926f32a37SChristian König * amdgpu_ih_ring_write - write IV to the ring buffer
14026f32a37SChristian König *
141*bf80d34bSPhilip Yang * @adev: amdgpu_device pointer
14226f32a37SChristian König * @ih: ih ring to write to
14326f32a37SChristian König * @iv: the iv to write
14426f32a37SChristian König * @num_dw: size of the iv in dw
14526f32a37SChristian König *
14626f32a37SChristian König * Writes an IV to the ring buffer using the CPU and increment the wptr.
14726f32a37SChristian König * Used for testing and delegating IVs to a software ring.
14826f32a37SChristian König */
amdgpu_ih_ring_write(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,const uint32_t * iv,unsigned int num_dw)149*bf80d34bSPhilip Yang void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
150*bf80d34bSPhilip Yang const uint32_t *iv, unsigned int num_dw)
15126f32a37SChristian König {
15226f32a37SChristian König uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
15326f32a37SChristian König unsigned int i;
15426f32a37SChristian König
15526f32a37SChristian König for (i = 0; i < num_dw; ++i)
15626f32a37SChristian König ih->ring[wptr++] = cpu_to_le32(iv[i]);
15726f32a37SChristian König
15826f32a37SChristian König wptr <<= 2;
15926f32a37SChristian König wptr &= ih->ptr_mask;
16026f32a37SChristian König
16126f32a37SChristian König /* Only commit the new wptr if we don't overflow */
16226f32a37SChristian König if (wptr != READ_ONCE(ih->rptr)) {
16326f32a37SChristian König wmb();
16426f32a37SChristian König WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
165*bf80d34bSPhilip Yang } else if (adev->irq.retry_cam_enabled) {
166*bf80d34bSPhilip Yang dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
167*bf80d34bSPhilip Yang wptr, ih->rptr);
16826f32a37SChristian König }
16926f32a37SChristian König }
17026f32a37SChristian König
1713f1d1eb2SJonathan Kim /**
1723c2d6ea2SPhilip Yang * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
1733f1d1eb2SJonathan Kim *
1743f1d1eb2SJonathan Kim * @adev: amdgpu_device pointer
1753f1d1eb2SJonathan Kim * @ih: ih ring to process
1763f1d1eb2SJonathan Kim *
1773f1d1eb2SJonathan Kim * Used to ensure ring has processed IVs up to the checkpoint write pointer.
1783f1d1eb2SJonathan Kim */
amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)1793c2d6ea2SPhilip Yang int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
1803f1d1eb2SJonathan Kim struct amdgpu_ih_ring *ih)
1813f1d1eb2SJonathan Kim {
1823c2d6ea2SPhilip Yang uint32_t checkpoint_wptr;
1833c2d6ea2SPhilip Yang uint64_t checkpoint_ts;
1843c2d6ea2SPhilip Yang long timeout = HZ;
1853f1d1eb2SJonathan Kim
1863f1d1eb2SJonathan Kim if (!ih->enabled || adev->shutdown)
1873f1d1eb2SJonathan Kim return -ENODEV;
1883f1d1eb2SJonathan Kim
1893f1d1eb2SJonathan Kim checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
1903c2d6ea2SPhilip Yang /* Order wptr with ring data. */
1913f1d1eb2SJonathan Kim rmb();
1923c2d6ea2SPhilip Yang checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
1933f1d1eb2SJonathan Kim
1943c2d6ea2SPhilip Yang return wait_event_interruptible_timeout(ih->wait_process,
1950771c805SPhilip Yang amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
1960771c805SPhilip Yang ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
1973f1d1eb2SJonathan Kim }
1983f1d1eb2SJonathan Kim
19926f32a37SChristian König /**
200d38ceaf9SAlex Deucher * amdgpu_ih_process - interrupt handler
201d38ceaf9SAlex Deucher *
202d38ceaf9SAlex Deucher * @adev: amdgpu_device pointer
203425c3143SChristian König * @ih: ih ring to process
204d38ceaf9SAlex Deucher *
205d38ceaf9SAlex Deucher * Interrupt hander (VI), walk the IH ring.
206d38ceaf9SAlex Deucher * Returns irq process return code.
207d38ceaf9SAlex Deucher */
amdgpu_ih_process(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih)208e2fb6e0aSChristian König int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
209d38ceaf9SAlex Deucher {
210514f4a99SPhilip Yang unsigned int count;
211d38ceaf9SAlex Deucher u32 wptr;
212d38ceaf9SAlex Deucher
213425c3143SChristian König if (!ih->enabled || adev->shutdown)
214d38ceaf9SAlex Deucher return IRQ_NONE;
215d38ceaf9SAlex Deucher
2168bb9eb48SChristian König wptr = amdgpu_ih_get_wptr(adev, ih);
217d38ceaf9SAlex Deucher
218d38ceaf9SAlex Deucher restart_ih:
219514f4a99SPhilip Yang count = AMDGPU_IH_MAX_NUM_IVS;
220425c3143SChristian König DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
221d38ceaf9SAlex Deucher
222d38ceaf9SAlex Deucher /* Order reading of wptr vs. reading of IH ring data */
223d38ceaf9SAlex Deucher rmb();
224d38ceaf9SAlex Deucher
2258c65fe5fSChristian König while (ih->rptr != wptr && --count) {
226e2fb6e0aSChristian König amdgpu_irq_dispatch(adev, ih);
227425c3143SChristian König ih->rptr &= ih->ptr_mask;
22800ecd8a2SFelix Kuehling }
22900ecd8a2SFelix Kuehling
2308bb9eb48SChristian König amdgpu_ih_set_rptr(adev, ih);
2313f1d1eb2SJonathan Kim wake_up_all(&ih->wait_process);
232d38ceaf9SAlex Deucher
233d38ceaf9SAlex Deucher /* make sure wptr hasn't changed while processing */
2348bb9eb48SChristian König wptr = amdgpu_ih_get_wptr(adev, ih);
235425c3143SChristian König if (wptr != ih->rptr)
236d38ceaf9SAlex Deucher goto restart_ih;
237d38ceaf9SAlex Deucher
238d38ceaf9SAlex Deucher return IRQ_HANDLED;
239d38ceaf9SAlex Deucher }
240a2f14820SFelix Kuehling
24178bd101cSHawking Zhang /**
24278bd101cSHawking Zhang * amdgpu_ih_decode_iv_helper - decode an interrupt vector
24378bd101cSHawking Zhang *
24478bd101cSHawking Zhang * @adev: amdgpu_device pointer
2459c573cf2SLee Jones * @ih: ih ring to process
2469c573cf2SLee Jones * @entry: IV entry
24778bd101cSHawking Zhang *
24878bd101cSHawking Zhang * Decodes the interrupt vector at the current rptr
24950ef0cacSJiang Jian * position and also advance the position for Vega10
25078bd101cSHawking Zhang * and later GPUs.
25178bd101cSHawking Zhang */
amdgpu_ih_decode_iv_helper(struct amdgpu_device * adev,struct amdgpu_ih_ring * ih,struct amdgpu_iv_entry * entry)25278bd101cSHawking Zhang void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
25378bd101cSHawking Zhang struct amdgpu_ih_ring *ih,
25478bd101cSHawking Zhang struct amdgpu_iv_entry *entry)
25578bd101cSHawking Zhang {
25678bd101cSHawking Zhang /* wptr/rptr are in bytes! */
25778bd101cSHawking Zhang u32 ring_index = ih->rptr >> 2;
25878bd101cSHawking Zhang uint32_t dw[8];
25978bd101cSHawking Zhang
26078bd101cSHawking Zhang dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
26178bd101cSHawking Zhang dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
26278bd101cSHawking Zhang dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
26378bd101cSHawking Zhang dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
26478bd101cSHawking Zhang dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
26578bd101cSHawking Zhang dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
26678bd101cSHawking Zhang dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
26778bd101cSHawking Zhang dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
26878bd101cSHawking Zhang
26978bd101cSHawking Zhang entry->client_id = dw[0] & 0xff;
27078bd101cSHawking Zhang entry->src_id = (dw[0] >> 8) & 0xff;
27178bd101cSHawking Zhang entry->ring_id = (dw[0] >> 16) & 0xff;
27278bd101cSHawking Zhang entry->vmid = (dw[0] >> 24) & 0xf;
27378bd101cSHawking Zhang entry->vmid_src = (dw[0] >> 31);
27478bd101cSHawking Zhang entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
27578bd101cSHawking Zhang entry->timestamp_src = dw[2] >> 31;
27678bd101cSHawking Zhang entry->pasid = dw[3] & 0xffff;
27747659738SLe Ma entry->node_id = (dw[3] >> 16) & 0xff;
27878bd101cSHawking Zhang entry->src_data[0] = dw[4];
27978bd101cSHawking Zhang entry->src_data[1] = dw[5];
28078bd101cSHawking Zhang entry->src_data[2] = dw[6];
28178bd101cSHawking Zhang entry->src_data[3] = dw[7];
28278bd101cSHawking Zhang
28378bd101cSHawking Zhang /* wptr/rptr are in bytes! */
28478bd101cSHawking Zhang ih->rptr += 32;
28578bd101cSHawking Zhang }
2863c2d6ea2SPhilip Yang
amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring * ih,u32 rptr,signed int offset)2873c2d6ea2SPhilip Yang uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
2883c2d6ea2SPhilip Yang signed int offset)
2893c2d6ea2SPhilip Yang {
2903c2d6ea2SPhilip Yang uint32_t iv_size = 32;
2913c2d6ea2SPhilip Yang uint32_t ring_index;
2923c2d6ea2SPhilip Yang uint32_t dw1, dw2;
2933c2d6ea2SPhilip Yang
2943c2d6ea2SPhilip Yang rptr += iv_size * offset;
2953c2d6ea2SPhilip Yang ring_index = (rptr & ih->ptr_mask) >> 2;
2963c2d6ea2SPhilip Yang
2973c2d6ea2SPhilip Yang dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
2983c2d6ea2SPhilip Yang dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
2993c2d6ea2SPhilip Yang return dw1 | ((u64)(dw2 & 0xffff) << 32);
3003c2d6ea2SPhilip Yang }
301