1 /* 2 * Copyright (C) 2007 Ben Skeggs. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 */ 26 27 #include "drmP.h" 28 #include "drm.h" 29 #include "nouveau_drv.h" 30 #include "nouveau_dma.h" 31 32 void 33 nouveau_dma_pre_init(struct nouveau_channel *chan) 34 { 35 struct drm_nouveau_private *dev_priv = chan->dev->dev_private; 36 struct nouveau_bo *pushbuf = chan->pushbuf_bo; 37 38 if (dev_priv->card_type == NV_50) { 39 const int ib_size = pushbuf->bo.mem.size / 2; 40 41 chan->dma.ib_base = (pushbuf->bo.mem.size - ib_size) >> 2; 42 chan->dma.ib_max = (ib_size / 8) - 1; 43 chan->dma.ib_put = 0; 44 chan->dma.ib_free = chan->dma.ib_max - chan->dma.ib_put; 45 46 chan->dma.max = (pushbuf->bo.mem.size - ib_size) >> 2; 47 } else { 48 chan->dma.max = (pushbuf->bo.mem.size >> 2) - 2; 49 } 50 51 chan->dma.put = 0; 52 chan->dma.cur = chan->dma.put; 53 chan->dma.free = chan->dma.max - chan->dma.cur; 54 } 55 56 int 57 nouveau_dma_init(struct nouveau_channel *chan) 58 { 59 struct drm_device *dev = chan->dev; 60 struct drm_nouveau_private *dev_priv = dev->dev_private; 61 struct nouveau_gpuobj *m2mf = NULL; 62 struct nouveau_gpuobj *nvsw = NULL; 63 int ret, i; 64 65 /* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */ 66 ret = nouveau_gpuobj_gr_new(chan, dev_priv->card_type < NV_50 ? 67 0x0039 : 0x5039, &m2mf); 68 if (ret) 69 return ret; 70 71 ret = nouveau_gpuobj_ref_add(dev, chan, NvM2MF, m2mf, NULL); 72 if (ret) 73 return ret; 74 75 /* Create an NV_SW object for various sync purposes */ 76 ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw); 77 if (ret) 78 return ret; 79 80 ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL); 81 if (ret) 82 return ret; 83 84 /* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */ 85 ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy); 86 if (ret) 87 return ret; 88 89 /* Map push buffer */ 90 ret = nouveau_bo_map(chan->pushbuf_bo); 91 if (ret) 92 return ret; 93 94 /* Insert NOPS for NOUVEAU_DMA_SKIPS */ 95 ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS); 96 if (ret) 97 return ret; 98 99 for (i = 0; i < NOUVEAU_DMA_SKIPS; i++) 100 OUT_RING(chan, 0); 101 102 /* Initialise NV_MEMORY_TO_MEMORY_FORMAT */ 103 ret = RING_SPACE(chan, 4); 104 if (ret) 105 return ret; 106 BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NAME, 1); 107 OUT_RING(chan, NvM2MF); 108 BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); 109 OUT_RING(chan, NvNotify0); 110 111 /* Initialise NV_SW */ 112 ret = RING_SPACE(chan, 2); 113 if (ret) 114 return ret; 115 BEGIN_RING(chan, NvSubSw, 0, 1); 116 OUT_RING(chan, NvSw); 117 118 /* Sit back and pray the channel works.. */ 119 FIRE_RING(chan); 120 121 return 0; 122 } 123 124 void 125 OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) 126 { 127 bool is_iomem; 128 u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem); 129 mem = &mem[chan->dma.cur]; 130 if (is_iomem) 131 memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4); 132 else 133 memcpy(mem, data, nr_dwords * 4); 134 chan->dma.cur += nr_dwords; 135 } 136 137 /* Fetch and adjust GPU GET pointer 138 * 139 * Returns: 140 * value >= 0, the adjusted GET pointer 141 * -EINVAL if GET pointer currently outside main push buffer 142 * -EBUSY if timeout exceeded 143 */ 144 static inline int 145 READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout) 146 { 147 uint32_t val; 148 149 val = nvchan_rd32(chan, chan->user_get); 150 151 /* reset counter as long as GET is still advancing, this is 152 * to avoid misdetecting a GPU lockup if the GPU happens to 153 * just be processing an operation that takes a long time 154 */ 155 if (val != *prev_get) { 156 *prev_get = val; 157 *timeout = 0; 158 } 159 160 if ((++*timeout & 0xff) == 0) { 161 DRM_UDELAY(1); 162 if (*timeout > 100000) 163 return -EBUSY; 164 } 165 166 if (val < chan->pushbuf_base || 167 val > chan->pushbuf_base + (chan->dma.max << 2)) 168 return -EINVAL; 169 170 return (val - chan->pushbuf_base) >> 2; 171 } 172 173 void 174 nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo, 175 int delta, int length) 176 { 177 struct nouveau_bo *pb = chan->pushbuf_bo; 178 uint64_t offset = bo->bo.offset + delta; 179 int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base; 180 181 BUG_ON(chan->dma.ib_free < 1); 182 nouveau_bo_wr32(pb, ip++, lower_32_bits(offset)); 183 nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8); 184 185 chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max; 186 187 DRM_MEMORYBARRIER(); 188 /* Flush writes. */ 189 nouveau_bo_rd32(pb, 0); 190 191 nvchan_wr32(chan, 0x8c, chan->dma.ib_put); 192 chan->dma.ib_free--; 193 } 194 195 static int 196 nv50_dma_push_wait(struct nouveau_channel *chan, int count) 197 { 198 uint32_t cnt = 0, prev_get = 0; 199 200 while (chan->dma.ib_free < count) { 201 uint32_t get = nvchan_rd32(chan, 0x88); 202 if (get != prev_get) { 203 prev_get = get; 204 cnt = 0; 205 } 206 207 if ((++cnt & 0xff) == 0) { 208 DRM_UDELAY(1); 209 if (cnt > 100000) 210 return -EBUSY; 211 } 212 213 chan->dma.ib_free = get - chan->dma.ib_put; 214 if (chan->dma.ib_free <= 0) 215 chan->dma.ib_free += chan->dma.ib_max + 1; 216 } 217 218 return 0; 219 } 220 221 static int 222 nv50_dma_wait(struct nouveau_channel *chan, int slots, int count) 223 { 224 uint32_t cnt = 0, prev_get = 0; 225 int ret; 226 227 ret = nv50_dma_push_wait(chan, slots + 1); 228 if (unlikely(ret)) 229 return ret; 230 231 while (chan->dma.free < count) { 232 int get = READ_GET(chan, &prev_get, &cnt); 233 if (unlikely(get < 0)) { 234 if (get == -EINVAL) 235 continue; 236 237 return get; 238 } 239 240 if (get <= chan->dma.cur) { 241 chan->dma.free = chan->dma.max - chan->dma.cur; 242 if (chan->dma.free >= count) 243 break; 244 245 FIRE_RING(chan); 246 do { 247 get = READ_GET(chan, &prev_get, &cnt); 248 if (unlikely(get < 0)) { 249 if (get == -EINVAL) 250 continue; 251 return get; 252 } 253 } while (get == 0); 254 chan->dma.cur = 0; 255 chan->dma.put = 0; 256 } 257 258 chan->dma.free = get - chan->dma.cur - 1; 259 } 260 261 return 0; 262 } 263 264 int 265 nouveau_dma_wait(struct nouveau_channel *chan, int slots, int size) 266 { 267 uint32_t prev_get = 0, cnt = 0; 268 int get; 269 270 if (chan->dma.ib_max) 271 return nv50_dma_wait(chan, slots, size); 272 273 while (chan->dma.free < size) { 274 get = READ_GET(chan, &prev_get, &cnt); 275 if (unlikely(get == -EBUSY)) 276 return -EBUSY; 277 278 /* loop until we have a usable GET pointer. the value 279 * we read from the GPU may be outside the main ring if 280 * PFIFO is processing a buffer called from the main ring, 281 * discard these values until something sensible is seen. 282 * 283 * the other case we discard GET is while the GPU is fetching 284 * from the SKIPS area, so the code below doesn't have to deal 285 * with some fun corner cases. 286 */ 287 if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS) 288 continue; 289 290 if (get <= chan->dma.cur) { 291 /* engine is fetching behind us, or is completely 292 * idle (GET == PUT) so we have free space up until 293 * the end of the push buffer 294 * 295 * we can only hit that path once per call due to 296 * looping back to the beginning of the push buffer, 297 * we'll hit the fetching-ahead-of-us path from that 298 * point on. 299 * 300 * the *one* exception to that rule is if we read 301 * GET==PUT, in which case the below conditional will 302 * always succeed and break us out of the wait loop. 303 */ 304 chan->dma.free = chan->dma.max - chan->dma.cur; 305 if (chan->dma.free >= size) 306 break; 307 308 /* not enough space left at the end of the push buffer, 309 * instruct the GPU to jump back to the start right 310 * after processing the currently pending commands. 311 */ 312 OUT_RING(chan, chan->pushbuf_base | 0x20000000); 313 314 /* wait for GET to depart from the skips area. 315 * prevents writing GET==PUT and causing a race 316 * condition that causes us to think the GPU is 317 * idle when it's not. 318 */ 319 do { 320 get = READ_GET(chan, &prev_get, &cnt); 321 if (unlikely(get == -EBUSY)) 322 return -EBUSY; 323 if (unlikely(get == -EINVAL)) 324 continue; 325 } while (get <= NOUVEAU_DMA_SKIPS); 326 WRITE_PUT(NOUVEAU_DMA_SKIPS); 327 328 /* we're now submitting commands at the start of 329 * the push buffer. 330 */ 331 chan->dma.cur = 332 chan->dma.put = NOUVEAU_DMA_SKIPS; 333 } 334 335 /* engine fetching ahead of us, we have space up until the 336 * current GET pointer. the "- 1" is to ensure there's 337 * space left to emit a jump back to the beginning of the 338 * push buffer if we require it. we can never get GET == PUT 339 * here, so this is safe. 340 */ 341 chan->dma.free = get - chan->dma.cur - 1; 342 } 343 344 return 0; 345 } 346 347