1 /* 2 * Copyright 2011 Red Hat Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Jerome Glisse <glisse@freedesktop.org> 29 */ 30 /* Algorithm: 31 * 32 * We store the last allocated bo in "hole", we always try to allocate 33 * after the last allocated bo. Principle is that in a linear GPU ring 34 * progression was is after last is the oldest bo we allocated and thus 35 * the first one that should no longer be in use by the GPU. 36 * 37 * If it's not the case we skip over the bo after last to the closest 38 * done bo if such one exist. If none exist and we are not asked to 39 * block we report failure to allocate. 40 * 41 * If we are asked to block we wait on all the oldest fence of all 42 * rings. We just wait for any of those fence to complete. 43 */ 44 #include <drm/drmP.h> 45 #include "amdgpu.h" 46 47 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo); 48 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager); 49 50 int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, 51 struct amdgpu_sa_manager *sa_manager, 52 unsigned size, u32 align, u32 domain) 53 { 54 int i, r; 55 56 init_waitqueue_head(&sa_manager->wq); 57 sa_manager->bo = NULL; 58 sa_manager->size = size; 59 sa_manager->domain = domain; 60 sa_manager->align = align; 61 sa_manager->hole = &sa_manager->olist; 62 INIT_LIST_HEAD(&sa_manager->olist); 63 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 64 INIT_LIST_HEAD(&sa_manager->flist[i]); 65 66 r = amdgpu_bo_create(adev, size, align, true, domain, 67 0, NULL, NULL, 0, &sa_manager->bo); 68 if (r) { 69 dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); 70 return r; 71 } 72 73 return r; 74 } 75 76 void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, 77 struct amdgpu_sa_manager *sa_manager) 78 { 79 struct amdgpu_sa_bo *sa_bo, *tmp; 80 81 if (!list_empty(&sa_manager->olist)) { 82 sa_manager->hole = &sa_manager->olist, 83 amdgpu_sa_bo_try_free(sa_manager); 84 if (!list_empty(&sa_manager->olist)) { 85 dev_err(adev->dev, "sa_manager is not empty, clearing anyway\n"); 86 } 87 } 88 list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { 89 amdgpu_sa_bo_remove_locked(sa_bo); 90 } 91 amdgpu_bo_unref(&sa_manager->bo); 92 sa_manager->size = 0; 93 } 94 95 int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, 96 struct amdgpu_sa_manager *sa_manager) 97 { 98 int r; 99 100 if (sa_manager->bo == NULL) { 101 dev_err(adev->dev, "no bo for sa manager\n"); 102 return -EINVAL; 103 } 104 105 /* map the buffer */ 106 r = amdgpu_bo_reserve(sa_manager->bo, false); 107 if (r) { 108 dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r); 109 return r; 110 } 111 r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr); 112 if (r) { 113 amdgpu_bo_unreserve(sa_manager->bo); 114 dev_err(adev->dev, "(%d) failed to pin manager bo\n", r); 115 return r; 116 } 117 r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); 118 memset(sa_manager->cpu_ptr, 0, sa_manager->size); 119 amdgpu_bo_unreserve(sa_manager->bo); 120 return r; 121 } 122 123 int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, 124 struct amdgpu_sa_manager *sa_manager) 125 { 126 int r; 127 128 if (sa_manager->bo == NULL) { 129 dev_err(adev->dev, "no bo for sa manager\n"); 130 return -EINVAL; 131 } 132 133 r = amdgpu_bo_reserve(sa_manager->bo, true); 134 if (!r) { 135 amdgpu_bo_kunmap(sa_manager->bo); 136 amdgpu_bo_unpin(sa_manager->bo); 137 amdgpu_bo_unreserve(sa_manager->bo); 138 } 139 return r; 140 } 141 142 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) 143 { 144 struct amdgpu_sa_manager *sa_manager = sa_bo->manager; 145 if (sa_manager->hole == &sa_bo->olist) { 146 sa_manager->hole = sa_bo->olist.prev; 147 } 148 list_del_init(&sa_bo->olist); 149 list_del_init(&sa_bo->flist); 150 dma_fence_put(sa_bo->fence); 151 kfree(sa_bo); 152 } 153 154 static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager *sa_manager) 155 { 156 struct amdgpu_sa_bo *sa_bo, *tmp; 157 158 if (sa_manager->hole->next == &sa_manager->olist) 159 return; 160 161 sa_bo = list_entry(sa_manager->hole->next, struct amdgpu_sa_bo, olist); 162 list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { 163 if (sa_bo->fence == NULL || 164 !dma_fence_is_signaled(sa_bo->fence)) { 165 return; 166 } 167 amdgpu_sa_bo_remove_locked(sa_bo); 168 } 169 } 170 171 static inline unsigned amdgpu_sa_bo_hole_soffset(struct amdgpu_sa_manager *sa_manager) 172 { 173 struct list_head *hole = sa_manager->hole; 174 175 if (hole != &sa_manager->olist) { 176 return list_entry(hole, struct amdgpu_sa_bo, olist)->eoffset; 177 } 178 return 0; 179 } 180 181 static inline unsigned amdgpu_sa_bo_hole_eoffset(struct amdgpu_sa_manager *sa_manager) 182 { 183 struct list_head *hole = sa_manager->hole; 184 185 if (hole->next != &sa_manager->olist) { 186 return list_entry(hole->next, struct amdgpu_sa_bo, olist)->soffset; 187 } 188 return sa_manager->size; 189 } 190 191 static bool amdgpu_sa_bo_try_alloc(struct amdgpu_sa_manager *sa_manager, 192 struct amdgpu_sa_bo *sa_bo, 193 unsigned size, unsigned align) 194 { 195 unsigned soffset, eoffset, wasted; 196 197 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 198 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 199 wasted = (align - (soffset % align)) % align; 200 201 if ((eoffset - soffset) >= (size + wasted)) { 202 soffset += wasted; 203 204 sa_bo->manager = sa_manager; 205 sa_bo->soffset = soffset; 206 sa_bo->eoffset = soffset + size; 207 list_add(&sa_bo->olist, sa_manager->hole); 208 INIT_LIST_HEAD(&sa_bo->flist); 209 sa_manager->hole = &sa_bo->olist; 210 return true; 211 } 212 return false; 213 } 214 215 /** 216 * amdgpu_sa_event - Check if we can stop waiting 217 * 218 * @sa_manager: pointer to the sa_manager 219 * @size: number of bytes we want to allocate 220 * @align: alignment we need to match 221 * 222 * Check if either there is a fence we can wait for or 223 * enough free memory to satisfy the allocation directly 224 */ 225 static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager, 226 unsigned size, unsigned align) 227 { 228 unsigned soffset, eoffset, wasted; 229 int i; 230 231 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 232 if (!list_empty(&sa_manager->flist[i])) 233 return true; 234 235 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 236 eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager); 237 wasted = (align - (soffset % align)) % align; 238 239 if ((eoffset - soffset) >= (size + wasted)) { 240 return true; 241 } 242 243 return false; 244 } 245 246 static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, 247 struct dma_fence **fences, 248 unsigned *tries) 249 { 250 struct amdgpu_sa_bo *best_bo = NULL; 251 unsigned i, soffset, best, tmp; 252 253 /* if hole points to the end of the buffer */ 254 if (sa_manager->hole->next == &sa_manager->olist) { 255 /* try again with its beginning */ 256 sa_manager->hole = &sa_manager->olist; 257 return true; 258 } 259 260 soffset = amdgpu_sa_bo_hole_soffset(sa_manager); 261 /* to handle wrap around we add sa_manager->size */ 262 best = sa_manager->size * 2; 263 /* go over all fence list and try to find the closest sa_bo 264 * of the current last 265 */ 266 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { 267 struct amdgpu_sa_bo *sa_bo; 268 269 if (list_empty(&sa_manager->flist[i])) 270 continue; 271 272 sa_bo = list_first_entry(&sa_manager->flist[i], 273 struct amdgpu_sa_bo, flist); 274 275 if (!dma_fence_is_signaled(sa_bo->fence)) { 276 fences[i] = sa_bo->fence; 277 continue; 278 } 279 280 /* limit the number of tries each ring gets */ 281 if (tries[i] > 2) { 282 continue; 283 } 284 285 tmp = sa_bo->soffset; 286 if (tmp < soffset) { 287 /* wrap around, pretend it's after */ 288 tmp += sa_manager->size; 289 } 290 tmp -= soffset; 291 if (tmp < best) { 292 /* this sa bo is the closest one */ 293 best = tmp; 294 best_bo = sa_bo; 295 } 296 } 297 298 if (best_bo) { 299 uint32_t idx = best_bo->fence->context; 300 301 idx %= AMDGPU_SA_NUM_FENCE_LISTS; 302 ++tries[idx]; 303 sa_manager->hole = best_bo->olist.prev; 304 305 /* we knew that this one is signaled, 306 so it's save to remote it */ 307 amdgpu_sa_bo_remove_locked(best_bo); 308 return true; 309 } 310 return false; 311 } 312 313 int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, 314 struct amdgpu_sa_bo **sa_bo, 315 unsigned size, unsigned align) 316 { 317 struct dma_fence *fences[AMDGPU_SA_NUM_FENCE_LISTS]; 318 unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS]; 319 unsigned count; 320 int i, r; 321 signed long t; 322 323 if (WARN_ON_ONCE(align > sa_manager->align)) 324 return -EINVAL; 325 326 if (WARN_ON_ONCE(size > sa_manager->size)) 327 return -EINVAL; 328 329 *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); 330 if (!(*sa_bo)) 331 return -ENOMEM; 332 (*sa_bo)->manager = sa_manager; 333 (*sa_bo)->fence = NULL; 334 INIT_LIST_HEAD(&(*sa_bo)->olist); 335 INIT_LIST_HEAD(&(*sa_bo)->flist); 336 337 spin_lock(&sa_manager->wq.lock); 338 do { 339 for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { 340 fences[i] = NULL; 341 tries[i] = 0; 342 } 343 344 do { 345 amdgpu_sa_bo_try_free(sa_manager); 346 347 if (amdgpu_sa_bo_try_alloc(sa_manager, *sa_bo, 348 size, align)) { 349 spin_unlock(&sa_manager->wq.lock); 350 return 0; 351 } 352 353 /* see if we can skip over some allocations */ 354 } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); 355 356 for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) 357 if (fences[i]) 358 fences[count++] = dma_fence_get(fences[i]); 359 360 if (count) { 361 spin_unlock(&sa_manager->wq.lock); 362 t = dma_fence_wait_any_timeout(fences, count, false, 363 MAX_SCHEDULE_TIMEOUT, 364 NULL); 365 for (i = 0; i < count; ++i) 366 dma_fence_put(fences[i]); 367 368 r = (t > 0) ? 0 : t; 369 spin_lock(&sa_manager->wq.lock); 370 } else { 371 /* if we have nothing to wait for block */ 372 r = wait_event_interruptible_locked( 373 sa_manager->wq, 374 amdgpu_sa_event(sa_manager, size, align) 375 ); 376 } 377 378 } while (!r); 379 380 spin_unlock(&sa_manager->wq.lock); 381 kfree(*sa_bo); 382 *sa_bo = NULL; 383 return r; 384 } 385 386 void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo, 387 struct dma_fence *fence) 388 { 389 struct amdgpu_sa_manager *sa_manager; 390 391 if (sa_bo == NULL || *sa_bo == NULL) { 392 return; 393 } 394 395 sa_manager = (*sa_bo)->manager; 396 spin_lock(&sa_manager->wq.lock); 397 if (fence && !dma_fence_is_signaled(fence)) { 398 uint32_t idx; 399 400 (*sa_bo)->fence = dma_fence_get(fence); 401 idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS; 402 list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]); 403 } else { 404 amdgpu_sa_bo_remove_locked(*sa_bo); 405 } 406 wake_up_all_locked(&sa_manager->wq); 407 spin_unlock(&sa_manager->wq.lock); 408 *sa_bo = NULL; 409 } 410 411 #if defined(CONFIG_DEBUG_FS) 412 413 void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, 414 struct seq_file *m) 415 { 416 struct amdgpu_sa_bo *i; 417 418 spin_lock(&sa_manager->wq.lock); 419 list_for_each_entry(i, &sa_manager->olist, olist) { 420 uint64_t soffset = i->soffset + sa_manager->gpu_addr; 421 uint64_t eoffset = i->eoffset + sa_manager->gpu_addr; 422 if (&i->olist == sa_manager->hole) { 423 seq_printf(m, ">"); 424 } else { 425 seq_printf(m, " "); 426 } 427 seq_printf(m, "[0x%010llx 0x%010llx] size %8lld", 428 soffset, eoffset, eoffset - soffset); 429 430 if (i->fence) 431 seq_printf(m, " protected by 0x%08x on context %llu", 432 i->fence->seqno, i->fence->context); 433 434 seq_printf(m, "\n"); 435 } 436 spin_unlock(&sa_manager->wq.lock); 437 } 438 #endif 439