1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sub license, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 16 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 19 * USE OR OTHER DEALINGS IN THE SOFTWARE. 20 * 21 * The above copyright notice and this permission notice (including the 22 * next paragraph) shall be included in all copies or substantial portions 23 * of the Software. 24 * 25 */ 26 /* 27 * Authors: 28 * Christian König <christian.koenig@amd.com> 29 */ 30 31 #include <drm/drmP.h> 32 #include "amdgpu.h" 33 #include "amdgpu_trace.h" 34 35 struct amdgpu_sync_entry { 36 struct hlist_node node; 37 struct fence *fence; 38 }; 39 40 /** 41 * amdgpu_sync_create - zero init sync object 42 * 43 * @sync: sync object to initialize 44 * 45 * Just clear the sync object for now. 46 */ 47 void amdgpu_sync_create(struct amdgpu_sync *sync) 48 { 49 unsigned i; 50 51 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 52 sync->semaphores[i] = NULL; 53 54 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 55 sync->sync_to[i] = NULL; 56 57 hash_init(sync->fences); 58 sync->last_vm_update = NULL; 59 } 60 61 static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f) 62 { 63 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 64 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 65 66 if (a_fence) 67 return a_fence->ring->adev == adev; 68 if (s_fence) 69 return (struct amdgpu_device *)s_fence->scheduler->priv == adev; 70 return false; 71 } 72 73 static bool amdgpu_sync_test_owner(struct fence *f, void *owner) 74 { 75 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 76 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 77 if (s_fence) 78 return s_fence->owner == owner; 79 if (a_fence) 80 return a_fence->owner == owner; 81 return false; 82 } 83 84 /** 85 * amdgpu_sync_fence - remember to sync to this fence 86 * 87 * @sync: sync object to add fence to 88 * @fence: fence to sync to 89 * 90 */ 91 int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, 92 struct fence *f) 93 { 94 struct amdgpu_sync_entry *e; 95 struct amdgpu_fence *fence; 96 struct amdgpu_fence *other; 97 struct fence *tmp, *later; 98 99 if (!f) 100 return 0; 101 102 if (amdgpu_sync_same_dev(adev, f) && 103 amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) { 104 if (sync->last_vm_update) { 105 tmp = sync->last_vm_update; 106 BUG_ON(f->context != tmp->context); 107 later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp; 108 sync->last_vm_update = fence_get(later); 109 fence_put(tmp); 110 } else 111 sync->last_vm_update = fence_get(f); 112 } 113 114 fence = to_amdgpu_fence(f); 115 if (!fence || fence->ring->adev != adev) { 116 hash_for_each_possible(sync->fences, e, node, f->context) { 117 struct fence *new; 118 if (unlikely(e->fence->context != f->context)) 119 continue; 120 new = fence_get(fence_later(e->fence, f)); 121 if (new) { 122 fence_put(e->fence); 123 e->fence = new; 124 } 125 return 0; 126 } 127 128 e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); 129 if (!e) 130 return -ENOMEM; 131 132 hash_add(sync->fences, &e->node, f->context); 133 e->fence = fence_get(f); 134 return 0; 135 } 136 137 other = sync->sync_to[fence->ring->idx]; 138 sync->sync_to[fence->ring->idx] = amdgpu_fence_ref( 139 amdgpu_fence_later(fence, other)); 140 amdgpu_fence_unref(&other); 141 142 return 0; 143 } 144 145 static void *amdgpu_sync_get_owner(struct fence *f) 146 { 147 struct amdgpu_fence *a_fence = to_amdgpu_fence(f); 148 struct amd_sched_fence *s_fence = to_amd_sched_fence(f); 149 150 if (s_fence) 151 return s_fence->owner; 152 else if (a_fence) 153 return a_fence->owner; 154 return AMDGPU_FENCE_OWNER_UNDEFINED; 155 } 156 157 /** 158 * amdgpu_sync_resv - use the semaphores to sync to a reservation object 159 * 160 * @sync: sync object to add fences from reservation object to 161 * @resv: reservation object with embedded fence 162 * @shared: true if we should only sync to the exclusive fence 163 * 164 * Sync to the fence using the semaphore objects 165 */ 166 int amdgpu_sync_resv(struct amdgpu_device *adev, 167 struct amdgpu_sync *sync, 168 struct reservation_object *resv, 169 void *owner) 170 { 171 struct reservation_object_list *flist; 172 struct fence *f; 173 void *fence_owner; 174 unsigned i; 175 int r = 0; 176 177 if (resv == NULL) 178 return -EINVAL; 179 180 /* always sync to the exclusive fence */ 181 f = reservation_object_get_excl(resv); 182 r = amdgpu_sync_fence(adev, sync, f); 183 184 flist = reservation_object_get_list(resv); 185 if (!flist || r) 186 return r; 187 188 for (i = 0; i < flist->shared_count; ++i) { 189 f = rcu_dereference_protected(flist->shared[i], 190 reservation_object_held(resv)); 191 if (amdgpu_sync_same_dev(adev, f)) { 192 /* VM updates are only interesting 193 * for other VM updates and moves. 194 */ 195 fence_owner = amdgpu_sync_get_owner(f); 196 if ((owner != AMDGPU_FENCE_OWNER_MOVE) && 197 (fence_owner != AMDGPU_FENCE_OWNER_MOVE) && 198 ((owner == AMDGPU_FENCE_OWNER_VM) != 199 (fence_owner == AMDGPU_FENCE_OWNER_VM))) 200 continue; 201 202 /* Ignore fence from the same owner as 203 * long as it isn't undefined. 204 */ 205 if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && 206 fence_owner == owner) 207 continue; 208 } 209 210 r = amdgpu_sync_fence(adev, sync, f); 211 if (r) 212 break; 213 } 214 return r; 215 } 216 217 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) 218 { 219 struct amdgpu_sync_entry *e; 220 struct hlist_node *tmp; 221 struct fence *f; 222 int i; 223 224 hash_for_each_safe(sync->fences, i, tmp, e, node) { 225 226 f = e->fence; 227 228 hash_del(&e->node); 229 kfree(e); 230 231 if (!fence_is_signaled(f)) 232 return f; 233 234 fence_put(f); 235 } 236 return NULL; 237 } 238 239 int amdgpu_sync_wait(struct amdgpu_sync *sync) 240 { 241 struct amdgpu_sync_entry *e; 242 struct hlist_node *tmp; 243 int i, r; 244 245 hash_for_each_safe(sync->fences, i, tmp, e, node) { 246 r = fence_wait(e->fence, false); 247 if (r) 248 return r; 249 250 hash_del(&e->node); 251 fence_put(e->fence); 252 kfree(e); 253 } 254 return 0; 255 } 256 257 /** 258 * amdgpu_sync_rings - sync ring to all registered fences 259 * 260 * @sync: sync object to use 261 * @ring: ring that needs sync 262 * 263 * Ensure that all registered fences are signaled before letting 264 * the ring continue. The caller must hold the ring lock. 265 */ 266 int amdgpu_sync_rings(struct amdgpu_sync *sync, 267 struct amdgpu_ring *ring) 268 { 269 struct amdgpu_device *adev = ring->adev; 270 unsigned count = 0; 271 int i, r; 272 273 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { 274 struct amdgpu_fence *fence = sync->sync_to[i]; 275 struct amdgpu_semaphore *semaphore; 276 struct amdgpu_ring *other = adev->rings[i]; 277 278 /* check if we really need to sync */ 279 if (!amdgpu_fence_need_sync(fence, ring)) 280 continue; 281 282 /* prevent GPU deadlocks */ 283 if (!other->ready) { 284 dev_err(adev->dev, "Syncing to a disabled ring!"); 285 return -EINVAL; 286 } 287 288 if (amdgpu_enable_scheduler || (count >= AMDGPU_NUM_SYNCS)) { 289 /* not enough room, wait manually */ 290 r = fence_wait(&fence->base, false); 291 if (r) 292 return r; 293 continue; 294 } 295 r = amdgpu_semaphore_create(adev, &semaphore); 296 if (r) 297 return r; 298 299 sync->semaphores[count++] = semaphore; 300 301 /* allocate enough space for sync command */ 302 r = amdgpu_ring_alloc(other, 16); 303 if (r) 304 return r; 305 306 /* emit the signal semaphore */ 307 if (!amdgpu_semaphore_emit_signal(other, semaphore)) { 308 /* signaling wasn't successful wait manually */ 309 amdgpu_ring_undo(other); 310 r = fence_wait(&fence->base, false); 311 if (r) 312 return r; 313 continue; 314 } 315 316 /* we assume caller has already allocated space on waiters ring */ 317 if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { 318 /* waiting wasn't successful wait manually */ 319 amdgpu_ring_undo(other); 320 r = fence_wait(&fence->base, false); 321 if (r) 322 return r; 323 continue; 324 } 325 326 amdgpu_ring_commit(other); 327 amdgpu_fence_note_sync(fence, ring); 328 } 329 330 return 0; 331 } 332 333 /** 334 * amdgpu_sync_free - free the sync object 335 * 336 * @adev: amdgpu_device pointer 337 * @sync: sync object to use 338 * @fence: fence to use for the free 339 * 340 * Free the sync object by freeing all semaphores in it. 341 */ 342 void amdgpu_sync_free(struct amdgpu_device *adev, 343 struct amdgpu_sync *sync, 344 struct fence *fence) 345 { 346 struct amdgpu_sync_entry *e; 347 struct hlist_node *tmp; 348 unsigned i; 349 350 hash_for_each_safe(sync->fences, i, tmp, e, node) { 351 hash_del(&e->node); 352 fence_put(e->fence); 353 kfree(e); 354 } 355 356 for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) 357 amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); 358 359 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 360 amdgpu_fence_unref(&sync->sync_to[i]); 361 362 fence_put(sync->last_vm_update); 363 } 364