1 /* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/slab.h> 26 27 #include "i915_syncmap.h" 28 29 #include "i915_gem.h" /* GEM_BUG_ON() */ 30 #include "i915_selftest.h" 31 32 #define SHIFT ilog2(KSYNCMAP) 33 #define MASK (KSYNCMAP - 1) 34 35 /* 36 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence 37 * context id to the last u32 fence seqno waited upon from that context. 38 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to 39 * the root. This allows us to access the whole tree via a single pointer 40 * to the most recently used layer. We expect fence contexts to be dense 41 * and most reuse to be on the same i915_gem_context but on neighbouring 42 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very 43 * effective lookup cache. If the new lookup is not on the same leaf, we 44 * expect it to be on the neighbouring branch. 45 * 46 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field 47 * allows us to store whether a particular seqno is valid (i.e. allows us 48 * to distinguish unset from 0). 49 * 50 * A branch holds an array of layer pointers, and has height > 0, and always 51 * has at least 2 layers (either branches or leaves) below it. 52 * 53 * For example, 54 * for x in 55 * 0 1 2 0x10 0x11 0x200 0x201 56 * 0x500000 0x500001 0x503000 0x503001 57 * 0xE<<60: 58 * i915_syncmap_set(&sync, x, lower_32_bits(x)); 59 * will build a tree like: 60 * 0xXXXXXXXXXXXXXXXX 61 * 0-> 0x0000000000XXXXXX 62 * | 0-> 0x0000000000000XXX 63 * | | 0-> 0x00000000000000XX 64 * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 65 * | | | 1-> 0x000000000000001X 0:10, 1:11 66 * | | 2-> 0x000000000000020X 0:200, 1:201 67 * | 5-> 0x000000000050XXXX 68 * | 0-> 0x000000000050000X 0:500000, 1:500001 69 * | 3-> 0x000000000050300X 0:503000, 1:503001 70 * e-> 0xe00000000000000X e:e 71 */ 72 73 struct i915_syncmap { 74 u64 prefix; 75 unsigned int height; 76 unsigned int bitmap; 77 struct i915_syncmap *parent; 78 /* 79 * Following this header is an array of either seqno or child pointers: 80 * union { 81 * u32 seqno[KSYNCMAP]; 82 * struct i915_syncmap *child[KSYNCMAP]; 83 * }; 84 */ 85 }; 86 87 /** 88 * i915_syncmap_init -- initialise the #i915_syncmap 89 * @root: pointer to the #i915_syncmap 90 */ 91 void i915_syncmap_init(struct i915_syncmap **root) 92 { 93 BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); 94 BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); 95 BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap)); 96 *root = NULL; 97 } 98 99 static inline u32 *__sync_seqno(struct i915_syncmap *p) 100 { 101 GEM_BUG_ON(p->height); 102 return (u32 *)(p + 1); 103 } 104 105 static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) 106 { 107 GEM_BUG_ON(!p->height); 108 return (struct i915_syncmap **)(p + 1); 109 } 110 111 static inline unsigned int 112 __sync_branch_idx(const struct i915_syncmap *p, u64 id) 113 { 114 return (id >> p->height) & MASK; 115 } 116 117 static inline unsigned int 118 __sync_leaf_idx(const struct i915_syncmap *p, u64 id) 119 { 120 GEM_BUG_ON(p->height); 121 return id & MASK; 122 } 123 124 static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) 125 { 126 return id >> p->height >> SHIFT; 127 } 128 129 static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) 130 { 131 GEM_BUG_ON(p->height); 132 return id >> SHIFT; 133 } 134 135 static inline bool seqno_later(u32 a, u32 b) 136 { 137 return (s32)(a - b) >= 0; 138 } 139 140 /** 141 * i915_syncmap_is_later -- compare against the last know sync point 142 * @root: pointer to the #i915_syncmap 143 * @id: the context id (other timeline) we are synchronising to 144 * @seqno: the sequence number along the other timeline 145 * 146 * If we have already synchronised this @root timeline with another (@id) then 147 * we can omit any repeated or earlier synchronisation requests. If the two 148 * timelines are already coupled, we can also omit the dependency between the 149 * two as that is already known via the timeline. 150 * 151 * Returns true if the two timelines are already synchronised wrt to @seqno, 152 * false if not and the synchronisation must be emitted. 153 */ 154 bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) 155 { 156 struct i915_syncmap *p; 157 unsigned int idx; 158 159 p = *root; 160 if (!p) 161 return false; 162 163 if (likely(__sync_leaf_prefix(p, id) == p->prefix)) 164 goto found; 165 166 /* First climb the tree back to a parent branch */ 167 do { 168 p = p->parent; 169 if (!p) 170 return false; 171 172 if (__sync_branch_prefix(p, id) == p->prefix) 173 break; 174 } while (1); 175 176 /* And then descend again until we find our leaf */ 177 do { 178 if (!p->height) 179 break; 180 181 p = __sync_child(p)[__sync_branch_idx(p, id)]; 182 if (!p) 183 return false; 184 185 if (__sync_branch_prefix(p, id) != p->prefix) 186 return false; 187 } while (1); 188 189 *root = p; 190 found: 191 idx = __sync_leaf_idx(p, id); 192 if (!(p->bitmap & BIT(idx))) 193 return false; 194 195 return seqno_later(__sync_seqno(p)[idx], seqno); 196 } 197 198 static struct i915_syncmap * 199 __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) 200 { 201 struct i915_syncmap *p; 202 203 p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); 204 if (unlikely(!p)) 205 return NULL; 206 207 p->parent = parent; 208 p->height = 0; 209 p->bitmap = 0; 210 p->prefix = __sync_leaf_prefix(p, id); 211 return p; 212 } 213 214 static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) 215 { 216 unsigned int idx = __sync_leaf_idx(p, id); 217 218 p->bitmap |= BIT(idx); 219 __sync_seqno(p)[idx] = seqno; 220 } 221 222 static inline void __sync_set_child(struct i915_syncmap *p, 223 unsigned int idx, 224 struct i915_syncmap *child) 225 { 226 p->bitmap |= BIT(idx); 227 __sync_child(p)[idx] = child; 228 } 229 230 static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) 231 { 232 struct i915_syncmap *p = *root; 233 unsigned int idx; 234 235 if (!p) { 236 p = __sync_alloc_leaf(NULL, id); 237 if (unlikely(!p)) 238 return -ENOMEM; 239 240 goto found; 241 } 242 243 /* Caller handled the likely cached case */ 244 GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); 245 246 /* Climb back up the tree until we find a common prefix */ 247 do { 248 if (!p->parent) 249 break; 250 251 p = p->parent; 252 253 if (__sync_branch_prefix(p, id) == p->prefix) 254 break; 255 } while (1); 256 257 /* 258 * No shortcut, we have to descend the tree to find the right layer 259 * containing this fence. 260 * 261 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences 262 * or lower layers. Leaf nodes (height = 0) contain the fences, all 263 * other nodes (height > 0) are internal layers that point to a lower 264 * node. Each internal layer has at least 2 descendents. 265 * 266 * Starting at the top, we check whether the current prefix matches. If 267 * it doesn't, we have gone past our target and need to insert a join 268 * into the tree, and a new leaf node for the target as a descendent 269 * of the join, as well as the original layer. 270 * 271 * The matching prefix means we are still following the right branch 272 * of the tree. If it has height 0, we have found our leaf and just 273 * need to replace the fence slot with ourselves. If the height is 274 * not zero, our slot contains the next layer in the tree (unless 275 * it is empty, in which case we can add ourselves as a new leaf). 276 * As descend the tree the prefix grows (and height decreases). 277 */ 278 do { 279 struct i915_syncmap *next; 280 281 if (__sync_branch_prefix(p, id) != p->prefix) { 282 unsigned int above; 283 284 /* Insert a join above the current layer */ 285 next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), 286 GFP_KERNEL); 287 if (unlikely(!next)) 288 return -ENOMEM; 289 290 /* Compute the height at which these two diverge */ 291 above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); 292 above = round_up(above, SHIFT); 293 next->height = above + p->height; 294 next->prefix = __sync_branch_prefix(next, id); 295 296 /* Insert the join into the parent */ 297 if (p->parent) { 298 idx = __sync_branch_idx(p->parent, id); 299 __sync_child(p->parent)[idx] = next; 300 GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); 301 } 302 next->parent = p->parent; 303 304 /* Compute the idx of the other branch, not our id! */ 305 idx = p->prefix >> (above - SHIFT) & MASK; 306 __sync_set_child(next, idx, p); 307 p->parent = next; 308 309 /* Ascend to the join */ 310 p = next; 311 } else { 312 if (!p->height) 313 break; 314 } 315 316 /* Descend into the next layer */ 317 GEM_BUG_ON(!p->height); 318 idx = __sync_branch_idx(p, id); 319 next = __sync_child(p)[idx]; 320 if (!next) { 321 next = __sync_alloc_leaf(p, id); 322 if (unlikely(!next)) 323 return -ENOMEM; 324 325 __sync_set_child(p, idx, next); 326 p = next; 327 break; 328 } 329 330 p = next; 331 } while (1); 332 333 found: 334 GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); 335 __sync_set_seqno(p, id, seqno); 336 *root = p; 337 return 0; 338 } 339 340 /** 341 * i915_syncmap_set -- mark the most recent syncpoint between contexts 342 * @root: pointer to the #i915_syncmap 343 * @id: the context id (other timeline) we have synchronised to 344 * @seqno: the sequence number along the other timeline 345 * 346 * When we synchronise this @root timeline with another (@id), we also know 347 * that we have synchronized with all previous seqno along that timeline. If 348 * we then have a request to synchronise with the same seqno or older, we can 349 * omit it, see i915_syncmap_is_later() 350 * 351 * Returns 0 on success, or a negative error code. 352 */ 353 int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) 354 { 355 struct i915_syncmap *p = *root; 356 357 /* 358 * We expect to be called in sequence following is_later(id), which 359 * should have preloaded the root for us. 360 */ 361 if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { 362 __sync_set_seqno(p, id, seqno); 363 return 0; 364 } 365 366 return __sync_set(root, id, seqno); 367 } 368 369 static void __sync_free(struct i915_syncmap *p) 370 { 371 if (p->height) { 372 unsigned int i; 373 374 while ((i = ffs(p->bitmap))) { 375 p->bitmap &= ~0u << i; 376 __sync_free(__sync_child(p)[i - 1]); 377 } 378 } 379 380 kfree(p); 381 } 382 383 /** 384 * i915_syncmap_free -- free all memory associated with the syncmap 385 * @root: pointer to the #i915_syncmap 386 * 387 * Either when the timeline is to be freed and we no longer need the sync 388 * point tracking, or when the fences are all known to be signaled and the 389 * sync point tracking is redundant, we can free the #i915_syncmap to recover 390 * its allocations. 391 * 392 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for 393 * reuse. 394 */ 395 void i915_syncmap_free(struct i915_syncmap **root) 396 { 397 struct i915_syncmap *p; 398 399 p = *root; 400 if (!p) 401 return; 402 403 while (p->parent) 404 p = p->parent; 405 406 __sync_free(p); 407 *root = NULL; 408 } 409 410 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 411 #include "selftests/i915_syncmap.c" 412 #endif 413