1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_SEQLOCK_H 3 #define __LINUX_SEQLOCK_H 4 /* 5 * Reader/writer consistent mechanism without starving writers. This type of 6 * lock for data where the reader wants a consistent set of information 7 * and is willing to retry if the information changes. There are two types 8 * of readers: 9 * 1. Sequence readers which never block a writer but they may have to retry 10 * if a writer is in progress by detecting change in sequence number. 11 * Writers do not wait for a sequence reader. 12 * 2. Locking readers which will wait if a writer or another locking reader 13 * is in progress. A locking reader in progress will also block a writer 14 * from going forward. Unlike the regular rwlock, the read lock here is 15 * exclusive so that only one locking reader can get it. 16 * 17 * This is not as cache friendly as brlock. Also, this may not work well 18 * for data that contains pointers, because any writer could 19 * invalidate a pointer that a reader was following. 20 * 21 * Expected non-blocking reader usage: 22 * do { 23 * seq = read_seqbegin(&foo); 24 * ... 25 * } while (read_seqretry(&foo, seq)); 26 * 27 * 28 * On non-SMP the spin locks disappear but the writer still needs 29 * to increment the sequence variables because an interrupt routine could 30 * change the state of the data. 31 * 32 * Based on x86_64 vsyscall gettimeofday 33 * by Keith Owens and Andrea Arcangeli 34 */ 35 36 #include <linux/spinlock.h> 37 #include <linux/preempt.h> 38 #include <linux/lockdep.h> 39 #include <linux/compiler.h> 40 #include <linux/kcsan-checks.h> 41 #include <asm/processor.h> 42 43 /* 44 * The seqlock interface does not prescribe a precise sequence of read 45 * begin/retry/end. For readers, typically there is a call to 46 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 47 * esoteric cases which do not follow this pattern. 48 * 49 * As a consequence, we take the following best-effort approach for raw usage 50 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 51 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 52 * atomics; if there is a matching read_seqcount_retry() call, no following 53 * memory operations are considered atomic. Usage of seqlocks via seqlock_t 54 * interface is not affected. 55 */ 56 #define KCSAN_SEQLOCK_REGION_MAX 1000 57 58 /* 59 * Version using sequence counter only. 60 * This can be used when code has its own mutex protecting the 61 * updating starting before the write_seqcountbeqin() and ending 62 * after the write_seqcount_end(). 63 */ 64 typedef struct seqcount { 65 unsigned sequence; 66 #ifdef CONFIG_DEBUG_LOCK_ALLOC 67 struct lockdep_map dep_map; 68 #endif 69 } seqcount_t; 70 71 static inline void __seqcount_init(seqcount_t *s, const char *name, 72 struct lock_class_key *key) 73 { 74 /* 75 * Make sure we are not reinitializing a held lock: 76 */ 77 lockdep_init_map(&s->dep_map, name, key, 0); 78 s->sequence = 0; 79 } 80 81 #ifdef CONFIG_DEBUG_LOCK_ALLOC 82 # define SEQCOUNT_DEP_MAP_INIT(lockname) \ 83 .dep_map = { .name = #lockname } \ 84 85 # define seqcount_init(s) \ 86 do { \ 87 static struct lock_class_key __key; \ 88 __seqcount_init((s), #s, &__key); \ 89 } while (0) 90 91 static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 92 { 93 seqcount_t *l = (seqcount_t *)s; 94 unsigned long flags; 95 96 local_irq_save(flags); 97 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 98 seqcount_release(&l->dep_map, _RET_IP_); 99 local_irq_restore(flags); 100 } 101 102 #else 103 # define SEQCOUNT_DEP_MAP_INIT(lockname) 104 # define seqcount_init(s) __seqcount_init(s, NULL, NULL) 105 # define seqcount_lockdep_reader_access(x) 106 #endif 107 108 #define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 109 110 111 /** 112 * __read_seqcount_begin - begin a seq-read critical section (without barrier) 113 * @s: pointer to seqcount_t 114 * Returns: count to be passed to read_seqcount_retry 115 * 116 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 117 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 118 * provided before actually loading any of the variables that are to be 119 * protected in this critical section. 120 * 121 * Use carefully, only in critical code, and comment how the barrier is 122 * provided. 123 */ 124 static inline unsigned __read_seqcount_begin(const seqcount_t *s) 125 { 126 unsigned ret; 127 128 repeat: 129 ret = READ_ONCE(s->sequence); 130 if (unlikely(ret & 1)) { 131 cpu_relax(); 132 goto repeat; 133 } 134 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 135 return ret; 136 } 137 138 /** 139 * raw_read_seqcount - Read the raw seqcount 140 * @s: pointer to seqcount_t 141 * Returns: count to be passed to read_seqcount_retry 142 * 143 * raw_read_seqcount opens a read critical section of the given 144 * seqcount without any lockdep checking and without checking or 145 * masking the LSB. Calling code is responsible for handling that. 146 */ 147 static inline unsigned raw_read_seqcount(const seqcount_t *s) 148 { 149 unsigned ret = READ_ONCE(s->sequence); 150 smp_rmb(); 151 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 152 return ret; 153 } 154 155 /** 156 * raw_read_seqcount_begin - start seq-read critical section w/o lockdep 157 * @s: pointer to seqcount_t 158 * Returns: count to be passed to read_seqcount_retry 159 * 160 * raw_read_seqcount_begin opens a read critical section of the given 161 * seqcount, but without any lockdep checking. Validity of the critical 162 * section is tested by checking read_seqcount_retry function. 163 */ 164 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) 165 { 166 unsigned ret = __read_seqcount_begin(s); 167 smp_rmb(); 168 return ret; 169 } 170 171 /** 172 * read_seqcount_begin - begin a seq-read critical section 173 * @s: pointer to seqcount_t 174 * Returns: count to be passed to read_seqcount_retry 175 * 176 * read_seqcount_begin opens a read critical section of the given seqcount. 177 * Validity of the critical section is tested by checking read_seqcount_retry 178 * function. 179 */ 180 static inline unsigned read_seqcount_begin(const seqcount_t *s) 181 { 182 seqcount_lockdep_reader_access(s); 183 return raw_read_seqcount_begin(s); 184 } 185 186 /** 187 * raw_seqcount_begin - begin a seq-read critical section 188 * @s: pointer to seqcount_t 189 * Returns: count to be passed to read_seqcount_retry 190 * 191 * raw_seqcount_begin opens a read critical section of the given seqcount. 192 * Validity of the critical section is tested by checking read_seqcount_retry 193 * function. 194 * 195 * Unlike read_seqcount_begin(), this function will not wait for the count 196 * to stabilize. If a writer is active when we begin, we will fail the 197 * read_seqcount_retry() instead of stabilizing at the beginning of the 198 * critical section. 199 */ 200 static inline unsigned raw_seqcount_begin(const seqcount_t *s) 201 { 202 unsigned ret = READ_ONCE(s->sequence); 203 smp_rmb(); 204 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 205 return ret & ~1; 206 } 207 208 /** 209 * __read_seqcount_retry - end a seq-read critical section (without barrier) 210 * @s: pointer to seqcount_t 211 * @start: count, from read_seqcount_begin 212 * Returns: 1 if retry is required, else 0 213 * 214 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 215 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 216 * provided before actually loading any of the variables that are to be 217 * protected in this critical section. 218 * 219 * Use carefully, only in critical code, and comment how the barrier is 220 * provided. 221 */ 222 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) 223 { 224 kcsan_atomic_next(0); 225 return unlikely(READ_ONCE(s->sequence) != start); 226 } 227 228 /** 229 * read_seqcount_retry - end a seq-read critical section 230 * @s: pointer to seqcount_t 231 * @start: count, from read_seqcount_begin 232 * Returns: 1 if retry is required, else 0 233 * 234 * read_seqcount_retry closes a read critical section of the given seqcount. 235 * If the critical section was invalid, it must be ignored (and typically 236 * retried). 237 */ 238 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 239 { 240 smp_rmb(); 241 return __read_seqcount_retry(s, start); 242 } 243 244 245 246 static inline void raw_write_seqcount_begin(seqcount_t *s) 247 { 248 kcsan_nestable_atomic_begin(); 249 s->sequence++; 250 smp_wmb(); 251 } 252 253 static inline void raw_write_seqcount_end(seqcount_t *s) 254 { 255 smp_wmb(); 256 s->sequence++; 257 kcsan_nestable_atomic_end(); 258 } 259 260 /** 261 * raw_write_seqcount_barrier - do a seq write barrier 262 * @s: pointer to seqcount_t 263 * 264 * This can be used to provide an ordering guarantee instead of the 265 * usual consistency guarantee. It is one wmb cheaper, because we can 266 * collapse the two back-to-back wmb()s. 267 * 268 * Note that writes surrounding the barrier should be declared atomic (e.g. 269 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 270 * atomically, avoiding compiler optimizations; b) to document which writes are 271 * meant to propagate to the reader critical section. This is necessary because 272 * neither writes before and after the barrier are enclosed in a seq-writer 273 * critical section that would ensure readers are aware of ongoing writes. 274 * 275 * seqcount_t seq; 276 * bool X = true, Y = false; 277 * 278 * void read(void) 279 * { 280 * bool x, y; 281 * 282 * do { 283 * int s = read_seqcount_begin(&seq); 284 * 285 * x = X; y = Y; 286 * 287 * } while (read_seqcount_retry(&seq, s)); 288 * 289 * BUG_ON(!x && !y); 290 * } 291 * 292 * void write(void) 293 * { 294 * WRITE_ONCE(Y, true); 295 * 296 * raw_write_seqcount_barrier(seq); 297 * 298 * WRITE_ONCE(X, false); 299 * } 300 */ 301 static inline void raw_write_seqcount_barrier(seqcount_t *s) 302 { 303 kcsan_nestable_atomic_begin(); 304 s->sequence++; 305 smp_wmb(); 306 s->sequence++; 307 kcsan_nestable_atomic_end(); 308 } 309 310 static inline int raw_read_seqcount_latch(seqcount_t *s) 311 { 312 /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ 313 int seq = READ_ONCE(s->sequence); /* ^^^ */ 314 return seq; 315 } 316 317 /** 318 * raw_write_seqcount_latch - redirect readers to even/odd copy 319 * @s: pointer to seqcount_t 320 * 321 * The latch technique is a multiversion concurrency control method that allows 322 * queries during non-atomic modifications. If you can guarantee queries never 323 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 324 * -- you most likely do not need this. 325 * 326 * Where the traditional RCU/lockless data structures rely on atomic 327 * modifications to ensure queries observe either the old or the new state the 328 * latch allows the same for non-atomic updates. The trade-off is doubling the 329 * cost of storage; we have to maintain two copies of the entire data 330 * structure. 331 * 332 * Very simply put: we first modify one copy and then the other. This ensures 333 * there is always one copy in a stable state, ready to give us an answer. 334 * 335 * The basic form is a data structure like: 336 * 337 * struct latch_struct { 338 * seqcount_t seq; 339 * struct data_struct data[2]; 340 * }; 341 * 342 * Where a modification, which is assumed to be externally serialized, does the 343 * following: 344 * 345 * void latch_modify(struct latch_struct *latch, ...) 346 * { 347 * smp_wmb(); <- Ensure that the last data[1] update is visible 348 * latch->seq++; 349 * smp_wmb(); <- Ensure that the seqcount update is visible 350 * 351 * modify(latch->data[0], ...); 352 * 353 * smp_wmb(); <- Ensure that the data[0] update is visible 354 * latch->seq++; 355 * smp_wmb(); <- Ensure that the seqcount update is visible 356 * 357 * modify(latch->data[1], ...); 358 * } 359 * 360 * The query will have a form like: 361 * 362 * struct entry *latch_query(struct latch_struct *latch, ...) 363 * { 364 * struct entry *entry; 365 * unsigned seq, idx; 366 * 367 * do { 368 * seq = raw_read_seqcount_latch(&latch->seq); 369 * 370 * idx = seq & 0x01; 371 * entry = data_query(latch->data[idx], ...); 372 * 373 * smp_rmb(); 374 * } while (seq != latch->seq); 375 * 376 * return entry; 377 * } 378 * 379 * So during the modification, queries are first redirected to data[1]. Then we 380 * modify data[0]. When that is complete, we redirect queries back to data[0] 381 * and we can modify data[1]. 382 * 383 * NOTE: The non-requirement for atomic modifications does _NOT_ include 384 * the publishing of new entries in the case where data is a dynamic 385 * data structure. 386 * 387 * An iteration might start in data[0] and get suspended long enough 388 * to miss an entire modification sequence, once it resumes it might 389 * observe the new entry. 390 * 391 * NOTE: When data is a dynamic data structure; one should use regular RCU 392 * patterns to manage the lifetimes of the objects within. 393 */ 394 static inline void raw_write_seqcount_latch(seqcount_t *s) 395 { 396 smp_wmb(); /* prior stores before incrementing "sequence" */ 397 s->sequence++; 398 smp_wmb(); /* increment "sequence" before following stores */ 399 } 400 401 /* 402 * Sequence counter only version assumes that callers are using their 403 * own mutexing. 404 */ 405 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 406 { 407 raw_write_seqcount_begin(s); 408 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 409 } 410 411 static inline void write_seqcount_begin(seqcount_t *s) 412 { 413 write_seqcount_begin_nested(s, 0); 414 } 415 416 static inline void write_seqcount_end(seqcount_t *s) 417 { 418 seqcount_release(&s->dep_map, _RET_IP_); 419 raw_write_seqcount_end(s); 420 } 421 422 /** 423 * write_seqcount_invalidate - invalidate in-progress read-side seq operations 424 * @s: pointer to seqcount_t 425 * 426 * After write_seqcount_invalidate, no read-side seq operations will complete 427 * successfully and see data older than this. 428 */ 429 static inline void write_seqcount_invalidate(seqcount_t *s) 430 { 431 smp_wmb(); 432 kcsan_nestable_atomic_begin(); 433 s->sequence+=2; 434 kcsan_nestable_atomic_end(); 435 } 436 437 typedef struct { 438 struct seqcount seqcount; 439 spinlock_t lock; 440 } seqlock_t; 441 442 /* 443 * These macros triggered gcc-3.x compile-time problems. We think these are 444 * OK now. Be cautious. 445 */ 446 #define __SEQLOCK_UNLOCKED(lockname) \ 447 { \ 448 .seqcount = SEQCNT_ZERO(lockname), \ 449 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 450 } 451 452 #define seqlock_init(x) \ 453 do { \ 454 seqcount_init(&(x)->seqcount); \ 455 spin_lock_init(&(x)->lock); \ 456 } while (0) 457 458 #define DEFINE_SEQLOCK(x) \ 459 seqlock_t x = __SEQLOCK_UNLOCKED(x) 460 461 /* 462 * Read side functions for starting and finalizing a read side section. 463 */ 464 static inline unsigned read_seqbegin(const seqlock_t *sl) 465 { 466 unsigned ret = read_seqcount_begin(&sl->seqcount); 467 468 kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ 469 kcsan_flat_atomic_begin(); 470 return ret; 471 } 472 473 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 474 { 475 /* 476 * Assume not nested: read_seqretry() may be called multiple times when 477 * completing read critical section. 478 */ 479 kcsan_flat_atomic_end(); 480 481 return read_seqcount_retry(&sl->seqcount, start); 482 } 483 484 /* 485 * Lock out other writers and update the count. 486 * Acts like a normal spin_lock/unlock. 487 * Don't need preempt_disable() because that is in the spin_lock already. 488 */ 489 static inline void write_seqlock(seqlock_t *sl) 490 { 491 spin_lock(&sl->lock); 492 write_seqcount_begin(&sl->seqcount); 493 } 494 495 static inline void write_sequnlock(seqlock_t *sl) 496 { 497 write_seqcount_end(&sl->seqcount); 498 spin_unlock(&sl->lock); 499 } 500 501 static inline void write_seqlock_bh(seqlock_t *sl) 502 { 503 spin_lock_bh(&sl->lock); 504 write_seqcount_begin(&sl->seqcount); 505 } 506 507 static inline void write_sequnlock_bh(seqlock_t *sl) 508 { 509 write_seqcount_end(&sl->seqcount); 510 spin_unlock_bh(&sl->lock); 511 } 512 513 static inline void write_seqlock_irq(seqlock_t *sl) 514 { 515 spin_lock_irq(&sl->lock); 516 write_seqcount_begin(&sl->seqcount); 517 } 518 519 static inline void write_sequnlock_irq(seqlock_t *sl) 520 { 521 write_seqcount_end(&sl->seqcount); 522 spin_unlock_irq(&sl->lock); 523 } 524 525 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 526 { 527 unsigned long flags; 528 529 spin_lock_irqsave(&sl->lock, flags); 530 write_seqcount_begin(&sl->seqcount); 531 return flags; 532 } 533 534 #define write_seqlock_irqsave(lock, flags) \ 535 do { flags = __write_seqlock_irqsave(lock); } while (0) 536 537 static inline void 538 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 539 { 540 write_seqcount_end(&sl->seqcount); 541 spin_unlock_irqrestore(&sl->lock, flags); 542 } 543 544 /* 545 * A locking reader exclusively locks out other writers and locking readers, 546 * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. 547 * Don't need preempt_disable() because that is in the spin_lock already. 548 */ 549 static inline void read_seqlock_excl(seqlock_t *sl) 550 { 551 spin_lock(&sl->lock); 552 } 553 554 static inline void read_sequnlock_excl(seqlock_t *sl) 555 { 556 spin_unlock(&sl->lock); 557 } 558 559 /** 560 * read_seqbegin_or_lock - begin a sequence number check or locking block 561 * @lock: sequence lock 562 * @seq : sequence number to be checked 563 * 564 * First try it once optimistically without taking the lock. If that fails, 565 * take the lock. The sequence number is also used as a marker for deciding 566 * whether to be a reader (even) or writer (odd). 567 * N.B. seq must be initialized to an even number to begin with. 568 */ 569 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 570 { 571 if (!(*seq & 1)) /* Even */ 572 *seq = read_seqbegin(lock); 573 else /* Odd */ 574 read_seqlock_excl(lock); 575 } 576 577 static inline int need_seqretry(seqlock_t *lock, int seq) 578 { 579 return !(seq & 1) && read_seqretry(lock, seq); 580 } 581 582 static inline void done_seqretry(seqlock_t *lock, int seq) 583 { 584 if (seq & 1) 585 read_sequnlock_excl(lock); 586 } 587 588 static inline void read_seqlock_excl_bh(seqlock_t *sl) 589 { 590 spin_lock_bh(&sl->lock); 591 } 592 593 static inline void read_sequnlock_excl_bh(seqlock_t *sl) 594 { 595 spin_unlock_bh(&sl->lock); 596 } 597 598 static inline void read_seqlock_excl_irq(seqlock_t *sl) 599 { 600 spin_lock_irq(&sl->lock); 601 } 602 603 static inline void read_sequnlock_excl_irq(seqlock_t *sl) 604 { 605 spin_unlock_irq(&sl->lock); 606 } 607 608 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 609 { 610 unsigned long flags; 611 612 spin_lock_irqsave(&sl->lock, flags); 613 return flags; 614 } 615 616 #define read_seqlock_excl_irqsave(lock, flags) \ 617 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 618 619 static inline void 620 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 621 { 622 spin_unlock_irqrestore(&sl->lock, flags); 623 } 624 625 static inline unsigned long 626 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 627 { 628 unsigned long flags = 0; 629 630 if (!(*seq & 1)) /* Even */ 631 *seq = read_seqbegin(lock); 632 else /* Odd */ 633 read_seqlock_excl_irqsave(lock, flags); 634 635 return flags; 636 } 637 638 static inline void 639 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 640 { 641 if (seq & 1) 642 read_sequnlock_excl_irqrestore(lock, flags); 643 } 644 #endif /* __LINUX_SEQLOCK_H */ 645