1 // SPDX-License-Identifier: GPL-2.0-or-later 2 #include <linux/bug.h> 3 #include <linux/compiler.h> 4 #include <linux/export.h> 5 #include <linux/percpu.h> 6 #include <linux/processor.h> 7 #include <linux/smp.h> 8 #include <linux/topology.h> 9 #include <linux/sched/clock.h> 10 #include <asm/qspinlock.h> 11 #include <asm/paravirt.h> 12 13 #define MAX_NODES 4 14 15 struct qnode { 16 struct qnode *next; 17 struct qspinlock *lock; 18 int cpu; 19 int yield_cpu; 20 u8 locked; /* 1 if lock acquired */ 21 }; 22 23 struct qnodes { 24 int count; 25 struct qnode nodes[MAX_NODES]; 26 }; 27 28 /* Tuning parameters */ 29 static int steal_spins __read_mostly = (1 << 5); 30 static int remote_steal_spins __read_mostly = (1 << 2); 31 #if _Q_SPIN_TRY_LOCK_STEAL == 1 32 static const bool maybe_stealers = true; 33 #else 34 static bool maybe_stealers __read_mostly = true; 35 #endif 36 static int head_spins __read_mostly = (1 << 8); 37 38 static bool pv_yield_owner __read_mostly = true; 39 static bool pv_yield_allow_steal __read_mostly = false; 40 static bool pv_spin_on_preempted_owner __read_mostly = false; 41 static bool pv_sleepy_lock __read_mostly = true; 42 static bool pv_sleepy_lock_sticky __read_mostly = false; 43 static u64 pv_sleepy_lock_interval_ns __read_mostly = 0; 44 static int pv_sleepy_lock_factor __read_mostly = 256; 45 static bool pv_yield_prev __read_mostly = true; 46 static bool pv_yield_propagate_owner __read_mostly = true; 47 static bool pv_prod_head __read_mostly = false; 48 49 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); 50 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock); 51 52 #if _Q_SPIN_SPEC_BARRIER == 1 53 #define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0) 54 #else 55 #define spec_barrier() do { } while (0) 56 #endif 57 58 static __always_inline bool recently_sleepy(void) 59 { 60 /* pv_sleepy_lock is true when this is called */ 61 if (pv_sleepy_lock_interval_ns) { 62 u64 seen = this_cpu_read(sleepy_lock_seen_clock); 63 64 if (seen) { 65 u64 delta = sched_clock() - seen; 66 if (delta < pv_sleepy_lock_interval_ns) 67 return true; 68 this_cpu_write(sleepy_lock_seen_clock, 0); 69 } 70 } 71 72 return false; 73 } 74 75 static __always_inline int get_steal_spins(bool paravirt, bool sleepy) 76 { 77 if (paravirt && sleepy) 78 return steal_spins * pv_sleepy_lock_factor; 79 else 80 return steal_spins; 81 } 82 83 static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy) 84 { 85 if (paravirt && sleepy) 86 return remote_steal_spins * pv_sleepy_lock_factor; 87 else 88 return remote_steal_spins; 89 } 90 91 static __always_inline int get_head_spins(bool paravirt, bool sleepy) 92 { 93 if (paravirt && sleepy) 94 return head_spins * pv_sleepy_lock_factor; 95 else 96 return head_spins; 97 } 98 99 static inline u32 encode_tail_cpu(int cpu) 100 { 101 return (cpu + 1) << _Q_TAIL_CPU_OFFSET; 102 } 103 104 static inline int decode_tail_cpu(u32 val) 105 { 106 return (val >> _Q_TAIL_CPU_OFFSET) - 1; 107 } 108 109 static inline int get_owner_cpu(u32 val) 110 { 111 return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET; 112 } 113 114 /* 115 * Try to acquire the lock if it was not already locked. If the tail matches 116 * mytail then clear it, otherwise leave it unchnaged. Return previous value. 117 * 118 * This is used by the head of the queue to acquire the lock and clean up 119 * its tail if it was the last one queued. 120 */ 121 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail) 122 { 123 u32 newval = queued_spin_encode_locked_val(); 124 u32 prev, tmp; 125 126 asm volatile( 127 "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" 128 /* This test is necessary if there could be stealers */ 129 " andi. %1,%0,%5 \n" 130 " bne 3f \n" 131 /* Test whether the lock tail == mytail */ 132 " and %1,%0,%6 \n" 133 " cmpw 0,%1,%3 \n" 134 /* Merge the new locked value */ 135 " or %1,%1,%4 \n" 136 " bne 2f \n" 137 /* If the lock tail matched, then clear it, otherwise leave it. */ 138 " andc %1,%1,%6 \n" 139 "2: stwcx. %1,0,%2 \n" 140 " bne- 1b \n" 141 "\t" PPC_ACQUIRE_BARRIER " \n" 142 "3: \n" 143 : "=&r" (prev), "=&r" (tmp) 144 : "r" (&lock->val), "r"(tail), "r" (newval), 145 "i" (_Q_LOCKED_VAL), 146 "r" (_Q_TAIL_CPU_MASK), 147 "i" (_Q_SPIN_EH_HINT) 148 : "cr0", "memory"); 149 150 return prev; 151 } 152 153 /* 154 * Publish our tail, replacing previous tail. Return previous value. 155 * 156 * This provides a release barrier for publishing node, this pairs with the 157 * acquire barrier in get_tail_qnode() when the next CPU finds this tail 158 * value. 159 */ 160 static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail) 161 { 162 u32 prev, tmp; 163 164 kcsan_release(); 165 166 asm volatile( 167 "\t" PPC_RELEASE_BARRIER " \n" 168 "1: lwarx %0,0,%2 # publish_tail_cpu \n" 169 " andc %1,%0,%4 \n" 170 " or %1,%1,%3 \n" 171 " stwcx. %1,0,%2 \n" 172 " bne- 1b \n" 173 : "=&r" (prev), "=&r"(tmp) 174 : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK) 175 : "cr0", "memory"); 176 177 return prev; 178 } 179 180 static __always_inline u32 set_mustq(struct qspinlock *lock) 181 { 182 u32 prev; 183 184 asm volatile( 185 "1: lwarx %0,0,%1 # set_mustq \n" 186 " or %0,%0,%2 \n" 187 " stwcx. %0,0,%1 \n" 188 " bne- 1b \n" 189 : "=&r" (prev) 190 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 191 : "cr0", "memory"); 192 193 return prev; 194 } 195 196 static __always_inline u32 clear_mustq(struct qspinlock *lock) 197 { 198 u32 prev; 199 200 asm volatile( 201 "1: lwarx %0,0,%1 # clear_mustq \n" 202 " andc %0,%0,%2 \n" 203 " stwcx. %0,0,%1 \n" 204 " bne- 1b \n" 205 : "=&r" (prev) 206 : "r" (&lock->val), "r" (_Q_MUST_Q_VAL) 207 : "cr0", "memory"); 208 209 return prev; 210 } 211 212 static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old) 213 { 214 u32 prev; 215 u32 new = old | _Q_SLEEPY_VAL; 216 217 BUG_ON(!(old & _Q_LOCKED_VAL)); 218 BUG_ON(old & _Q_SLEEPY_VAL); 219 220 asm volatile( 221 "1: lwarx %0,0,%1 # try_set_sleepy \n" 222 " cmpw 0,%0,%2 \n" 223 " bne- 2f \n" 224 " stwcx. %3,0,%1 \n" 225 " bne- 1b \n" 226 "2: \n" 227 : "=&r" (prev) 228 : "r" (&lock->val), "r"(old), "r" (new) 229 : "cr0", "memory"); 230 231 return likely(prev == old); 232 } 233 234 static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val) 235 { 236 if (pv_sleepy_lock) { 237 if (pv_sleepy_lock_interval_ns) 238 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 239 if (!(val & _Q_SLEEPY_VAL)) 240 try_set_sleepy(lock, val); 241 } 242 } 243 244 static __always_inline void seen_sleepy_lock(void) 245 { 246 if (pv_sleepy_lock && pv_sleepy_lock_interval_ns) 247 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 248 } 249 250 static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val) 251 { 252 if (pv_sleepy_lock) { 253 if (pv_sleepy_lock_interval_ns) 254 this_cpu_write(sleepy_lock_seen_clock, sched_clock()); 255 if (val & _Q_LOCKED_VAL) { 256 if (!(val & _Q_SLEEPY_VAL)) 257 try_set_sleepy(lock, val); 258 } 259 } 260 } 261 262 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) 263 { 264 int cpu = decode_tail_cpu(val); 265 struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu); 266 int idx; 267 268 /* 269 * After publishing the new tail and finding a previous tail in the 270 * previous val (which is the control dependency), this barrier 271 * orders the release barrier in publish_tail_cpu performed by the 272 * last CPU, with subsequently looking at its qnode structures 273 * after the barrier. 274 */ 275 smp_acquire__after_ctrl_dep(); 276 277 for (idx = 0; idx < MAX_NODES; idx++) { 278 struct qnode *qnode = &qnodesp->nodes[idx]; 279 if (qnode->lock == lock) 280 return qnode; 281 } 282 283 BUG(); 284 } 285 286 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 287 static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq) 288 { 289 int owner; 290 u32 yield_count; 291 bool preempted = false; 292 293 BUG_ON(!(val & _Q_LOCKED_VAL)); 294 295 if (!paravirt) 296 goto relax; 297 298 if (!pv_yield_owner) 299 goto relax; 300 301 owner = get_owner_cpu(val); 302 yield_count = yield_count_of(owner); 303 304 if ((yield_count & 1) == 0) 305 goto relax; /* owner vcpu is running */ 306 307 spin_end(); 308 309 seen_sleepy_owner(lock, val); 310 preempted = true; 311 312 /* 313 * Read the lock word after sampling the yield count. On the other side 314 * there may a wmb because the yield count update is done by the 315 * hypervisor preemption and the value update by the OS, however this 316 * ordering might reduce the chance of out of order accesses and 317 * improve the heuristic. 318 */ 319 smp_rmb(); 320 321 if (READ_ONCE(lock->val) == val) { 322 if (mustq) 323 clear_mustq(lock); 324 yield_to_preempted(owner, yield_count); 325 if (mustq) 326 set_mustq(lock); 327 spin_begin(); 328 329 /* Don't relax if we yielded. Maybe we should? */ 330 return preempted; 331 } 332 spin_begin(); 333 relax: 334 spin_cpu_relax(); 335 336 return preempted; 337 } 338 339 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 340 static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 341 { 342 return __yield_to_locked_owner(lock, val, paravirt, false); 343 } 344 345 /* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ 346 static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) 347 { 348 bool mustq = false; 349 350 if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal) 351 mustq = true; 352 353 return __yield_to_locked_owner(lock, val, paravirt, mustq); 354 } 355 356 static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) 357 { 358 struct qnode *next; 359 int owner; 360 361 if (!paravirt) 362 return; 363 if (!pv_yield_propagate_owner) 364 return; 365 366 owner = get_owner_cpu(val); 367 if (*set_yield_cpu == owner) 368 return; 369 370 next = READ_ONCE(node->next); 371 if (!next) 372 return; 373 374 if (vcpu_is_preempted(owner)) { 375 next->yield_cpu = owner; 376 *set_yield_cpu = owner; 377 } else if (*set_yield_cpu != -1) { 378 next->yield_cpu = owner; 379 *set_yield_cpu = owner; 380 } 381 } 382 383 /* Called inside spin_begin() */ 384 static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt) 385 { 386 int prev_cpu = decode_tail_cpu(val); 387 u32 yield_count; 388 int yield_cpu; 389 bool preempted = false; 390 391 if (!paravirt) 392 goto relax; 393 394 if (!pv_yield_propagate_owner) 395 goto yield_prev; 396 397 yield_cpu = READ_ONCE(node->yield_cpu); 398 if (yield_cpu == -1) { 399 /* Propagate back the -1 CPU */ 400 if (node->next && node->next->yield_cpu != -1) 401 node->next->yield_cpu = yield_cpu; 402 goto yield_prev; 403 } 404 405 yield_count = yield_count_of(yield_cpu); 406 if ((yield_count & 1) == 0) 407 goto yield_prev; /* owner vcpu is running */ 408 409 if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu) 410 goto yield_prev; /* re-sample lock owner */ 411 412 spin_end(); 413 414 preempted = true; 415 seen_sleepy_node(lock, val); 416 417 smp_rmb(); 418 419 if (yield_cpu == node->yield_cpu) { 420 if (node->next && node->next->yield_cpu != yield_cpu) 421 node->next->yield_cpu = yield_cpu; 422 yield_to_preempted(yield_cpu, yield_count); 423 spin_begin(); 424 return preempted; 425 } 426 spin_begin(); 427 428 yield_prev: 429 if (!pv_yield_prev) 430 goto relax; 431 432 yield_count = yield_count_of(prev_cpu); 433 if ((yield_count & 1) == 0) 434 goto relax; /* owner vcpu is running */ 435 436 spin_end(); 437 438 preempted = true; 439 seen_sleepy_node(lock, val); 440 441 smp_rmb(); /* See __yield_to_locked_owner comment */ 442 443 if (!READ_ONCE(node->locked)) { 444 yield_to_preempted(prev_cpu, yield_count); 445 spin_begin(); 446 return preempted; 447 } 448 spin_begin(); 449 450 relax: 451 spin_cpu_relax(); 452 453 return preempted; 454 } 455 456 static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy) 457 { 458 if (iters >= get_steal_spins(paravirt, sleepy)) 459 return true; 460 461 if (IS_ENABLED(CONFIG_NUMA) && 462 (iters >= get_remote_steal_spins(paravirt, sleepy))) { 463 int cpu = get_owner_cpu(val); 464 if (numa_node_id() != cpu_to_node(cpu)) 465 return true; 466 } 467 return false; 468 } 469 470 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) 471 { 472 bool seen_preempted = false; 473 bool sleepy = false; 474 int iters = 0; 475 u32 val; 476 477 if (!steal_spins) { 478 /* XXX: should spin_on_preempted_owner do anything here? */ 479 return false; 480 } 481 482 /* Attempt to steal the lock */ 483 spin_begin(); 484 do { 485 bool preempted = false; 486 487 val = READ_ONCE(lock->val); 488 if (val & _Q_MUST_Q_VAL) 489 break; 490 spec_barrier(); 491 492 if (unlikely(!(val & _Q_LOCKED_VAL))) { 493 spin_end(); 494 if (__queued_spin_trylock_steal(lock)) 495 return true; 496 spin_begin(); 497 } else { 498 preempted = yield_to_locked_owner(lock, val, paravirt); 499 } 500 501 if (paravirt && pv_sleepy_lock) { 502 if (!sleepy) { 503 if (val & _Q_SLEEPY_VAL) { 504 seen_sleepy_lock(); 505 sleepy = true; 506 } else if (recently_sleepy()) { 507 sleepy = true; 508 } 509 } 510 if (pv_sleepy_lock_sticky && seen_preempted && 511 !(val & _Q_SLEEPY_VAL)) { 512 if (try_set_sleepy(lock, val)) 513 val |= _Q_SLEEPY_VAL; 514 } 515 } 516 517 if (preempted) { 518 seen_preempted = true; 519 sleepy = true; 520 if (!pv_spin_on_preempted_owner) 521 iters++; 522 /* 523 * pv_spin_on_preempted_owner don't increase iters 524 * while the owner is preempted -- we won't interfere 525 * with it by definition. This could introduce some 526 * latency issue if we continually observe preempted 527 * owners, but hopefully that's a rare corner case of 528 * a badly oversubscribed system. 529 */ 530 } else { 531 iters++; 532 } 533 } while (!steal_break(val, iters, paravirt, sleepy)); 534 535 spin_end(); 536 537 return false; 538 } 539 540 static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt) 541 { 542 struct qnodes *qnodesp; 543 struct qnode *next, *node; 544 u32 val, old, tail; 545 bool seen_preempted = false; 546 bool sleepy = false; 547 bool mustq = false; 548 int idx; 549 int set_yield_cpu = -1; 550 int iters = 0; 551 552 BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); 553 554 qnodesp = this_cpu_ptr(&qnodes); 555 if (unlikely(qnodesp->count >= MAX_NODES)) { 556 spec_barrier(); 557 while (!queued_spin_trylock(lock)) 558 cpu_relax(); 559 return; 560 } 561 562 idx = qnodesp->count++; 563 /* 564 * Ensure that we increment the head node->count before initialising 565 * the actual node. If the compiler is kind enough to reorder these 566 * stores, then an IRQ could overwrite our assignments. 567 */ 568 barrier(); 569 node = &qnodesp->nodes[idx]; 570 node->next = NULL; 571 node->lock = lock; 572 node->cpu = smp_processor_id(); 573 node->yield_cpu = -1; 574 node->locked = 0; 575 576 tail = encode_tail_cpu(node->cpu); 577 578 /* 579 * Assign all attributes of a node before it can be published. 580 * Issues an lwsync, serving as a release barrier, as well as a 581 * compiler barrier. 582 */ 583 old = publish_tail_cpu(lock, tail); 584 585 /* 586 * If there was a previous node; link it and wait until reaching the 587 * head of the waitqueue. 588 */ 589 if (old & _Q_TAIL_CPU_MASK) { 590 struct qnode *prev = get_tail_qnode(lock, old); 591 592 /* Link @node into the waitqueue. */ 593 WRITE_ONCE(prev->next, node); 594 595 /* Wait for mcs node lock to be released */ 596 spin_begin(); 597 while (!READ_ONCE(node->locked)) { 598 spec_barrier(); 599 600 if (yield_to_prev(lock, node, old, paravirt)) 601 seen_preempted = true; 602 } 603 spec_barrier(); 604 spin_end(); 605 606 /* Clear out stale propagated yield_cpu */ 607 if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1) 608 node->yield_cpu = -1; 609 610 smp_rmb(); /* acquire barrier for the mcs lock */ 611 612 /* 613 * Generic qspinlocks have this prefetch here, but it seems 614 * like it could cause additional line transitions because 615 * the waiter will keep loading from it. 616 */ 617 if (_Q_SPIN_PREFETCH_NEXT) { 618 next = READ_ONCE(node->next); 619 if (next) 620 prefetchw(next); 621 } 622 } 623 624 /* We're at the head of the waitqueue, wait for the lock. */ 625 again: 626 spin_begin(); 627 for (;;) { 628 bool preempted; 629 630 val = READ_ONCE(lock->val); 631 if (!(val & _Q_LOCKED_VAL)) 632 break; 633 spec_barrier(); 634 635 if (paravirt && pv_sleepy_lock && maybe_stealers) { 636 if (!sleepy) { 637 if (val & _Q_SLEEPY_VAL) { 638 seen_sleepy_lock(); 639 sleepy = true; 640 } else if (recently_sleepy()) { 641 sleepy = true; 642 } 643 } 644 if (pv_sleepy_lock_sticky && seen_preempted && 645 !(val & _Q_SLEEPY_VAL)) { 646 if (try_set_sleepy(lock, val)) 647 val |= _Q_SLEEPY_VAL; 648 } 649 } 650 651 propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); 652 preempted = yield_head_to_locked_owner(lock, val, paravirt); 653 if (!maybe_stealers) 654 continue; 655 656 if (preempted) 657 seen_preempted = true; 658 659 if (paravirt && preempted) { 660 sleepy = true; 661 662 if (!pv_spin_on_preempted_owner) 663 iters++; 664 } else { 665 iters++; 666 } 667 668 if (!mustq && iters >= get_head_spins(paravirt, sleepy)) { 669 mustq = true; 670 set_mustq(lock); 671 val |= _Q_MUST_Q_VAL; 672 } 673 } 674 spec_barrier(); 675 spin_end(); 676 677 /* If we're the last queued, must clean up the tail. */ 678 old = trylock_clean_tail(lock, tail); 679 if (unlikely(old & _Q_LOCKED_VAL)) { 680 BUG_ON(!maybe_stealers); 681 goto again; /* Can only be true if maybe_stealers. */ 682 } 683 684 if ((old & _Q_TAIL_CPU_MASK) == tail) 685 goto release; /* We were the tail, no next. */ 686 687 /* There is a next, must wait for node->next != NULL (MCS protocol) */ 688 next = READ_ONCE(node->next); 689 if (!next) { 690 spin_begin(); 691 while (!(next = READ_ONCE(node->next))) 692 cpu_relax(); 693 spin_end(); 694 } 695 spec_barrier(); 696 697 /* 698 * Unlock the next mcs waiter node. Release barrier is not required 699 * here because the acquirer is only accessing the lock word, and 700 * the acquire barrier we took the lock with orders that update vs 701 * this store to locked. The corresponding barrier is the smp_rmb() 702 * acquire barrier for mcs lock, above. 703 */ 704 if (paravirt && pv_prod_head) { 705 int next_cpu = next->cpu; 706 WRITE_ONCE(next->locked, 1); 707 if (_Q_SPIN_MISO) 708 asm volatile("miso" ::: "memory"); 709 if (vcpu_is_preempted(next_cpu)) 710 prod_cpu(next_cpu); 711 } else { 712 WRITE_ONCE(next->locked, 1); 713 if (_Q_SPIN_MISO) 714 asm volatile("miso" ::: "memory"); 715 } 716 717 release: 718 /* 719 * Clear the lock before releasing the node, as another CPU might see stale 720 * values if an interrupt occurs after we increment qnodesp->count 721 * but before node->lock is initialized. The barrier ensures that 722 * there are no further stores to the node after it has been released. 723 */ 724 node->lock = NULL; 725 barrier(); 726 qnodesp->count--; 727 } 728 729 void queued_spin_lock_slowpath(struct qspinlock *lock) 730 { 731 /* 732 * This looks funny, but it induces the compiler to inline both 733 * sides of the branch rather than share code as when the condition 734 * is passed as the paravirt argument to the functions. 735 */ 736 if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { 737 if (try_to_steal_lock(lock, true)) { 738 spec_barrier(); 739 return; 740 } 741 queued_spin_lock_mcs_queue(lock, true); 742 } else { 743 if (try_to_steal_lock(lock, false)) { 744 spec_barrier(); 745 return; 746 } 747 queued_spin_lock_mcs_queue(lock, false); 748 } 749 } 750 EXPORT_SYMBOL(queued_spin_lock_slowpath); 751 752 #ifdef CONFIG_PARAVIRT_SPINLOCKS 753 void pv_spinlocks_init(void) 754 { 755 } 756 #endif 757 758 #include <linux/debugfs.h> 759 static int steal_spins_set(void *data, u64 val) 760 { 761 #if _Q_SPIN_TRY_LOCK_STEAL == 1 762 /* MAYBE_STEAL remains true */ 763 steal_spins = val; 764 #else 765 static DEFINE_MUTEX(lock); 766 767 /* 768 * The lock slow path has a !maybe_stealers case that can assume 769 * the head of queue will not see concurrent waiters. That waiter 770 * is unsafe in the presence of stealers, so must keep them away 771 * from one another. 772 */ 773 774 mutex_lock(&lock); 775 if (val && !steal_spins) { 776 maybe_stealers = true; 777 /* wait for queue head waiter to go away */ 778 synchronize_rcu(); 779 steal_spins = val; 780 } else if (!val && steal_spins) { 781 steal_spins = val; 782 /* wait for all possible stealers to go away */ 783 synchronize_rcu(); 784 maybe_stealers = false; 785 } else { 786 steal_spins = val; 787 } 788 mutex_unlock(&lock); 789 #endif 790 791 return 0; 792 } 793 794 static int steal_spins_get(void *data, u64 *val) 795 { 796 *val = steal_spins; 797 798 return 0; 799 } 800 801 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n"); 802 803 static int remote_steal_spins_set(void *data, u64 val) 804 { 805 remote_steal_spins = val; 806 807 return 0; 808 } 809 810 static int remote_steal_spins_get(void *data, u64 *val) 811 { 812 *val = remote_steal_spins; 813 814 return 0; 815 } 816 817 DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n"); 818 819 static int head_spins_set(void *data, u64 val) 820 { 821 head_spins = val; 822 823 return 0; 824 } 825 826 static int head_spins_get(void *data, u64 *val) 827 { 828 *val = head_spins; 829 830 return 0; 831 } 832 833 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n"); 834 835 static int pv_yield_owner_set(void *data, u64 val) 836 { 837 pv_yield_owner = !!val; 838 839 return 0; 840 } 841 842 static int pv_yield_owner_get(void *data, u64 *val) 843 { 844 *val = pv_yield_owner; 845 846 return 0; 847 } 848 849 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n"); 850 851 static int pv_yield_allow_steal_set(void *data, u64 val) 852 { 853 pv_yield_allow_steal = !!val; 854 855 return 0; 856 } 857 858 static int pv_yield_allow_steal_get(void *data, u64 *val) 859 { 860 *val = pv_yield_allow_steal; 861 862 return 0; 863 } 864 865 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); 866 867 static int pv_spin_on_preempted_owner_set(void *data, u64 val) 868 { 869 pv_spin_on_preempted_owner = !!val; 870 871 return 0; 872 } 873 874 static int pv_spin_on_preempted_owner_get(void *data, u64 *val) 875 { 876 *val = pv_spin_on_preempted_owner; 877 878 return 0; 879 } 880 881 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); 882 883 static int pv_sleepy_lock_set(void *data, u64 val) 884 { 885 pv_sleepy_lock = !!val; 886 887 return 0; 888 } 889 890 static int pv_sleepy_lock_get(void *data, u64 *val) 891 { 892 *val = pv_sleepy_lock; 893 894 return 0; 895 } 896 897 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n"); 898 899 static int pv_sleepy_lock_sticky_set(void *data, u64 val) 900 { 901 pv_sleepy_lock_sticky = !!val; 902 903 return 0; 904 } 905 906 static int pv_sleepy_lock_sticky_get(void *data, u64 *val) 907 { 908 *val = pv_sleepy_lock_sticky; 909 910 return 0; 911 } 912 913 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n"); 914 915 static int pv_sleepy_lock_interval_ns_set(void *data, u64 val) 916 { 917 pv_sleepy_lock_interval_ns = val; 918 919 return 0; 920 } 921 922 static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val) 923 { 924 *val = pv_sleepy_lock_interval_ns; 925 926 return 0; 927 } 928 929 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n"); 930 931 static int pv_sleepy_lock_factor_set(void *data, u64 val) 932 { 933 pv_sleepy_lock_factor = val; 934 935 return 0; 936 } 937 938 static int pv_sleepy_lock_factor_get(void *data, u64 *val) 939 { 940 *val = pv_sleepy_lock_factor; 941 942 return 0; 943 } 944 945 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n"); 946 947 static int pv_yield_prev_set(void *data, u64 val) 948 { 949 pv_yield_prev = !!val; 950 951 return 0; 952 } 953 954 static int pv_yield_prev_get(void *data, u64 *val) 955 { 956 *val = pv_yield_prev; 957 958 return 0; 959 } 960 961 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n"); 962 963 static int pv_yield_propagate_owner_set(void *data, u64 val) 964 { 965 pv_yield_propagate_owner = !!val; 966 967 return 0; 968 } 969 970 static int pv_yield_propagate_owner_get(void *data, u64 *val) 971 { 972 *val = pv_yield_propagate_owner; 973 974 return 0; 975 } 976 977 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n"); 978 979 static int pv_prod_head_set(void *data, u64 val) 980 { 981 pv_prod_head = !!val; 982 983 return 0; 984 } 985 986 static int pv_prod_head_get(void *data, u64 *val) 987 { 988 *val = pv_prod_head; 989 990 return 0; 991 } 992 993 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n"); 994 995 static __init int spinlock_debugfs_init(void) 996 { 997 debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins); 998 debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins); 999 debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins); 1000 if (is_shared_processor()) { 1001 debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); 1002 debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); 1003 debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); 1004 debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock); 1005 debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky); 1006 debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns); 1007 debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor); 1008 debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); 1009 debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); 1010 debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); 1011 } 1012 1013 return 0; 1014 } 1015 device_initcall(spinlock_debugfs_init); 1016