1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 4 * policies) 5 */ 6 #include "sched.h" 7 8 #include "pelt.h" 9 10 int sched_rr_timeslice = RR_TIMESLICE; 11 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; 12 /* More than 4 hours if BW_SHIFT equals 20. */ 13 static const u64 max_rt_runtime = MAX_BW; 14 15 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 16 17 struct rt_bandwidth def_rt_bandwidth; 18 19 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 20 { 21 struct rt_bandwidth *rt_b = 22 container_of(timer, struct rt_bandwidth, rt_period_timer); 23 int idle = 0; 24 int overrun; 25 26 raw_spin_lock(&rt_b->rt_runtime_lock); 27 for (;;) { 28 overrun = hrtimer_forward_now(timer, rt_b->rt_period); 29 if (!overrun) 30 break; 31 32 raw_spin_unlock(&rt_b->rt_runtime_lock); 33 idle = do_sched_rt_period_timer(rt_b, overrun); 34 raw_spin_lock(&rt_b->rt_runtime_lock); 35 } 36 if (idle) 37 rt_b->rt_period_active = 0; 38 raw_spin_unlock(&rt_b->rt_runtime_lock); 39 40 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 41 } 42 43 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 44 { 45 rt_b->rt_period = ns_to_ktime(period); 46 rt_b->rt_runtime = runtime; 47 48 raw_spin_lock_init(&rt_b->rt_runtime_lock); 49 50 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, 51 HRTIMER_MODE_REL_HARD); 52 rt_b->rt_period_timer.function = sched_rt_period_timer; 53 } 54 55 static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 56 { 57 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 58 return; 59 60 raw_spin_lock(&rt_b->rt_runtime_lock); 61 if (!rt_b->rt_period_active) { 62 rt_b->rt_period_active = 1; 63 /* 64 * SCHED_DEADLINE updates the bandwidth, as a run away 65 * RT task with a DL task could hog a CPU. But DL does 66 * not reset the period. If a deadline task was running 67 * without an RT task running, it can cause RT tasks to 68 * throttle when they start up. Kick the timer right away 69 * to update the period. 70 */ 71 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0)); 72 hrtimer_start_expires(&rt_b->rt_period_timer, 73 HRTIMER_MODE_ABS_PINNED_HARD); 74 } 75 raw_spin_unlock(&rt_b->rt_runtime_lock); 76 } 77 78 void init_rt_rq(struct rt_rq *rt_rq) 79 { 80 struct rt_prio_array *array; 81 int i; 82 83 array = &rt_rq->active; 84 for (i = 0; i < MAX_RT_PRIO; i++) { 85 INIT_LIST_HEAD(array->queue + i); 86 __clear_bit(i, array->bitmap); 87 } 88 /* delimiter for bitsearch: */ 89 __set_bit(MAX_RT_PRIO, array->bitmap); 90 91 #if defined CONFIG_SMP 92 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 93 rt_rq->highest_prio.next = MAX_RT_PRIO-1; 94 rt_rq->rt_nr_migratory = 0; 95 rt_rq->overloaded = 0; 96 plist_head_init(&rt_rq->pushable_tasks); 97 #endif /* CONFIG_SMP */ 98 /* We start is dequeued state, because no RT tasks are queued */ 99 rt_rq->rt_queued = 0; 100 101 rt_rq->rt_time = 0; 102 rt_rq->rt_throttled = 0; 103 rt_rq->rt_runtime = 0; 104 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 105 } 106 107 #ifdef CONFIG_RT_GROUP_SCHED 108 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 109 { 110 hrtimer_cancel(&rt_b->rt_period_timer); 111 } 112 113 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 114 115 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 116 { 117 #ifdef CONFIG_SCHED_DEBUG 118 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 119 #endif 120 return container_of(rt_se, struct task_struct, rt); 121 } 122 123 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 124 { 125 return rt_rq->rq; 126 } 127 128 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 129 { 130 return rt_se->rt_rq; 131 } 132 133 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 134 { 135 struct rt_rq *rt_rq = rt_se->rt_rq; 136 137 return rt_rq->rq; 138 } 139 140 void free_rt_sched_group(struct task_group *tg) 141 { 142 int i; 143 144 if (tg->rt_se) 145 destroy_rt_bandwidth(&tg->rt_bandwidth); 146 147 for_each_possible_cpu(i) { 148 if (tg->rt_rq) 149 kfree(tg->rt_rq[i]); 150 if (tg->rt_se) 151 kfree(tg->rt_se[i]); 152 } 153 154 kfree(tg->rt_rq); 155 kfree(tg->rt_se); 156 } 157 158 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 159 struct sched_rt_entity *rt_se, int cpu, 160 struct sched_rt_entity *parent) 161 { 162 struct rq *rq = cpu_rq(cpu); 163 164 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 165 rt_rq->rt_nr_boosted = 0; 166 rt_rq->rq = rq; 167 rt_rq->tg = tg; 168 169 tg->rt_rq[cpu] = rt_rq; 170 tg->rt_se[cpu] = rt_se; 171 172 if (!rt_se) 173 return; 174 175 if (!parent) 176 rt_se->rt_rq = &rq->rt; 177 else 178 rt_se->rt_rq = parent->my_q; 179 180 rt_se->my_q = rt_rq; 181 rt_se->parent = parent; 182 INIT_LIST_HEAD(&rt_se->run_list); 183 } 184 185 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 186 { 187 struct rt_rq *rt_rq; 188 struct sched_rt_entity *rt_se; 189 int i; 190 191 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL); 192 if (!tg->rt_rq) 193 goto err; 194 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL); 195 if (!tg->rt_se) 196 goto err; 197 198 init_rt_bandwidth(&tg->rt_bandwidth, 199 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 200 201 for_each_possible_cpu(i) { 202 rt_rq = kzalloc_node(sizeof(struct rt_rq), 203 GFP_KERNEL, cpu_to_node(i)); 204 if (!rt_rq) 205 goto err; 206 207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 208 GFP_KERNEL, cpu_to_node(i)); 209 if (!rt_se) 210 goto err_free_rq; 211 212 init_rt_rq(rt_rq); 213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 215 } 216 217 return 1; 218 219 err_free_rq: 220 kfree(rt_rq); 221 err: 222 return 0; 223 } 224 225 #else /* CONFIG_RT_GROUP_SCHED */ 226 227 #define rt_entity_is_task(rt_se) (1) 228 229 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 230 { 231 return container_of(rt_se, struct task_struct, rt); 232 } 233 234 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 235 { 236 return container_of(rt_rq, struct rq, rt); 237 } 238 239 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 240 { 241 struct task_struct *p = rt_task_of(rt_se); 242 243 return task_rq(p); 244 } 245 246 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 247 { 248 struct rq *rq = rq_of_rt_se(rt_se); 249 250 return &rq->rt; 251 } 252 253 void free_rt_sched_group(struct task_group *tg) { } 254 255 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 256 { 257 return 1; 258 } 259 #endif /* CONFIG_RT_GROUP_SCHED */ 260 261 #ifdef CONFIG_SMP 262 263 static void pull_rt_task(struct rq *this_rq); 264 265 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 266 { 267 /* Try to pull RT tasks here if we lower this rq's prio */ 268 return rq->online && rq->rt.highest_prio.curr > prev->prio; 269 } 270 271 static inline int rt_overloaded(struct rq *rq) 272 { 273 return atomic_read(&rq->rd->rto_count); 274 } 275 276 static inline void rt_set_overload(struct rq *rq) 277 { 278 if (!rq->online) 279 return; 280 281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 282 /* 283 * Make sure the mask is visible before we set 284 * the overload count. That is checked to determine 285 * if we should look at the mask. It would be a shame 286 * if we looked at the mask, but the mask was not 287 * updated yet. 288 * 289 * Matched by the barrier in pull_rt_task(). 290 */ 291 smp_wmb(); 292 atomic_inc(&rq->rd->rto_count); 293 } 294 295 static inline void rt_clear_overload(struct rq *rq) 296 { 297 if (!rq->online) 298 return; 299 300 /* the order here really doesn't matter */ 301 atomic_dec(&rq->rd->rto_count); 302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 303 } 304 305 static void update_rt_migration(struct rt_rq *rt_rq) 306 { 307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 308 if (!rt_rq->overloaded) { 309 rt_set_overload(rq_of_rt_rq(rt_rq)); 310 rt_rq->overloaded = 1; 311 } 312 } else if (rt_rq->overloaded) { 313 rt_clear_overload(rq_of_rt_rq(rt_rq)); 314 rt_rq->overloaded = 0; 315 } 316 } 317 318 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 319 { 320 struct task_struct *p; 321 322 if (!rt_entity_is_task(rt_se)) 323 return; 324 325 p = rt_task_of(rt_se); 326 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 327 328 rt_rq->rt_nr_total++; 329 if (p->nr_cpus_allowed > 1) 330 rt_rq->rt_nr_migratory++; 331 332 update_rt_migration(rt_rq); 333 } 334 335 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 336 { 337 struct task_struct *p; 338 339 if (!rt_entity_is_task(rt_se)) 340 return; 341 342 p = rt_task_of(rt_se); 343 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 344 345 rt_rq->rt_nr_total--; 346 if (p->nr_cpus_allowed > 1) 347 rt_rq->rt_nr_migratory--; 348 349 update_rt_migration(rt_rq); 350 } 351 352 static inline int has_pushable_tasks(struct rq *rq) 353 { 354 return !plist_head_empty(&rq->rt.pushable_tasks); 355 } 356 357 static DEFINE_PER_CPU(struct callback_head, rt_push_head); 358 static DEFINE_PER_CPU(struct callback_head, rt_pull_head); 359 360 static void push_rt_tasks(struct rq *); 361 static void pull_rt_task(struct rq *); 362 363 static inline void rt_queue_push_tasks(struct rq *rq) 364 { 365 if (!has_pushable_tasks(rq)) 366 return; 367 368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks); 369 } 370 371 static inline void rt_queue_pull_task(struct rq *rq) 372 { 373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task); 374 } 375 376 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 377 { 378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 379 plist_node_init(&p->pushable_tasks, p->prio); 380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 381 382 /* Update the highest prio pushable task */ 383 if (p->prio < rq->rt.highest_prio.next) 384 rq->rt.highest_prio.next = p->prio; 385 } 386 387 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 388 { 389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 390 391 /* Update the new highest prio pushable task */ 392 if (has_pushable_tasks(rq)) { 393 p = plist_first_entry(&rq->rt.pushable_tasks, 394 struct task_struct, pushable_tasks); 395 rq->rt.highest_prio.next = p->prio; 396 } else { 397 rq->rt.highest_prio.next = MAX_RT_PRIO-1; 398 } 399 } 400 401 #else 402 403 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 404 { 405 } 406 407 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 408 { 409 } 410 411 static inline 412 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 413 { 414 } 415 416 static inline 417 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 418 { 419 } 420 421 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 422 { 423 return false; 424 } 425 426 static inline void pull_rt_task(struct rq *this_rq) 427 { 428 } 429 430 static inline void rt_queue_push_tasks(struct rq *rq) 431 { 432 } 433 #endif /* CONFIG_SMP */ 434 435 static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 436 static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 437 438 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 439 { 440 return rt_se->on_rq; 441 } 442 443 #ifdef CONFIG_UCLAMP_TASK 444 /* 445 * Verify the fitness of task @p to run on @cpu taking into account the uclamp 446 * settings. 447 * 448 * This check is only important for heterogeneous systems where uclamp_min value 449 * is higher than the capacity of a @cpu. For non-heterogeneous system this 450 * function will always return true. 451 * 452 * The function will return true if the capacity of the @cpu is >= the 453 * uclamp_min and false otherwise. 454 * 455 * Note that uclamp_min will be clamped to uclamp_max if uclamp_min 456 * > uclamp_max. 457 */ 458 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) 459 { 460 unsigned int min_cap; 461 unsigned int max_cap; 462 unsigned int cpu_cap; 463 464 /* Only heterogeneous systems can benefit from this check */ 465 if (!static_branch_unlikely(&sched_asym_cpucapacity)) 466 return true; 467 468 min_cap = uclamp_eff_value(p, UCLAMP_MIN); 469 max_cap = uclamp_eff_value(p, UCLAMP_MAX); 470 471 cpu_cap = capacity_orig_of(cpu); 472 473 return cpu_cap >= min(min_cap, max_cap); 474 } 475 #else 476 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) 477 { 478 return true; 479 } 480 #endif 481 482 #ifdef CONFIG_RT_GROUP_SCHED 483 484 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 485 { 486 if (!rt_rq->tg) 487 return RUNTIME_INF; 488 489 return rt_rq->rt_runtime; 490 } 491 492 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 493 { 494 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 495 } 496 497 typedef struct task_group *rt_rq_iter_t; 498 499 static inline struct task_group *next_task_group(struct task_group *tg) 500 { 501 do { 502 tg = list_entry_rcu(tg->list.next, 503 typeof(struct task_group), list); 504 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 505 506 if (&tg->list == &task_groups) 507 tg = NULL; 508 509 return tg; 510 } 511 512 #define for_each_rt_rq(rt_rq, iter, rq) \ 513 for (iter = container_of(&task_groups, typeof(*iter), list); \ 514 (iter = next_task_group(iter)) && \ 515 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 516 517 #define for_each_sched_rt_entity(rt_se) \ 518 for (; rt_se; rt_se = rt_se->parent) 519 520 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 521 { 522 return rt_se->my_q; 523 } 524 525 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags); 526 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags); 527 528 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 529 { 530 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 531 struct rq *rq = rq_of_rt_rq(rt_rq); 532 struct sched_rt_entity *rt_se; 533 534 int cpu = cpu_of(rq); 535 536 rt_se = rt_rq->tg->rt_se[cpu]; 537 538 if (rt_rq->rt_nr_running) { 539 if (!rt_se) 540 enqueue_top_rt_rq(rt_rq); 541 else if (!on_rt_rq(rt_se)) 542 enqueue_rt_entity(rt_se, 0); 543 544 if (rt_rq->highest_prio.curr < curr->prio) 545 resched_curr(rq); 546 } 547 } 548 549 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 550 { 551 struct sched_rt_entity *rt_se; 552 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 553 554 rt_se = rt_rq->tg->rt_se[cpu]; 555 556 if (!rt_se) { 557 dequeue_top_rt_rq(rt_rq); 558 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 559 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); 560 } 561 else if (on_rt_rq(rt_se)) 562 dequeue_rt_entity(rt_se, 0); 563 } 564 565 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 566 { 567 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 568 } 569 570 static int rt_se_boosted(struct sched_rt_entity *rt_se) 571 { 572 struct rt_rq *rt_rq = group_rt_rq(rt_se); 573 struct task_struct *p; 574 575 if (rt_rq) 576 return !!rt_rq->rt_nr_boosted; 577 578 p = rt_task_of(rt_se); 579 return p->prio != p->normal_prio; 580 } 581 582 #ifdef CONFIG_SMP 583 static inline const struct cpumask *sched_rt_period_mask(void) 584 { 585 return this_rq()->rd->span; 586 } 587 #else 588 static inline const struct cpumask *sched_rt_period_mask(void) 589 { 590 return cpu_online_mask; 591 } 592 #endif 593 594 static inline 595 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 596 { 597 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 598 } 599 600 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 601 { 602 return &rt_rq->tg->rt_bandwidth; 603 } 604 605 #else /* !CONFIG_RT_GROUP_SCHED */ 606 607 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 608 { 609 return rt_rq->rt_runtime; 610 } 611 612 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 613 { 614 return ktime_to_ns(def_rt_bandwidth.rt_period); 615 } 616 617 typedef struct rt_rq *rt_rq_iter_t; 618 619 #define for_each_rt_rq(rt_rq, iter, rq) \ 620 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 621 622 #define for_each_sched_rt_entity(rt_se) \ 623 for (; rt_se; rt_se = NULL) 624 625 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 626 { 627 return NULL; 628 } 629 630 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 631 { 632 struct rq *rq = rq_of_rt_rq(rt_rq); 633 634 if (!rt_rq->rt_nr_running) 635 return; 636 637 enqueue_top_rt_rq(rt_rq); 638 resched_curr(rq); 639 } 640 641 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 642 { 643 dequeue_top_rt_rq(rt_rq); 644 } 645 646 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 647 { 648 return rt_rq->rt_throttled; 649 } 650 651 static inline const struct cpumask *sched_rt_period_mask(void) 652 { 653 return cpu_online_mask; 654 } 655 656 static inline 657 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 658 { 659 return &cpu_rq(cpu)->rt; 660 } 661 662 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 663 { 664 return &def_rt_bandwidth; 665 } 666 667 #endif /* CONFIG_RT_GROUP_SCHED */ 668 669 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 670 { 671 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 672 673 return (hrtimer_active(&rt_b->rt_period_timer) || 674 rt_rq->rt_time < rt_b->rt_runtime); 675 } 676 677 #ifdef CONFIG_SMP 678 /* 679 * We ran out of runtime, see if we can borrow some from our neighbours. 680 */ 681 static void do_balance_runtime(struct rt_rq *rt_rq) 682 { 683 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 684 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; 685 int i, weight; 686 u64 rt_period; 687 688 weight = cpumask_weight(rd->span); 689 690 raw_spin_lock(&rt_b->rt_runtime_lock); 691 rt_period = ktime_to_ns(rt_b->rt_period); 692 for_each_cpu(i, rd->span) { 693 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 694 s64 diff; 695 696 if (iter == rt_rq) 697 continue; 698 699 raw_spin_lock(&iter->rt_runtime_lock); 700 /* 701 * Either all rqs have inf runtime and there's nothing to steal 702 * or __disable_runtime() below sets a specific rq to inf to 703 * indicate its been disabled and disalow stealing. 704 */ 705 if (iter->rt_runtime == RUNTIME_INF) 706 goto next; 707 708 /* 709 * From runqueues with spare time, take 1/n part of their 710 * spare time, but no more than our period. 711 */ 712 diff = iter->rt_runtime - iter->rt_time; 713 if (diff > 0) { 714 diff = div_u64((u64)diff, weight); 715 if (rt_rq->rt_runtime + diff > rt_period) 716 diff = rt_period - rt_rq->rt_runtime; 717 iter->rt_runtime -= diff; 718 rt_rq->rt_runtime += diff; 719 if (rt_rq->rt_runtime == rt_period) { 720 raw_spin_unlock(&iter->rt_runtime_lock); 721 break; 722 } 723 } 724 next: 725 raw_spin_unlock(&iter->rt_runtime_lock); 726 } 727 raw_spin_unlock(&rt_b->rt_runtime_lock); 728 } 729 730 /* 731 * Ensure this RQ takes back all the runtime it lend to its neighbours. 732 */ 733 static void __disable_runtime(struct rq *rq) 734 { 735 struct root_domain *rd = rq->rd; 736 rt_rq_iter_t iter; 737 struct rt_rq *rt_rq; 738 739 if (unlikely(!scheduler_running)) 740 return; 741 742 for_each_rt_rq(rt_rq, iter, rq) { 743 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 744 s64 want; 745 int i; 746 747 raw_spin_lock(&rt_b->rt_runtime_lock); 748 raw_spin_lock(&rt_rq->rt_runtime_lock); 749 /* 750 * Either we're all inf and nobody needs to borrow, or we're 751 * already disabled and thus have nothing to do, or we have 752 * exactly the right amount of runtime to take out. 753 */ 754 if (rt_rq->rt_runtime == RUNTIME_INF || 755 rt_rq->rt_runtime == rt_b->rt_runtime) 756 goto balanced; 757 raw_spin_unlock(&rt_rq->rt_runtime_lock); 758 759 /* 760 * Calculate the difference between what we started out with 761 * and what we current have, that's the amount of runtime 762 * we lend and now have to reclaim. 763 */ 764 want = rt_b->rt_runtime - rt_rq->rt_runtime; 765 766 /* 767 * Greedy reclaim, take back as much as we can. 768 */ 769 for_each_cpu(i, rd->span) { 770 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 771 s64 diff; 772 773 /* 774 * Can't reclaim from ourselves or disabled runqueues. 775 */ 776 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 777 continue; 778 779 raw_spin_lock(&iter->rt_runtime_lock); 780 if (want > 0) { 781 diff = min_t(s64, iter->rt_runtime, want); 782 iter->rt_runtime -= diff; 783 want -= diff; 784 } else { 785 iter->rt_runtime -= want; 786 want -= want; 787 } 788 raw_spin_unlock(&iter->rt_runtime_lock); 789 790 if (!want) 791 break; 792 } 793 794 raw_spin_lock(&rt_rq->rt_runtime_lock); 795 /* 796 * We cannot be left wanting - that would mean some runtime 797 * leaked out of the system. 798 */ 799 BUG_ON(want); 800 balanced: 801 /* 802 * Disable all the borrow logic by pretending we have inf 803 * runtime - in which case borrowing doesn't make sense. 804 */ 805 rt_rq->rt_runtime = RUNTIME_INF; 806 rt_rq->rt_throttled = 0; 807 raw_spin_unlock(&rt_rq->rt_runtime_lock); 808 raw_spin_unlock(&rt_b->rt_runtime_lock); 809 810 /* Make rt_rq available for pick_next_task() */ 811 sched_rt_rq_enqueue(rt_rq); 812 } 813 } 814 815 static void __enable_runtime(struct rq *rq) 816 { 817 rt_rq_iter_t iter; 818 struct rt_rq *rt_rq; 819 820 if (unlikely(!scheduler_running)) 821 return; 822 823 /* 824 * Reset each runqueue's bandwidth settings 825 */ 826 for_each_rt_rq(rt_rq, iter, rq) { 827 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 828 829 raw_spin_lock(&rt_b->rt_runtime_lock); 830 raw_spin_lock(&rt_rq->rt_runtime_lock); 831 rt_rq->rt_runtime = rt_b->rt_runtime; 832 rt_rq->rt_time = 0; 833 rt_rq->rt_throttled = 0; 834 raw_spin_unlock(&rt_rq->rt_runtime_lock); 835 raw_spin_unlock(&rt_b->rt_runtime_lock); 836 } 837 } 838 839 static void balance_runtime(struct rt_rq *rt_rq) 840 { 841 if (!sched_feat(RT_RUNTIME_SHARE)) 842 return; 843 844 if (rt_rq->rt_time > rt_rq->rt_runtime) { 845 raw_spin_unlock(&rt_rq->rt_runtime_lock); 846 do_balance_runtime(rt_rq); 847 raw_spin_lock(&rt_rq->rt_runtime_lock); 848 } 849 } 850 #else /* !CONFIG_SMP */ 851 static inline void balance_runtime(struct rt_rq *rt_rq) {} 852 #endif /* CONFIG_SMP */ 853 854 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 855 { 856 int i, idle = 1, throttled = 0; 857 const struct cpumask *span; 858 859 span = sched_rt_period_mask(); 860 #ifdef CONFIG_RT_GROUP_SCHED 861 /* 862 * FIXME: isolated CPUs should really leave the root task group, 863 * whether they are isolcpus or were isolated via cpusets, lest 864 * the timer run on a CPU which does not service all runqueues, 865 * potentially leaving other CPUs indefinitely throttled. If 866 * isolation is really required, the user will turn the throttle 867 * off to kill the perturbations it causes anyway. Meanwhile, 868 * this maintains functionality for boot and/or troubleshooting. 869 */ 870 if (rt_b == &root_task_group.rt_bandwidth) 871 span = cpu_online_mask; 872 #endif 873 for_each_cpu(i, span) { 874 int enqueue = 0; 875 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 876 struct rq *rq = rq_of_rt_rq(rt_rq); 877 int skip; 878 879 /* 880 * When span == cpu_online_mask, taking each rq->lock 881 * can be time-consuming. Try to avoid it when possible. 882 */ 883 raw_spin_lock(&rt_rq->rt_runtime_lock); 884 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF) 885 rt_rq->rt_runtime = rt_b->rt_runtime; 886 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running; 887 raw_spin_unlock(&rt_rq->rt_runtime_lock); 888 if (skip) 889 continue; 890 891 raw_spin_lock(&rq->lock); 892 update_rq_clock(rq); 893 894 if (rt_rq->rt_time) { 895 u64 runtime; 896 897 raw_spin_lock(&rt_rq->rt_runtime_lock); 898 if (rt_rq->rt_throttled) 899 balance_runtime(rt_rq); 900 runtime = rt_rq->rt_runtime; 901 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 902 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 903 rt_rq->rt_throttled = 0; 904 enqueue = 1; 905 906 /* 907 * When we're idle and a woken (rt) task is 908 * throttled check_preempt_curr() will set 909 * skip_update and the time between the wakeup 910 * and this unthrottle will get accounted as 911 * 'runtime'. 912 */ 913 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 914 rq_clock_cancel_skipupdate(rq); 915 } 916 if (rt_rq->rt_time || rt_rq->rt_nr_running) 917 idle = 0; 918 raw_spin_unlock(&rt_rq->rt_runtime_lock); 919 } else if (rt_rq->rt_nr_running) { 920 idle = 0; 921 if (!rt_rq_throttled(rt_rq)) 922 enqueue = 1; 923 } 924 if (rt_rq->rt_throttled) 925 throttled = 1; 926 927 if (enqueue) 928 sched_rt_rq_enqueue(rt_rq); 929 raw_spin_unlock(&rq->lock); 930 } 931 932 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 933 return 1; 934 935 return idle; 936 } 937 938 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 939 { 940 #ifdef CONFIG_RT_GROUP_SCHED 941 struct rt_rq *rt_rq = group_rt_rq(rt_se); 942 943 if (rt_rq) 944 return rt_rq->highest_prio.curr; 945 #endif 946 947 return rt_task_of(rt_se)->prio; 948 } 949 950 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 951 { 952 u64 runtime = sched_rt_runtime(rt_rq); 953 954 if (rt_rq->rt_throttled) 955 return rt_rq_throttled(rt_rq); 956 957 if (runtime >= sched_rt_period(rt_rq)) 958 return 0; 959 960 balance_runtime(rt_rq); 961 runtime = sched_rt_runtime(rt_rq); 962 if (runtime == RUNTIME_INF) 963 return 0; 964 965 if (rt_rq->rt_time > runtime) { 966 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 967 968 /* 969 * Don't actually throttle groups that have no runtime assigned 970 * but accrue some time due to boosting. 971 */ 972 if (likely(rt_b->rt_runtime)) { 973 rt_rq->rt_throttled = 1; 974 printk_deferred_once("sched: RT throttling activated\n"); 975 } else { 976 /* 977 * In case we did anyway, make it go away, 978 * replenishment is a joke, since it will replenish us 979 * with exactly 0 ns. 980 */ 981 rt_rq->rt_time = 0; 982 } 983 984 if (rt_rq_throttled(rt_rq)) { 985 sched_rt_rq_dequeue(rt_rq); 986 return 1; 987 } 988 } 989 990 return 0; 991 } 992 993 /* 994 * Update the current task's runtime statistics. Skip current tasks that 995 * are not in our scheduling class. 996 */ 997 static void update_curr_rt(struct rq *rq) 998 { 999 struct task_struct *curr = rq->curr; 1000 struct sched_rt_entity *rt_se = &curr->rt; 1001 u64 delta_exec; 1002 u64 now; 1003 1004 if (curr->sched_class != &rt_sched_class) 1005 return; 1006 1007 now = rq_clock_task(rq); 1008 delta_exec = now - curr->se.exec_start; 1009 if (unlikely((s64)delta_exec <= 0)) 1010 return; 1011 1012 schedstat_set(curr->se.statistics.exec_max, 1013 max(curr->se.statistics.exec_max, delta_exec)); 1014 1015 curr->se.sum_exec_runtime += delta_exec; 1016 account_group_exec_runtime(curr, delta_exec); 1017 1018 curr->se.exec_start = now; 1019 cgroup_account_cputime(curr, delta_exec); 1020 1021 if (!rt_bandwidth_enabled()) 1022 return; 1023 1024 for_each_sched_rt_entity(rt_se) { 1025 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1026 1027 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 1028 raw_spin_lock(&rt_rq->rt_runtime_lock); 1029 rt_rq->rt_time += delta_exec; 1030 if (sched_rt_runtime_exceeded(rt_rq)) 1031 resched_curr(rq); 1032 raw_spin_unlock(&rt_rq->rt_runtime_lock); 1033 } 1034 } 1035 } 1036 1037 static void 1038 dequeue_top_rt_rq(struct rt_rq *rt_rq) 1039 { 1040 struct rq *rq = rq_of_rt_rq(rt_rq); 1041 1042 BUG_ON(&rq->rt != rt_rq); 1043 1044 if (!rt_rq->rt_queued) 1045 return; 1046 1047 BUG_ON(!rq->nr_running); 1048 1049 sub_nr_running(rq, rt_rq->rt_nr_running); 1050 rt_rq->rt_queued = 0; 1051 1052 } 1053 1054 static void 1055 enqueue_top_rt_rq(struct rt_rq *rt_rq) 1056 { 1057 struct rq *rq = rq_of_rt_rq(rt_rq); 1058 1059 BUG_ON(&rq->rt != rt_rq); 1060 1061 if (rt_rq->rt_queued) 1062 return; 1063 1064 if (rt_rq_throttled(rt_rq)) 1065 return; 1066 1067 if (rt_rq->rt_nr_running) { 1068 add_nr_running(rq, rt_rq->rt_nr_running); 1069 rt_rq->rt_queued = 1; 1070 } 1071 1072 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 1073 cpufreq_update_util(rq, 0); 1074 } 1075 1076 #if defined CONFIG_SMP 1077 1078 static void 1079 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1080 { 1081 struct rq *rq = rq_of_rt_rq(rt_rq); 1082 1083 #ifdef CONFIG_RT_GROUP_SCHED 1084 /* 1085 * Change rq's cpupri only if rt_rq is the top queue. 1086 */ 1087 if (&rq->rt != rt_rq) 1088 return; 1089 #endif 1090 if (rq->online && prio < prev_prio) 1091 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 1092 } 1093 1094 static void 1095 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1096 { 1097 struct rq *rq = rq_of_rt_rq(rt_rq); 1098 1099 #ifdef CONFIG_RT_GROUP_SCHED 1100 /* 1101 * Change rq's cpupri only if rt_rq is the top queue. 1102 */ 1103 if (&rq->rt != rt_rq) 1104 return; 1105 #endif 1106 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 1107 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 1108 } 1109 1110 #else /* CONFIG_SMP */ 1111 1112 static inline 1113 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1114 static inline 1115 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1116 1117 #endif /* CONFIG_SMP */ 1118 1119 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 1120 static void 1121 inc_rt_prio(struct rt_rq *rt_rq, int prio) 1122 { 1123 int prev_prio = rt_rq->highest_prio.curr; 1124 1125 if (prio < prev_prio) 1126 rt_rq->highest_prio.curr = prio; 1127 1128 inc_rt_prio_smp(rt_rq, prio, prev_prio); 1129 } 1130 1131 static void 1132 dec_rt_prio(struct rt_rq *rt_rq, int prio) 1133 { 1134 int prev_prio = rt_rq->highest_prio.curr; 1135 1136 if (rt_rq->rt_nr_running) { 1137 1138 WARN_ON(prio < prev_prio); 1139 1140 /* 1141 * This may have been our highest task, and therefore 1142 * we may have some recomputation to do 1143 */ 1144 if (prio == prev_prio) { 1145 struct rt_prio_array *array = &rt_rq->active; 1146 1147 rt_rq->highest_prio.curr = 1148 sched_find_first_bit(array->bitmap); 1149 } 1150 1151 } else { 1152 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 1153 } 1154 1155 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1156 } 1157 1158 #else 1159 1160 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1161 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1162 1163 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1164 1165 #ifdef CONFIG_RT_GROUP_SCHED 1166 1167 static void 1168 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1169 { 1170 if (rt_se_boosted(rt_se)) 1171 rt_rq->rt_nr_boosted++; 1172 1173 if (rt_rq->tg) 1174 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1175 } 1176 1177 static void 1178 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1179 { 1180 if (rt_se_boosted(rt_se)) 1181 rt_rq->rt_nr_boosted--; 1182 1183 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1184 } 1185 1186 #else /* CONFIG_RT_GROUP_SCHED */ 1187 1188 static void 1189 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1190 { 1191 start_rt_bandwidth(&def_rt_bandwidth); 1192 } 1193 1194 static inline 1195 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1196 1197 #endif /* CONFIG_RT_GROUP_SCHED */ 1198 1199 static inline 1200 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) 1201 { 1202 struct rt_rq *group_rq = group_rt_rq(rt_se); 1203 1204 if (group_rq) 1205 return group_rq->rt_nr_running; 1206 else 1207 return 1; 1208 } 1209 1210 static inline 1211 unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se) 1212 { 1213 struct rt_rq *group_rq = group_rt_rq(rt_se); 1214 struct task_struct *tsk; 1215 1216 if (group_rq) 1217 return group_rq->rr_nr_running; 1218 1219 tsk = rt_task_of(rt_se); 1220 1221 return (tsk->policy == SCHED_RR) ? 1 : 0; 1222 } 1223 1224 static inline 1225 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1226 { 1227 int prio = rt_se_prio(rt_se); 1228 1229 WARN_ON(!rt_prio(prio)); 1230 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1231 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se); 1232 1233 inc_rt_prio(rt_rq, prio); 1234 inc_rt_migration(rt_se, rt_rq); 1235 inc_rt_group(rt_se, rt_rq); 1236 } 1237 1238 static inline 1239 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1240 { 1241 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1242 WARN_ON(!rt_rq->rt_nr_running); 1243 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1244 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se); 1245 1246 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1247 dec_rt_migration(rt_se, rt_rq); 1248 dec_rt_group(rt_se, rt_rq); 1249 } 1250 1251 /* 1252 * Change rt_se->run_list location unless SAVE && !MOVE 1253 * 1254 * assumes ENQUEUE/DEQUEUE flags match 1255 */ 1256 static inline bool move_entity(unsigned int flags) 1257 { 1258 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) 1259 return false; 1260 1261 return true; 1262 } 1263 1264 static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array) 1265 { 1266 list_del_init(&rt_se->run_list); 1267 1268 if (list_empty(array->queue + rt_se_prio(rt_se))) 1269 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1270 1271 rt_se->on_list = 0; 1272 } 1273 1274 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1275 { 1276 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1277 struct rt_prio_array *array = &rt_rq->active; 1278 struct rt_rq *group_rq = group_rt_rq(rt_se); 1279 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1280 1281 /* 1282 * Don't enqueue the group if its throttled, or when empty. 1283 * The latter is a consequence of the former when a child group 1284 * get throttled and the current group doesn't have any other 1285 * active members. 1286 */ 1287 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) { 1288 if (rt_se->on_list) 1289 __delist_rt_entity(rt_se, array); 1290 return; 1291 } 1292 1293 if (move_entity(flags)) { 1294 WARN_ON_ONCE(rt_se->on_list); 1295 if (flags & ENQUEUE_HEAD) 1296 list_add(&rt_se->run_list, queue); 1297 else 1298 list_add_tail(&rt_se->run_list, queue); 1299 1300 __set_bit(rt_se_prio(rt_se), array->bitmap); 1301 rt_se->on_list = 1; 1302 } 1303 rt_se->on_rq = 1; 1304 1305 inc_rt_tasks(rt_se, rt_rq); 1306 } 1307 1308 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1309 { 1310 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1311 struct rt_prio_array *array = &rt_rq->active; 1312 1313 if (move_entity(flags)) { 1314 WARN_ON_ONCE(!rt_se->on_list); 1315 __delist_rt_entity(rt_se, array); 1316 } 1317 rt_se->on_rq = 0; 1318 1319 dec_rt_tasks(rt_se, rt_rq); 1320 } 1321 1322 /* 1323 * Because the prio of an upper entry depends on the lower 1324 * entries, we must remove entries top - down. 1325 */ 1326 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) 1327 { 1328 struct sched_rt_entity *back = NULL; 1329 1330 for_each_sched_rt_entity(rt_se) { 1331 rt_se->back = back; 1332 back = rt_se; 1333 } 1334 1335 dequeue_top_rt_rq(rt_rq_of_se(back)); 1336 1337 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1338 if (on_rt_rq(rt_se)) 1339 __dequeue_rt_entity(rt_se, flags); 1340 } 1341 } 1342 1343 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1344 { 1345 struct rq *rq = rq_of_rt_se(rt_se); 1346 1347 dequeue_rt_stack(rt_se, flags); 1348 for_each_sched_rt_entity(rt_se) 1349 __enqueue_rt_entity(rt_se, flags); 1350 enqueue_top_rt_rq(&rq->rt); 1351 } 1352 1353 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1354 { 1355 struct rq *rq = rq_of_rt_se(rt_se); 1356 1357 dequeue_rt_stack(rt_se, flags); 1358 1359 for_each_sched_rt_entity(rt_se) { 1360 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1361 1362 if (rt_rq && rt_rq->rt_nr_running) 1363 __enqueue_rt_entity(rt_se, flags); 1364 } 1365 enqueue_top_rt_rq(&rq->rt); 1366 } 1367 1368 /* 1369 * Adding/removing a task to/from a priority array: 1370 */ 1371 static void 1372 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1373 { 1374 struct sched_rt_entity *rt_se = &p->rt; 1375 1376 if (flags & ENQUEUE_WAKEUP) 1377 rt_se->timeout = 0; 1378 1379 enqueue_rt_entity(rt_se, flags); 1380 1381 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1382 enqueue_pushable_task(rq, p); 1383 } 1384 1385 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1386 { 1387 struct sched_rt_entity *rt_se = &p->rt; 1388 1389 update_curr_rt(rq); 1390 dequeue_rt_entity(rt_se, flags); 1391 1392 dequeue_pushable_task(rq, p); 1393 } 1394 1395 /* 1396 * Put task to the head or the end of the run list without the overhead of 1397 * dequeue followed by enqueue. 1398 */ 1399 static void 1400 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1401 { 1402 if (on_rt_rq(rt_se)) { 1403 struct rt_prio_array *array = &rt_rq->active; 1404 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1405 1406 if (head) 1407 list_move(&rt_se->run_list, queue); 1408 else 1409 list_move_tail(&rt_se->run_list, queue); 1410 } 1411 } 1412 1413 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1414 { 1415 struct sched_rt_entity *rt_se = &p->rt; 1416 struct rt_rq *rt_rq; 1417 1418 for_each_sched_rt_entity(rt_se) { 1419 rt_rq = rt_rq_of_se(rt_se); 1420 requeue_rt_entity(rt_rq, rt_se, head); 1421 } 1422 } 1423 1424 static void yield_task_rt(struct rq *rq) 1425 { 1426 requeue_task_rt(rq, rq->curr, 0); 1427 } 1428 1429 #ifdef CONFIG_SMP 1430 static int find_lowest_rq(struct task_struct *task); 1431 1432 static int 1433 select_task_rq_rt(struct task_struct *p, int cpu, int flags) 1434 { 1435 struct task_struct *curr; 1436 struct rq *rq; 1437 bool test; 1438 1439 /* For anything but wake ups, just return the task_cpu */ 1440 if (!(flags & (WF_TTWU | WF_FORK))) 1441 goto out; 1442 1443 rq = cpu_rq(cpu); 1444 1445 rcu_read_lock(); 1446 curr = READ_ONCE(rq->curr); /* unlocked access */ 1447 1448 /* 1449 * If the current task on @p's runqueue is an RT task, then 1450 * try to see if we can wake this RT task up on another 1451 * runqueue. Otherwise simply start this RT task 1452 * on its current runqueue. 1453 * 1454 * We want to avoid overloading runqueues. If the woken 1455 * task is a higher priority, then it will stay on this CPU 1456 * and the lower prio task should be moved to another CPU. 1457 * Even though this will probably make the lower prio task 1458 * lose its cache, we do not want to bounce a higher task 1459 * around just because it gave up its CPU, perhaps for a 1460 * lock? 1461 * 1462 * For equal prio tasks, we just let the scheduler sort it out. 1463 * 1464 * Otherwise, just let it ride on the affined RQ and the 1465 * post-schedule router will push the preempted task away 1466 * 1467 * This test is optimistic, if we get it wrong the load-balancer 1468 * will have to sort it out. 1469 * 1470 * We take into account the capacity of the CPU to ensure it fits the 1471 * requirement of the task - which is only important on heterogeneous 1472 * systems like big.LITTLE. 1473 */ 1474 test = curr && 1475 unlikely(rt_task(curr)) && 1476 (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); 1477 1478 if (test || !rt_task_fits_capacity(p, cpu)) { 1479 int target = find_lowest_rq(p); 1480 1481 /* 1482 * Bail out if we were forcing a migration to find a better 1483 * fitting CPU but our search failed. 1484 */ 1485 if (!test && target != -1 && !rt_task_fits_capacity(p, target)) 1486 goto out_unlock; 1487 1488 /* 1489 * Don't bother moving it if the destination CPU is 1490 * not running a lower priority task. 1491 */ 1492 if (target != -1 && 1493 p->prio < cpu_rq(target)->rt.highest_prio.curr) 1494 cpu = target; 1495 } 1496 1497 out_unlock: 1498 rcu_read_unlock(); 1499 1500 out: 1501 return cpu; 1502 } 1503 1504 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1505 { 1506 /* 1507 * Current can't be migrated, useless to reschedule, 1508 * let's hope p can move out. 1509 */ 1510 if (rq->curr->nr_cpus_allowed == 1 || 1511 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1512 return; 1513 1514 /* 1515 * p is migratable, so let's not schedule it and 1516 * see if it is pushed or pulled somewhere else. 1517 */ 1518 if (p->nr_cpus_allowed != 1 && 1519 cpupri_find(&rq->rd->cpupri, p, NULL)) 1520 return; 1521 1522 /* 1523 * There appear to be other CPUs that can accept 1524 * the current task but none can run 'p', so lets reschedule 1525 * to try and push the current task away: 1526 */ 1527 requeue_task_rt(rq, p, 1); 1528 resched_curr(rq); 1529 } 1530 1531 static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) 1532 { 1533 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { 1534 /* 1535 * This is OK, because current is on_cpu, which avoids it being 1536 * picked for load-balance and preemption/IRQs are still 1537 * disabled avoiding further scheduler activity on it and we've 1538 * not yet started the picking loop. 1539 */ 1540 rq_unpin_lock(rq, rf); 1541 pull_rt_task(rq); 1542 rq_repin_lock(rq, rf); 1543 } 1544 1545 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq); 1546 } 1547 #endif /* CONFIG_SMP */ 1548 1549 /* 1550 * Preempt the current task with a newly woken task if needed: 1551 */ 1552 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1553 { 1554 if (p->prio < rq->curr->prio) { 1555 resched_curr(rq); 1556 return; 1557 } 1558 1559 #ifdef CONFIG_SMP 1560 /* 1561 * If: 1562 * 1563 * - the newly woken task is of equal priority to the current task 1564 * - the newly woken task is non-migratable while current is migratable 1565 * - current will be preempted on the next reschedule 1566 * 1567 * we should check to see if current can readily move to a different 1568 * cpu. If so, we will reschedule to allow the push logic to try 1569 * to move current somewhere else, making room for our non-migratable 1570 * task. 1571 */ 1572 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1573 check_preempt_equal_prio(rq, p); 1574 #endif 1575 } 1576 1577 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first) 1578 { 1579 p->se.exec_start = rq_clock_task(rq); 1580 1581 /* The running task is never eligible for pushing */ 1582 dequeue_pushable_task(rq, p); 1583 1584 if (!first) 1585 return; 1586 1587 /* 1588 * If prev task was rt, put_prev_task() has already updated the 1589 * utilization. We only care of the case where we start to schedule a 1590 * rt task 1591 */ 1592 if (rq->curr->sched_class != &rt_sched_class) 1593 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); 1594 1595 rt_queue_push_tasks(rq); 1596 } 1597 1598 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1599 struct rt_rq *rt_rq) 1600 { 1601 struct rt_prio_array *array = &rt_rq->active; 1602 struct sched_rt_entity *next = NULL; 1603 struct list_head *queue; 1604 int idx; 1605 1606 idx = sched_find_first_bit(array->bitmap); 1607 BUG_ON(idx >= MAX_RT_PRIO); 1608 1609 queue = array->queue + idx; 1610 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1611 1612 return next; 1613 } 1614 1615 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1616 { 1617 struct sched_rt_entity *rt_se; 1618 struct rt_rq *rt_rq = &rq->rt; 1619 1620 do { 1621 rt_se = pick_next_rt_entity(rq, rt_rq); 1622 BUG_ON(!rt_se); 1623 rt_rq = group_rt_rq(rt_se); 1624 } while (rt_rq); 1625 1626 return rt_task_of(rt_se); 1627 } 1628 1629 static struct task_struct *pick_next_task_rt(struct rq *rq) 1630 { 1631 struct task_struct *p; 1632 1633 if (!sched_rt_runnable(rq)) 1634 return NULL; 1635 1636 p = _pick_next_task_rt(rq); 1637 set_next_task_rt(rq, p, true); 1638 return p; 1639 } 1640 1641 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1642 { 1643 update_curr_rt(rq); 1644 1645 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); 1646 1647 /* 1648 * The previous task needs to be made eligible for pushing 1649 * if it is still active 1650 */ 1651 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1652 enqueue_pushable_task(rq, p); 1653 } 1654 1655 #ifdef CONFIG_SMP 1656 1657 /* Only try algorithms three times */ 1658 #define RT_MAX_TRIES 3 1659 1660 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1661 { 1662 if (!task_running(rq, p) && 1663 cpumask_test_cpu(cpu, &p->cpus_mask)) 1664 return 1; 1665 1666 return 0; 1667 } 1668 1669 /* 1670 * Return the highest pushable rq's task, which is suitable to be executed 1671 * on the CPU, NULL otherwise 1672 */ 1673 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) 1674 { 1675 struct plist_head *head = &rq->rt.pushable_tasks; 1676 struct task_struct *p; 1677 1678 if (!has_pushable_tasks(rq)) 1679 return NULL; 1680 1681 plist_for_each_entry(p, head, pushable_tasks) { 1682 if (pick_rt_task(rq, p, cpu)) 1683 return p; 1684 } 1685 1686 return NULL; 1687 } 1688 1689 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1690 1691 static int find_lowest_rq(struct task_struct *task) 1692 { 1693 struct sched_domain *sd; 1694 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); 1695 int this_cpu = smp_processor_id(); 1696 int cpu = task_cpu(task); 1697 int ret; 1698 1699 /* Make sure the mask is initialized first */ 1700 if (unlikely(!lowest_mask)) 1701 return -1; 1702 1703 if (task->nr_cpus_allowed == 1) 1704 return -1; /* No other targets possible */ 1705 1706 /* 1707 * If we're on asym system ensure we consider the different capacities 1708 * of the CPUs when searching for the lowest_mask. 1709 */ 1710 if (static_branch_unlikely(&sched_asym_cpucapacity)) { 1711 1712 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, 1713 task, lowest_mask, 1714 rt_task_fits_capacity); 1715 } else { 1716 1717 ret = cpupri_find(&task_rq(task)->rd->cpupri, 1718 task, lowest_mask); 1719 } 1720 1721 if (!ret) 1722 return -1; /* No targets found */ 1723 1724 /* 1725 * At this point we have built a mask of CPUs representing the 1726 * lowest priority tasks in the system. Now we want to elect 1727 * the best one based on our affinity and topology. 1728 * 1729 * We prioritize the last CPU that the task executed on since 1730 * it is most likely cache-hot in that location. 1731 */ 1732 if (cpumask_test_cpu(cpu, lowest_mask)) 1733 return cpu; 1734 1735 /* 1736 * Otherwise, we consult the sched_domains span maps to figure 1737 * out which CPU is logically closest to our hot cache data. 1738 */ 1739 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1740 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1741 1742 rcu_read_lock(); 1743 for_each_domain(cpu, sd) { 1744 if (sd->flags & SD_WAKE_AFFINE) { 1745 int best_cpu; 1746 1747 /* 1748 * "this_cpu" is cheaper to preempt than a 1749 * remote processor. 1750 */ 1751 if (this_cpu != -1 && 1752 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1753 rcu_read_unlock(); 1754 return this_cpu; 1755 } 1756 1757 best_cpu = cpumask_any_and_distribute(lowest_mask, 1758 sched_domain_span(sd)); 1759 if (best_cpu < nr_cpu_ids) { 1760 rcu_read_unlock(); 1761 return best_cpu; 1762 } 1763 } 1764 } 1765 rcu_read_unlock(); 1766 1767 /* 1768 * And finally, if there were no matches within the domains 1769 * just give the caller *something* to work with from the compatible 1770 * locations. 1771 */ 1772 if (this_cpu != -1) 1773 return this_cpu; 1774 1775 cpu = cpumask_any_distribute(lowest_mask); 1776 if (cpu < nr_cpu_ids) 1777 return cpu; 1778 1779 return -1; 1780 } 1781 1782 /* Will lock the rq it finds */ 1783 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1784 { 1785 struct rq *lowest_rq = NULL; 1786 int tries; 1787 int cpu; 1788 1789 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1790 cpu = find_lowest_rq(task); 1791 1792 if ((cpu == -1) || (cpu == rq->cpu)) 1793 break; 1794 1795 lowest_rq = cpu_rq(cpu); 1796 1797 if (lowest_rq->rt.highest_prio.curr <= task->prio) { 1798 /* 1799 * Target rq has tasks of equal or higher priority, 1800 * retrying does not release any lock and is unlikely 1801 * to yield a different result. 1802 */ 1803 lowest_rq = NULL; 1804 break; 1805 } 1806 1807 /* if the prio of this runqueue changed, try again */ 1808 if (double_lock_balance(rq, lowest_rq)) { 1809 /* 1810 * We had to unlock the run queue. In 1811 * the mean time, task could have 1812 * migrated already or had its affinity changed. 1813 * Also make sure that it wasn't scheduled on its rq. 1814 */ 1815 if (unlikely(task_rq(task) != rq || 1816 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || 1817 task_running(rq, task) || 1818 !rt_task(task) || 1819 !task_on_rq_queued(task))) { 1820 1821 double_unlock_balance(rq, lowest_rq); 1822 lowest_rq = NULL; 1823 break; 1824 } 1825 } 1826 1827 /* If this rq is still suitable use it. */ 1828 if (lowest_rq->rt.highest_prio.curr > task->prio) 1829 break; 1830 1831 /* try again */ 1832 double_unlock_balance(rq, lowest_rq); 1833 lowest_rq = NULL; 1834 } 1835 1836 return lowest_rq; 1837 } 1838 1839 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1840 { 1841 struct task_struct *p; 1842 1843 if (!has_pushable_tasks(rq)) 1844 return NULL; 1845 1846 p = plist_first_entry(&rq->rt.pushable_tasks, 1847 struct task_struct, pushable_tasks); 1848 1849 BUG_ON(rq->cpu != task_cpu(p)); 1850 BUG_ON(task_current(rq, p)); 1851 BUG_ON(p->nr_cpus_allowed <= 1); 1852 1853 BUG_ON(!task_on_rq_queued(p)); 1854 BUG_ON(!rt_task(p)); 1855 1856 return p; 1857 } 1858 1859 /* 1860 * If the current CPU has more than one RT task, see if the non 1861 * running task can migrate over to a CPU that is running a task 1862 * of lesser priority. 1863 */ 1864 static int push_rt_task(struct rq *rq, bool pull) 1865 { 1866 struct task_struct *next_task; 1867 struct rq *lowest_rq; 1868 int ret = 0; 1869 1870 if (!rq->rt.overloaded) 1871 return 0; 1872 1873 next_task = pick_next_pushable_task(rq); 1874 if (!next_task) 1875 return 0; 1876 1877 retry: 1878 if (is_migration_disabled(next_task)) { 1879 struct task_struct *push_task = NULL; 1880 int cpu; 1881 1882 if (!pull || rq->push_busy) 1883 return 0; 1884 1885 cpu = find_lowest_rq(rq->curr); 1886 if (cpu == -1 || cpu == rq->cpu) 1887 return 0; 1888 1889 /* 1890 * Given we found a CPU with lower priority than @next_task, 1891 * therefore it should be running. However we cannot migrate it 1892 * to this other CPU, instead attempt to push the current 1893 * running task on this CPU away. 1894 */ 1895 push_task = get_push_task(rq); 1896 if (push_task) { 1897 raw_spin_unlock(&rq->lock); 1898 stop_one_cpu_nowait(rq->cpu, push_cpu_stop, 1899 push_task, &rq->push_work); 1900 raw_spin_lock(&rq->lock); 1901 } 1902 1903 return 0; 1904 } 1905 1906 if (WARN_ON(next_task == rq->curr)) 1907 return 0; 1908 1909 /* 1910 * It's possible that the next_task slipped in of 1911 * higher priority than current. If that's the case 1912 * just reschedule current. 1913 */ 1914 if (unlikely(next_task->prio < rq->curr->prio)) { 1915 resched_curr(rq); 1916 return 0; 1917 } 1918 1919 /* We might release rq lock */ 1920 get_task_struct(next_task); 1921 1922 /* find_lock_lowest_rq locks the rq if found */ 1923 lowest_rq = find_lock_lowest_rq(next_task, rq); 1924 if (!lowest_rq) { 1925 struct task_struct *task; 1926 /* 1927 * find_lock_lowest_rq releases rq->lock 1928 * so it is possible that next_task has migrated. 1929 * 1930 * We need to make sure that the task is still on the same 1931 * run-queue and is also still the next task eligible for 1932 * pushing. 1933 */ 1934 task = pick_next_pushable_task(rq); 1935 if (task == next_task) { 1936 /* 1937 * The task hasn't migrated, and is still the next 1938 * eligible task, but we failed to find a run-queue 1939 * to push it to. Do not retry in this case, since 1940 * other CPUs will pull from us when ready. 1941 */ 1942 goto out; 1943 } 1944 1945 if (!task) 1946 /* No more tasks, just exit */ 1947 goto out; 1948 1949 /* 1950 * Something has shifted, try again. 1951 */ 1952 put_task_struct(next_task); 1953 next_task = task; 1954 goto retry; 1955 } 1956 1957 deactivate_task(rq, next_task, 0); 1958 set_task_cpu(next_task, lowest_rq->cpu); 1959 activate_task(lowest_rq, next_task, 0); 1960 resched_curr(lowest_rq); 1961 ret = 1; 1962 1963 double_unlock_balance(rq, lowest_rq); 1964 out: 1965 put_task_struct(next_task); 1966 1967 return ret; 1968 } 1969 1970 static void push_rt_tasks(struct rq *rq) 1971 { 1972 /* push_rt_task will return true if it moved an RT */ 1973 while (push_rt_task(rq, false)) 1974 ; 1975 } 1976 1977 #ifdef HAVE_RT_PUSH_IPI 1978 1979 /* 1980 * When a high priority task schedules out from a CPU and a lower priority 1981 * task is scheduled in, a check is made to see if there's any RT tasks 1982 * on other CPUs that are waiting to run because a higher priority RT task 1983 * is currently running on its CPU. In this case, the CPU with multiple RT 1984 * tasks queued on it (overloaded) needs to be notified that a CPU has opened 1985 * up that may be able to run one of its non-running queued RT tasks. 1986 * 1987 * All CPUs with overloaded RT tasks need to be notified as there is currently 1988 * no way to know which of these CPUs have the highest priority task waiting 1989 * to run. Instead of trying to take a spinlock on each of these CPUs, 1990 * which has shown to cause large latency when done on machines with many 1991 * CPUs, sending an IPI to the CPUs to have them push off the overloaded 1992 * RT tasks waiting to run. 1993 * 1994 * Just sending an IPI to each of the CPUs is also an issue, as on large 1995 * count CPU machines, this can cause an IPI storm on a CPU, especially 1996 * if its the only CPU with multiple RT tasks queued, and a large number 1997 * of CPUs scheduling a lower priority task at the same time. 1998 * 1999 * Each root domain has its own irq work function that can iterate over 2000 * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT 2001 * tassk must be checked if there's one or many CPUs that are lowering 2002 * their priority, there's a single irq work iterator that will try to 2003 * push off RT tasks that are waiting to run. 2004 * 2005 * When a CPU schedules a lower priority task, it will kick off the 2006 * irq work iterator that will jump to each CPU with overloaded RT tasks. 2007 * As it only takes the first CPU that schedules a lower priority task 2008 * to start the process, the rto_start variable is incremented and if 2009 * the atomic result is one, then that CPU will try to take the rto_lock. 2010 * This prevents high contention on the lock as the process handles all 2011 * CPUs scheduling lower priority tasks. 2012 * 2013 * All CPUs that are scheduling a lower priority task will increment the 2014 * rt_loop_next variable. This will make sure that the irq work iterator 2015 * checks all RT overloaded CPUs whenever a CPU schedules a new lower 2016 * priority task, even if the iterator is in the middle of a scan. Incrementing 2017 * the rt_loop_next will cause the iterator to perform another scan. 2018 * 2019 */ 2020 static int rto_next_cpu(struct root_domain *rd) 2021 { 2022 int next; 2023 int cpu; 2024 2025 /* 2026 * When starting the IPI RT pushing, the rto_cpu is set to -1, 2027 * rt_next_cpu() will simply return the first CPU found in 2028 * the rto_mask. 2029 * 2030 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it 2031 * will return the next CPU found in the rto_mask. 2032 * 2033 * If there are no more CPUs left in the rto_mask, then a check is made 2034 * against rto_loop and rto_loop_next. rto_loop is only updated with 2035 * the rto_lock held, but any CPU may increment the rto_loop_next 2036 * without any locking. 2037 */ 2038 for (;;) { 2039 2040 /* When rto_cpu is -1 this acts like cpumask_first() */ 2041 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask); 2042 2043 rd->rto_cpu = cpu; 2044 2045 if (cpu < nr_cpu_ids) 2046 return cpu; 2047 2048 rd->rto_cpu = -1; 2049 2050 /* 2051 * ACQUIRE ensures we see the @rto_mask changes 2052 * made prior to the @next value observed. 2053 * 2054 * Matches WMB in rt_set_overload(). 2055 */ 2056 next = atomic_read_acquire(&rd->rto_loop_next); 2057 2058 if (rd->rto_loop == next) 2059 break; 2060 2061 rd->rto_loop = next; 2062 } 2063 2064 return -1; 2065 } 2066 2067 static inline bool rto_start_trylock(atomic_t *v) 2068 { 2069 return !atomic_cmpxchg_acquire(v, 0, 1); 2070 } 2071 2072 static inline void rto_start_unlock(atomic_t *v) 2073 { 2074 atomic_set_release(v, 0); 2075 } 2076 2077 static void tell_cpu_to_push(struct rq *rq) 2078 { 2079 int cpu = -1; 2080 2081 /* Keep the loop going if the IPI is currently active */ 2082 atomic_inc(&rq->rd->rto_loop_next); 2083 2084 /* Only one CPU can initiate a loop at a time */ 2085 if (!rto_start_trylock(&rq->rd->rto_loop_start)) 2086 return; 2087 2088 raw_spin_lock(&rq->rd->rto_lock); 2089 2090 /* 2091 * The rto_cpu is updated under the lock, if it has a valid CPU 2092 * then the IPI is still running and will continue due to the 2093 * update to loop_next, and nothing needs to be done here. 2094 * Otherwise it is finishing up and an ipi needs to be sent. 2095 */ 2096 if (rq->rd->rto_cpu < 0) 2097 cpu = rto_next_cpu(rq->rd); 2098 2099 raw_spin_unlock(&rq->rd->rto_lock); 2100 2101 rto_start_unlock(&rq->rd->rto_loop_start); 2102 2103 if (cpu >= 0) { 2104 /* Make sure the rd does not get freed while pushing */ 2105 sched_get_rd(rq->rd); 2106 irq_work_queue_on(&rq->rd->rto_push_work, cpu); 2107 } 2108 } 2109 2110 /* Called from hardirq context */ 2111 void rto_push_irq_work_func(struct irq_work *work) 2112 { 2113 struct root_domain *rd = 2114 container_of(work, struct root_domain, rto_push_work); 2115 struct rq *rq; 2116 int cpu; 2117 2118 rq = this_rq(); 2119 2120 /* 2121 * We do not need to grab the lock to check for has_pushable_tasks. 2122 * When it gets updated, a check is made if a push is possible. 2123 */ 2124 if (has_pushable_tasks(rq)) { 2125 raw_spin_lock(&rq->lock); 2126 while (push_rt_task(rq, true)) 2127 ; 2128 raw_spin_unlock(&rq->lock); 2129 } 2130 2131 raw_spin_lock(&rd->rto_lock); 2132 2133 /* Pass the IPI to the next rt overloaded queue */ 2134 cpu = rto_next_cpu(rd); 2135 2136 raw_spin_unlock(&rd->rto_lock); 2137 2138 if (cpu < 0) { 2139 sched_put_rd(rd); 2140 return; 2141 } 2142 2143 /* Try the next RT overloaded CPU */ 2144 irq_work_queue_on(&rd->rto_push_work, cpu); 2145 } 2146 #endif /* HAVE_RT_PUSH_IPI */ 2147 2148 static void pull_rt_task(struct rq *this_rq) 2149 { 2150 int this_cpu = this_rq->cpu, cpu; 2151 bool resched = false; 2152 struct task_struct *p, *push_task; 2153 struct rq *src_rq; 2154 int rt_overload_count = rt_overloaded(this_rq); 2155 2156 if (likely(!rt_overload_count)) 2157 return; 2158 2159 /* 2160 * Match the barrier from rt_set_overloaded; this guarantees that if we 2161 * see overloaded we must also see the rto_mask bit. 2162 */ 2163 smp_rmb(); 2164 2165 /* If we are the only overloaded CPU do nothing */ 2166 if (rt_overload_count == 1 && 2167 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) 2168 return; 2169 2170 #ifdef HAVE_RT_PUSH_IPI 2171 if (sched_feat(RT_PUSH_IPI)) { 2172 tell_cpu_to_push(this_rq); 2173 return; 2174 } 2175 #endif 2176 2177 for_each_cpu(cpu, this_rq->rd->rto_mask) { 2178 if (this_cpu == cpu) 2179 continue; 2180 2181 src_rq = cpu_rq(cpu); 2182 2183 /* 2184 * Don't bother taking the src_rq->lock if the next highest 2185 * task is known to be lower-priority than our current task. 2186 * This may look racy, but if this value is about to go 2187 * logically higher, the src_rq will push this task away. 2188 * And if its going logically lower, we do not care 2189 */ 2190 if (src_rq->rt.highest_prio.next >= 2191 this_rq->rt.highest_prio.curr) 2192 continue; 2193 2194 /* 2195 * We can potentially drop this_rq's lock in 2196 * double_lock_balance, and another CPU could 2197 * alter this_rq 2198 */ 2199 push_task = NULL; 2200 double_lock_balance(this_rq, src_rq); 2201 2202 /* 2203 * We can pull only a task, which is pushable 2204 * on its rq, and no others. 2205 */ 2206 p = pick_highest_pushable_task(src_rq, this_cpu); 2207 2208 /* 2209 * Do we have an RT task that preempts 2210 * the to-be-scheduled task? 2211 */ 2212 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 2213 WARN_ON(p == src_rq->curr); 2214 WARN_ON(!task_on_rq_queued(p)); 2215 2216 /* 2217 * There's a chance that p is higher in priority 2218 * than what's currently running on its CPU. 2219 * This is just that p is wakeing up and hasn't 2220 * had a chance to schedule. We only pull 2221 * p if it is lower in priority than the 2222 * current task on the run queue 2223 */ 2224 if (p->prio < src_rq->curr->prio) 2225 goto skip; 2226 2227 if (is_migration_disabled(p)) { 2228 push_task = get_push_task(src_rq); 2229 } else { 2230 deactivate_task(src_rq, p, 0); 2231 set_task_cpu(p, this_cpu); 2232 activate_task(this_rq, p, 0); 2233 resched = true; 2234 } 2235 /* 2236 * We continue with the search, just in 2237 * case there's an even higher prio task 2238 * in another runqueue. (low likelihood 2239 * but possible) 2240 */ 2241 } 2242 skip: 2243 double_unlock_balance(this_rq, src_rq); 2244 2245 if (push_task) { 2246 raw_spin_unlock(&this_rq->lock); 2247 stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop, 2248 push_task, &src_rq->push_work); 2249 raw_spin_lock(&this_rq->lock); 2250 } 2251 } 2252 2253 if (resched) 2254 resched_curr(this_rq); 2255 } 2256 2257 /* 2258 * If we are not running and we are not going to reschedule soon, we should 2259 * try to push tasks away now 2260 */ 2261 static void task_woken_rt(struct rq *rq, struct task_struct *p) 2262 { 2263 bool need_to_push = !task_running(rq, p) && 2264 !test_tsk_need_resched(rq->curr) && 2265 p->nr_cpus_allowed > 1 && 2266 (dl_task(rq->curr) || rt_task(rq->curr)) && 2267 (rq->curr->nr_cpus_allowed < 2 || 2268 rq->curr->prio <= p->prio); 2269 2270 if (need_to_push) 2271 push_rt_tasks(rq); 2272 } 2273 2274 /* Assumes rq->lock is held */ 2275 static void rq_online_rt(struct rq *rq) 2276 { 2277 if (rq->rt.overloaded) 2278 rt_set_overload(rq); 2279 2280 __enable_runtime(rq); 2281 2282 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 2283 } 2284 2285 /* Assumes rq->lock is held */ 2286 static void rq_offline_rt(struct rq *rq) 2287 { 2288 if (rq->rt.overloaded) 2289 rt_clear_overload(rq); 2290 2291 __disable_runtime(rq); 2292 2293 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 2294 } 2295 2296 /* 2297 * When switch from the rt queue, we bring ourselves to a position 2298 * that we might want to pull RT tasks from other runqueues. 2299 */ 2300 static void switched_from_rt(struct rq *rq, struct task_struct *p) 2301 { 2302 /* 2303 * If there are other RT tasks then we will reschedule 2304 * and the scheduling of the other RT tasks will handle 2305 * the balancing. But if we are the last RT task 2306 * we may need to handle the pulling of RT tasks 2307 * now. 2308 */ 2309 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) 2310 return; 2311 2312 rt_queue_pull_task(rq); 2313 } 2314 2315 void __init init_sched_rt_class(void) 2316 { 2317 unsigned int i; 2318 2319 for_each_possible_cpu(i) { 2320 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 2321 GFP_KERNEL, cpu_to_node(i)); 2322 } 2323 } 2324 #endif /* CONFIG_SMP */ 2325 2326 /* 2327 * When switching a task to RT, we may overload the runqueue 2328 * with RT tasks. In this case we try to push them off to 2329 * other runqueues. 2330 */ 2331 static void switched_to_rt(struct rq *rq, struct task_struct *p) 2332 { 2333 /* 2334 * If we are already running, then there's nothing 2335 * that needs to be done. But if we are not running 2336 * we may need to preempt the current running task. 2337 * If that current running task is also an RT task 2338 * then see if we can move to another run queue. 2339 */ 2340 if (task_on_rq_queued(p) && rq->curr != p) { 2341 #ifdef CONFIG_SMP 2342 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2343 rt_queue_push_tasks(rq); 2344 #endif /* CONFIG_SMP */ 2345 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) 2346 resched_curr(rq); 2347 } 2348 } 2349 2350 /* 2351 * Priority of the task has changed. This may cause 2352 * us to initiate a push or pull. 2353 */ 2354 static void 2355 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 2356 { 2357 if (!task_on_rq_queued(p)) 2358 return; 2359 2360 if (task_current(rq, p)) { 2361 #ifdef CONFIG_SMP 2362 /* 2363 * If our priority decreases while running, we 2364 * may need to pull tasks to this runqueue. 2365 */ 2366 if (oldprio < p->prio) 2367 rt_queue_pull_task(rq); 2368 2369 /* 2370 * If there's a higher priority task waiting to run 2371 * then reschedule. 2372 */ 2373 if (p->prio > rq->rt.highest_prio.curr) 2374 resched_curr(rq); 2375 #else 2376 /* For UP simply resched on drop of prio */ 2377 if (oldprio < p->prio) 2378 resched_curr(rq); 2379 #endif /* CONFIG_SMP */ 2380 } else { 2381 /* 2382 * This task is not running, but if it is 2383 * greater than the current running task 2384 * then reschedule. 2385 */ 2386 if (p->prio < rq->curr->prio) 2387 resched_curr(rq); 2388 } 2389 } 2390 2391 #ifdef CONFIG_POSIX_TIMERS 2392 static void watchdog(struct rq *rq, struct task_struct *p) 2393 { 2394 unsigned long soft, hard; 2395 2396 /* max may change after cur was read, this will be fixed next tick */ 2397 soft = task_rlimit(p, RLIMIT_RTTIME); 2398 hard = task_rlimit_max(p, RLIMIT_RTTIME); 2399 2400 if (soft != RLIM_INFINITY) { 2401 unsigned long next; 2402 2403 if (p->rt.watchdog_stamp != jiffies) { 2404 p->rt.timeout++; 2405 p->rt.watchdog_stamp = jiffies; 2406 } 2407 2408 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 2409 if (p->rt.timeout > next) { 2410 posix_cputimers_rt_watchdog(&p->posix_cputimers, 2411 p->se.sum_exec_runtime); 2412 } 2413 } 2414 } 2415 #else 2416 static inline void watchdog(struct rq *rq, struct task_struct *p) { } 2417 #endif 2418 2419 /* 2420 * scheduler tick hitting a task of our scheduling class. 2421 * 2422 * NOTE: This function can be called remotely by the tick offload that 2423 * goes along full dynticks. Therefore no local assumption can be made 2424 * and everything must be accessed through the @rq and @curr passed in 2425 * parameters. 2426 */ 2427 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 2428 { 2429 struct sched_rt_entity *rt_se = &p->rt; 2430 2431 update_curr_rt(rq); 2432 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); 2433 2434 watchdog(rq, p); 2435 2436 /* 2437 * RR tasks need a special form of timeslice management. 2438 * FIFO tasks have no timeslices. 2439 */ 2440 if (p->policy != SCHED_RR) 2441 return; 2442 2443 if (--p->rt.time_slice) 2444 return; 2445 2446 p->rt.time_slice = sched_rr_timeslice; 2447 2448 /* 2449 * Requeue to the end of queue if we (and all of our ancestors) are not 2450 * the only element on the queue 2451 */ 2452 for_each_sched_rt_entity(rt_se) { 2453 if (rt_se->run_list.prev != rt_se->run_list.next) { 2454 requeue_task_rt(rq, p, 0); 2455 resched_curr(rq); 2456 return; 2457 } 2458 } 2459 } 2460 2461 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2462 { 2463 /* 2464 * Time slice is 0 for SCHED_FIFO tasks 2465 */ 2466 if (task->policy == SCHED_RR) 2467 return sched_rr_timeslice; 2468 else 2469 return 0; 2470 } 2471 2472 DEFINE_SCHED_CLASS(rt) = { 2473 2474 .enqueue_task = enqueue_task_rt, 2475 .dequeue_task = dequeue_task_rt, 2476 .yield_task = yield_task_rt, 2477 2478 .check_preempt_curr = check_preempt_curr_rt, 2479 2480 .pick_next_task = pick_next_task_rt, 2481 .put_prev_task = put_prev_task_rt, 2482 .set_next_task = set_next_task_rt, 2483 2484 #ifdef CONFIG_SMP 2485 .balance = balance_rt, 2486 .select_task_rq = select_task_rq_rt, 2487 .set_cpus_allowed = set_cpus_allowed_common, 2488 .rq_online = rq_online_rt, 2489 .rq_offline = rq_offline_rt, 2490 .task_woken = task_woken_rt, 2491 .switched_from = switched_from_rt, 2492 .find_lock_rq = find_lock_lowest_rq, 2493 #endif 2494 2495 .task_tick = task_tick_rt, 2496 2497 .get_rr_interval = get_rr_interval_rt, 2498 2499 .prio_changed = prio_changed_rt, 2500 .switched_to = switched_to_rt, 2501 2502 .update_curr = update_curr_rt, 2503 2504 #ifdef CONFIG_UCLAMP_TASK 2505 .uclamp_enabled = 1, 2506 #endif 2507 }; 2508 2509 #ifdef CONFIG_RT_GROUP_SCHED 2510 /* 2511 * Ensure that the real time constraints are schedulable. 2512 */ 2513 static DEFINE_MUTEX(rt_constraints_mutex); 2514 2515 static inline int tg_has_rt_tasks(struct task_group *tg) 2516 { 2517 struct task_struct *task; 2518 struct css_task_iter it; 2519 int ret = 0; 2520 2521 /* 2522 * Autogroups do not have RT tasks; see autogroup_create(). 2523 */ 2524 if (task_group_is_autogroup(tg)) 2525 return 0; 2526 2527 css_task_iter_start(&tg->css, 0, &it); 2528 while (!ret && (task = css_task_iter_next(&it))) 2529 ret |= rt_task(task); 2530 css_task_iter_end(&it); 2531 2532 return ret; 2533 } 2534 2535 struct rt_schedulable_data { 2536 struct task_group *tg; 2537 u64 rt_period; 2538 u64 rt_runtime; 2539 }; 2540 2541 static int tg_rt_schedulable(struct task_group *tg, void *data) 2542 { 2543 struct rt_schedulable_data *d = data; 2544 struct task_group *child; 2545 unsigned long total, sum = 0; 2546 u64 period, runtime; 2547 2548 period = ktime_to_ns(tg->rt_bandwidth.rt_period); 2549 runtime = tg->rt_bandwidth.rt_runtime; 2550 2551 if (tg == d->tg) { 2552 period = d->rt_period; 2553 runtime = d->rt_runtime; 2554 } 2555 2556 /* 2557 * Cannot have more runtime than the period. 2558 */ 2559 if (runtime > period && runtime != RUNTIME_INF) 2560 return -EINVAL; 2561 2562 /* 2563 * Ensure we don't starve existing RT tasks if runtime turns zero. 2564 */ 2565 if (rt_bandwidth_enabled() && !runtime && 2566 tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg)) 2567 return -EBUSY; 2568 2569 total = to_ratio(period, runtime); 2570 2571 /* 2572 * Nobody can have more than the global setting allows. 2573 */ 2574 if (total > to_ratio(global_rt_period(), global_rt_runtime())) 2575 return -EINVAL; 2576 2577 /* 2578 * The sum of our children's runtime should not exceed our own. 2579 */ 2580 list_for_each_entry_rcu(child, &tg->children, siblings) { 2581 period = ktime_to_ns(child->rt_bandwidth.rt_period); 2582 runtime = child->rt_bandwidth.rt_runtime; 2583 2584 if (child == d->tg) { 2585 period = d->rt_period; 2586 runtime = d->rt_runtime; 2587 } 2588 2589 sum += to_ratio(period, runtime); 2590 } 2591 2592 if (sum > total) 2593 return -EINVAL; 2594 2595 return 0; 2596 } 2597 2598 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 2599 { 2600 int ret; 2601 2602 struct rt_schedulable_data data = { 2603 .tg = tg, 2604 .rt_period = period, 2605 .rt_runtime = runtime, 2606 }; 2607 2608 rcu_read_lock(); 2609 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); 2610 rcu_read_unlock(); 2611 2612 return ret; 2613 } 2614 2615 static int tg_set_rt_bandwidth(struct task_group *tg, 2616 u64 rt_period, u64 rt_runtime) 2617 { 2618 int i, err = 0; 2619 2620 /* 2621 * Disallowing the root group RT runtime is BAD, it would disallow the 2622 * kernel creating (and or operating) RT threads. 2623 */ 2624 if (tg == &root_task_group && rt_runtime == 0) 2625 return -EINVAL; 2626 2627 /* No period doesn't make any sense. */ 2628 if (rt_period == 0) 2629 return -EINVAL; 2630 2631 /* 2632 * Bound quota to defend quota against overflow during bandwidth shift. 2633 */ 2634 if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime) 2635 return -EINVAL; 2636 2637 mutex_lock(&rt_constraints_mutex); 2638 err = __rt_schedulable(tg, rt_period, rt_runtime); 2639 if (err) 2640 goto unlock; 2641 2642 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 2643 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 2644 tg->rt_bandwidth.rt_runtime = rt_runtime; 2645 2646 for_each_possible_cpu(i) { 2647 struct rt_rq *rt_rq = tg->rt_rq[i]; 2648 2649 raw_spin_lock(&rt_rq->rt_runtime_lock); 2650 rt_rq->rt_runtime = rt_runtime; 2651 raw_spin_unlock(&rt_rq->rt_runtime_lock); 2652 } 2653 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); 2654 unlock: 2655 mutex_unlock(&rt_constraints_mutex); 2656 2657 return err; 2658 } 2659 2660 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 2661 { 2662 u64 rt_runtime, rt_period; 2663 2664 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); 2665 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; 2666 if (rt_runtime_us < 0) 2667 rt_runtime = RUNTIME_INF; 2668 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC) 2669 return -EINVAL; 2670 2671 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 2672 } 2673 2674 long sched_group_rt_runtime(struct task_group *tg) 2675 { 2676 u64 rt_runtime_us; 2677 2678 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF) 2679 return -1; 2680 2681 rt_runtime_us = tg->rt_bandwidth.rt_runtime; 2682 do_div(rt_runtime_us, NSEC_PER_USEC); 2683 return rt_runtime_us; 2684 } 2685 2686 int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us) 2687 { 2688 u64 rt_runtime, rt_period; 2689 2690 if (rt_period_us > U64_MAX / NSEC_PER_USEC) 2691 return -EINVAL; 2692 2693 rt_period = rt_period_us * NSEC_PER_USEC; 2694 rt_runtime = tg->rt_bandwidth.rt_runtime; 2695 2696 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 2697 } 2698 2699 long sched_group_rt_period(struct task_group *tg) 2700 { 2701 u64 rt_period_us; 2702 2703 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period); 2704 do_div(rt_period_us, NSEC_PER_USEC); 2705 return rt_period_us; 2706 } 2707 2708 static int sched_rt_global_constraints(void) 2709 { 2710 int ret = 0; 2711 2712 mutex_lock(&rt_constraints_mutex); 2713 ret = __rt_schedulable(NULL, 0, 0); 2714 mutex_unlock(&rt_constraints_mutex); 2715 2716 return ret; 2717 } 2718 2719 int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) 2720 { 2721 /* Don't accept realtime tasks when there is no way for them to run */ 2722 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) 2723 return 0; 2724 2725 return 1; 2726 } 2727 2728 #else /* !CONFIG_RT_GROUP_SCHED */ 2729 static int sched_rt_global_constraints(void) 2730 { 2731 unsigned long flags; 2732 int i; 2733 2734 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2735 for_each_possible_cpu(i) { 2736 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 2737 2738 raw_spin_lock(&rt_rq->rt_runtime_lock); 2739 rt_rq->rt_runtime = global_rt_runtime(); 2740 raw_spin_unlock(&rt_rq->rt_runtime_lock); 2741 } 2742 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2743 2744 return 0; 2745 } 2746 #endif /* CONFIG_RT_GROUP_SCHED */ 2747 2748 static int sched_rt_global_validate(void) 2749 { 2750 if (sysctl_sched_rt_period <= 0) 2751 return -EINVAL; 2752 2753 if ((sysctl_sched_rt_runtime != RUNTIME_INF) && 2754 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || 2755 ((u64)sysctl_sched_rt_runtime * 2756 NSEC_PER_USEC > max_rt_runtime))) 2757 return -EINVAL; 2758 2759 return 0; 2760 } 2761 2762 static void sched_rt_do_global(void) 2763 { 2764 def_rt_bandwidth.rt_runtime = global_rt_runtime(); 2765 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); 2766 } 2767 2768 int sched_rt_handler(struct ctl_table *table, int write, void *buffer, 2769 size_t *lenp, loff_t *ppos) 2770 { 2771 int old_period, old_runtime; 2772 static DEFINE_MUTEX(mutex); 2773 int ret; 2774 2775 mutex_lock(&mutex); 2776 old_period = sysctl_sched_rt_period; 2777 old_runtime = sysctl_sched_rt_runtime; 2778 2779 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2780 2781 if (!ret && write) { 2782 ret = sched_rt_global_validate(); 2783 if (ret) 2784 goto undo; 2785 2786 ret = sched_dl_global_validate(); 2787 if (ret) 2788 goto undo; 2789 2790 ret = sched_rt_global_constraints(); 2791 if (ret) 2792 goto undo; 2793 2794 sched_rt_do_global(); 2795 sched_dl_do_global(); 2796 } 2797 if (0) { 2798 undo: 2799 sysctl_sched_rt_period = old_period; 2800 sysctl_sched_rt_runtime = old_runtime; 2801 } 2802 mutex_unlock(&mutex); 2803 2804 return ret; 2805 } 2806 2807 int sched_rr_handler(struct ctl_table *table, int write, void *buffer, 2808 size_t *lenp, loff_t *ppos) 2809 { 2810 int ret; 2811 static DEFINE_MUTEX(mutex); 2812 2813 mutex_lock(&mutex); 2814 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2815 /* 2816 * Make sure that internally we keep jiffies. 2817 * Also, writing zero resets the timeslice to default: 2818 */ 2819 if (!ret && write) { 2820 sched_rr_timeslice = 2821 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : 2822 msecs_to_jiffies(sysctl_sched_rr_timeslice); 2823 } 2824 mutex_unlock(&mutex); 2825 2826 return ret; 2827 } 2828 2829 #ifdef CONFIG_SCHED_DEBUG 2830 void print_rt_stats(struct seq_file *m, int cpu) 2831 { 2832 rt_rq_iter_t iter; 2833 struct rt_rq *rt_rq; 2834 2835 rcu_read_lock(); 2836 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2837 print_rt_rq(m, cpu, rt_rq); 2838 rcu_read_unlock(); 2839 } 2840 #endif /* CONFIG_SCHED_DEBUG */ 2841