1 /* 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 3 * policies) 4 */ 5 6 #include "sched.h" 7 8 #include <linux/slab.h> 9 #include <linux/irq_work.h> 10 11 int sched_rr_timeslice = RR_TIMESLICE; 12 13 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 14 15 struct rt_bandwidth def_rt_bandwidth; 16 17 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 18 { 19 struct rt_bandwidth *rt_b = 20 container_of(timer, struct rt_bandwidth, rt_period_timer); 21 int idle = 0; 22 int overrun; 23 24 raw_spin_lock(&rt_b->rt_runtime_lock); 25 for (;;) { 26 overrun = hrtimer_forward_now(timer, rt_b->rt_period); 27 if (!overrun) 28 break; 29 30 raw_spin_unlock(&rt_b->rt_runtime_lock); 31 idle = do_sched_rt_period_timer(rt_b, overrun); 32 raw_spin_lock(&rt_b->rt_runtime_lock); 33 } 34 if (idle) 35 rt_b->rt_period_active = 0; 36 raw_spin_unlock(&rt_b->rt_runtime_lock); 37 38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 39 } 40 41 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 42 { 43 rt_b->rt_period = ns_to_ktime(period); 44 rt_b->rt_runtime = runtime; 45 46 raw_spin_lock_init(&rt_b->rt_runtime_lock); 47 48 hrtimer_init(&rt_b->rt_period_timer, 49 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 50 rt_b->rt_period_timer.function = sched_rt_period_timer; 51 } 52 53 static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 54 { 55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 56 return; 57 58 raw_spin_lock(&rt_b->rt_runtime_lock); 59 if (!rt_b->rt_period_active) { 60 rt_b->rt_period_active = 1; 61 hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period); 62 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); 63 } 64 raw_spin_unlock(&rt_b->rt_runtime_lock); 65 } 66 67 #ifdef CONFIG_SMP 68 static void push_irq_work_func(struct irq_work *work); 69 #endif 70 71 void init_rt_rq(struct rt_rq *rt_rq) 72 { 73 struct rt_prio_array *array; 74 int i; 75 76 array = &rt_rq->active; 77 for (i = 0; i < MAX_RT_PRIO; i++) { 78 INIT_LIST_HEAD(array->queue + i); 79 __clear_bit(i, array->bitmap); 80 } 81 /* delimiter for bitsearch: */ 82 __set_bit(MAX_RT_PRIO, array->bitmap); 83 84 #if defined CONFIG_SMP 85 rt_rq->highest_prio.curr = MAX_RT_PRIO; 86 rt_rq->highest_prio.next = MAX_RT_PRIO; 87 rt_rq->rt_nr_migratory = 0; 88 rt_rq->overloaded = 0; 89 plist_head_init(&rt_rq->pushable_tasks); 90 91 #ifdef HAVE_RT_PUSH_IPI 92 rt_rq->push_flags = 0; 93 rt_rq->push_cpu = nr_cpu_ids; 94 raw_spin_lock_init(&rt_rq->push_lock); 95 init_irq_work(&rt_rq->push_work, push_irq_work_func); 96 #endif 97 #endif /* CONFIG_SMP */ 98 /* We start is dequeued state, because no RT tasks are queued */ 99 rt_rq->rt_queued = 0; 100 101 rt_rq->rt_time = 0; 102 rt_rq->rt_throttled = 0; 103 rt_rq->rt_runtime = 0; 104 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 105 } 106 107 #ifdef CONFIG_RT_GROUP_SCHED 108 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 109 { 110 hrtimer_cancel(&rt_b->rt_period_timer); 111 } 112 113 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 114 115 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 116 { 117 #ifdef CONFIG_SCHED_DEBUG 118 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 119 #endif 120 return container_of(rt_se, struct task_struct, rt); 121 } 122 123 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 124 { 125 return rt_rq->rq; 126 } 127 128 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 129 { 130 return rt_se->rt_rq; 131 } 132 133 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 134 { 135 struct rt_rq *rt_rq = rt_se->rt_rq; 136 137 return rt_rq->rq; 138 } 139 140 void free_rt_sched_group(struct task_group *tg) 141 { 142 int i; 143 144 if (tg->rt_se) 145 destroy_rt_bandwidth(&tg->rt_bandwidth); 146 147 for_each_possible_cpu(i) { 148 if (tg->rt_rq) 149 kfree(tg->rt_rq[i]); 150 if (tg->rt_se) 151 kfree(tg->rt_se[i]); 152 } 153 154 kfree(tg->rt_rq); 155 kfree(tg->rt_se); 156 } 157 158 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 159 struct sched_rt_entity *rt_se, int cpu, 160 struct sched_rt_entity *parent) 161 { 162 struct rq *rq = cpu_rq(cpu); 163 164 rt_rq->highest_prio.curr = MAX_RT_PRIO; 165 rt_rq->rt_nr_boosted = 0; 166 rt_rq->rq = rq; 167 rt_rq->tg = tg; 168 169 tg->rt_rq[cpu] = rt_rq; 170 tg->rt_se[cpu] = rt_se; 171 172 if (!rt_se) 173 return; 174 175 if (!parent) 176 rt_se->rt_rq = &rq->rt; 177 else 178 rt_se->rt_rq = parent->my_q; 179 180 rt_se->my_q = rt_rq; 181 rt_se->parent = parent; 182 INIT_LIST_HEAD(&rt_se->run_list); 183 } 184 185 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 186 { 187 struct rt_rq *rt_rq; 188 struct sched_rt_entity *rt_se; 189 int i; 190 191 tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); 192 if (!tg->rt_rq) 193 goto err; 194 tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); 195 if (!tg->rt_se) 196 goto err; 197 198 init_rt_bandwidth(&tg->rt_bandwidth, 199 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 200 201 for_each_possible_cpu(i) { 202 rt_rq = kzalloc_node(sizeof(struct rt_rq), 203 GFP_KERNEL, cpu_to_node(i)); 204 if (!rt_rq) 205 goto err; 206 207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 208 GFP_KERNEL, cpu_to_node(i)); 209 if (!rt_se) 210 goto err_free_rq; 211 212 init_rt_rq(rt_rq); 213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 215 } 216 217 return 1; 218 219 err_free_rq: 220 kfree(rt_rq); 221 err: 222 return 0; 223 } 224 225 #else /* CONFIG_RT_GROUP_SCHED */ 226 227 #define rt_entity_is_task(rt_se) (1) 228 229 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 230 { 231 return container_of(rt_se, struct task_struct, rt); 232 } 233 234 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 235 { 236 return container_of(rt_rq, struct rq, rt); 237 } 238 239 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 240 { 241 struct task_struct *p = rt_task_of(rt_se); 242 243 return task_rq(p); 244 } 245 246 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 247 { 248 struct rq *rq = rq_of_rt_se(rt_se); 249 250 return &rq->rt; 251 } 252 253 void free_rt_sched_group(struct task_group *tg) { } 254 255 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 256 { 257 return 1; 258 } 259 #endif /* CONFIG_RT_GROUP_SCHED */ 260 261 #ifdef CONFIG_SMP 262 263 static void pull_rt_task(struct rq *this_rq); 264 265 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 266 { 267 /* Try to pull RT tasks here if we lower this rq's prio */ 268 return rq->rt.highest_prio.curr > prev->prio; 269 } 270 271 static inline int rt_overloaded(struct rq *rq) 272 { 273 return atomic_read(&rq->rd->rto_count); 274 } 275 276 static inline void rt_set_overload(struct rq *rq) 277 { 278 if (!rq->online) 279 return; 280 281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 282 /* 283 * Make sure the mask is visible before we set 284 * the overload count. That is checked to determine 285 * if we should look at the mask. It would be a shame 286 * if we looked at the mask, but the mask was not 287 * updated yet. 288 * 289 * Matched by the barrier in pull_rt_task(). 290 */ 291 smp_wmb(); 292 atomic_inc(&rq->rd->rto_count); 293 } 294 295 static inline void rt_clear_overload(struct rq *rq) 296 { 297 if (!rq->online) 298 return; 299 300 /* the order here really doesn't matter */ 301 atomic_dec(&rq->rd->rto_count); 302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 303 } 304 305 static void update_rt_migration(struct rt_rq *rt_rq) 306 { 307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 308 if (!rt_rq->overloaded) { 309 rt_set_overload(rq_of_rt_rq(rt_rq)); 310 rt_rq->overloaded = 1; 311 } 312 } else if (rt_rq->overloaded) { 313 rt_clear_overload(rq_of_rt_rq(rt_rq)); 314 rt_rq->overloaded = 0; 315 } 316 } 317 318 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 319 { 320 struct task_struct *p; 321 322 if (!rt_entity_is_task(rt_se)) 323 return; 324 325 p = rt_task_of(rt_se); 326 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 327 328 rt_rq->rt_nr_total++; 329 if (p->nr_cpus_allowed > 1) 330 rt_rq->rt_nr_migratory++; 331 332 update_rt_migration(rt_rq); 333 } 334 335 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 336 { 337 struct task_struct *p; 338 339 if (!rt_entity_is_task(rt_se)) 340 return; 341 342 p = rt_task_of(rt_se); 343 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 344 345 rt_rq->rt_nr_total--; 346 if (p->nr_cpus_allowed > 1) 347 rt_rq->rt_nr_migratory--; 348 349 update_rt_migration(rt_rq); 350 } 351 352 static inline int has_pushable_tasks(struct rq *rq) 353 { 354 return !plist_head_empty(&rq->rt.pushable_tasks); 355 } 356 357 static DEFINE_PER_CPU(struct callback_head, rt_push_head); 358 static DEFINE_PER_CPU(struct callback_head, rt_pull_head); 359 360 static void push_rt_tasks(struct rq *); 361 static void pull_rt_task(struct rq *); 362 363 static inline void queue_push_tasks(struct rq *rq) 364 { 365 if (!has_pushable_tasks(rq)) 366 return; 367 368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks); 369 } 370 371 static inline void queue_pull_task(struct rq *rq) 372 { 373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task); 374 } 375 376 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 377 { 378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 379 plist_node_init(&p->pushable_tasks, p->prio); 380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 381 382 /* Update the highest prio pushable task */ 383 if (p->prio < rq->rt.highest_prio.next) 384 rq->rt.highest_prio.next = p->prio; 385 } 386 387 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 388 { 389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 390 391 /* Update the new highest prio pushable task */ 392 if (has_pushable_tasks(rq)) { 393 p = plist_first_entry(&rq->rt.pushable_tasks, 394 struct task_struct, pushable_tasks); 395 rq->rt.highest_prio.next = p->prio; 396 } else 397 rq->rt.highest_prio.next = MAX_RT_PRIO; 398 } 399 400 #else 401 402 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 403 { 404 } 405 406 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 407 { 408 } 409 410 static inline 411 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 412 { 413 } 414 415 static inline 416 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 417 { 418 } 419 420 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 421 { 422 return false; 423 } 424 425 static inline void pull_rt_task(struct rq *this_rq) 426 { 427 } 428 429 static inline void queue_push_tasks(struct rq *rq) 430 { 431 } 432 #endif /* CONFIG_SMP */ 433 434 static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 435 static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 436 437 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 438 { 439 return !list_empty(&rt_se->run_list); 440 } 441 442 #ifdef CONFIG_RT_GROUP_SCHED 443 444 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 445 { 446 if (!rt_rq->tg) 447 return RUNTIME_INF; 448 449 return rt_rq->rt_runtime; 450 } 451 452 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 453 { 454 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 455 } 456 457 typedef struct task_group *rt_rq_iter_t; 458 459 static inline struct task_group *next_task_group(struct task_group *tg) 460 { 461 do { 462 tg = list_entry_rcu(tg->list.next, 463 typeof(struct task_group), list); 464 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 465 466 if (&tg->list == &task_groups) 467 tg = NULL; 468 469 return tg; 470 } 471 472 #define for_each_rt_rq(rt_rq, iter, rq) \ 473 for (iter = container_of(&task_groups, typeof(*iter), list); \ 474 (iter = next_task_group(iter)) && \ 475 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 476 477 #define for_each_sched_rt_entity(rt_se) \ 478 for (; rt_se; rt_se = rt_se->parent) 479 480 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 481 { 482 return rt_se->my_q; 483 } 484 485 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); 486 static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 487 488 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 489 { 490 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 491 struct rq *rq = rq_of_rt_rq(rt_rq); 492 struct sched_rt_entity *rt_se; 493 494 int cpu = cpu_of(rq); 495 496 rt_se = rt_rq->tg->rt_se[cpu]; 497 498 if (rt_rq->rt_nr_running) { 499 if (!rt_se) 500 enqueue_top_rt_rq(rt_rq); 501 else if (!on_rt_rq(rt_se)) 502 enqueue_rt_entity(rt_se, false); 503 504 if (rt_rq->highest_prio.curr < curr->prio) 505 resched_curr(rq); 506 } 507 } 508 509 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 510 { 511 struct sched_rt_entity *rt_se; 512 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 513 514 rt_se = rt_rq->tg->rt_se[cpu]; 515 516 if (!rt_se) 517 dequeue_top_rt_rq(rt_rq); 518 else if (on_rt_rq(rt_se)) 519 dequeue_rt_entity(rt_se); 520 } 521 522 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 523 { 524 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 525 } 526 527 static int rt_se_boosted(struct sched_rt_entity *rt_se) 528 { 529 struct rt_rq *rt_rq = group_rt_rq(rt_se); 530 struct task_struct *p; 531 532 if (rt_rq) 533 return !!rt_rq->rt_nr_boosted; 534 535 p = rt_task_of(rt_se); 536 return p->prio != p->normal_prio; 537 } 538 539 #ifdef CONFIG_SMP 540 static inline const struct cpumask *sched_rt_period_mask(void) 541 { 542 return this_rq()->rd->span; 543 } 544 #else 545 static inline const struct cpumask *sched_rt_period_mask(void) 546 { 547 return cpu_online_mask; 548 } 549 #endif 550 551 static inline 552 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 553 { 554 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 555 } 556 557 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 558 { 559 return &rt_rq->tg->rt_bandwidth; 560 } 561 562 #else /* !CONFIG_RT_GROUP_SCHED */ 563 564 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 565 { 566 return rt_rq->rt_runtime; 567 } 568 569 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 570 { 571 return ktime_to_ns(def_rt_bandwidth.rt_period); 572 } 573 574 typedef struct rt_rq *rt_rq_iter_t; 575 576 #define for_each_rt_rq(rt_rq, iter, rq) \ 577 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 578 579 #define for_each_sched_rt_entity(rt_se) \ 580 for (; rt_se; rt_se = NULL) 581 582 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 583 { 584 return NULL; 585 } 586 587 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 588 { 589 struct rq *rq = rq_of_rt_rq(rt_rq); 590 591 if (!rt_rq->rt_nr_running) 592 return; 593 594 enqueue_top_rt_rq(rt_rq); 595 resched_curr(rq); 596 } 597 598 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 599 { 600 dequeue_top_rt_rq(rt_rq); 601 } 602 603 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 604 { 605 return rt_rq->rt_throttled; 606 } 607 608 static inline const struct cpumask *sched_rt_period_mask(void) 609 { 610 return cpu_online_mask; 611 } 612 613 static inline 614 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 615 { 616 return &cpu_rq(cpu)->rt; 617 } 618 619 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 620 { 621 return &def_rt_bandwidth; 622 } 623 624 #endif /* CONFIG_RT_GROUP_SCHED */ 625 626 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 627 { 628 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 629 630 return (hrtimer_active(&rt_b->rt_period_timer) || 631 rt_rq->rt_time < rt_b->rt_runtime); 632 } 633 634 #ifdef CONFIG_SMP 635 /* 636 * We ran out of runtime, see if we can borrow some from our neighbours. 637 */ 638 static int do_balance_runtime(struct rt_rq *rt_rq) 639 { 640 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 641 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; 642 int i, weight, more = 0; 643 u64 rt_period; 644 645 weight = cpumask_weight(rd->span); 646 647 raw_spin_lock(&rt_b->rt_runtime_lock); 648 rt_period = ktime_to_ns(rt_b->rt_period); 649 for_each_cpu(i, rd->span) { 650 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 651 s64 diff; 652 653 if (iter == rt_rq) 654 continue; 655 656 raw_spin_lock(&iter->rt_runtime_lock); 657 /* 658 * Either all rqs have inf runtime and there's nothing to steal 659 * or __disable_runtime() below sets a specific rq to inf to 660 * indicate its been disabled and disalow stealing. 661 */ 662 if (iter->rt_runtime == RUNTIME_INF) 663 goto next; 664 665 /* 666 * From runqueues with spare time, take 1/n part of their 667 * spare time, but no more than our period. 668 */ 669 diff = iter->rt_runtime - iter->rt_time; 670 if (diff > 0) { 671 diff = div_u64((u64)diff, weight); 672 if (rt_rq->rt_runtime + diff > rt_period) 673 diff = rt_period - rt_rq->rt_runtime; 674 iter->rt_runtime -= diff; 675 rt_rq->rt_runtime += diff; 676 more = 1; 677 if (rt_rq->rt_runtime == rt_period) { 678 raw_spin_unlock(&iter->rt_runtime_lock); 679 break; 680 } 681 } 682 next: 683 raw_spin_unlock(&iter->rt_runtime_lock); 684 } 685 raw_spin_unlock(&rt_b->rt_runtime_lock); 686 687 return more; 688 } 689 690 /* 691 * Ensure this RQ takes back all the runtime it lend to its neighbours. 692 */ 693 static void __disable_runtime(struct rq *rq) 694 { 695 struct root_domain *rd = rq->rd; 696 rt_rq_iter_t iter; 697 struct rt_rq *rt_rq; 698 699 if (unlikely(!scheduler_running)) 700 return; 701 702 for_each_rt_rq(rt_rq, iter, rq) { 703 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 704 s64 want; 705 int i; 706 707 raw_spin_lock(&rt_b->rt_runtime_lock); 708 raw_spin_lock(&rt_rq->rt_runtime_lock); 709 /* 710 * Either we're all inf and nobody needs to borrow, or we're 711 * already disabled and thus have nothing to do, or we have 712 * exactly the right amount of runtime to take out. 713 */ 714 if (rt_rq->rt_runtime == RUNTIME_INF || 715 rt_rq->rt_runtime == rt_b->rt_runtime) 716 goto balanced; 717 raw_spin_unlock(&rt_rq->rt_runtime_lock); 718 719 /* 720 * Calculate the difference between what we started out with 721 * and what we current have, that's the amount of runtime 722 * we lend and now have to reclaim. 723 */ 724 want = rt_b->rt_runtime - rt_rq->rt_runtime; 725 726 /* 727 * Greedy reclaim, take back as much as we can. 728 */ 729 for_each_cpu(i, rd->span) { 730 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 731 s64 diff; 732 733 /* 734 * Can't reclaim from ourselves or disabled runqueues. 735 */ 736 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 737 continue; 738 739 raw_spin_lock(&iter->rt_runtime_lock); 740 if (want > 0) { 741 diff = min_t(s64, iter->rt_runtime, want); 742 iter->rt_runtime -= diff; 743 want -= diff; 744 } else { 745 iter->rt_runtime -= want; 746 want -= want; 747 } 748 raw_spin_unlock(&iter->rt_runtime_lock); 749 750 if (!want) 751 break; 752 } 753 754 raw_spin_lock(&rt_rq->rt_runtime_lock); 755 /* 756 * We cannot be left wanting - that would mean some runtime 757 * leaked out of the system. 758 */ 759 BUG_ON(want); 760 balanced: 761 /* 762 * Disable all the borrow logic by pretending we have inf 763 * runtime - in which case borrowing doesn't make sense. 764 */ 765 rt_rq->rt_runtime = RUNTIME_INF; 766 rt_rq->rt_throttled = 0; 767 raw_spin_unlock(&rt_rq->rt_runtime_lock); 768 raw_spin_unlock(&rt_b->rt_runtime_lock); 769 770 /* Make rt_rq available for pick_next_task() */ 771 sched_rt_rq_enqueue(rt_rq); 772 } 773 } 774 775 static void __enable_runtime(struct rq *rq) 776 { 777 rt_rq_iter_t iter; 778 struct rt_rq *rt_rq; 779 780 if (unlikely(!scheduler_running)) 781 return; 782 783 /* 784 * Reset each runqueue's bandwidth settings 785 */ 786 for_each_rt_rq(rt_rq, iter, rq) { 787 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 788 789 raw_spin_lock(&rt_b->rt_runtime_lock); 790 raw_spin_lock(&rt_rq->rt_runtime_lock); 791 rt_rq->rt_runtime = rt_b->rt_runtime; 792 rt_rq->rt_time = 0; 793 rt_rq->rt_throttled = 0; 794 raw_spin_unlock(&rt_rq->rt_runtime_lock); 795 raw_spin_unlock(&rt_b->rt_runtime_lock); 796 } 797 } 798 799 static int balance_runtime(struct rt_rq *rt_rq) 800 { 801 int more = 0; 802 803 if (!sched_feat(RT_RUNTIME_SHARE)) 804 return more; 805 806 if (rt_rq->rt_time > rt_rq->rt_runtime) { 807 raw_spin_unlock(&rt_rq->rt_runtime_lock); 808 more = do_balance_runtime(rt_rq); 809 raw_spin_lock(&rt_rq->rt_runtime_lock); 810 } 811 812 return more; 813 } 814 #else /* !CONFIG_SMP */ 815 static inline int balance_runtime(struct rt_rq *rt_rq) 816 { 817 return 0; 818 } 819 #endif /* CONFIG_SMP */ 820 821 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 822 { 823 int i, idle = 1, throttled = 0; 824 const struct cpumask *span; 825 826 span = sched_rt_period_mask(); 827 #ifdef CONFIG_RT_GROUP_SCHED 828 /* 829 * FIXME: isolated CPUs should really leave the root task group, 830 * whether they are isolcpus or were isolated via cpusets, lest 831 * the timer run on a CPU which does not service all runqueues, 832 * potentially leaving other CPUs indefinitely throttled. If 833 * isolation is really required, the user will turn the throttle 834 * off to kill the perturbations it causes anyway. Meanwhile, 835 * this maintains functionality for boot and/or troubleshooting. 836 */ 837 if (rt_b == &root_task_group.rt_bandwidth) 838 span = cpu_online_mask; 839 #endif 840 for_each_cpu(i, span) { 841 int enqueue = 0; 842 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 843 struct rq *rq = rq_of_rt_rq(rt_rq); 844 845 raw_spin_lock(&rq->lock); 846 if (rt_rq->rt_time) { 847 u64 runtime; 848 849 raw_spin_lock(&rt_rq->rt_runtime_lock); 850 if (rt_rq->rt_throttled) 851 balance_runtime(rt_rq); 852 runtime = rt_rq->rt_runtime; 853 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 854 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 855 rt_rq->rt_throttled = 0; 856 enqueue = 1; 857 858 /* 859 * When we're idle and a woken (rt) task is 860 * throttled check_preempt_curr() will set 861 * skip_update and the time between the wakeup 862 * and this unthrottle will get accounted as 863 * 'runtime'. 864 */ 865 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 866 rq_clock_skip_update(rq, false); 867 } 868 if (rt_rq->rt_time || rt_rq->rt_nr_running) 869 idle = 0; 870 raw_spin_unlock(&rt_rq->rt_runtime_lock); 871 } else if (rt_rq->rt_nr_running) { 872 idle = 0; 873 if (!rt_rq_throttled(rt_rq)) 874 enqueue = 1; 875 } 876 if (rt_rq->rt_throttled) 877 throttled = 1; 878 879 if (enqueue) 880 sched_rt_rq_enqueue(rt_rq); 881 raw_spin_unlock(&rq->lock); 882 } 883 884 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 885 return 1; 886 887 return idle; 888 } 889 890 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 891 { 892 #ifdef CONFIG_RT_GROUP_SCHED 893 struct rt_rq *rt_rq = group_rt_rq(rt_se); 894 895 if (rt_rq) 896 return rt_rq->highest_prio.curr; 897 #endif 898 899 return rt_task_of(rt_se)->prio; 900 } 901 902 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 903 { 904 u64 runtime = sched_rt_runtime(rt_rq); 905 906 if (rt_rq->rt_throttled) 907 return rt_rq_throttled(rt_rq); 908 909 if (runtime >= sched_rt_period(rt_rq)) 910 return 0; 911 912 balance_runtime(rt_rq); 913 runtime = sched_rt_runtime(rt_rq); 914 if (runtime == RUNTIME_INF) 915 return 0; 916 917 if (rt_rq->rt_time > runtime) { 918 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 919 920 /* 921 * Don't actually throttle groups that have no runtime assigned 922 * but accrue some time due to boosting. 923 */ 924 if (likely(rt_b->rt_runtime)) { 925 rt_rq->rt_throttled = 1; 926 printk_deferred_once("sched: RT throttling activated\n"); 927 } else { 928 /* 929 * In case we did anyway, make it go away, 930 * replenishment is a joke, since it will replenish us 931 * with exactly 0 ns. 932 */ 933 rt_rq->rt_time = 0; 934 } 935 936 if (rt_rq_throttled(rt_rq)) { 937 sched_rt_rq_dequeue(rt_rq); 938 return 1; 939 } 940 } 941 942 return 0; 943 } 944 945 /* 946 * Update the current task's runtime statistics. Skip current tasks that 947 * are not in our scheduling class. 948 */ 949 static void update_curr_rt(struct rq *rq) 950 { 951 struct task_struct *curr = rq->curr; 952 struct sched_rt_entity *rt_se = &curr->rt; 953 u64 delta_exec; 954 955 if (curr->sched_class != &rt_sched_class) 956 return; 957 958 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 959 if (unlikely((s64)delta_exec <= 0)) 960 return; 961 962 schedstat_set(curr->se.statistics.exec_max, 963 max(curr->se.statistics.exec_max, delta_exec)); 964 965 curr->se.sum_exec_runtime += delta_exec; 966 account_group_exec_runtime(curr, delta_exec); 967 968 curr->se.exec_start = rq_clock_task(rq); 969 cpuacct_charge(curr, delta_exec); 970 971 sched_rt_avg_update(rq, delta_exec); 972 973 if (!rt_bandwidth_enabled()) 974 return; 975 976 for_each_sched_rt_entity(rt_se) { 977 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 978 979 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 980 raw_spin_lock(&rt_rq->rt_runtime_lock); 981 rt_rq->rt_time += delta_exec; 982 if (sched_rt_runtime_exceeded(rt_rq)) 983 resched_curr(rq); 984 raw_spin_unlock(&rt_rq->rt_runtime_lock); 985 } 986 } 987 } 988 989 static void 990 dequeue_top_rt_rq(struct rt_rq *rt_rq) 991 { 992 struct rq *rq = rq_of_rt_rq(rt_rq); 993 994 BUG_ON(&rq->rt != rt_rq); 995 996 if (!rt_rq->rt_queued) 997 return; 998 999 BUG_ON(!rq->nr_running); 1000 1001 sub_nr_running(rq, rt_rq->rt_nr_running); 1002 rt_rq->rt_queued = 0; 1003 } 1004 1005 static void 1006 enqueue_top_rt_rq(struct rt_rq *rt_rq) 1007 { 1008 struct rq *rq = rq_of_rt_rq(rt_rq); 1009 1010 BUG_ON(&rq->rt != rt_rq); 1011 1012 if (rt_rq->rt_queued) 1013 return; 1014 if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) 1015 return; 1016 1017 add_nr_running(rq, rt_rq->rt_nr_running); 1018 rt_rq->rt_queued = 1; 1019 } 1020 1021 #if defined CONFIG_SMP 1022 1023 static void 1024 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1025 { 1026 struct rq *rq = rq_of_rt_rq(rt_rq); 1027 1028 #ifdef CONFIG_RT_GROUP_SCHED 1029 /* 1030 * Change rq's cpupri only if rt_rq is the top queue. 1031 */ 1032 if (&rq->rt != rt_rq) 1033 return; 1034 #endif 1035 if (rq->online && prio < prev_prio) 1036 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 1037 } 1038 1039 static void 1040 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1041 { 1042 struct rq *rq = rq_of_rt_rq(rt_rq); 1043 1044 #ifdef CONFIG_RT_GROUP_SCHED 1045 /* 1046 * Change rq's cpupri only if rt_rq is the top queue. 1047 */ 1048 if (&rq->rt != rt_rq) 1049 return; 1050 #endif 1051 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 1052 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 1053 } 1054 1055 #else /* CONFIG_SMP */ 1056 1057 static inline 1058 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1059 static inline 1060 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1061 1062 #endif /* CONFIG_SMP */ 1063 1064 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 1065 static void 1066 inc_rt_prio(struct rt_rq *rt_rq, int prio) 1067 { 1068 int prev_prio = rt_rq->highest_prio.curr; 1069 1070 if (prio < prev_prio) 1071 rt_rq->highest_prio.curr = prio; 1072 1073 inc_rt_prio_smp(rt_rq, prio, prev_prio); 1074 } 1075 1076 static void 1077 dec_rt_prio(struct rt_rq *rt_rq, int prio) 1078 { 1079 int prev_prio = rt_rq->highest_prio.curr; 1080 1081 if (rt_rq->rt_nr_running) { 1082 1083 WARN_ON(prio < prev_prio); 1084 1085 /* 1086 * This may have been our highest task, and therefore 1087 * we may have some recomputation to do 1088 */ 1089 if (prio == prev_prio) { 1090 struct rt_prio_array *array = &rt_rq->active; 1091 1092 rt_rq->highest_prio.curr = 1093 sched_find_first_bit(array->bitmap); 1094 } 1095 1096 } else 1097 rt_rq->highest_prio.curr = MAX_RT_PRIO; 1098 1099 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1100 } 1101 1102 #else 1103 1104 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1105 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1106 1107 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1108 1109 #ifdef CONFIG_RT_GROUP_SCHED 1110 1111 static void 1112 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1113 { 1114 if (rt_se_boosted(rt_se)) 1115 rt_rq->rt_nr_boosted++; 1116 1117 if (rt_rq->tg) 1118 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1119 } 1120 1121 static void 1122 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1123 { 1124 if (rt_se_boosted(rt_se)) 1125 rt_rq->rt_nr_boosted--; 1126 1127 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1128 } 1129 1130 #else /* CONFIG_RT_GROUP_SCHED */ 1131 1132 static void 1133 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1134 { 1135 start_rt_bandwidth(&def_rt_bandwidth); 1136 } 1137 1138 static inline 1139 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1140 1141 #endif /* CONFIG_RT_GROUP_SCHED */ 1142 1143 static inline 1144 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) 1145 { 1146 struct rt_rq *group_rq = group_rt_rq(rt_se); 1147 1148 if (group_rq) 1149 return group_rq->rt_nr_running; 1150 else 1151 return 1; 1152 } 1153 1154 static inline 1155 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1156 { 1157 int prio = rt_se_prio(rt_se); 1158 1159 WARN_ON(!rt_prio(prio)); 1160 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1161 1162 inc_rt_prio(rt_rq, prio); 1163 inc_rt_migration(rt_se, rt_rq); 1164 inc_rt_group(rt_se, rt_rq); 1165 } 1166 1167 static inline 1168 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1169 { 1170 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1171 WARN_ON(!rt_rq->rt_nr_running); 1172 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1173 1174 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1175 dec_rt_migration(rt_se, rt_rq); 1176 dec_rt_group(rt_se, rt_rq); 1177 } 1178 1179 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1180 { 1181 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1182 struct rt_prio_array *array = &rt_rq->active; 1183 struct rt_rq *group_rq = group_rt_rq(rt_se); 1184 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1185 1186 /* 1187 * Don't enqueue the group if its throttled, or when empty. 1188 * The latter is a consequence of the former when a child group 1189 * get throttled and the current group doesn't have any other 1190 * active members. 1191 */ 1192 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 1193 return; 1194 1195 if (head) 1196 list_add(&rt_se->run_list, queue); 1197 else 1198 list_add_tail(&rt_se->run_list, queue); 1199 __set_bit(rt_se_prio(rt_se), array->bitmap); 1200 1201 inc_rt_tasks(rt_se, rt_rq); 1202 } 1203 1204 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 1205 { 1206 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1207 struct rt_prio_array *array = &rt_rq->active; 1208 1209 list_del_init(&rt_se->run_list); 1210 if (list_empty(array->queue + rt_se_prio(rt_se))) 1211 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1212 1213 dec_rt_tasks(rt_se, rt_rq); 1214 } 1215 1216 /* 1217 * Because the prio of an upper entry depends on the lower 1218 * entries, we must remove entries top - down. 1219 */ 1220 static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 1221 { 1222 struct sched_rt_entity *back = NULL; 1223 1224 for_each_sched_rt_entity(rt_se) { 1225 rt_se->back = back; 1226 back = rt_se; 1227 } 1228 1229 dequeue_top_rt_rq(rt_rq_of_se(back)); 1230 1231 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1232 if (on_rt_rq(rt_se)) 1233 __dequeue_rt_entity(rt_se); 1234 } 1235 } 1236 1237 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1238 { 1239 struct rq *rq = rq_of_rt_se(rt_se); 1240 1241 dequeue_rt_stack(rt_se); 1242 for_each_sched_rt_entity(rt_se) 1243 __enqueue_rt_entity(rt_se, head); 1244 enqueue_top_rt_rq(&rq->rt); 1245 } 1246 1247 static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 1248 { 1249 struct rq *rq = rq_of_rt_se(rt_se); 1250 1251 dequeue_rt_stack(rt_se); 1252 1253 for_each_sched_rt_entity(rt_se) { 1254 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1255 1256 if (rt_rq && rt_rq->rt_nr_running) 1257 __enqueue_rt_entity(rt_se, false); 1258 } 1259 enqueue_top_rt_rq(&rq->rt); 1260 } 1261 1262 /* 1263 * Adding/removing a task to/from a priority array: 1264 */ 1265 static void 1266 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1267 { 1268 struct sched_rt_entity *rt_se = &p->rt; 1269 1270 if (flags & ENQUEUE_WAKEUP) 1271 rt_se->timeout = 0; 1272 1273 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1274 1275 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1276 enqueue_pushable_task(rq, p); 1277 } 1278 1279 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1280 { 1281 struct sched_rt_entity *rt_se = &p->rt; 1282 1283 update_curr_rt(rq); 1284 dequeue_rt_entity(rt_se); 1285 1286 dequeue_pushable_task(rq, p); 1287 } 1288 1289 /* 1290 * Put task to the head or the end of the run list without the overhead of 1291 * dequeue followed by enqueue. 1292 */ 1293 static void 1294 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1295 { 1296 if (on_rt_rq(rt_se)) { 1297 struct rt_prio_array *array = &rt_rq->active; 1298 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1299 1300 if (head) 1301 list_move(&rt_se->run_list, queue); 1302 else 1303 list_move_tail(&rt_se->run_list, queue); 1304 } 1305 } 1306 1307 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1308 { 1309 struct sched_rt_entity *rt_se = &p->rt; 1310 struct rt_rq *rt_rq; 1311 1312 for_each_sched_rt_entity(rt_se) { 1313 rt_rq = rt_rq_of_se(rt_se); 1314 requeue_rt_entity(rt_rq, rt_se, head); 1315 } 1316 } 1317 1318 static void yield_task_rt(struct rq *rq) 1319 { 1320 requeue_task_rt(rq, rq->curr, 0); 1321 } 1322 1323 #ifdef CONFIG_SMP 1324 static int find_lowest_rq(struct task_struct *task); 1325 1326 static int 1327 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) 1328 { 1329 struct task_struct *curr; 1330 struct rq *rq; 1331 1332 /* For anything but wake ups, just return the task_cpu */ 1333 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) 1334 goto out; 1335 1336 rq = cpu_rq(cpu); 1337 1338 rcu_read_lock(); 1339 curr = READ_ONCE(rq->curr); /* unlocked access */ 1340 1341 /* 1342 * If the current task on @p's runqueue is an RT task, then 1343 * try to see if we can wake this RT task up on another 1344 * runqueue. Otherwise simply start this RT task 1345 * on its current runqueue. 1346 * 1347 * We want to avoid overloading runqueues. If the woken 1348 * task is a higher priority, then it will stay on this CPU 1349 * and the lower prio task should be moved to another CPU. 1350 * Even though this will probably make the lower prio task 1351 * lose its cache, we do not want to bounce a higher task 1352 * around just because it gave up its CPU, perhaps for a 1353 * lock? 1354 * 1355 * For equal prio tasks, we just let the scheduler sort it out. 1356 * 1357 * Otherwise, just let it ride on the affined RQ and the 1358 * post-schedule router will push the preempted task away 1359 * 1360 * This test is optimistic, if we get it wrong the load-balancer 1361 * will have to sort it out. 1362 */ 1363 if (curr && unlikely(rt_task(curr)) && 1364 (curr->nr_cpus_allowed < 2 || 1365 curr->prio <= p->prio)) { 1366 int target = find_lowest_rq(p); 1367 1368 /* 1369 * Don't bother moving it if the destination CPU is 1370 * not running a lower priority task. 1371 */ 1372 if (target != -1 && 1373 p->prio < cpu_rq(target)->rt.highest_prio.curr) 1374 cpu = target; 1375 } 1376 rcu_read_unlock(); 1377 1378 out: 1379 return cpu; 1380 } 1381 1382 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1383 { 1384 /* 1385 * Current can't be migrated, useless to reschedule, 1386 * let's hope p can move out. 1387 */ 1388 if (rq->curr->nr_cpus_allowed == 1 || 1389 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1390 return; 1391 1392 /* 1393 * p is migratable, so let's not schedule it and 1394 * see if it is pushed or pulled somewhere else. 1395 */ 1396 if (p->nr_cpus_allowed != 1 1397 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1398 return; 1399 1400 /* 1401 * There appears to be other cpus that can accept 1402 * current and none to run 'p', so lets reschedule 1403 * to try and push current away: 1404 */ 1405 requeue_task_rt(rq, p, 1); 1406 resched_curr(rq); 1407 } 1408 1409 #endif /* CONFIG_SMP */ 1410 1411 /* 1412 * Preempt the current task with a newly woken task if needed: 1413 */ 1414 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1415 { 1416 if (p->prio < rq->curr->prio) { 1417 resched_curr(rq); 1418 return; 1419 } 1420 1421 #ifdef CONFIG_SMP 1422 /* 1423 * If: 1424 * 1425 * - the newly woken task is of equal priority to the current task 1426 * - the newly woken task is non-migratable while current is migratable 1427 * - current will be preempted on the next reschedule 1428 * 1429 * we should check to see if current can readily move to a different 1430 * cpu. If so, we will reschedule to allow the push logic to try 1431 * to move current somewhere else, making room for our non-migratable 1432 * task. 1433 */ 1434 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1435 check_preempt_equal_prio(rq, p); 1436 #endif 1437 } 1438 1439 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1440 struct rt_rq *rt_rq) 1441 { 1442 struct rt_prio_array *array = &rt_rq->active; 1443 struct sched_rt_entity *next = NULL; 1444 struct list_head *queue; 1445 int idx; 1446 1447 idx = sched_find_first_bit(array->bitmap); 1448 BUG_ON(idx >= MAX_RT_PRIO); 1449 1450 queue = array->queue + idx; 1451 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1452 1453 return next; 1454 } 1455 1456 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1457 { 1458 struct sched_rt_entity *rt_se; 1459 struct task_struct *p; 1460 struct rt_rq *rt_rq = &rq->rt; 1461 1462 do { 1463 rt_se = pick_next_rt_entity(rq, rt_rq); 1464 BUG_ON(!rt_se); 1465 rt_rq = group_rt_rq(rt_se); 1466 } while (rt_rq); 1467 1468 p = rt_task_of(rt_se); 1469 p->se.exec_start = rq_clock_task(rq); 1470 1471 return p; 1472 } 1473 1474 static struct task_struct * 1475 pick_next_task_rt(struct rq *rq, struct task_struct *prev) 1476 { 1477 struct task_struct *p; 1478 struct rt_rq *rt_rq = &rq->rt; 1479 1480 if (need_pull_rt_task(rq, prev)) { 1481 /* 1482 * This is OK, because current is on_cpu, which avoids it being 1483 * picked for load-balance and preemption/IRQs are still 1484 * disabled avoiding further scheduler activity on it and we're 1485 * being very careful to re-start the picking loop. 1486 */ 1487 lockdep_unpin_lock(&rq->lock); 1488 pull_rt_task(rq); 1489 lockdep_pin_lock(&rq->lock); 1490 /* 1491 * pull_rt_task() can drop (and re-acquire) rq->lock; this 1492 * means a dl or stop task can slip in, in which case we need 1493 * to re-start task selection. 1494 */ 1495 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) || 1496 rq->dl.dl_nr_running)) 1497 return RETRY_TASK; 1498 } 1499 1500 /* 1501 * We may dequeue prev's rt_rq in put_prev_task(). 1502 * So, we update time before rt_nr_running check. 1503 */ 1504 if (prev->sched_class == &rt_sched_class) 1505 update_curr_rt(rq); 1506 1507 if (!rt_rq->rt_queued) 1508 return NULL; 1509 1510 put_prev_task(rq, prev); 1511 1512 p = _pick_next_task_rt(rq); 1513 1514 /* The running task is never eligible for pushing */ 1515 dequeue_pushable_task(rq, p); 1516 1517 queue_push_tasks(rq); 1518 1519 return p; 1520 } 1521 1522 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1523 { 1524 update_curr_rt(rq); 1525 1526 /* 1527 * The previous task needs to be made eligible for pushing 1528 * if it is still active 1529 */ 1530 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1531 enqueue_pushable_task(rq, p); 1532 } 1533 1534 #ifdef CONFIG_SMP 1535 1536 /* Only try algorithms three times */ 1537 #define RT_MAX_TRIES 3 1538 1539 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1540 { 1541 if (!task_running(rq, p) && 1542 cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) 1543 return 1; 1544 return 0; 1545 } 1546 1547 /* 1548 * Return the highest pushable rq's task, which is suitable to be executed 1549 * on the cpu, NULL otherwise 1550 */ 1551 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) 1552 { 1553 struct plist_head *head = &rq->rt.pushable_tasks; 1554 struct task_struct *p; 1555 1556 if (!has_pushable_tasks(rq)) 1557 return NULL; 1558 1559 plist_for_each_entry(p, head, pushable_tasks) { 1560 if (pick_rt_task(rq, p, cpu)) 1561 return p; 1562 } 1563 1564 return NULL; 1565 } 1566 1567 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1568 1569 static int find_lowest_rq(struct task_struct *task) 1570 { 1571 struct sched_domain *sd; 1572 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); 1573 int this_cpu = smp_processor_id(); 1574 int cpu = task_cpu(task); 1575 1576 /* Make sure the mask is initialized first */ 1577 if (unlikely(!lowest_mask)) 1578 return -1; 1579 1580 if (task->nr_cpus_allowed == 1) 1581 return -1; /* No other targets possible */ 1582 1583 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1584 return -1; /* No targets found */ 1585 1586 /* 1587 * At this point we have built a mask of cpus representing the 1588 * lowest priority tasks in the system. Now we want to elect 1589 * the best one based on our affinity and topology. 1590 * 1591 * We prioritize the last cpu that the task executed on since 1592 * it is most likely cache-hot in that location. 1593 */ 1594 if (cpumask_test_cpu(cpu, lowest_mask)) 1595 return cpu; 1596 1597 /* 1598 * Otherwise, we consult the sched_domains span maps to figure 1599 * out which cpu is logically closest to our hot cache data. 1600 */ 1601 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1602 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1603 1604 rcu_read_lock(); 1605 for_each_domain(cpu, sd) { 1606 if (sd->flags & SD_WAKE_AFFINE) { 1607 int best_cpu; 1608 1609 /* 1610 * "this_cpu" is cheaper to preempt than a 1611 * remote processor. 1612 */ 1613 if (this_cpu != -1 && 1614 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1615 rcu_read_unlock(); 1616 return this_cpu; 1617 } 1618 1619 best_cpu = cpumask_first_and(lowest_mask, 1620 sched_domain_span(sd)); 1621 if (best_cpu < nr_cpu_ids) { 1622 rcu_read_unlock(); 1623 return best_cpu; 1624 } 1625 } 1626 } 1627 rcu_read_unlock(); 1628 1629 /* 1630 * And finally, if there were no matches within the domains 1631 * just give the caller *something* to work with from the compatible 1632 * locations. 1633 */ 1634 if (this_cpu != -1) 1635 return this_cpu; 1636 1637 cpu = cpumask_any(lowest_mask); 1638 if (cpu < nr_cpu_ids) 1639 return cpu; 1640 return -1; 1641 } 1642 1643 /* Will lock the rq it finds */ 1644 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1645 { 1646 struct rq *lowest_rq = NULL; 1647 int tries; 1648 int cpu; 1649 1650 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1651 cpu = find_lowest_rq(task); 1652 1653 if ((cpu == -1) || (cpu == rq->cpu)) 1654 break; 1655 1656 lowest_rq = cpu_rq(cpu); 1657 1658 if (lowest_rq->rt.highest_prio.curr <= task->prio) { 1659 /* 1660 * Target rq has tasks of equal or higher priority, 1661 * retrying does not release any lock and is unlikely 1662 * to yield a different result. 1663 */ 1664 lowest_rq = NULL; 1665 break; 1666 } 1667 1668 /* if the prio of this runqueue changed, try again */ 1669 if (double_lock_balance(rq, lowest_rq)) { 1670 /* 1671 * We had to unlock the run queue. In 1672 * the mean time, task could have 1673 * migrated already or had its affinity changed. 1674 * Also make sure that it wasn't scheduled on its rq. 1675 */ 1676 if (unlikely(task_rq(task) != rq || 1677 !cpumask_test_cpu(lowest_rq->cpu, 1678 tsk_cpus_allowed(task)) || 1679 task_running(rq, task) || 1680 !task_on_rq_queued(task))) { 1681 1682 double_unlock_balance(rq, lowest_rq); 1683 lowest_rq = NULL; 1684 break; 1685 } 1686 } 1687 1688 /* If this rq is still suitable use it. */ 1689 if (lowest_rq->rt.highest_prio.curr > task->prio) 1690 break; 1691 1692 /* try again */ 1693 double_unlock_balance(rq, lowest_rq); 1694 lowest_rq = NULL; 1695 } 1696 1697 return lowest_rq; 1698 } 1699 1700 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1701 { 1702 struct task_struct *p; 1703 1704 if (!has_pushable_tasks(rq)) 1705 return NULL; 1706 1707 p = plist_first_entry(&rq->rt.pushable_tasks, 1708 struct task_struct, pushable_tasks); 1709 1710 BUG_ON(rq->cpu != task_cpu(p)); 1711 BUG_ON(task_current(rq, p)); 1712 BUG_ON(p->nr_cpus_allowed <= 1); 1713 1714 BUG_ON(!task_on_rq_queued(p)); 1715 BUG_ON(!rt_task(p)); 1716 1717 return p; 1718 } 1719 1720 /* 1721 * If the current CPU has more than one RT task, see if the non 1722 * running task can migrate over to a CPU that is running a task 1723 * of lesser priority. 1724 */ 1725 static int push_rt_task(struct rq *rq) 1726 { 1727 struct task_struct *next_task; 1728 struct rq *lowest_rq; 1729 int ret = 0; 1730 1731 if (!rq->rt.overloaded) 1732 return 0; 1733 1734 next_task = pick_next_pushable_task(rq); 1735 if (!next_task) 1736 return 0; 1737 1738 retry: 1739 if (unlikely(next_task == rq->curr)) { 1740 WARN_ON(1); 1741 return 0; 1742 } 1743 1744 /* 1745 * It's possible that the next_task slipped in of 1746 * higher priority than current. If that's the case 1747 * just reschedule current. 1748 */ 1749 if (unlikely(next_task->prio < rq->curr->prio)) { 1750 resched_curr(rq); 1751 return 0; 1752 } 1753 1754 /* We might release rq lock */ 1755 get_task_struct(next_task); 1756 1757 /* find_lock_lowest_rq locks the rq if found */ 1758 lowest_rq = find_lock_lowest_rq(next_task, rq); 1759 if (!lowest_rq) { 1760 struct task_struct *task; 1761 /* 1762 * find_lock_lowest_rq releases rq->lock 1763 * so it is possible that next_task has migrated. 1764 * 1765 * We need to make sure that the task is still on the same 1766 * run-queue and is also still the next task eligible for 1767 * pushing. 1768 */ 1769 task = pick_next_pushable_task(rq); 1770 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1771 /* 1772 * The task hasn't migrated, and is still the next 1773 * eligible task, but we failed to find a run-queue 1774 * to push it to. Do not retry in this case, since 1775 * other cpus will pull from us when ready. 1776 */ 1777 goto out; 1778 } 1779 1780 if (!task) 1781 /* No more tasks, just exit */ 1782 goto out; 1783 1784 /* 1785 * Something has shifted, try again. 1786 */ 1787 put_task_struct(next_task); 1788 next_task = task; 1789 goto retry; 1790 } 1791 1792 deactivate_task(rq, next_task, 0); 1793 set_task_cpu(next_task, lowest_rq->cpu); 1794 activate_task(lowest_rq, next_task, 0); 1795 ret = 1; 1796 1797 resched_curr(lowest_rq); 1798 1799 double_unlock_balance(rq, lowest_rq); 1800 1801 out: 1802 put_task_struct(next_task); 1803 1804 return ret; 1805 } 1806 1807 static void push_rt_tasks(struct rq *rq) 1808 { 1809 /* push_rt_task will return true if it moved an RT */ 1810 while (push_rt_task(rq)) 1811 ; 1812 } 1813 1814 #ifdef HAVE_RT_PUSH_IPI 1815 /* 1816 * The search for the next cpu always starts at rq->cpu and ends 1817 * when we reach rq->cpu again. It will never return rq->cpu. 1818 * This returns the next cpu to check, or nr_cpu_ids if the loop 1819 * is complete. 1820 * 1821 * rq->rt.push_cpu holds the last cpu returned by this function, 1822 * or if this is the first instance, it must hold rq->cpu. 1823 */ 1824 static int rto_next_cpu(struct rq *rq) 1825 { 1826 int prev_cpu = rq->rt.push_cpu; 1827 int cpu; 1828 1829 cpu = cpumask_next(prev_cpu, rq->rd->rto_mask); 1830 1831 /* 1832 * If the previous cpu is less than the rq's CPU, then it already 1833 * passed the end of the mask, and has started from the beginning. 1834 * We end if the next CPU is greater or equal to rq's CPU. 1835 */ 1836 if (prev_cpu < rq->cpu) { 1837 if (cpu >= rq->cpu) 1838 return nr_cpu_ids; 1839 1840 } else if (cpu >= nr_cpu_ids) { 1841 /* 1842 * We passed the end of the mask, start at the beginning. 1843 * If the result is greater or equal to the rq's CPU, then 1844 * the loop is finished. 1845 */ 1846 cpu = cpumask_first(rq->rd->rto_mask); 1847 if (cpu >= rq->cpu) 1848 return nr_cpu_ids; 1849 } 1850 rq->rt.push_cpu = cpu; 1851 1852 /* Return cpu to let the caller know if the loop is finished or not */ 1853 return cpu; 1854 } 1855 1856 static int find_next_push_cpu(struct rq *rq) 1857 { 1858 struct rq *next_rq; 1859 int cpu; 1860 1861 while (1) { 1862 cpu = rto_next_cpu(rq); 1863 if (cpu >= nr_cpu_ids) 1864 break; 1865 next_rq = cpu_rq(cpu); 1866 1867 /* Make sure the next rq can push to this rq */ 1868 if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr) 1869 break; 1870 } 1871 1872 return cpu; 1873 } 1874 1875 #define RT_PUSH_IPI_EXECUTING 1 1876 #define RT_PUSH_IPI_RESTART 2 1877 1878 static void tell_cpu_to_push(struct rq *rq) 1879 { 1880 int cpu; 1881 1882 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1883 raw_spin_lock(&rq->rt.push_lock); 1884 /* Make sure it's still executing */ 1885 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1886 /* 1887 * Tell the IPI to restart the loop as things have 1888 * changed since it started. 1889 */ 1890 rq->rt.push_flags |= RT_PUSH_IPI_RESTART; 1891 raw_spin_unlock(&rq->rt.push_lock); 1892 return; 1893 } 1894 raw_spin_unlock(&rq->rt.push_lock); 1895 } 1896 1897 /* When here, there's no IPI going around */ 1898 1899 rq->rt.push_cpu = rq->cpu; 1900 cpu = find_next_push_cpu(rq); 1901 if (cpu >= nr_cpu_ids) 1902 return; 1903 1904 rq->rt.push_flags = RT_PUSH_IPI_EXECUTING; 1905 1906 irq_work_queue_on(&rq->rt.push_work, cpu); 1907 } 1908 1909 /* Called from hardirq context */ 1910 static void try_to_push_tasks(void *arg) 1911 { 1912 struct rt_rq *rt_rq = arg; 1913 struct rq *rq, *src_rq; 1914 int this_cpu; 1915 int cpu; 1916 1917 this_cpu = rt_rq->push_cpu; 1918 1919 /* Paranoid check */ 1920 BUG_ON(this_cpu != smp_processor_id()); 1921 1922 rq = cpu_rq(this_cpu); 1923 src_rq = rq_of_rt_rq(rt_rq); 1924 1925 again: 1926 if (has_pushable_tasks(rq)) { 1927 raw_spin_lock(&rq->lock); 1928 push_rt_task(rq); 1929 raw_spin_unlock(&rq->lock); 1930 } 1931 1932 /* Pass the IPI to the next rt overloaded queue */ 1933 raw_spin_lock(&rt_rq->push_lock); 1934 /* 1935 * If the source queue changed since the IPI went out, 1936 * we need to restart the search from that CPU again. 1937 */ 1938 if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) { 1939 rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART; 1940 rt_rq->push_cpu = src_rq->cpu; 1941 } 1942 1943 cpu = find_next_push_cpu(src_rq); 1944 1945 if (cpu >= nr_cpu_ids) 1946 rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING; 1947 raw_spin_unlock(&rt_rq->push_lock); 1948 1949 if (cpu >= nr_cpu_ids) 1950 return; 1951 1952 /* 1953 * It is possible that a restart caused this CPU to be 1954 * chosen again. Don't bother with an IPI, just see if we 1955 * have more to push. 1956 */ 1957 if (unlikely(cpu == rq->cpu)) 1958 goto again; 1959 1960 /* Try the next RT overloaded CPU */ 1961 irq_work_queue_on(&rt_rq->push_work, cpu); 1962 } 1963 1964 static void push_irq_work_func(struct irq_work *work) 1965 { 1966 struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work); 1967 1968 try_to_push_tasks(rt_rq); 1969 } 1970 #endif /* HAVE_RT_PUSH_IPI */ 1971 1972 static void pull_rt_task(struct rq *this_rq) 1973 { 1974 int this_cpu = this_rq->cpu, cpu; 1975 bool resched = false; 1976 struct task_struct *p; 1977 struct rq *src_rq; 1978 1979 if (likely(!rt_overloaded(this_rq))) 1980 return; 1981 1982 /* 1983 * Match the barrier from rt_set_overloaded; this guarantees that if we 1984 * see overloaded we must also see the rto_mask bit. 1985 */ 1986 smp_rmb(); 1987 1988 #ifdef HAVE_RT_PUSH_IPI 1989 if (sched_feat(RT_PUSH_IPI)) { 1990 tell_cpu_to_push(this_rq); 1991 return; 1992 } 1993 #endif 1994 1995 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1996 if (this_cpu == cpu) 1997 continue; 1998 1999 src_rq = cpu_rq(cpu); 2000 2001 /* 2002 * Don't bother taking the src_rq->lock if the next highest 2003 * task is known to be lower-priority than our current task. 2004 * This may look racy, but if this value is about to go 2005 * logically higher, the src_rq will push this task away. 2006 * And if its going logically lower, we do not care 2007 */ 2008 if (src_rq->rt.highest_prio.next >= 2009 this_rq->rt.highest_prio.curr) 2010 continue; 2011 2012 /* 2013 * We can potentially drop this_rq's lock in 2014 * double_lock_balance, and another CPU could 2015 * alter this_rq 2016 */ 2017 double_lock_balance(this_rq, src_rq); 2018 2019 /* 2020 * We can pull only a task, which is pushable 2021 * on its rq, and no others. 2022 */ 2023 p = pick_highest_pushable_task(src_rq, this_cpu); 2024 2025 /* 2026 * Do we have an RT task that preempts 2027 * the to-be-scheduled task? 2028 */ 2029 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 2030 WARN_ON(p == src_rq->curr); 2031 WARN_ON(!task_on_rq_queued(p)); 2032 2033 /* 2034 * There's a chance that p is higher in priority 2035 * than what's currently running on its cpu. 2036 * This is just that p is wakeing up and hasn't 2037 * had a chance to schedule. We only pull 2038 * p if it is lower in priority than the 2039 * current task on the run queue 2040 */ 2041 if (p->prio < src_rq->curr->prio) 2042 goto skip; 2043 2044 resched = true; 2045 2046 deactivate_task(src_rq, p, 0); 2047 set_task_cpu(p, this_cpu); 2048 activate_task(this_rq, p, 0); 2049 /* 2050 * We continue with the search, just in 2051 * case there's an even higher prio task 2052 * in another runqueue. (low likelihood 2053 * but possible) 2054 */ 2055 } 2056 skip: 2057 double_unlock_balance(this_rq, src_rq); 2058 } 2059 2060 if (resched) 2061 resched_curr(this_rq); 2062 } 2063 2064 /* 2065 * If we are not running and we are not going to reschedule soon, we should 2066 * try to push tasks away now 2067 */ 2068 static void task_woken_rt(struct rq *rq, struct task_struct *p) 2069 { 2070 if (!task_running(rq, p) && 2071 !test_tsk_need_resched(rq->curr) && 2072 p->nr_cpus_allowed > 1 && 2073 (dl_task(rq->curr) || rt_task(rq->curr)) && 2074 (rq->curr->nr_cpus_allowed < 2 || 2075 rq->curr->prio <= p->prio)) 2076 push_rt_tasks(rq); 2077 } 2078 2079 /* Assumes rq->lock is held */ 2080 static void rq_online_rt(struct rq *rq) 2081 { 2082 if (rq->rt.overloaded) 2083 rt_set_overload(rq); 2084 2085 __enable_runtime(rq); 2086 2087 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 2088 } 2089 2090 /* Assumes rq->lock is held */ 2091 static void rq_offline_rt(struct rq *rq) 2092 { 2093 if (rq->rt.overloaded) 2094 rt_clear_overload(rq); 2095 2096 __disable_runtime(rq); 2097 2098 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 2099 } 2100 2101 /* 2102 * When switch from the rt queue, we bring ourselves to a position 2103 * that we might want to pull RT tasks from other runqueues. 2104 */ 2105 static void switched_from_rt(struct rq *rq, struct task_struct *p) 2106 { 2107 /* 2108 * If there are other RT tasks then we will reschedule 2109 * and the scheduling of the other RT tasks will handle 2110 * the balancing. But if we are the last RT task 2111 * we may need to handle the pulling of RT tasks 2112 * now. 2113 */ 2114 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) 2115 return; 2116 2117 queue_pull_task(rq); 2118 } 2119 2120 void __init init_sched_rt_class(void) 2121 { 2122 unsigned int i; 2123 2124 for_each_possible_cpu(i) { 2125 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 2126 GFP_KERNEL, cpu_to_node(i)); 2127 } 2128 } 2129 #endif /* CONFIG_SMP */ 2130 2131 /* 2132 * When switching a task to RT, we may overload the runqueue 2133 * with RT tasks. In this case we try to push them off to 2134 * other runqueues. 2135 */ 2136 static void switched_to_rt(struct rq *rq, struct task_struct *p) 2137 { 2138 /* 2139 * If we are already running, then there's nothing 2140 * that needs to be done. But if we are not running 2141 * we may need to preempt the current running task. 2142 * If that current running task is also an RT task 2143 * then see if we can move to another run queue. 2144 */ 2145 if (task_on_rq_queued(p) && rq->curr != p) { 2146 #ifdef CONFIG_SMP 2147 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2148 queue_push_tasks(rq); 2149 #else 2150 if (p->prio < rq->curr->prio) 2151 resched_curr(rq); 2152 #endif /* CONFIG_SMP */ 2153 } 2154 } 2155 2156 /* 2157 * Priority of the task has changed. This may cause 2158 * us to initiate a push or pull. 2159 */ 2160 static void 2161 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 2162 { 2163 if (!task_on_rq_queued(p)) 2164 return; 2165 2166 if (rq->curr == p) { 2167 #ifdef CONFIG_SMP 2168 /* 2169 * If our priority decreases while running, we 2170 * may need to pull tasks to this runqueue. 2171 */ 2172 if (oldprio < p->prio) 2173 queue_pull_task(rq); 2174 2175 /* 2176 * If there's a higher priority task waiting to run 2177 * then reschedule. 2178 */ 2179 if (p->prio > rq->rt.highest_prio.curr) 2180 resched_curr(rq); 2181 #else 2182 /* For UP simply resched on drop of prio */ 2183 if (oldprio < p->prio) 2184 resched_curr(rq); 2185 #endif /* CONFIG_SMP */ 2186 } else { 2187 /* 2188 * This task is not running, but if it is 2189 * greater than the current running task 2190 * then reschedule. 2191 */ 2192 if (p->prio < rq->curr->prio) 2193 resched_curr(rq); 2194 } 2195 } 2196 2197 static void watchdog(struct rq *rq, struct task_struct *p) 2198 { 2199 unsigned long soft, hard; 2200 2201 /* max may change after cur was read, this will be fixed next tick */ 2202 soft = task_rlimit(p, RLIMIT_RTTIME); 2203 hard = task_rlimit_max(p, RLIMIT_RTTIME); 2204 2205 if (soft != RLIM_INFINITY) { 2206 unsigned long next; 2207 2208 if (p->rt.watchdog_stamp != jiffies) { 2209 p->rt.timeout++; 2210 p->rt.watchdog_stamp = jiffies; 2211 } 2212 2213 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 2214 if (p->rt.timeout > next) 2215 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 2216 } 2217 } 2218 2219 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 2220 { 2221 struct sched_rt_entity *rt_se = &p->rt; 2222 2223 update_curr_rt(rq); 2224 2225 watchdog(rq, p); 2226 2227 /* 2228 * RR tasks need a special form of timeslice management. 2229 * FIFO tasks have no timeslices. 2230 */ 2231 if (p->policy != SCHED_RR) 2232 return; 2233 2234 if (--p->rt.time_slice) 2235 return; 2236 2237 p->rt.time_slice = sched_rr_timeslice; 2238 2239 /* 2240 * Requeue to the end of queue if we (and all of our ancestors) are not 2241 * the only element on the queue 2242 */ 2243 for_each_sched_rt_entity(rt_se) { 2244 if (rt_se->run_list.prev != rt_se->run_list.next) { 2245 requeue_task_rt(rq, p, 0); 2246 resched_curr(rq); 2247 return; 2248 } 2249 } 2250 } 2251 2252 static void set_curr_task_rt(struct rq *rq) 2253 { 2254 struct task_struct *p = rq->curr; 2255 2256 p->se.exec_start = rq_clock_task(rq); 2257 2258 /* The running task is never eligible for pushing */ 2259 dequeue_pushable_task(rq, p); 2260 } 2261 2262 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2263 { 2264 /* 2265 * Time slice is 0 for SCHED_FIFO tasks 2266 */ 2267 if (task->policy == SCHED_RR) 2268 return sched_rr_timeslice; 2269 else 2270 return 0; 2271 } 2272 2273 const struct sched_class rt_sched_class = { 2274 .next = &fair_sched_class, 2275 .enqueue_task = enqueue_task_rt, 2276 .dequeue_task = dequeue_task_rt, 2277 .yield_task = yield_task_rt, 2278 2279 .check_preempt_curr = check_preempt_curr_rt, 2280 2281 .pick_next_task = pick_next_task_rt, 2282 .put_prev_task = put_prev_task_rt, 2283 2284 #ifdef CONFIG_SMP 2285 .select_task_rq = select_task_rq_rt, 2286 2287 .set_cpus_allowed = set_cpus_allowed_common, 2288 .rq_online = rq_online_rt, 2289 .rq_offline = rq_offline_rt, 2290 .task_woken = task_woken_rt, 2291 .switched_from = switched_from_rt, 2292 #endif 2293 2294 .set_curr_task = set_curr_task_rt, 2295 .task_tick = task_tick_rt, 2296 2297 .get_rr_interval = get_rr_interval_rt, 2298 2299 .prio_changed = prio_changed_rt, 2300 .switched_to = switched_to_rt, 2301 2302 .update_curr = update_curr_rt, 2303 }; 2304 2305 #ifdef CONFIG_SCHED_DEBUG 2306 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 2307 2308 void print_rt_stats(struct seq_file *m, int cpu) 2309 { 2310 rt_rq_iter_t iter; 2311 struct rt_rq *rt_rq; 2312 2313 rcu_read_lock(); 2314 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2315 print_rt_rq(m, cpu, rt_rq); 2316 rcu_read_unlock(); 2317 } 2318 #endif /* CONFIG_SCHED_DEBUG */ 2319