1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Deadline Scheduling Class (SCHED_DEADLINE) 4 * 5 * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). 6 * 7 * Tasks that periodically executes their instances for less than their 8 * runtime won't miss any of their deadlines. 9 * Tasks that are not periodic or sporadic or that tries to execute more 10 * than their reserved bandwidth will be slowed down (and may potentially 11 * miss some of their deadlines), and won't affect any other task. 12 * 13 * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, 14 * Juri Lelli <juri.lelli@gmail.com>, 15 * Michael Trimarchi <michael@amarulasolutions.com>, 16 * Fabio Checconi <fchecconi@gmail.com> 17 */ 18 19 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 20 { 21 return container_of(dl_se, struct task_struct, dl); 22 } 23 24 static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) 25 { 26 return container_of(dl_rq, struct rq, dl); 27 } 28 29 static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) 30 { 31 struct task_struct *p = dl_task_of(dl_se); 32 struct rq *rq = task_rq(p); 33 34 return &rq->dl; 35 } 36 37 static inline int on_dl_rq(struct sched_dl_entity *dl_se) 38 { 39 return !RB_EMPTY_NODE(&dl_se->rb_node); 40 } 41 42 #ifdef CONFIG_RT_MUTEXES 43 static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) 44 { 45 return dl_se->pi_se; 46 } 47 48 static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) 49 { 50 return pi_of(dl_se) != dl_se; 51 } 52 #else 53 static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) 54 { 55 return dl_se; 56 } 57 58 static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) 59 { 60 return false; 61 } 62 #endif 63 64 #ifdef CONFIG_SMP 65 static inline struct dl_bw *dl_bw_of(int i) 66 { 67 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), 68 "sched RCU must be held"); 69 return &cpu_rq(i)->rd->dl_bw; 70 } 71 72 static inline int dl_bw_cpus(int i) 73 { 74 struct root_domain *rd = cpu_rq(i)->rd; 75 int cpus; 76 77 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), 78 "sched RCU must be held"); 79 80 if (cpumask_subset(rd->span, cpu_active_mask)) 81 return cpumask_weight(rd->span); 82 83 cpus = 0; 84 85 for_each_cpu_and(i, rd->span, cpu_active_mask) 86 cpus++; 87 88 return cpus; 89 } 90 91 static inline unsigned long __dl_bw_capacity(int i) 92 { 93 struct root_domain *rd = cpu_rq(i)->rd; 94 unsigned long cap = 0; 95 96 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), 97 "sched RCU must be held"); 98 99 for_each_cpu_and(i, rd->span, cpu_active_mask) 100 cap += capacity_orig_of(i); 101 102 return cap; 103 } 104 105 /* 106 * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity 107 * of the CPU the task is running on rather rd's \Sum CPU capacity. 108 */ 109 static inline unsigned long dl_bw_capacity(int i) 110 { 111 if (!static_branch_unlikely(&sched_asym_cpucapacity) && 112 capacity_orig_of(i) == SCHED_CAPACITY_SCALE) { 113 return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT; 114 } else { 115 return __dl_bw_capacity(i); 116 } 117 } 118 119 static inline bool dl_bw_visited(int cpu, u64 gen) 120 { 121 struct root_domain *rd = cpu_rq(cpu)->rd; 122 123 if (rd->visit_gen == gen) 124 return true; 125 126 rd->visit_gen = gen; 127 return false; 128 } 129 130 static inline 131 void __dl_update(struct dl_bw *dl_b, s64 bw) 132 { 133 struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw); 134 int i; 135 136 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), 137 "sched RCU must be held"); 138 for_each_cpu_and(i, rd->span, cpu_active_mask) { 139 struct rq *rq = cpu_rq(i); 140 141 rq->dl.extra_bw += bw; 142 } 143 } 144 #else 145 static inline struct dl_bw *dl_bw_of(int i) 146 { 147 return &cpu_rq(i)->dl.dl_bw; 148 } 149 150 static inline int dl_bw_cpus(int i) 151 { 152 return 1; 153 } 154 155 static inline unsigned long dl_bw_capacity(int i) 156 { 157 return SCHED_CAPACITY_SCALE; 158 } 159 160 static inline bool dl_bw_visited(int cpu, u64 gen) 161 { 162 return false; 163 } 164 165 static inline 166 void __dl_update(struct dl_bw *dl_b, s64 bw) 167 { 168 struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw); 169 170 dl->extra_bw += bw; 171 } 172 #endif 173 174 static inline 175 void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus) 176 { 177 dl_b->total_bw -= tsk_bw; 178 __dl_update(dl_b, (s32)tsk_bw / cpus); 179 } 180 181 static inline 182 void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus) 183 { 184 dl_b->total_bw += tsk_bw; 185 __dl_update(dl_b, -((s32)tsk_bw / cpus)); 186 } 187 188 static inline bool 189 __dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw) 190 { 191 return dl_b->bw != -1 && 192 cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw; 193 } 194 195 static inline 196 void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) 197 { 198 u64 old = dl_rq->running_bw; 199 200 lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); 201 dl_rq->running_bw += dl_bw; 202 SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ 203 SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); 204 /* kick cpufreq (see the comment in kernel/sched/sched.h). */ 205 cpufreq_update_util(rq_of_dl_rq(dl_rq), 0); 206 } 207 208 static inline 209 void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) 210 { 211 u64 old = dl_rq->running_bw; 212 213 lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); 214 dl_rq->running_bw -= dl_bw; 215 SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ 216 if (dl_rq->running_bw > old) 217 dl_rq->running_bw = 0; 218 /* kick cpufreq (see the comment in kernel/sched/sched.h). */ 219 cpufreq_update_util(rq_of_dl_rq(dl_rq), 0); 220 } 221 222 static inline 223 void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) 224 { 225 u64 old = dl_rq->this_bw; 226 227 lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); 228 dl_rq->this_bw += dl_bw; 229 SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */ 230 } 231 232 static inline 233 void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) 234 { 235 u64 old = dl_rq->this_bw; 236 237 lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); 238 dl_rq->this_bw -= dl_bw; 239 SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */ 240 if (dl_rq->this_bw > old) 241 dl_rq->this_bw = 0; 242 SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); 243 } 244 245 static inline 246 void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 247 { 248 if (!dl_entity_is_special(dl_se)) 249 __add_rq_bw(dl_se->dl_bw, dl_rq); 250 } 251 252 static inline 253 void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 254 { 255 if (!dl_entity_is_special(dl_se)) 256 __sub_rq_bw(dl_se->dl_bw, dl_rq); 257 } 258 259 static inline 260 void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 261 { 262 if (!dl_entity_is_special(dl_se)) 263 __add_running_bw(dl_se->dl_bw, dl_rq); 264 } 265 266 static inline 267 void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 268 { 269 if (!dl_entity_is_special(dl_se)) 270 __sub_running_bw(dl_se->dl_bw, dl_rq); 271 } 272 273 static void dl_change_utilization(struct task_struct *p, u64 new_bw) 274 { 275 struct rq *rq; 276 277 BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV); 278 279 if (task_on_rq_queued(p)) 280 return; 281 282 rq = task_rq(p); 283 if (p->dl.dl_non_contending) { 284 sub_running_bw(&p->dl, &rq->dl); 285 p->dl.dl_non_contending = 0; 286 /* 287 * If the timer handler is currently running and the 288 * timer cannot be canceled, inactive_task_timer() 289 * will see that dl_not_contending is not set, and 290 * will not touch the rq's active utilization, 291 * so we are still safe. 292 */ 293 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) 294 put_task_struct(p); 295 } 296 __sub_rq_bw(p->dl.dl_bw, &rq->dl); 297 __add_rq_bw(new_bw, &rq->dl); 298 } 299 300 /* 301 * The utilization of a task cannot be immediately removed from 302 * the rq active utilization (running_bw) when the task blocks. 303 * Instead, we have to wait for the so called "0-lag time". 304 * 305 * If a task blocks before the "0-lag time", a timer (the inactive 306 * timer) is armed, and running_bw is decreased when the timer 307 * fires. 308 * 309 * If the task wakes up again before the inactive timer fires, 310 * the timer is canceled, whereas if the task wakes up after the 311 * inactive timer fired (and running_bw has been decreased) the 312 * task's utilization has to be added to running_bw again. 313 * A flag in the deadline scheduling entity (dl_non_contending) 314 * is used to avoid race conditions between the inactive timer handler 315 * and task wakeups. 316 * 317 * The following diagram shows how running_bw is updated. A task is 318 * "ACTIVE" when its utilization contributes to running_bw; an 319 * "ACTIVE contending" task is in the TASK_RUNNING state, while an 320 * "ACTIVE non contending" task is a blocked task for which the "0-lag time" 321 * has not passed yet. An "INACTIVE" task is a task for which the "0-lag" 322 * time already passed, which does not contribute to running_bw anymore. 323 * +------------------+ 324 * wakeup | ACTIVE | 325 * +------------------>+ contending | 326 * | add_running_bw | | 327 * | +----+------+------+ 328 * | | ^ 329 * | dequeue | | 330 * +--------+-------+ | | 331 * | | t >= 0-lag | | wakeup 332 * | INACTIVE |<---------------+ | 333 * | | sub_running_bw | | 334 * +--------+-------+ | | 335 * ^ | | 336 * | t < 0-lag | | 337 * | | | 338 * | V | 339 * | +----+------+------+ 340 * | sub_running_bw | ACTIVE | 341 * +-------------------+ | 342 * inactive timer | non contending | 343 * fired +------------------+ 344 * 345 * The task_non_contending() function is invoked when a task 346 * blocks, and checks if the 0-lag time already passed or 347 * not (in the first case, it directly updates running_bw; 348 * in the second case, it arms the inactive timer). 349 * 350 * The task_contending() function is invoked when a task wakes 351 * up, and checks if the task is still in the "ACTIVE non contending" 352 * state or not (in the second case, it updates running_bw). 353 */ 354 static void task_non_contending(struct task_struct *p) 355 { 356 struct sched_dl_entity *dl_se = &p->dl; 357 struct hrtimer *timer = &dl_se->inactive_timer; 358 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 359 struct rq *rq = rq_of_dl_rq(dl_rq); 360 s64 zerolag_time; 361 362 /* 363 * If this is a non-deadline task that has been boosted, 364 * do nothing 365 */ 366 if (dl_se->dl_runtime == 0) 367 return; 368 369 if (dl_entity_is_special(dl_se)) 370 return; 371 372 WARN_ON(dl_se->dl_non_contending); 373 374 zerolag_time = dl_se->deadline - 375 div64_long((dl_se->runtime * dl_se->dl_period), 376 dl_se->dl_runtime); 377 378 /* 379 * Using relative times instead of the absolute "0-lag time" 380 * allows to simplify the code 381 */ 382 zerolag_time -= rq_clock(rq); 383 384 /* 385 * If the "0-lag time" already passed, decrease the active 386 * utilization now, instead of starting a timer 387 */ 388 if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) { 389 if (dl_task(p)) 390 sub_running_bw(dl_se, dl_rq); 391 if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { 392 struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); 393 394 if (READ_ONCE(p->__state) == TASK_DEAD) 395 sub_rq_bw(&p->dl, &rq->dl); 396 raw_spin_lock(&dl_b->lock); 397 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); 398 __dl_clear_params(p); 399 raw_spin_unlock(&dl_b->lock); 400 } 401 402 return; 403 } 404 405 dl_se->dl_non_contending = 1; 406 get_task_struct(p); 407 hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD); 408 } 409 410 static void task_contending(struct sched_dl_entity *dl_se, int flags) 411 { 412 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 413 414 /* 415 * If this is a non-deadline task that has been boosted, 416 * do nothing 417 */ 418 if (dl_se->dl_runtime == 0) 419 return; 420 421 if (flags & ENQUEUE_MIGRATED) 422 add_rq_bw(dl_se, dl_rq); 423 424 if (dl_se->dl_non_contending) { 425 dl_se->dl_non_contending = 0; 426 /* 427 * If the timer handler is currently running and the 428 * timer cannot be canceled, inactive_task_timer() 429 * will see that dl_not_contending is not set, and 430 * will not touch the rq's active utilization, 431 * so we are still safe. 432 */ 433 if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1) 434 put_task_struct(dl_task_of(dl_se)); 435 } else { 436 /* 437 * Since "dl_non_contending" is not set, the 438 * task's utilization has already been removed from 439 * active utilization (either when the task blocked, 440 * when the "inactive timer" fired). 441 * So, add it back. 442 */ 443 add_running_bw(dl_se, dl_rq); 444 } 445 } 446 447 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) 448 { 449 struct sched_dl_entity *dl_se = &p->dl; 450 451 return rb_first_cached(&dl_rq->root) == &dl_se->rb_node; 452 } 453 454 static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); 455 456 void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) 457 { 458 raw_spin_lock_init(&dl_b->dl_runtime_lock); 459 dl_b->dl_period = period; 460 dl_b->dl_runtime = runtime; 461 } 462 463 void init_dl_bw(struct dl_bw *dl_b) 464 { 465 raw_spin_lock_init(&dl_b->lock); 466 if (global_rt_runtime() == RUNTIME_INF) 467 dl_b->bw = -1; 468 else 469 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime()); 470 dl_b->total_bw = 0; 471 } 472 473 void init_dl_rq(struct dl_rq *dl_rq) 474 { 475 dl_rq->root = RB_ROOT_CACHED; 476 477 #ifdef CONFIG_SMP 478 /* zero means no -deadline tasks */ 479 dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; 480 481 dl_rq->dl_nr_migratory = 0; 482 dl_rq->overloaded = 0; 483 dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED; 484 #else 485 init_dl_bw(&dl_rq->dl_bw); 486 #endif 487 488 dl_rq->running_bw = 0; 489 dl_rq->this_bw = 0; 490 init_dl_rq_bw_ratio(dl_rq); 491 } 492 493 #ifdef CONFIG_SMP 494 495 static inline int dl_overloaded(struct rq *rq) 496 { 497 return atomic_read(&rq->rd->dlo_count); 498 } 499 500 static inline void dl_set_overload(struct rq *rq) 501 { 502 if (!rq->online) 503 return; 504 505 cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); 506 /* 507 * Must be visible before the overload count is 508 * set (as in sched_rt.c). 509 * 510 * Matched by the barrier in pull_dl_task(). 511 */ 512 smp_wmb(); 513 atomic_inc(&rq->rd->dlo_count); 514 } 515 516 static inline void dl_clear_overload(struct rq *rq) 517 { 518 if (!rq->online) 519 return; 520 521 atomic_dec(&rq->rd->dlo_count); 522 cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); 523 } 524 525 static void update_dl_migration(struct dl_rq *dl_rq) 526 { 527 if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { 528 if (!dl_rq->overloaded) { 529 dl_set_overload(rq_of_dl_rq(dl_rq)); 530 dl_rq->overloaded = 1; 531 } 532 } else if (dl_rq->overloaded) { 533 dl_clear_overload(rq_of_dl_rq(dl_rq)); 534 dl_rq->overloaded = 0; 535 } 536 } 537 538 static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 539 { 540 struct task_struct *p = dl_task_of(dl_se); 541 542 if (p->nr_cpus_allowed > 1) 543 dl_rq->dl_nr_migratory++; 544 545 update_dl_migration(dl_rq); 546 } 547 548 static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 549 { 550 struct task_struct *p = dl_task_of(dl_se); 551 552 if (p->nr_cpus_allowed > 1) 553 dl_rq->dl_nr_migratory--; 554 555 update_dl_migration(dl_rq); 556 } 557 558 #define __node_2_pdl(node) \ 559 rb_entry((node), struct task_struct, pushable_dl_tasks) 560 561 static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b) 562 { 563 return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl); 564 } 565 566 /* 567 * The list of pushable -deadline task is not a plist, like in 568 * sched_rt.c, it is an rb-tree with tasks ordered by deadline. 569 */ 570 static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) 571 { 572 struct rb_node *leftmost; 573 574 BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); 575 576 leftmost = rb_add_cached(&p->pushable_dl_tasks, 577 &rq->dl.pushable_dl_tasks_root, 578 __pushable_less); 579 if (leftmost) 580 rq->dl.earliest_dl.next = p->dl.deadline; 581 } 582 583 static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) 584 { 585 struct dl_rq *dl_rq = &rq->dl; 586 struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root; 587 struct rb_node *leftmost; 588 589 if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) 590 return; 591 592 leftmost = rb_erase_cached(&p->pushable_dl_tasks, root); 593 if (leftmost) 594 dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline; 595 596 RB_CLEAR_NODE(&p->pushable_dl_tasks); 597 } 598 599 static inline int has_pushable_dl_tasks(struct rq *rq) 600 { 601 return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root); 602 } 603 604 static int push_dl_task(struct rq *rq); 605 606 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) 607 { 608 return rq->online && dl_task(prev); 609 } 610 611 static DEFINE_PER_CPU(struct callback_head, dl_push_head); 612 static DEFINE_PER_CPU(struct callback_head, dl_pull_head); 613 614 static void push_dl_tasks(struct rq *); 615 static void pull_dl_task(struct rq *); 616 617 static inline void deadline_queue_push_tasks(struct rq *rq) 618 { 619 if (!has_pushable_dl_tasks(rq)) 620 return; 621 622 queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks); 623 } 624 625 static inline void deadline_queue_pull_task(struct rq *rq) 626 { 627 queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task); 628 } 629 630 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq); 631 632 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p) 633 { 634 struct rq *later_rq = NULL; 635 struct dl_bw *dl_b; 636 637 later_rq = find_lock_later_rq(p, rq); 638 if (!later_rq) { 639 int cpu; 640 641 /* 642 * If we cannot preempt any rq, fall back to pick any 643 * online CPU: 644 */ 645 cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); 646 if (cpu >= nr_cpu_ids) { 647 /* 648 * Failed to find any suitable CPU. 649 * The task will never come back! 650 */ 651 BUG_ON(dl_bandwidth_enabled()); 652 653 /* 654 * If admission control is disabled we 655 * try a little harder to let the task 656 * run. 657 */ 658 cpu = cpumask_any(cpu_active_mask); 659 } 660 later_rq = cpu_rq(cpu); 661 double_lock_balance(rq, later_rq); 662 } 663 664 if (p->dl.dl_non_contending || p->dl.dl_throttled) { 665 /* 666 * Inactive timer is armed (or callback is running, but 667 * waiting for us to release rq locks). In any case, when it 668 * will fire (or continue), it will see running_bw of this 669 * task migrated to later_rq (and correctly handle it). 670 */ 671 sub_running_bw(&p->dl, &rq->dl); 672 sub_rq_bw(&p->dl, &rq->dl); 673 674 add_rq_bw(&p->dl, &later_rq->dl); 675 add_running_bw(&p->dl, &later_rq->dl); 676 } else { 677 sub_rq_bw(&p->dl, &rq->dl); 678 add_rq_bw(&p->dl, &later_rq->dl); 679 } 680 681 /* 682 * And we finally need to fixup root_domain(s) bandwidth accounting, 683 * since p is still hanging out in the old (now moved to default) root 684 * domain. 685 */ 686 dl_b = &rq->rd->dl_bw; 687 raw_spin_lock(&dl_b->lock); 688 __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span)); 689 raw_spin_unlock(&dl_b->lock); 690 691 dl_b = &later_rq->rd->dl_bw; 692 raw_spin_lock(&dl_b->lock); 693 __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span)); 694 raw_spin_unlock(&dl_b->lock); 695 696 set_task_cpu(p, later_rq->cpu); 697 double_unlock_balance(later_rq, rq); 698 699 return later_rq; 700 } 701 702 #else 703 704 static inline 705 void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) 706 { 707 } 708 709 static inline 710 void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) 711 { 712 } 713 714 static inline 715 void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 716 { 717 } 718 719 static inline 720 void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 721 { 722 } 723 724 static inline void deadline_queue_push_tasks(struct rq *rq) 725 { 726 } 727 728 static inline void deadline_queue_pull_task(struct rq *rq) 729 { 730 } 731 #endif /* CONFIG_SMP */ 732 733 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); 734 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); 735 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags); 736 737 /* 738 * We are being explicitly informed that a new instance is starting, 739 * and this means that: 740 * - the absolute deadline of the entity has to be placed at 741 * current time + relative deadline; 742 * - the runtime of the entity has to be set to the maximum value. 743 * 744 * The capability of specifying such event is useful whenever a -deadline 745 * entity wants to (try to!) synchronize its behaviour with the scheduler's 746 * one, and to (try to!) reconcile itself with its own scheduling 747 * parameters. 748 */ 749 static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) 750 { 751 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 752 struct rq *rq = rq_of_dl_rq(dl_rq); 753 754 WARN_ON(is_dl_boosted(dl_se)); 755 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); 756 757 /* 758 * We are racing with the deadline timer. So, do nothing because 759 * the deadline timer handler will take care of properly recharging 760 * the runtime and postponing the deadline 761 */ 762 if (dl_se->dl_throttled) 763 return; 764 765 /* 766 * We use the regular wall clock time to set deadlines in the 767 * future; in fact, we must consider execution overheads (time 768 * spent on hardirq context, etc.). 769 */ 770 dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; 771 dl_se->runtime = dl_se->dl_runtime; 772 } 773 774 /* 775 * Pure Earliest Deadline First (EDF) scheduling does not deal with the 776 * possibility of a entity lasting more than what it declared, and thus 777 * exhausting its runtime. 778 * 779 * Here we are interested in making runtime overrun possible, but we do 780 * not want a entity which is misbehaving to affect the scheduling of all 781 * other entities. 782 * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) 783 * is used, in order to confine each entity within its own bandwidth. 784 * 785 * This function deals exactly with that, and ensures that when the runtime 786 * of a entity is replenished, its deadline is also postponed. That ensures 787 * the overrunning entity can't interfere with other entity in the system and 788 * can't make them miss their deadlines. Reasons why this kind of overruns 789 * could happen are, typically, a entity voluntarily trying to overcome its 790 * runtime, or it just underestimated it during sched_setattr(). 791 */ 792 static void replenish_dl_entity(struct sched_dl_entity *dl_se) 793 { 794 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 795 struct rq *rq = rq_of_dl_rq(dl_rq); 796 797 BUG_ON(pi_of(dl_se)->dl_runtime <= 0); 798 799 /* 800 * This could be the case for a !-dl task that is boosted. 801 * Just go with full inherited parameters. 802 */ 803 if (dl_se->dl_deadline == 0) { 804 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 805 dl_se->runtime = pi_of(dl_se)->dl_runtime; 806 } 807 808 if (dl_se->dl_yielded && dl_se->runtime > 0) 809 dl_se->runtime = 0; 810 811 /* 812 * We keep moving the deadline away until we get some 813 * available runtime for the entity. This ensures correct 814 * handling of situations where the runtime overrun is 815 * arbitrary large. 816 */ 817 while (dl_se->runtime <= 0) { 818 dl_se->deadline += pi_of(dl_se)->dl_period; 819 dl_se->runtime += pi_of(dl_se)->dl_runtime; 820 } 821 822 /* 823 * At this point, the deadline really should be "in 824 * the future" with respect to rq->clock. If it's 825 * not, we are, for some reason, lagging too much! 826 * Anyway, after having warn userspace abut that, 827 * we still try to keep the things running by 828 * resetting the deadline and the budget of the 829 * entity. 830 */ 831 if (dl_time_before(dl_se->deadline, rq_clock(rq))) { 832 printk_deferred_once("sched: DL replenish lagged too much\n"); 833 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 834 dl_se->runtime = pi_of(dl_se)->dl_runtime; 835 } 836 837 if (dl_se->dl_yielded) 838 dl_se->dl_yielded = 0; 839 if (dl_se->dl_throttled) 840 dl_se->dl_throttled = 0; 841 } 842 843 /* 844 * Here we check if --at time t-- an entity (which is probably being 845 * [re]activated or, in general, enqueued) can use its remaining runtime 846 * and its current deadline _without_ exceeding the bandwidth it is 847 * assigned (function returns true if it can't). We are in fact applying 848 * one of the CBS rules: when a task wakes up, if the residual runtime 849 * over residual deadline fits within the allocated bandwidth, then we 850 * can keep the current (absolute) deadline and residual budget without 851 * disrupting the schedulability of the system. Otherwise, we should 852 * refill the runtime and set the deadline a period in the future, 853 * because keeping the current (absolute) deadline of the task would 854 * result in breaking guarantees promised to other tasks (refer to 855 * Documentation/scheduler/sched-deadline.rst for more information). 856 * 857 * This function returns true if: 858 * 859 * runtime / (deadline - t) > dl_runtime / dl_deadline , 860 * 861 * IOW we can't recycle current parameters. 862 * 863 * Notice that the bandwidth check is done against the deadline. For 864 * task with deadline equal to period this is the same of using 865 * dl_period instead of dl_deadline in the equation above. 866 */ 867 static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) 868 { 869 u64 left, right; 870 871 /* 872 * left and right are the two sides of the equation above, 873 * after a bit of shuffling to use multiplications instead 874 * of divisions. 875 * 876 * Note that none of the time values involved in the two 877 * multiplications are absolute: dl_deadline and dl_runtime 878 * are the relative deadline and the maximum runtime of each 879 * instance, runtime is the runtime left for the last instance 880 * and (deadline - t), since t is rq->clock, is the time left 881 * to the (absolute) deadline. Even if overflowing the u64 type 882 * is very unlikely to occur in both cases, here we scale down 883 * as we want to avoid that risk at all. Scaling down by 10 884 * means that we reduce granularity to 1us. We are fine with it, 885 * since this is only a true/false check and, anyway, thinking 886 * of anything below microseconds resolution is actually fiction 887 * (but still we want to give the user that illusion >;). 888 */ 889 left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); 890 right = ((dl_se->deadline - t) >> DL_SCALE) * 891 (pi_of(dl_se)->dl_runtime >> DL_SCALE); 892 893 return dl_time_before(right, left); 894 } 895 896 /* 897 * Revised wakeup rule [1]: For self-suspending tasks, rather then 898 * re-initializing task's runtime and deadline, the revised wakeup 899 * rule adjusts the task's runtime to avoid the task to overrun its 900 * density. 901 * 902 * Reasoning: a task may overrun the density if: 903 * runtime / (deadline - t) > dl_runtime / dl_deadline 904 * 905 * Therefore, runtime can be adjusted to: 906 * runtime = (dl_runtime / dl_deadline) * (deadline - t) 907 * 908 * In such way that runtime will be equal to the maximum density 909 * the task can use without breaking any rule. 910 * 911 * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant 912 * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24. 913 */ 914 static void 915 update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq) 916 { 917 u64 laxity = dl_se->deadline - rq_clock(rq); 918 919 /* 920 * If the task has deadline < period, and the deadline is in the past, 921 * it should already be throttled before this check. 922 * 923 * See update_dl_entity() comments for further details. 924 */ 925 WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq))); 926 927 dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT; 928 } 929 930 /* 931 * Regarding the deadline, a task with implicit deadline has a relative 932 * deadline == relative period. A task with constrained deadline has a 933 * relative deadline <= relative period. 934 * 935 * We support constrained deadline tasks. However, there are some restrictions 936 * applied only for tasks which do not have an implicit deadline. See 937 * update_dl_entity() to know more about such restrictions. 938 * 939 * The dl_is_implicit() returns true if the task has an implicit deadline. 940 */ 941 static inline bool dl_is_implicit(struct sched_dl_entity *dl_se) 942 { 943 return dl_se->dl_deadline == dl_se->dl_period; 944 } 945 946 /* 947 * When a deadline entity is placed in the runqueue, its runtime and deadline 948 * might need to be updated. This is done by a CBS wake up rule. There are two 949 * different rules: 1) the original CBS; and 2) the Revisited CBS. 950 * 951 * When the task is starting a new period, the Original CBS is used. In this 952 * case, the runtime is replenished and a new absolute deadline is set. 953 * 954 * When a task is queued before the begin of the next period, using the 955 * remaining runtime and deadline could make the entity to overflow, see 956 * dl_entity_overflow() to find more about runtime overflow. When such case 957 * is detected, the runtime and deadline need to be updated. 958 * 959 * If the task has an implicit deadline, i.e., deadline == period, the Original 960 * CBS is applied. the runtime is replenished and a new absolute deadline is 961 * set, as in the previous cases. 962 * 963 * However, the Original CBS does not work properly for tasks with 964 * deadline < period, which are said to have a constrained deadline. By 965 * applying the Original CBS, a constrained deadline task would be able to run 966 * runtime/deadline in a period. With deadline < period, the task would 967 * overrun the runtime/period allowed bandwidth, breaking the admission test. 968 * 969 * In order to prevent this misbehave, the Revisited CBS is used for 970 * constrained deadline tasks when a runtime overflow is detected. In the 971 * Revisited CBS, rather than replenishing & setting a new absolute deadline, 972 * the remaining runtime of the task is reduced to avoid runtime overflow. 973 * Please refer to the comments update_dl_revised_wakeup() function to find 974 * more about the Revised CBS rule. 975 */ 976 static void update_dl_entity(struct sched_dl_entity *dl_se) 977 { 978 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 979 struct rq *rq = rq_of_dl_rq(dl_rq); 980 981 if (dl_time_before(dl_se->deadline, rq_clock(rq)) || 982 dl_entity_overflow(dl_se, rq_clock(rq))) { 983 984 if (unlikely(!dl_is_implicit(dl_se) && 985 !dl_time_before(dl_se->deadline, rq_clock(rq)) && 986 !is_dl_boosted(dl_se))) { 987 update_dl_revised_wakeup(dl_se, rq); 988 return; 989 } 990 991 dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 992 dl_se->runtime = pi_of(dl_se)->dl_runtime; 993 } 994 } 995 996 static inline u64 dl_next_period(struct sched_dl_entity *dl_se) 997 { 998 return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; 999 } 1000 1001 /* 1002 * If the entity depleted all its runtime, and if we want it to sleep 1003 * while waiting for some new execution time to become available, we 1004 * set the bandwidth replenishment timer to the replenishment instant 1005 * and try to activate it. 1006 * 1007 * Notice that it is important for the caller to know if the timer 1008 * actually started or not (i.e., the replenishment instant is in 1009 * the future or in the past). 1010 */ 1011 static int start_dl_timer(struct task_struct *p) 1012 { 1013 struct sched_dl_entity *dl_se = &p->dl; 1014 struct hrtimer *timer = &dl_se->dl_timer; 1015 struct rq *rq = task_rq(p); 1016 ktime_t now, act; 1017 s64 delta; 1018 1019 lockdep_assert_rq_held(rq); 1020 1021 /* 1022 * We want the timer to fire at the deadline, but considering 1023 * that it is actually coming from rq->clock and not from 1024 * hrtimer's time base reading. 1025 */ 1026 act = ns_to_ktime(dl_next_period(dl_se)); 1027 now = hrtimer_cb_get_time(timer); 1028 delta = ktime_to_ns(now) - rq_clock(rq); 1029 act = ktime_add_ns(act, delta); 1030 1031 /* 1032 * If the expiry time already passed, e.g., because the value 1033 * chosen as the deadline is too small, don't even try to 1034 * start the timer in the past! 1035 */ 1036 if (ktime_us_delta(act, now) < 0) 1037 return 0; 1038 1039 /* 1040 * !enqueued will guarantee another callback; even if one is already in 1041 * progress. This ensures a balanced {get,put}_task_struct(). 1042 * 1043 * The race against __run_timer() clearing the enqueued state is 1044 * harmless because we're holding task_rq()->lock, therefore the timer 1045 * expiring after we've done the check will wait on its task_rq_lock() 1046 * and observe our state. 1047 */ 1048 if (!hrtimer_is_queued(timer)) { 1049 get_task_struct(p); 1050 hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD); 1051 } 1052 1053 return 1; 1054 } 1055 1056 /* 1057 * This is the bandwidth enforcement timer callback. If here, we know 1058 * a task is not on its dl_rq, since the fact that the timer was running 1059 * means the task is throttled and needs a runtime replenishment. 1060 * 1061 * However, what we actually do depends on the fact the task is active, 1062 * (it is on its rq) or has been removed from there by a call to 1063 * dequeue_task_dl(). In the former case we must issue the runtime 1064 * replenishment and add the task back to the dl_rq; in the latter, we just 1065 * do nothing but clearing dl_throttled, so that runtime and deadline 1066 * updating (and the queueing back to dl_rq) will be done by the 1067 * next call to enqueue_task_dl(). 1068 */ 1069 static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) 1070 { 1071 struct sched_dl_entity *dl_se = container_of(timer, 1072 struct sched_dl_entity, 1073 dl_timer); 1074 struct task_struct *p = dl_task_of(dl_se); 1075 struct rq_flags rf; 1076 struct rq *rq; 1077 1078 rq = task_rq_lock(p, &rf); 1079 1080 /* 1081 * The task might have changed its scheduling policy to something 1082 * different than SCHED_DEADLINE (through switched_from_dl()). 1083 */ 1084 if (!dl_task(p)) 1085 goto unlock; 1086 1087 /* 1088 * The task might have been boosted by someone else and might be in the 1089 * boosting/deboosting path, its not throttled. 1090 */ 1091 if (is_dl_boosted(dl_se)) 1092 goto unlock; 1093 1094 /* 1095 * Spurious timer due to start_dl_timer() race; or we already received 1096 * a replenishment from rt_mutex_setprio(). 1097 */ 1098 if (!dl_se->dl_throttled) 1099 goto unlock; 1100 1101 sched_clock_tick(); 1102 update_rq_clock(rq); 1103 1104 /* 1105 * If the throttle happened during sched-out; like: 1106 * 1107 * schedule() 1108 * deactivate_task() 1109 * dequeue_task_dl() 1110 * update_curr_dl() 1111 * start_dl_timer() 1112 * __dequeue_task_dl() 1113 * prev->on_rq = 0; 1114 * 1115 * We can be both throttled and !queued. Replenish the counter 1116 * but do not enqueue -- wait for our wakeup to do that. 1117 */ 1118 if (!task_on_rq_queued(p)) { 1119 replenish_dl_entity(dl_se); 1120 goto unlock; 1121 } 1122 1123 #ifdef CONFIG_SMP 1124 if (unlikely(!rq->online)) { 1125 /* 1126 * If the runqueue is no longer available, migrate the 1127 * task elsewhere. This necessarily changes rq. 1128 */ 1129 lockdep_unpin_lock(__rq_lockp(rq), rf.cookie); 1130 rq = dl_task_offline_migration(rq, p); 1131 rf.cookie = lockdep_pin_lock(__rq_lockp(rq)); 1132 update_rq_clock(rq); 1133 1134 /* 1135 * Now that the task has been migrated to the new RQ and we 1136 * have that locked, proceed as normal and enqueue the task 1137 * there. 1138 */ 1139 } 1140 #endif 1141 1142 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 1143 if (dl_task(rq->curr)) 1144 check_preempt_curr_dl(rq, p, 0); 1145 else 1146 resched_curr(rq); 1147 1148 #ifdef CONFIG_SMP 1149 /* 1150 * Queueing this task back might have overloaded rq, check if we need 1151 * to kick someone away. 1152 */ 1153 if (has_pushable_dl_tasks(rq)) { 1154 /* 1155 * Nothing relies on rq->lock after this, so its safe to drop 1156 * rq->lock. 1157 */ 1158 rq_unpin_lock(rq, &rf); 1159 push_dl_task(rq); 1160 rq_repin_lock(rq, &rf); 1161 } 1162 #endif 1163 1164 unlock: 1165 task_rq_unlock(rq, p, &rf); 1166 1167 /* 1168 * This can free the task_struct, including this hrtimer, do not touch 1169 * anything related to that after this. 1170 */ 1171 put_task_struct(p); 1172 1173 return HRTIMER_NORESTART; 1174 } 1175 1176 void init_dl_task_timer(struct sched_dl_entity *dl_se) 1177 { 1178 struct hrtimer *timer = &dl_se->dl_timer; 1179 1180 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 1181 timer->function = dl_task_timer; 1182 } 1183 1184 /* 1185 * During the activation, CBS checks if it can reuse the current task's 1186 * runtime and period. If the deadline of the task is in the past, CBS 1187 * cannot use the runtime, and so it replenishes the task. This rule 1188 * works fine for implicit deadline tasks (deadline == period), and the 1189 * CBS was designed for implicit deadline tasks. However, a task with 1190 * constrained deadline (deadline < period) might be awakened after the 1191 * deadline, but before the next period. In this case, replenishing the 1192 * task would allow it to run for runtime / deadline. As in this case 1193 * deadline < period, CBS enables a task to run for more than the 1194 * runtime / period. In a very loaded system, this can cause a domino 1195 * effect, making other tasks miss their deadlines. 1196 * 1197 * To avoid this problem, in the activation of a constrained deadline 1198 * task after the deadline but before the next period, throttle the 1199 * task and set the replenishing timer to the begin of the next period, 1200 * unless it is boosted. 1201 */ 1202 static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) 1203 { 1204 struct task_struct *p = dl_task_of(dl_se); 1205 struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); 1206 1207 if (dl_time_before(dl_se->deadline, rq_clock(rq)) && 1208 dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { 1209 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p))) 1210 return; 1211 dl_se->dl_throttled = 1; 1212 if (dl_se->runtime > 0) 1213 dl_se->runtime = 0; 1214 } 1215 } 1216 1217 static 1218 int dl_runtime_exceeded(struct sched_dl_entity *dl_se) 1219 { 1220 return (dl_se->runtime <= 0); 1221 } 1222 1223 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); 1224 1225 /* 1226 * This function implements the GRUB accounting rule: 1227 * according to the GRUB reclaiming algorithm, the runtime is 1228 * not decreased as "dq = -dt", but as 1229 * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt", 1230 * where u is the utilization of the task, Umax is the maximum reclaimable 1231 * utilization, Uinact is the (per-runqueue) inactive utilization, computed 1232 * as the difference between the "total runqueue utilization" and the 1233 * runqueue active utilization, and Uextra is the (per runqueue) extra 1234 * reclaimable utilization. 1235 * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations 1236 * multiplied by 2^BW_SHIFT, the result has to be shifted right by 1237 * BW_SHIFT. 1238 * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, 1239 * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT. 1240 * Since delta is a 64 bit variable, to have an overflow its value 1241 * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds. 1242 * So, overflow is not an issue here. 1243 */ 1244 static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) 1245 { 1246 u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */ 1247 u64 u_act; 1248 u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT; 1249 1250 /* 1251 * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)}, 1252 * we compare u_inact + rq->dl.extra_bw with 1253 * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because 1254 * u_inact + rq->dl.extra_bw can be larger than 1255 * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative 1256 * leading to wrong results) 1257 */ 1258 if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min) 1259 u_act = u_act_min; 1260 else 1261 u_act = BW_UNIT - u_inact - rq->dl.extra_bw; 1262 1263 return (delta * u_act) >> BW_SHIFT; 1264 } 1265 1266 /* 1267 * Update the current task's runtime statistics (provided it is still 1268 * a -deadline task and has not been removed from the dl_rq). 1269 */ 1270 static void update_curr_dl(struct rq *rq) 1271 { 1272 struct task_struct *curr = rq->curr; 1273 struct sched_dl_entity *dl_se = &curr->dl; 1274 u64 delta_exec, scaled_delta_exec; 1275 int cpu = cpu_of(rq); 1276 u64 now; 1277 1278 if (!dl_task(curr) || !on_dl_rq(dl_se)) 1279 return; 1280 1281 /* 1282 * Consumed budget is computed considering the time as 1283 * observed by schedulable tasks (excluding time spent 1284 * in hardirq context, etc.). Deadlines are instead 1285 * computed using hard walltime. This seems to be the more 1286 * natural solution, but the full ramifications of this 1287 * approach need further study. 1288 */ 1289 now = rq_clock_task(rq); 1290 delta_exec = now - curr->se.exec_start; 1291 if (unlikely((s64)delta_exec <= 0)) { 1292 if (unlikely(dl_se->dl_yielded)) 1293 goto throttle; 1294 return; 1295 } 1296 1297 schedstat_set(curr->stats.exec_max, 1298 max(curr->stats.exec_max, delta_exec)); 1299 1300 trace_sched_stat_runtime(curr, delta_exec, 0); 1301 1302 curr->se.sum_exec_runtime += delta_exec; 1303 account_group_exec_runtime(curr, delta_exec); 1304 1305 curr->se.exec_start = now; 1306 cgroup_account_cputime(curr, delta_exec); 1307 1308 if (dl_entity_is_special(dl_se)) 1309 return; 1310 1311 /* 1312 * For tasks that participate in GRUB, we implement GRUB-PA: the 1313 * spare reclaimed bandwidth is used to clock down frequency. 1314 * 1315 * For the others, we still need to scale reservation parameters 1316 * according to current frequency and CPU maximum capacity. 1317 */ 1318 if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) { 1319 scaled_delta_exec = grub_reclaim(delta_exec, 1320 rq, 1321 &curr->dl); 1322 } else { 1323 unsigned long scale_freq = arch_scale_freq_capacity(cpu); 1324 unsigned long scale_cpu = arch_scale_cpu_capacity(cpu); 1325 1326 scaled_delta_exec = cap_scale(delta_exec, scale_freq); 1327 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu); 1328 } 1329 1330 dl_se->runtime -= scaled_delta_exec; 1331 1332 throttle: 1333 if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { 1334 dl_se->dl_throttled = 1; 1335 1336 /* If requested, inform the user about runtime overruns. */ 1337 if (dl_runtime_exceeded(dl_se) && 1338 (dl_se->flags & SCHED_FLAG_DL_OVERRUN)) 1339 dl_se->dl_overrun = 1; 1340 1341 __dequeue_task_dl(rq, curr, 0); 1342 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr))) 1343 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); 1344 1345 if (!is_leftmost(curr, &rq->dl)) 1346 resched_curr(rq); 1347 } 1348 1349 /* 1350 * Because -- for now -- we share the rt bandwidth, we need to 1351 * account our runtime there too, otherwise actual rt tasks 1352 * would be able to exceed the shared quota. 1353 * 1354 * Account to the root rt group for now. 1355 * 1356 * The solution we're working towards is having the RT groups scheduled 1357 * using deadline servers -- however there's a few nasties to figure 1358 * out before that can happen. 1359 */ 1360 if (rt_bandwidth_enabled()) { 1361 struct rt_rq *rt_rq = &rq->rt; 1362 1363 raw_spin_lock(&rt_rq->rt_runtime_lock); 1364 /* 1365 * We'll let actual RT tasks worry about the overflow here, we 1366 * have our own CBS to keep us inline; only account when RT 1367 * bandwidth is relevant. 1368 */ 1369 if (sched_rt_bandwidth_account(rt_rq)) 1370 rt_rq->rt_time += delta_exec; 1371 raw_spin_unlock(&rt_rq->rt_runtime_lock); 1372 } 1373 } 1374 1375 static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer) 1376 { 1377 struct sched_dl_entity *dl_se = container_of(timer, 1378 struct sched_dl_entity, 1379 inactive_timer); 1380 struct task_struct *p = dl_task_of(dl_se); 1381 struct rq_flags rf; 1382 struct rq *rq; 1383 1384 rq = task_rq_lock(p, &rf); 1385 1386 sched_clock_tick(); 1387 update_rq_clock(rq); 1388 1389 if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) { 1390 struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); 1391 1392 if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) { 1393 sub_running_bw(&p->dl, dl_rq_of_se(&p->dl)); 1394 sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl)); 1395 dl_se->dl_non_contending = 0; 1396 } 1397 1398 raw_spin_lock(&dl_b->lock); 1399 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); 1400 raw_spin_unlock(&dl_b->lock); 1401 __dl_clear_params(p); 1402 1403 goto unlock; 1404 } 1405 if (dl_se->dl_non_contending == 0) 1406 goto unlock; 1407 1408 sub_running_bw(dl_se, &rq->dl); 1409 dl_se->dl_non_contending = 0; 1410 unlock: 1411 task_rq_unlock(rq, p, &rf); 1412 put_task_struct(p); 1413 1414 return HRTIMER_NORESTART; 1415 } 1416 1417 void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se) 1418 { 1419 struct hrtimer *timer = &dl_se->inactive_timer; 1420 1421 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); 1422 timer->function = inactive_task_timer; 1423 } 1424 1425 #define __node_2_dle(node) \ 1426 rb_entry((node), struct sched_dl_entity, rb_node) 1427 1428 #ifdef CONFIG_SMP 1429 1430 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) 1431 { 1432 struct rq *rq = rq_of_dl_rq(dl_rq); 1433 1434 if (dl_rq->earliest_dl.curr == 0 || 1435 dl_time_before(deadline, dl_rq->earliest_dl.curr)) { 1436 if (dl_rq->earliest_dl.curr == 0) 1437 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER); 1438 dl_rq->earliest_dl.curr = deadline; 1439 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline); 1440 } 1441 } 1442 1443 static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) 1444 { 1445 struct rq *rq = rq_of_dl_rq(dl_rq); 1446 1447 /* 1448 * Since we may have removed our earliest (and/or next earliest) 1449 * task we must recompute them. 1450 */ 1451 if (!dl_rq->dl_nr_running) { 1452 dl_rq->earliest_dl.curr = 0; 1453 dl_rq->earliest_dl.next = 0; 1454 cpudl_clear(&rq->rd->cpudl, rq->cpu); 1455 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 1456 } else { 1457 struct rb_node *leftmost = rb_first_cached(&dl_rq->root); 1458 struct sched_dl_entity *entry = __node_2_dle(leftmost); 1459 1460 dl_rq->earliest_dl.curr = entry->deadline; 1461 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline); 1462 } 1463 } 1464 1465 #else 1466 1467 static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} 1468 static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} 1469 1470 #endif /* CONFIG_SMP */ 1471 1472 static inline 1473 void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 1474 { 1475 int prio = dl_task_of(dl_se)->prio; 1476 u64 deadline = dl_se->deadline; 1477 1478 WARN_ON(!dl_prio(prio)); 1479 dl_rq->dl_nr_running++; 1480 add_nr_running(rq_of_dl_rq(dl_rq), 1); 1481 1482 inc_dl_deadline(dl_rq, deadline); 1483 inc_dl_migration(dl_se, dl_rq); 1484 } 1485 1486 static inline 1487 void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 1488 { 1489 int prio = dl_task_of(dl_se)->prio; 1490 1491 WARN_ON(!dl_prio(prio)); 1492 WARN_ON(!dl_rq->dl_nr_running); 1493 dl_rq->dl_nr_running--; 1494 sub_nr_running(rq_of_dl_rq(dl_rq), 1); 1495 1496 dec_dl_deadline(dl_rq, dl_se->deadline); 1497 dec_dl_migration(dl_se, dl_rq); 1498 } 1499 1500 static inline bool __dl_less(struct rb_node *a, const struct rb_node *b) 1501 { 1502 return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline); 1503 } 1504 1505 static inline struct sched_statistics * 1506 __schedstats_from_dl_se(struct sched_dl_entity *dl_se) 1507 { 1508 return &dl_task_of(dl_se)->stats; 1509 } 1510 1511 static inline void 1512 update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) 1513 { 1514 struct sched_statistics *stats; 1515 1516 if (!schedstat_enabled()) 1517 return; 1518 1519 stats = __schedstats_from_dl_se(dl_se); 1520 __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); 1521 } 1522 1523 static inline void 1524 update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) 1525 { 1526 struct sched_statistics *stats; 1527 1528 if (!schedstat_enabled()) 1529 return; 1530 1531 stats = __schedstats_from_dl_se(dl_se); 1532 __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); 1533 } 1534 1535 static inline void 1536 update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se) 1537 { 1538 struct sched_statistics *stats; 1539 1540 if (!schedstat_enabled()) 1541 return; 1542 1543 stats = __schedstats_from_dl_se(dl_se); 1544 __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats); 1545 } 1546 1547 static inline void 1548 update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, 1549 int flags) 1550 { 1551 if (!schedstat_enabled()) 1552 return; 1553 1554 if (flags & ENQUEUE_WAKEUP) 1555 update_stats_enqueue_sleeper_dl(dl_rq, dl_se); 1556 } 1557 1558 static inline void 1559 update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, 1560 int flags) 1561 { 1562 struct task_struct *p = dl_task_of(dl_se); 1563 1564 if (!schedstat_enabled()) 1565 return; 1566 1567 if ((flags & DEQUEUE_SLEEP)) { 1568 unsigned int state; 1569 1570 state = READ_ONCE(p->__state); 1571 if (state & TASK_INTERRUPTIBLE) 1572 __schedstat_set(p->stats.sleep_start, 1573 rq_clock(rq_of_dl_rq(dl_rq))); 1574 1575 if (state & TASK_UNINTERRUPTIBLE) 1576 __schedstat_set(p->stats.block_start, 1577 rq_clock(rq_of_dl_rq(dl_rq))); 1578 } 1579 } 1580 1581 static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) 1582 { 1583 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 1584 1585 BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); 1586 1587 rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less); 1588 1589 inc_dl_tasks(dl_se, dl_rq); 1590 } 1591 1592 static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) 1593 { 1594 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 1595 1596 if (RB_EMPTY_NODE(&dl_se->rb_node)) 1597 return; 1598 1599 rb_erase_cached(&dl_se->rb_node, &dl_rq->root); 1600 1601 RB_CLEAR_NODE(&dl_se->rb_node); 1602 1603 dec_dl_tasks(dl_se, dl_rq); 1604 } 1605 1606 static void 1607 enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) 1608 { 1609 BUG_ON(on_dl_rq(dl_se)); 1610 1611 update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags); 1612 1613 /* 1614 * If this is a wakeup or a new instance, the scheduling 1615 * parameters of the task might need updating. Otherwise, 1616 * we want a replenishment of its runtime. 1617 */ 1618 if (flags & ENQUEUE_WAKEUP) { 1619 task_contending(dl_se, flags); 1620 update_dl_entity(dl_se); 1621 } else if (flags & ENQUEUE_REPLENISH) { 1622 replenish_dl_entity(dl_se); 1623 } else if ((flags & ENQUEUE_RESTORE) && 1624 dl_time_before(dl_se->deadline, 1625 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { 1626 setup_new_dl_entity(dl_se); 1627 } 1628 1629 __enqueue_dl_entity(dl_se); 1630 } 1631 1632 static void dequeue_dl_entity(struct sched_dl_entity *dl_se) 1633 { 1634 __dequeue_dl_entity(dl_se); 1635 } 1636 1637 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1638 { 1639 if (is_dl_boosted(&p->dl)) { 1640 /* 1641 * Because of delays in the detection of the overrun of a 1642 * thread's runtime, it might be the case that a thread 1643 * goes to sleep in a rt mutex with negative runtime. As 1644 * a consequence, the thread will be throttled. 1645 * 1646 * While waiting for the mutex, this thread can also be 1647 * boosted via PI, resulting in a thread that is throttled 1648 * and boosted at the same time. 1649 * 1650 * In this case, the boost overrides the throttle. 1651 */ 1652 if (p->dl.dl_throttled) { 1653 /* 1654 * The replenish timer needs to be canceled. No 1655 * problem if it fires concurrently: boosted threads 1656 * are ignored in dl_task_timer(). 1657 */ 1658 hrtimer_try_to_cancel(&p->dl.dl_timer); 1659 p->dl.dl_throttled = 0; 1660 } 1661 } else if (!dl_prio(p->normal_prio)) { 1662 /* 1663 * Special case in which we have a !SCHED_DEADLINE task that is going 1664 * to be deboosted, but exceeds its runtime while doing so. No point in 1665 * replenishing it, as it's going to return back to its original 1666 * scheduling class after this. If it has been throttled, we need to 1667 * clear the flag, otherwise the task may wake up as throttled after 1668 * being boosted again with no means to replenish the runtime and clear 1669 * the throttle. 1670 */ 1671 p->dl.dl_throttled = 0; 1672 BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); 1673 return; 1674 } 1675 1676 /* 1677 * Check if a constrained deadline task was activated 1678 * after the deadline but before the next period. 1679 * If that is the case, the task will be throttled and 1680 * the replenishment timer will be set to the next period. 1681 */ 1682 if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl)) 1683 dl_check_constrained_dl(&p->dl); 1684 1685 if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { 1686 add_rq_bw(&p->dl, &rq->dl); 1687 add_running_bw(&p->dl, &rq->dl); 1688 } 1689 1690 /* 1691 * If p is throttled, we do not enqueue it. In fact, if it exhausted 1692 * its budget it needs a replenishment and, since it now is on 1693 * its rq, the bandwidth timer callback (which clearly has not 1694 * run yet) will take care of this. 1695 * However, the active utilization does not depend on the fact 1696 * that the task is on the runqueue or not (but depends on the 1697 * task's state - in GRUB parlance, "inactive" vs "active contending"). 1698 * In other words, even if a task is throttled its utilization must 1699 * be counted in the active utilization; hence, we need to call 1700 * add_running_bw(). 1701 */ 1702 if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) { 1703 if (flags & ENQUEUE_WAKEUP) 1704 task_contending(&p->dl, flags); 1705 1706 return; 1707 } 1708 1709 check_schedstat_required(); 1710 update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl); 1711 1712 enqueue_dl_entity(&p->dl, flags); 1713 1714 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1715 enqueue_pushable_dl_task(rq, p); 1716 } 1717 1718 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1719 { 1720 update_stats_dequeue_dl(&rq->dl, &p->dl, flags); 1721 dequeue_dl_entity(&p->dl); 1722 dequeue_pushable_dl_task(rq, p); 1723 } 1724 1725 static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1726 { 1727 update_curr_dl(rq); 1728 __dequeue_task_dl(rq, p, flags); 1729 1730 if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { 1731 sub_running_bw(&p->dl, &rq->dl); 1732 sub_rq_bw(&p->dl, &rq->dl); 1733 } 1734 1735 /* 1736 * This check allows to start the inactive timer (or to immediately 1737 * decrease the active utilization, if needed) in two cases: 1738 * when the task blocks and when it is terminating 1739 * (p->state == TASK_DEAD). We can handle the two cases in the same 1740 * way, because from GRUB's point of view the same thing is happening 1741 * (the task moves from "active contending" to "active non contending" 1742 * or "inactive") 1743 */ 1744 if (flags & DEQUEUE_SLEEP) 1745 task_non_contending(p); 1746 } 1747 1748 /* 1749 * Yield task semantic for -deadline tasks is: 1750 * 1751 * get off from the CPU until our next instance, with 1752 * a new runtime. This is of little use now, since we 1753 * don't have a bandwidth reclaiming mechanism. Anyway, 1754 * bandwidth reclaiming is planned for the future, and 1755 * yield_task_dl will indicate that some spare budget 1756 * is available for other task instances to use it. 1757 */ 1758 static void yield_task_dl(struct rq *rq) 1759 { 1760 /* 1761 * We make the task go to sleep until its current deadline by 1762 * forcing its runtime to zero. This way, update_curr_dl() stops 1763 * it and the bandwidth timer will wake it up and will give it 1764 * new scheduling parameters (thanks to dl_yielded=1). 1765 */ 1766 rq->curr->dl.dl_yielded = 1; 1767 1768 update_rq_clock(rq); 1769 update_curr_dl(rq); 1770 /* 1771 * Tell update_rq_clock() that we've just updated, 1772 * so we don't do microscopic update in schedule() 1773 * and double the fastpath cost. 1774 */ 1775 rq_clock_skip_update(rq); 1776 } 1777 1778 #ifdef CONFIG_SMP 1779 1780 static int find_later_rq(struct task_struct *task); 1781 1782 static int 1783 select_task_rq_dl(struct task_struct *p, int cpu, int flags) 1784 { 1785 struct task_struct *curr; 1786 bool select_rq; 1787 struct rq *rq; 1788 1789 if (!(flags & WF_TTWU)) 1790 goto out; 1791 1792 rq = cpu_rq(cpu); 1793 1794 rcu_read_lock(); 1795 curr = READ_ONCE(rq->curr); /* unlocked access */ 1796 1797 /* 1798 * If we are dealing with a -deadline task, we must 1799 * decide where to wake it up. 1800 * If it has a later deadline and the current task 1801 * on this rq can't move (provided the waking task 1802 * can!) we prefer to send it somewhere else. On the 1803 * other hand, if it has a shorter deadline, we 1804 * try to make it stay here, it might be important. 1805 */ 1806 select_rq = unlikely(dl_task(curr)) && 1807 (curr->nr_cpus_allowed < 2 || 1808 !dl_entity_preempt(&p->dl, &curr->dl)) && 1809 p->nr_cpus_allowed > 1; 1810 1811 /* 1812 * Take the capacity of the CPU into account to 1813 * ensure it fits the requirement of the task. 1814 */ 1815 if (static_branch_unlikely(&sched_asym_cpucapacity)) 1816 select_rq |= !dl_task_fits_capacity(p, cpu); 1817 1818 if (select_rq) { 1819 int target = find_later_rq(p); 1820 1821 if (target != -1 && 1822 (dl_time_before(p->dl.deadline, 1823 cpu_rq(target)->dl.earliest_dl.curr) || 1824 (cpu_rq(target)->dl.dl_nr_running == 0))) 1825 cpu = target; 1826 } 1827 rcu_read_unlock(); 1828 1829 out: 1830 return cpu; 1831 } 1832 1833 static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused) 1834 { 1835 struct rq *rq; 1836 1837 if (READ_ONCE(p->__state) != TASK_WAKING) 1838 return; 1839 1840 rq = task_rq(p); 1841 /* 1842 * Since p->state == TASK_WAKING, set_task_cpu() has been called 1843 * from try_to_wake_up(). Hence, p->pi_lock is locked, but 1844 * rq->lock is not... So, lock it 1845 */ 1846 raw_spin_rq_lock(rq); 1847 if (p->dl.dl_non_contending) { 1848 update_rq_clock(rq); 1849 sub_running_bw(&p->dl, &rq->dl); 1850 p->dl.dl_non_contending = 0; 1851 /* 1852 * If the timer handler is currently running and the 1853 * timer cannot be canceled, inactive_task_timer() 1854 * will see that dl_not_contending is not set, and 1855 * will not touch the rq's active utilization, 1856 * so we are still safe. 1857 */ 1858 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) 1859 put_task_struct(p); 1860 } 1861 sub_rq_bw(&p->dl, &rq->dl); 1862 raw_spin_rq_unlock(rq); 1863 } 1864 1865 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) 1866 { 1867 /* 1868 * Current can't be migrated, useless to reschedule, 1869 * let's hope p can move out. 1870 */ 1871 if (rq->curr->nr_cpus_allowed == 1 || 1872 !cpudl_find(&rq->rd->cpudl, rq->curr, NULL)) 1873 return; 1874 1875 /* 1876 * p is migratable, so let's not schedule it and 1877 * see if it is pushed or pulled somewhere else. 1878 */ 1879 if (p->nr_cpus_allowed != 1 && 1880 cpudl_find(&rq->rd->cpudl, p, NULL)) 1881 return; 1882 1883 resched_curr(rq); 1884 } 1885 1886 static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf) 1887 { 1888 if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) { 1889 /* 1890 * This is OK, because current is on_cpu, which avoids it being 1891 * picked for load-balance and preemption/IRQs are still 1892 * disabled avoiding further scheduler activity on it and we've 1893 * not yet started the picking loop. 1894 */ 1895 rq_unpin_lock(rq, rf); 1896 pull_dl_task(rq); 1897 rq_repin_lock(rq, rf); 1898 } 1899 1900 return sched_stop_runnable(rq) || sched_dl_runnable(rq); 1901 } 1902 #endif /* CONFIG_SMP */ 1903 1904 /* 1905 * Only called when both the current and waking task are -deadline 1906 * tasks. 1907 */ 1908 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, 1909 int flags) 1910 { 1911 if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { 1912 resched_curr(rq); 1913 return; 1914 } 1915 1916 #ifdef CONFIG_SMP 1917 /* 1918 * In the unlikely case current and p have the same deadline 1919 * let us try to decide what's the best thing to do... 1920 */ 1921 if ((p->dl.deadline == rq->curr->dl.deadline) && 1922 !test_tsk_need_resched(rq->curr)) 1923 check_preempt_equal_dl(rq, p); 1924 #endif /* CONFIG_SMP */ 1925 } 1926 1927 #ifdef CONFIG_SCHED_HRTICK 1928 static void start_hrtick_dl(struct rq *rq, struct task_struct *p) 1929 { 1930 hrtick_start(rq, p->dl.runtime); 1931 } 1932 #else /* !CONFIG_SCHED_HRTICK */ 1933 static void start_hrtick_dl(struct rq *rq, struct task_struct *p) 1934 { 1935 } 1936 #endif 1937 1938 static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first) 1939 { 1940 struct sched_dl_entity *dl_se = &p->dl; 1941 struct dl_rq *dl_rq = &rq->dl; 1942 1943 p->se.exec_start = rq_clock_task(rq); 1944 if (on_dl_rq(&p->dl)) 1945 update_stats_wait_end_dl(dl_rq, dl_se); 1946 1947 /* You can't push away the running task */ 1948 dequeue_pushable_dl_task(rq, p); 1949 1950 if (!first) 1951 return; 1952 1953 if (hrtick_enabled_dl(rq)) 1954 start_hrtick_dl(rq, p); 1955 1956 if (rq->curr->sched_class != &dl_sched_class) 1957 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0); 1958 1959 deadline_queue_push_tasks(rq); 1960 } 1961 1962 static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq) 1963 { 1964 struct rb_node *left = rb_first_cached(&dl_rq->root); 1965 1966 if (!left) 1967 return NULL; 1968 1969 return __node_2_dle(left); 1970 } 1971 1972 static struct task_struct *pick_task_dl(struct rq *rq) 1973 { 1974 struct sched_dl_entity *dl_se; 1975 struct dl_rq *dl_rq = &rq->dl; 1976 struct task_struct *p; 1977 1978 if (!sched_dl_runnable(rq)) 1979 return NULL; 1980 1981 dl_se = pick_next_dl_entity(dl_rq); 1982 BUG_ON(!dl_se); 1983 p = dl_task_of(dl_se); 1984 1985 return p; 1986 } 1987 1988 static struct task_struct *pick_next_task_dl(struct rq *rq) 1989 { 1990 struct task_struct *p; 1991 1992 p = pick_task_dl(rq); 1993 if (p) 1994 set_next_task_dl(rq, p, true); 1995 1996 return p; 1997 } 1998 1999 static void put_prev_task_dl(struct rq *rq, struct task_struct *p) 2000 { 2001 struct sched_dl_entity *dl_se = &p->dl; 2002 struct dl_rq *dl_rq = &rq->dl; 2003 2004 if (on_dl_rq(&p->dl)) 2005 update_stats_wait_start_dl(dl_rq, dl_se); 2006 2007 update_curr_dl(rq); 2008 2009 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); 2010 if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) 2011 enqueue_pushable_dl_task(rq, p); 2012 } 2013 2014 /* 2015 * scheduler tick hitting a task of our scheduling class. 2016 * 2017 * NOTE: This function can be called remotely by the tick offload that 2018 * goes along full dynticks. Therefore no local assumption can be made 2019 * and everything must be accessed through the @rq and @curr passed in 2020 * parameters. 2021 */ 2022 static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) 2023 { 2024 update_curr_dl(rq); 2025 2026 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1); 2027 /* 2028 * Even when we have runtime, update_curr_dl() might have resulted in us 2029 * not being the leftmost task anymore. In that case NEED_RESCHED will 2030 * be set and schedule() will start a new hrtick for the next task. 2031 */ 2032 if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 && 2033 is_leftmost(p, &rq->dl)) 2034 start_hrtick_dl(rq, p); 2035 } 2036 2037 static void task_fork_dl(struct task_struct *p) 2038 { 2039 /* 2040 * SCHED_DEADLINE tasks cannot fork and this is achieved through 2041 * sched_fork() 2042 */ 2043 } 2044 2045 #ifdef CONFIG_SMP 2046 2047 /* Only try algorithms three times */ 2048 #define DL_MAX_TRIES 3 2049 2050 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) 2051 { 2052 if (!task_running(rq, p) && 2053 cpumask_test_cpu(cpu, &p->cpus_mask)) 2054 return 1; 2055 return 0; 2056 } 2057 2058 /* 2059 * Return the earliest pushable rq's task, which is suitable to be executed 2060 * on the CPU, NULL otherwise: 2061 */ 2062 static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu) 2063 { 2064 struct task_struct *p = NULL; 2065 struct rb_node *next_node; 2066 2067 if (!has_pushable_dl_tasks(rq)) 2068 return NULL; 2069 2070 next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root); 2071 2072 next_node: 2073 if (next_node) { 2074 p = __node_2_pdl(next_node); 2075 2076 if (pick_dl_task(rq, p, cpu)) 2077 return p; 2078 2079 next_node = rb_next(next_node); 2080 goto next_node; 2081 } 2082 2083 return NULL; 2084 } 2085 2086 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); 2087 2088 static int find_later_rq(struct task_struct *task) 2089 { 2090 struct sched_domain *sd; 2091 struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); 2092 int this_cpu = smp_processor_id(); 2093 int cpu = task_cpu(task); 2094 2095 /* Make sure the mask is initialized first */ 2096 if (unlikely(!later_mask)) 2097 return -1; 2098 2099 if (task->nr_cpus_allowed == 1) 2100 return -1; 2101 2102 /* 2103 * We have to consider system topology and task affinity 2104 * first, then we can look for a suitable CPU. 2105 */ 2106 if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask)) 2107 return -1; 2108 2109 /* 2110 * If we are here, some targets have been found, including 2111 * the most suitable which is, among the runqueues where the 2112 * current tasks have later deadlines than the task's one, the 2113 * rq with the latest possible one. 2114 * 2115 * Now we check how well this matches with task's 2116 * affinity and system topology. 2117 * 2118 * The last CPU where the task run is our first 2119 * guess, since it is most likely cache-hot there. 2120 */ 2121 if (cpumask_test_cpu(cpu, later_mask)) 2122 return cpu; 2123 /* 2124 * Check if this_cpu is to be skipped (i.e., it is 2125 * not in the mask) or not. 2126 */ 2127 if (!cpumask_test_cpu(this_cpu, later_mask)) 2128 this_cpu = -1; 2129 2130 rcu_read_lock(); 2131 for_each_domain(cpu, sd) { 2132 if (sd->flags & SD_WAKE_AFFINE) { 2133 int best_cpu; 2134 2135 /* 2136 * If possible, preempting this_cpu is 2137 * cheaper than migrating. 2138 */ 2139 if (this_cpu != -1 && 2140 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 2141 rcu_read_unlock(); 2142 return this_cpu; 2143 } 2144 2145 best_cpu = cpumask_any_and_distribute(later_mask, 2146 sched_domain_span(sd)); 2147 /* 2148 * Last chance: if a CPU being in both later_mask 2149 * and current sd span is valid, that becomes our 2150 * choice. Of course, the latest possible CPU is 2151 * already under consideration through later_mask. 2152 */ 2153 if (best_cpu < nr_cpu_ids) { 2154 rcu_read_unlock(); 2155 return best_cpu; 2156 } 2157 } 2158 } 2159 rcu_read_unlock(); 2160 2161 /* 2162 * At this point, all our guesses failed, we just return 2163 * 'something', and let the caller sort the things out. 2164 */ 2165 if (this_cpu != -1) 2166 return this_cpu; 2167 2168 cpu = cpumask_any_distribute(later_mask); 2169 if (cpu < nr_cpu_ids) 2170 return cpu; 2171 2172 return -1; 2173 } 2174 2175 /* Locks the rq it finds */ 2176 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) 2177 { 2178 struct rq *later_rq = NULL; 2179 int tries; 2180 int cpu; 2181 2182 for (tries = 0; tries < DL_MAX_TRIES; tries++) { 2183 cpu = find_later_rq(task); 2184 2185 if ((cpu == -1) || (cpu == rq->cpu)) 2186 break; 2187 2188 later_rq = cpu_rq(cpu); 2189 2190 if (later_rq->dl.dl_nr_running && 2191 !dl_time_before(task->dl.deadline, 2192 later_rq->dl.earliest_dl.curr)) { 2193 /* 2194 * Target rq has tasks of equal or earlier deadline, 2195 * retrying does not release any lock and is unlikely 2196 * to yield a different result. 2197 */ 2198 later_rq = NULL; 2199 break; 2200 } 2201 2202 /* Retry if something changed. */ 2203 if (double_lock_balance(rq, later_rq)) { 2204 if (unlikely(task_rq(task) != rq || 2205 !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) || 2206 task_running(rq, task) || 2207 !dl_task(task) || 2208 !task_on_rq_queued(task))) { 2209 double_unlock_balance(rq, later_rq); 2210 later_rq = NULL; 2211 break; 2212 } 2213 } 2214 2215 /* 2216 * If the rq we found has no -deadline task, or 2217 * its earliest one has a later deadline than our 2218 * task, the rq is a good one. 2219 */ 2220 if (!later_rq->dl.dl_nr_running || 2221 dl_time_before(task->dl.deadline, 2222 later_rq->dl.earliest_dl.curr)) 2223 break; 2224 2225 /* Otherwise we try again. */ 2226 double_unlock_balance(rq, later_rq); 2227 later_rq = NULL; 2228 } 2229 2230 return later_rq; 2231 } 2232 2233 static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) 2234 { 2235 struct task_struct *p; 2236 2237 if (!has_pushable_dl_tasks(rq)) 2238 return NULL; 2239 2240 p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root)); 2241 2242 BUG_ON(rq->cpu != task_cpu(p)); 2243 BUG_ON(task_current(rq, p)); 2244 BUG_ON(p->nr_cpus_allowed <= 1); 2245 2246 BUG_ON(!task_on_rq_queued(p)); 2247 BUG_ON(!dl_task(p)); 2248 2249 return p; 2250 } 2251 2252 /* 2253 * See if the non running -deadline tasks on this rq 2254 * can be sent to some other CPU where they can preempt 2255 * and start executing. 2256 */ 2257 static int push_dl_task(struct rq *rq) 2258 { 2259 struct task_struct *next_task; 2260 struct rq *later_rq; 2261 int ret = 0; 2262 2263 if (!rq->dl.overloaded) 2264 return 0; 2265 2266 next_task = pick_next_pushable_dl_task(rq); 2267 if (!next_task) 2268 return 0; 2269 2270 retry: 2271 /* 2272 * If next_task preempts rq->curr, and rq->curr 2273 * can move away, it makes sense to just reschedule 2274 * without going further in pushing next_task. 2275 */ 2276 if (dl_task(rq->curr) && 2277 dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && 2278 rq->curr->nr_cpus_allowed > 1) { 2279 resched_curr(rq); 2280 return 0; 2281 } 2282 2283 if (is_migration_disabled(next_task)) 2284 return 0; 2285 2286 if (WARN_ON(next_task == rq->curr)) 2287 return 0; 2288 2289 /* We might release rq lock */ 2290 get_task_struct(next_task); 2291 2292 /* Will lock the rq it'll find */ 2293 later_rq = find_lock_later_rq(next_task, rq); 2294 if (!later_rq) { 2295 struct task_struct *task; 2296 2297 /* 2298 * We must check all this again, since 2299 * find_lock_later_rq releases rq->lock and it is 2300 * then possible that next_task has migrated. 2301 */ 2302 task = pick_next_pushable_dl_task(rq); 2303 if (task == next_task) { 2304 /* 2305 * The task is still there. We don't try 2306 * again, some other CPU will pull it when ready. 2307 */ 2308 goto out; 2309 } 2310 2311 if (!task) 2312 /* No more tasks */ 2313 goto out; 2314 2315 put_task_struct(next_task); 2316 next_task = task; 2317 goto retry; 2318 } 2319 2320 deactivate_task(rq, next_task, 0); 2321 set_task_cpu(next_task, later_rq->cpu); 2322 2323 /* 2324 * Update the later_rq clock here, because the clock is used 2325 * by the cpufreq_update_util() inside __add_running_bw(). 2326 */ 2327 update_rq_clock(later_rq); 2328 activate_task(later_rq, next_task, ENQUEUE_NOCLOCK); 2329 ret = 1; 2330 2331 resched_curr(later_rq); 2332 2333 double_unlock_balance(rq, later_rq); 2334 2335 out: 2336 put_task_struct(next_task); 2337 2338 return ret; 2339 } 2340 2341 static void push_dl_tasks(struct rq *rq) 2342 { 2343 /* push_dl_task() will return true if it moved a -deadline task */ 2344 while (push_dl_task(rq)) 2345 ; 2346 } 2347 2348 static void pull_dl_task(struct rq *this_rq) 2349 { 2350 int this_cpu = this_rq->cpu, cpu; 2351 struct task_struct *p, *push_task; 2352 bool resched = false; 2353 struct rq *src_rq; 2354 u64 dmin = LONG_MAX; 2355 2356 if (likely(!dl_overloaded(this_rq))) 2357 return; 2358 2359 /* 2360 * Match the barrier from dl_set_overloaded; this guarantees that if we 2361 * see overloaded we must also see the dlo_mask bit. 2362 */ 2363 smp_rmb(); 2364 2365 for_each_cpu(cpu, this_rq->rd->dlo_mask) { 2366 if (this_cpu == cpu) 2367 continue; 2368 2369 src_rq = cpu_rq(cpu); 2370 2371 /* 2372 * It looks racy, abd it is! However, as in sched_rt.c, 2373 * we are fine with this. 2374 */ 2375 if (this_rq->dl.dl_nr_running && 2376 dl_time_before(this_rq->dl.earliest_dl.curr, 2377 src_rq->dl.earliest_dl.next)) 2378 continue; 2379 2380 /* Might drop this_rq->lock */ 2381 push_task = NULL; 2382 double_lock_balance(this_rq, src_rq); 2383 2384 /* 2385 * If there are no more pullable tasks on the 2386 * rq, we're done with it. 2387 */ 2388 if (src_rq->dl.dl_nr_running <= 1) 2389 goto skip; 2390 2391 p = pick_earliest_pushable_dl_task(src_rq, this_cpu); 2392 2393 /* 2394 * We found a task to be pulled if: 2395 * - it preempts our current (if there's one), 2396 * - it will preempt the last one we pulled (if any). 2397 */ 2398 if (p && dl_time_before(p->dl.deadline, dmin) && 2399 (!this_rq->dl.dl_nr_running || 2400 dl_time_before(p->dl.deadline, 2401 this_rq->dl.earliest_dl.curr))) { 2402 WARN_ON(p == src_rq->curr); 2403 WARN_ON(!task_on_rq_queued(p)); 2404 2405 /* 2406 * Then we pull iff p has actually an earlier 2407 * deadline than the current task of its runqueue. 2408 */ 2409 if (dl_time_before(p->dl.deadline, 2410 src_rq->curr->dl.deadline)) 2411 goto skip; 2412 2413 if (is_migration_disabled(p)) { 2414 push_task = get_push_task(src_rq); 2415 } else { 2416 deactivate_task(src_rq, p, 0); 2417 set_task_cpu(p, this_cpu); 2418 activate_task(this_rq, p, 0); 2419 dmin = p->dl.deadline; 2420 resched = true; 2421 } 2422 2423 /* Is there any other task even earlier? */ 2424 } 2425 skip: 2426 double_unlock_balance(this_rq, src_rq); 2427 2428 if (push_task) { 2429 raw_spin_rq_unlock(this_rq); 2430 stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop, 2431 push_task, &src_rq->push_work); 2432 raw_spin_rq_lock(this_rq); 2433 } 2434 } 2435 2436 if (resched) 2437 resched_curr(this_rq); 2438 } 2439 2440 /* 2441 * Since the task is not running and a reschedule is not going to happen 2442 * anytime soon on its runqueue, we try pushing it away now. 2443 */ 2444 static void task_woken_dl(struct rq *rq, struct task_struct *p) 2445 { 2446 if (!task_running(rq, p) && 2447 !test_tsk_need_resched(rq->curr) && 2448 p->nr_cpus_allowed > 1 && 2449 dl_task(rq->curr) && 2450 (rq->curr->nr_cpus_allowed < 2 || 2451 !dl_entity_preempt(&p->dl, &rq->curr->dl))) { 2452 push_dl_tasks(rq); 2453 } 2454 } 2455 2456 static void set_cpus_allowed_dl(struct task_struct *p, 2457 const struct cpumask *new_mask, 2458 u32 flags) 2459 { 2460 struct root_domain *src_rd; 2461 struct rq *rq; 2462 2463 BUG_ON(!dl_task(p)); 2464 2465 rq = task_rq(p); 2466 src_rd = rq->rd; 2467 /* 2468 * Migrating a SCHED_DEADLINE task between exclusive 2469 * cpusets (different root_domains) entails a bandwidth 2470 * update. We already made space for us in the destination 2471 * domain (see cpuset_can_attach()). 2472 */ 2473 if (!cpumask_intersects(src_rd->span, new_mask)) { 2474 struct dl_bw *src_dl_b; 2475 2476 src_dl_b = dl_bw_of(cpu_of(rq)); 2477 /* 2478 * We now free resources of the root_domain we are migrating 2479 * off. In the worst case, sched_setattr() may temporary fail 2480 * until we complete the update. 2481 */ 2482 raw_spin_lock(&src_dl_b->lock); 2483 __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); 2484 raw_spin_unlock(&src_dl_b->lock); 2485 } 2486 2487 set_cpus_allowed_common(p, new_mask, flags); 2488 } 2489 2490 /* Assumes rq->lock is held */ 2491 static void rq_online_dl(struct rq *rq) 2492 { 2493 if (rq->dl.overloaded) 2494 dl_set_overload(rq); 2495 2496 cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); 2497 if (rq->dl.dl_nr_running > 0) 2498 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr); 2499 } 2500 2501 /* Assumes rq->lock is held */ 2502 static void rq_offline_dl(struct rq *rq) 2503 { 2504 if (rq->dl.overloaded) 2505 dl_clear_overload(rq); 2506 2507 cpudl_clear(&rq->rd->cpudl, rq->cpu); 2508 cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); 2509 } 2510 2511 void __init init_sched_dl_class(void) 2512 { 2513 unsigned int i; 2514 2515 for_each_possible_cpu(i) 2516 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), 2517 GFP_KERNEL, cpu_to_node(i)); 2518 } 2519 2520 void dl_add_task_root_domain(struct task_struct *p) 2521 { 2522 struct rq_flags rf; 2523 struct rq *rq; 2524 struct dl_bw *dl_b; 2525 2526 raw_spin_lock_irqsave(&p->pi_lock, rf.flags); 2527 if (!dl_task(p)) { 2528 raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); 2529 return; 2530 } 2531 2532 rq = __task_rq_lock(p, &rf); 2533 2534 dl_b = &rq->rd->dl_bw; 2535 raw_spin_lock(&dl_b->lock); 2536 2537 __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span)); 2538 2539 raw_spin_unlock(&dl_b->lock); 2540 2541 task_rq_unlock(rq, p, &rf); 2542 } 2543 2544 void dl_clear_root_domain(struct root_domain *rd) 2545 { 2546 unsigned long flags; 2547 2548 raw_spin_lock_irqsave(&rd->dl_bw.lock, flags); 2549 rd->dl_bw.total_bw = 0; 2550 raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags); 2551 } 2552 2553 #endif /* CONFIG_SMP */ 2554 2555 static void switched_from_dl(struct rq *rq, struct task_struct *p) 2556 { 2557 /* 2558 * task_non_contending() can start the "inactive timer" (if the 0-lag 2559 * time is in the future). If the task switches back to dl before 2560 * the "inactive timer" fires, it can continue to consume its current 2561 * runtime using its current deadline. If it stays outside of 2562 * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer() 2563 * will reset the task parameters. 2564 */ 2565 if (task_on_rq_queued(p) && p->dl.dl_runtime) 2566 task_non_contending(p); 2567 2568 if (!task_on_rq_queued(p)) { 2569 /* 2570 * Inactive timer is armed. However, p is leaving DEADLINE and 2571 * might migrate away from this rq while continuing to run on 2572 * some other class. We need to remove its contribution from 2573 * this rq running_bw now, or sub_rq_bw (below) will complain. 2574 */ 2575 if (p->dl.dl_non_contending) 2576 sub_running_bw(&p->dl, &rq->dl); 2577 sub_rq_bw(&p->dl, &rq->dl); 2578 } 2579 2580 /* 2581 * We cannot use inactive_task_timer() to invoke sub_running_bw() 2582 * at the 0-lag time, because the task could have been migrated 2583 * while SCHED_OTHER in the meanwhile. 2584 */ 2585 if (p->dl.dl_non_contending) 2586 p->dl.dl_non_contending = 0; 2587 2588 /* 2589 * Since this might be the only -deadline task on the rq, 2590 * this is the right place to try to pull some other one 2591 * from an overloaded CPU, if any. 2592 */ 2593 if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) 2594 return; 2595 2596 deadline_queue_pull_task(rq); 2597 } 2598 2599 /* 2600 * When switching to -deadline, we may overload the rq, then 2601 * we try to push someone off, if possible. 2602 */ 2603 static void switched_to_dl(struct rq *rq, struct task_struct *p) 2604 { 2605 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) 2606 put_task_struct(p); 2607 2608 /* If p is not queued we will update its parameters at next wakeup. */ 2609 if (!task_on_rq_queued(p)) { 2610 add_rq_bw(&p->dl, &rq->dl); 2611 2612 return; 2613 } 2614 2615 if (rq->curr != p) { 2616 #ifdef CONFIG_SMP 2617 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) 2618 deadline_queue_push_tasks(rq); 2619 #endif 2620 if (dl_task(rq->curr)) 2621 check_preempt_curr_dl(rq, p, 0); 2622 else 2623 resched_curr(rq); 2624 } else { 2625 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0); 2626 } 2627 } 2628 2629 /* 2630 * If the scheduling parameters of a -deadline task changed, 2631 * a push or pull operation might be needed. 2632 */ 2633 static void prio_changed_dl(struct rq *rq, struct task_struct *p, 2634 int oldprio) 2635 { 2636 if (task_on_rq_queued(p) || task_current(rq, p)) { 2637 #ifdef CONFIG_SMP 2638 /* 2639 * This might be too much, but unfortunately 2640 * we don't have the old deadline value, and 2641 * we can't argue if the task is increasing 2642 * or lowering its prio, so... 2643 */ 2644 if (!rq->dl.overloaded) 2645 deadline_queue_pull_task(rq); 2646 2647 /* 2648 * If we now have a earlier deadline task than p, 2649 * then reschedule, provided p is still on this 2650 * runqueue. 2651 */ 2652 if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline)) 2653 resched_curr(rq); 2654 #else 2655 /* 2656 * Again, we don't know if p has a earlier 2657 * or later deadline, so let's blindly set a 2658 * (maybe not needed) rescheduling point. 2659 */ 2660 resched_curr(rq); 2661 #endif /* CONFIG_SMP */ 2662 } 2663 } 2664 2665 DEFINE_SCHED_CLASS(dl) = { 2666 2667 .enqueue_task = enqueue_task_dl, 2668 .dequeue_task = dequeue_task_dl, 2669 .yield_task = yield_task_dl, 2670 2671 .check_preempt_curr = check_preempt_curr_dl, 2672 2673 .pick_next_task = pick_next_task_dl, 2674 .put_prev_task = put_prev_task_dl, 2675 .set_next_task = set_next_task_dl, 2676 2677 #ifdef CONFIG_SMP 2678 .balance = balance_dl, 2679 .pick_task = pick_task_dl, 2680 .select_task_rq = select_task_rq_dl, 2681 .migrate_task_rq = migrate_task_rq_dl, 2682 .set_cpus_allowed = set_cpus_allowed_dl, 2683 .rq_online = rq_online_dl, 2684 .rq_offline = rq_offline_dl, 2685 .task_woken = task_woken_dl, 2686 .find_lock_rq = find_lock_later_rq, 2687 #endif 2688 2689 .task_tick = task_tick_dl, 2690 .task_fork = task_fork_dl, 2691 2692 .prio_changed = prio_changed_dl, 2693 .switched_from = switched_from_dl, 2694 .switched_to = switched_to_dl, 2695 2696 .update_curr = update_curr_dl, 2697 }; 2698 2699 /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */ 2700 static u64 dl_generation; 2701 2702 int sched_dl_global_validate(void) 2703 { 2704 u64 runtime = global_rt_runtime(); 2705 u64 period = global_rt_period(); 2706 u64 new_bw = to_ratio(period, runtime); 2707 u64 gen = ++dl_generation; 2708 struct dl_bw *dl_b; 2709 int cpu, cpus, ret = 0; 2710 unsigned long flags; 2711 2712 /* 2713 * Here we want to check the bandwidth not being set to some 2714 * value smaller than the currently allocated bandwidth in 2715 * any of the root_domains. 2716 */ 2717 for_each_possible_cpu(cpu) { 2718 rcu_read_lock_sched(); 2719 2720 if (dl_bw_visited(cpu, gen)) 2721 goto next; 2722 2723 dl_b = dl_bw_of(cpu); 2724 cpus = dl_bw_cpus(cpu); 2725 2726 raw_spin_lock_irqsave(&dl_b->lock, flags); 2727 if (new_bw * cpus < dl_b->total_bw) 2728 ret = -EBUSY; 2729 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 2730 2731 next: 2732 rcu_read_unlock_sched(); 2733 2734 if (ret) 2735 break; 2736 } 2737 2738 return ret; 2739 } 2740 2741 static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) 2742 { 2743 if (global_rt_runtime() == RUNTIME_INF) { 2744 dl_rq->bw_ratio = 1 << RATIO_SHIFT; 2745 dl_rq->extra_bw = 1 << BW_SHIFT; 2746 } else { 2747 dl_rq->bw_ratio = to_ratio(global_rt_runtime(), 2748 global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT); 2749 dl_rq->extra_bw = to_ratio(global_rt_period(), 2750 global_rt_runtime()); 2751 } 2752 } 2753 2754 void sched_dl_do_global(void) 2755 { 2756 u64 new_bw = -1; 2757 u64 gen = ++dl_generation; 2758 struct dl_bw *dl_b; 2759 int cpu; 2760 unsigned long flags; 2761 2762 if (global_rt_runtime() != RUNTIME_INF) 2763 new_bw = to_ratio(global_rt_period(), global_rt_runtime()); 2764 2765 for_each_possible_cpu(cpu) { 2766 rcu_read_lock_sched(); 2767 2768 if (dl_bw_visited(cpu, gen)) { 2769 rcu_read_unlock_sched(); 2770 continue; 2771 } 2772 2773 dl_b = dl_bw_of(cpu); 2774 2775 raw_spin_lock_irqsave(&dl_b->lock, flags); 2776 dl_b->bw = new_bw; 2777 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 2778 2779 rcu_read_unlock_sched(); 2780 init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl); 2781 } 2782 } 2783 2784 /* 2785 * We must be sure that accepting a new task (or allowing changing the 2786 * parameters of an existing one) is consistent with the bandwidth 2787 * constraints. If yes, this function also accordingly updates the currently 2788 * allocated bandwidth to reflect the new situation. 2789 * 2790 * This function is called while holding p's rq->lock. 2791 */ 2792 int sched_dl_overflow(struct task_struct *p, int policy, 2793 const struct sched_attr *attr) 2794 { 2795 u64 period = attr->sched_period ?: attr->sched_deadline; 2796 u64 runtime = attr->sched_runtime; 2797 u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; 2798 int cpus, err = -1, cpu = task_cpu(p); 2799 struct dl_bw *dl_b = dl_bw_of(cpu); 2800 unsigned long cap; 2801 2802 if (attr->sched_flags & SCHED_FLAG_SUGOV) 2803 return 0; 2804 2805 /* !deadline task may carry old deadline bandwidth */ 2806 if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) 2807 return 0; 2808 2809 /* 2810 * Either if a task, enters, leave, or stays -deadline but changes 2811 * its parameters, we may need to update accordingly the total 2812 * allocated bandwidth of the container. 2813 */ 2814 raw_spin_lock(&dl_b->lock); 2815 cpus = dl_bw_cpus(cpu); 2816 cap = dl_bw_capacity(cpu); 2817 2818 if (dl_policy(policy) && !task_has_dl_policy(p) && 2819 !__dl_overflow(dl_b, cap, 0, new_bw)) { 2820 if (hrtimer_active(&p->dl.inactive_timer)) 2821 __dl_sub(dl_b, p->dl.dl_bw, cpus); 2822 __dl_add(dl_b, new_bw, cpus); 2823 err = 0; 2824 } else if (dl_policy(policy) && task_has_dl_policy(p) && 2825 !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) { 2826 /* 2827 * XXX this is slightly incorrect: when the task 2828 * utilization decreases, we should delay the total 2829 * utilization change until the task's 0-lag point. 2830 * But this would require to set the task's "inactive 2831 * timer" when the task is not inactive. 2832 */ 2833 __dl_sub(dl_b, p->dl.dl_bw, cpus); 2834 __dl_add(dl_b, new_bw, cpus); 2835 dl_change_utilization(p, new_bw); 2836 err = 0; 2837 } else if (!dl_policy(policy) && task_has_dl_policy(p)) { 2838 /* 2839 * Do not decrease the total deadline utilization here, 2840 * switched_from_dl() will take care to do it at the correct 2841 * (0-lag) time. 2842 */ 2843 err = 0; 2844 } 2845 raw_spin_unlock(&dl_b->lock); 2846 2847 return err; 2848 } 2849 2850 /* 2851 * This function initializes the sched_dl_entity of a newly becoming 2852 * SCHED_DEADLINE task. 2853 * 2854 * Only the static values are considered here, the actual runtime and the 2855 * absolute deadline will be properly calculated when the task is enqueued 2856 * for the first time with its new policy. 2857 */ 2858 void __setparam_dl(struct task_struct *p, const struct sched_attr *attr) 2859 { 2860 struct sched_dl_entity *dl_se = &p->dl; 2861 2862 dl_se->dl_runtime = attr->sched_runtime; 2863 dl_se->dl_deadline = attr->sched_deadline; 2864 dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; 2865 dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS; 2866 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 2867 dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); 2868 } 2869 2870 void __getparam_dl(struct task_struct *p, struct sched_attr *attr) 2871 { 2872 struct sched_dl_entity *dl_se = &p->dl; 2873 2874 attr->sched_priority = p->rt_priority; 2875 attr->sched_runtime = dl_se->dl_runtime; 2876 attr->sched_deadline = dl_se->dl_deadline; 2877 attr->sched_period = dl_se->dl_period; 2878 attr->sched_flags &= ~SCHED_DL_FLAGS; 2879 attr->sched_flags |= dl_se->flags; 2880 } 2881 2882 /* 2883 * Default limits for DL period; on the top end we guard against small util 2884 * tasks still getting ridiculously long effective runtimes, on the bottom end we 2885 * guard against timer DoS. 2886 */ 2887 unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ 2888 unsigned int sysctl_sched_dl_period_min = 100; /* 100 us */ 2889 2890 /* 2891 * This function validates the new parameters of a -deadline task. 2892 * We ask for the deadline not being zero, and greater or equal 2893 * than the runtime, as well as the period of being zero or 2894 * greater than deadline. Furthermore, we have to be sure that 2895 * user parameters are above the internal resolution of 1us (we 2896 * check sched_runtime only since it is always the smaller one) and 2897 * below 2^63 ns (we have to check both sched_deadline and 2898 * sched_period, as the latter can be zero). 2899 */ 2900 bool __checkparam_dl(const struct sched_attr *attr) 2901 { 2902 u64 period, max, min; 2903 2904 /* special dl tasks don't actually use any parameter */ 2905 if (attr->sched_flags & SCHED_FLAG_SUGOV) 2906 return true; 2907 2908 /* deadline != 0 */ 2909 if (attr->sched_deadline == 0) 2910 return false; 2911 2912 /* 2913 * Since we truncate DL_SCALE bits, make sure we're at least 2914 * that big. 2915 */ 2916 if (attr->sched_runtime < (1ULL << DL_SCALE)) 2917 return false; 2918 2919 /* 2920 * Since we use the MSB for wrap-around and sign issues, make 2921 * sure it's not set (mind that period can be equal to zero). 2922 */ 2923 if (attr->sched_deadline & (1ULL << 63) || 2924 attr->sched_period & (1ULL << 63)) 2925 return false; 2926 2927 period = attr->sched_period; 2928 if (!period) 2929 period = attr->sched_deadline; 2930 2931 /* runtime <= deadline <= period (if period != 0) */ 2932 if (period < attr->sched_deadline || 2933 attr->sched_deadline < attr->sched_runtime) 2934 return false; 2935 2936 max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC; 2937 min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC; 2938 2939 if (period < min || period > max) 2940 return false; 2941 2942 return true; 2943 } 2944 2945 /* 2946 * This function clears the sched_dl_entity static params. 2947 */ 2948 void __dl_clear_params(struct task_struct *p) 2949 { 2950 struct sched_dl_entity *dl_se = &p->dl; 2951 2952 dl_se->dl_runtime = 0; 2953 dl_se->dl_deadline = 0; 2954 dl_se->dl_period = 0; 2955 dl_se->flags = 0; 2956 dl_se->dl_bw = 0; 2957 dl_se->dl_density = 0; 2958 2959 dl_se->dl_throttled = 0; 2960 dl_se->dl_yielded = 0; 2961 dl_se->dl_non_contending = 0; 2962 dl_se->dl_overrun = 0; 2963 2964 #ifdef CONFIG_RT_MUTEXES 2965 dl_se->pi_se = dl_se; 2966 #endif 2967 } 2968 2969 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) 2970 { 2971 struct sched_dl_entity *dl_se = &p->dl; 2972 2973 if (dl_se->dl_runtime != attr->sched_runtime || 2974 dl_se->dl_deadline != attr->sched_deadline || 2975 dl_se->dl_period != attr->sched_period || 2976 dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS)) 2977 return true; 2978 2979 return false; 2980 } 2981 2982 #ifdef CONFIG_SMP 2983 int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, 2984 const struct cpumask *trial) 2985 { 2986 int ret = 1, trial_cpus; 2987 struct dl_bw *cur_dl_b; 2988 unsigned long flags; 2989 2990 rcu_read_lock_sched(); 2991 cur_dl_b = dl_bw_of(cpumask_any(cur)); 2992 trial_cpus = cpumask_weight(trial); 2993 2994 raw_spin_lock_irqsave(&cur_dl_b->lock, flags); 2995 if (cur_dl_b->bw != -1 && 2996 cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw) 2997 ret = 0; 2998 raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags); 2999 rcu_read_unlock_sched(); 3000 3001 return ret; 3002 } 3003 3004 int dl_cpu_busy(int cpu, struct task_struct *p) 3005 { 3006 unsigned long flags, cap; 3007 struct dl_bw *dl_b; 3008 bool overflow; 3009 3010 rcu_read_lock_sched(); 3011 dl_b = dl_bw_of(cpu); 3012 raw_spin_lock_irqsave(&dl_b->lock, flags); 3013 cap = dl_bw_capacity(cpu); 3014 overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0); 3015 3016 if (!overflow && p) { 3017 /* 3018 * We reserve space for this task in the destination 3019 * root_domain, as we can't fail after this point. 3020 * We will free resources in the source root_domain 3021 * later on (see set_cpus_allowed_dl()). 3022 */ 3023 __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu)); 3024 } 3025 3026 raw_spin_unlock_irqrestore(&dl_b->lock, flags); 3027 rcu_read_unlock_sched(); 3028 3029 return overflow ? -EBUSY : 0; 3030 } 3031 #endif 3032 3033 #ifdef CONFIG_SCHED_DEBUG 3034 void print_dl_stats(struct seq_file *m, int cpu) 3035 { 3036 print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); 3037 } 3038 #endif /* CONFIG_SCHED_DEBUG */ 3039