core.c (c01f5120ca7cf2994336c42b8a9cae697121ffb3) | core.c (0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * kernel/sched/core.c 4 * 5 * Core kernel scheduler code and related syscalls 6 * 7 * Copyright (C) 1991-2002 Linus Torvalds 8 */ --- 759 unchanged lines hidden (view full) --- 768 } else { 769 load->weight = scale_load(sched_prio_to_weight[prio]); 770 load->inv_weight = sched_prio_to_wmult[prio]; 771 p->se.runnable_weight = load->weight; 772 } 773} 774 775#ifdef CONFIG_UCLAMP_TASK | 1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * kernel/sched/core.c 4 * 5 * Core kernel scheduler code and related syscalls 6 * 7 * Copyright (C) 1991-2002 Linus Torvalds 8 */ --- 759 unchanged lines hidden (view full) --- 768 } else { 769 load->weight = scale_load(sched_prio_to_weight[prio]); 770 load->inv_weight = sched_prio_to_wmult[prio]; 771 p->se.runnable_weight = load->weight; 772 } 773} 774 775#ifdef CONFIG_UCLAMP_TASK |
776/* 777 * Serializes updates of utilization clamp values 778 * 779 * The (slow-path) user-space triggers utilization clamp value updates which 780 * can require updates on (fast-path) scheduler's data structures used to 781 * support enqueue/dequeue operations. 782 * While the per-CPU rq lock protects fast-path update operations, user-space 783 * requests are serialized using a mutex to reduce the risk of conflicting 784 * updates or API abuses. 785 */ 786static DEFINE_MUTEX(uclamp_mutex); 787 |
|
776/* Max allowed minimum utilization */ 777unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; 778 779/* Max allowed maximum utilization */ 780unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; 781 782/* All clamps are required to be less or equal than these values */ 783static struct uclamp_se uclamp_default[UCLAMP_CNT]; --- 9 unchanged lines hidden (view full) --- 793 return clamp_value / UCLAMP_BUCKET_DELTA; 794} 795 796static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) 797{ 798 return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); 799} 800 | 788/* Max allowed minimum utilization */ 789unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; 790 791/* Max allowed maximum utilization */ 792unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE; 793 794/* All clamps are required to be less or equal than these values */ 795static struct uclamp_se uclamp_default[UCLAMP_CNT]; --- 9 unchanged lines hidden (view full) --- 805 return clamp_value / UCLAMP_BUCKET_DELTA; 806} 807 808static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value) 809{ 810 return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value); 811} 812 |
801static inline unsigned int uclamp_none(int clamp_id) | 813static inline enum uclamp_id uclamp_none(enum uclamp_id clamp_id) |
802{ 803 if (clamp_id == UCLAMP_MIN) 804 return 0; 805 return SCHED_CAPACITY_SCALE; 806} 807 808static inline void uclamp_se_set(struct uclamp_se *uc_se, 809 unsigned int value, bool user_defined) 810{ 811 uc_se->value = value; 812 uc_se->bucket_id = uclamp_bucket_id(value); 813 uc_se->user_defined = user_defined; 814} 815 816static inline unsigned int | 814{ 815 if (clamp_id == UCLAMP_MIN) 816 return 0; 817 return SCHED_CAPACITY_SCALE; 818} 819 820static inline void uclamp_se_set(struct uclamp_se *uc_se, 821 unsigned int value, bool user_defined) 822{ 823 uc_se->value = value; 824 uc_se->bucket_id = uclamp_bucket_id(value); 825 uc_se->user_defined = user_defined; 826} 827 828static inline unsigned int |
817uclamp_idle_value(struct rq *rq, unsigned int clamp_id, | 829uclamp_idle_value(struct rq *rq, enum uclamp_id clamp_id, |
818 unsigned int clamp_value) 819{ 820 /* 821 * Avoid blocked utilization pushing up the frequency when we go 822 * idle (which drops the max-clamp) by retaining the last known 823 * max-clamp. 824 */ 825 if (clamp_id == UCLAMP_MAX) { 826 rq->uclamp_flags |= UCLAMP_FLAG_IDLE; 827 return clamp_value; 828 } 829 830 return uclamp_none(UCLAMP_MIN); 831} 832 | 830 unsigned int clamp_value) 831{ 832 /* 833 * Avoid blocked utilization pushing up the frequency when we go 834 * idle (which drops the max-clamp) by retaining the last known 835 * max-clamp. 836 */ 837 if (clamp_id == UCLAMP_MAX) { 838 rq->uclamp_flags |= UCLAMP_FLAG_IDLE; 839 return clamp_value; 840 } 841 842 return uclamp_none(UCLAMP_MIN); 843} 844 |
833static inline void uclamp_idle_reset(struct rq *rq, unsigned int clamp_id, | 845static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id, |
834 unsigned int clamp_value) 835{ 836 /* Reset max-clamp retention only on idle exit */ 837 if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) 838 return; 839 840 WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); 841} 842 843static inline | 846 unsigned int clamp_value) 847{ 848 /* Reset max-clamp retention only on idle exit */ 849 if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) 850 return; 851 852 WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); 853} 854 855static inline |
844unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id, 845 unsigned int clamp_value) | 856enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id, 857 unsigned int clamp_value) |
846{ 847 struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket; 848 int bucket_id = UCLAMP_BUCKETS - 1; 849 850 /* 851 * Since both min and max clamps are max aggregated, find the 852 * top most bucket with tasks in. 853 */ 854 for ( ; bucket_id >= 0; bucket_id--) { 855 if (!bucket[bucket_id].tasks) 856 continue; 857 return bucket[bucket_id].value; 858 } 859 860 /* No tasks -- default clamp values */ 861 return uclamp_idle_value(rq, clamp_id, clamp_value); 862} 863 | 858{ 859 struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket; 860 int bucket_id = UCLAMP_BUCKETS - 1; 861 862 /* 863 * Since both min and max clamps are max aggregated, find the 864 * top most bucket with tasks in. 865 */ 866 for ( ; bucket_id >= 0; bucket_id--) { 867 if (!bucket[bucket_id].tasks) 868 continue; 869 return bucket[bucket_id].value; 870 } 871 872 /* No tasks -- default clamp values */ 873 return uclamp_idle_value(rq, clamp_id, clamp_value); 874} 875 |
876static inline struct uclamp_se 877uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id) 878{ 879 struct uclamp_se uc_req = p->uclamp_req[clamp_id]; 880#ifdef CONFIG_UCLAMP_TASK_GROUP 881 struct uclamp_se uc_max; 882 883 /* 884 * Tasks in autogroups or root task group will be 885 * restricted by system defaults. 886 */ 887 if (task_group_is_autogroup(task_group(p))) 888 return uc_req; 889 if (task_group(p) == &root_task_group) 890 return uc_req; 891 892 uc_max = task_group(p)->uclamp[clamp_id]; 893 if (uc_req.value > uc_max.value || !uc_req.user_defined) 894 return uc_max; 895#endif 896 897 return uc_req; 898} 899 |
|
864/* 865 * The effective clamp bucket index of a task depends on, by increasing 866 * priority: 867 * - the task specific clamp value, when explicitly requested from userspace | 900/* 901 * The effective clamp bucket index of a task depends on, by increasing 902 * priority: 903 * - the task specific clamp value, when explicitly requested from userspace |
904 * - the task group effective clamp value, for tasks not either in the root 905 * group or in an autogroup |
|
868 * - the system default clamp value, defined by the sysadmin 869 */ 870static inline struct uclamp_se | 906 * - the system default clamp value, defined by the sysadmin 907 */ 908static inline struct uclamp_se |
871uclamp_eff_get(struct task_struct *p, unsigned int clamp_id) | 909uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id) |
872{ | 910{ |
873 struct uclamp_se uc_req = p->uclamp_req[clamp_id]; | 911 struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id); |
874 struct uclamp_se uc_max = uclamp_default[clamp_id]; 875 876 /* System default restrictions always apply */ 877 if (unlikely(uc_req.value > uc_max.value)) 878 return uc_max; 879 880 return uc_req; 881} 882 | 912 struct uclamp_se uc_max = uclamp_default[clamp_id]; 913 914 /* System default restrictions always apply */ 915 if (unlikely(uc_req.value > uc_max.value)) 916 return uc_max; 917 918 return uc_req; 919} 920 |
883unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id) | 921enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id) |
884{ 885 struct uclamp_se uc_eff; 886 887 /* Task currently refcounted: use back-annotated (effective) value */ 888 if (p->uclamp[clamp_id].active) 889 return p->uclamp[clamp_id].value; 890 891 uc_eff = uclamp_eff_get(p, clamp_id); --- 7 unchanged lines hidden (view full) --- 899 * updates the rq's clamp value if required. 900 * 901 * Tasks can have a task-specific value requested from user-space, track 902 * within each bucket the maximum value for tasks refcounted in it. 903 * This "local max aggregation" allows to track the exact "requested" value 904 * for each bucket when all its RUNNABLE tasks require the same clamp. 905 */ 906static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, | 922{ 923 struct uclamp_se uc_eff; 924 925 /* Task currently refcounted: use back-annotated (effective) value */ 926 if (p->uclamp[clamp_id].active) 927 return p->uclamp[clamp_id].value; 928 929 uc_eff = uclamp_eff_get(p, clamp_id); --- 7 unchanged lines hidden (view full) --- 937 * updates the rq's clamp value if required. 938 * 939 * Tasks can have a task-specific value requested from user-space, track 940 * within each bucket the maximum value for tasks refcounted in it. 941 * This "local max aggregation" allows to track the exact "requested" value 942 * for each bucket when all its RUNNABLE tasks require the same clamp. 943 */ 944static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, |
907 unsigned int clamp_id) | 945 enum uclamp_id clamp_id) |
908{ 909 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; 910 struct uclamp_se *uc_se = &p->uclamp[clamp_id]; 911 struct uclamp_bucket *bucket; 912 913 lockdep_assert_held(&rq->lock); 914 915 /* Update task effective clamp */ --- 21 unchanged lines hidden (view full) --- 937 * is released. If this is the last task reference counting the rq's max 938 * active clamp value, then the rq's clamp value is updated. 939 * 940 * Both refcounted tasks and rq's cached clamp values are expected to be 941 * always valid. If it's detected they are not, as defensive programming, 942 * enforce the expected state and warn. 943 */ 944static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, | 946{ 947 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; 948 struct uclamp_se *uc_se = &p->uclamp[clamp_id]; 949 struct uclamp_bucket *bucket; 950 951 lockdep_assert_held(&rq->lock); 952 953 /* Update task effective clamp */ --- 21 unchanged lines hidden (view full) --- 975 * is released. If this is the last task reference counting the rq's max 976 * active clamp value, then the rq's clamp value is updated. 977 * 978 * Both refcounted tasks and rq's cached clamp values are expected to be 979 * always valid. If it's detected they are not, as defensive programming, 980 * enforce the expected state and warn. 981 */ 982static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, |
945 unsigned int clamp_id) | 983 enum uclamp_id clamp_id) |
946{ 947 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; 948 struct uclamp_se *uc_se = &p->uclamp[clamp_id]; 949 struct uclamp_bucket *bucket; 950 unsigned int bkt_clamp; 951 unsigned int rq_clamp; 952 953 lockdep_assert_held(&rq->lock); --- 22 unchanged lines hidden (view full) --- 976 if (bucket->value >= rq_clamp) { 977 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); 978 WRITE_ONCE(uc_rq->value, bkt_clamp); 979 } 980} 981 982static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) 983{ | 984{ 985 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id]; 986 struct uclamp_se *uc_se = &p->uclamp[clamp_id]; 987 struct uclamp_bucket *bucket; 988 unsigned int bkt_clamp; 989 unsigned int rq_clamp; 990 991 lockdep_assert_held(&rq->lock); --- 22 unchanged lines hidden (view full) --- 1014 if (bucket->value >= rq_clamp) { 1015 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); 1016 WRITE_ONCE(uc_rq->value, bkt_clamp); 1017 } 1018} 1019 1020static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) 1021{ |
984 unsigned int clamp_id; | 1022 enum uclamp_id clamp_id; |
985 986 if (unlikely(!p->sched_class->uclamp_enabled)) 987 return; 988 989 for_each_clamp_id(clamp_id) 990 uclamp_rq_inc_id(rq, p, clamp_id); 991 992 /* Reset clamp idle holding when there is one RUNNABLE task */ 993 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) 994 rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; 995} 996 997static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) 998{ | 1023 1024 if (unlikely(!p->sched_class->uclamp_enabled)) 1025 return; 1026 1027 for_each_clamp_id(clamp_id) 1028 uclamp_rq_inc_id(rq, p, clamp_id); 1029 1030 /* Reset clamp idle holding when there is one RUNNABLE task */ 1031 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) 1032 rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE; 1033} 1034 1035static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) 1036{ |
999 unsigned int clamp_id; | 1037 enum uclamp_id clamp_id; |
1000 1001 if (unlikely(!p->sched_class->uclamp_enabled)) 1002 return; 1003 1004 for_each_clamp_id(clamp_id) 1005 uclamp_rq_dec_id(rq, p, clamp_id); 1006} 1007 | 1038 1039 if (unlikely(!p->sched_class->uclamp_enabled)) 1040 return; 1041 1042 for_each_clamp_id(clamp_id) 1043 uclamp_rq_dec_id(rq, p, clamp_id); 1044} 1045 |
1046static inline void 1047uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id) 1048{ 1049 struct rq_flags rf; 1050 struct rq *rq; 1051 1052 /* 1053 * Lock the task and the rq where the task is (or was) queued. 1054 * 1055 * We might lock the (previous) rq of a !RUNNABLE task, but that's the 1056 * price to pay to safely serialize util_{min,max} updates with 1057 * enqueues, dequeues and migration operations. 1058 * This is the same locking schema used by __set_cpus_allowed_ptr(). 1059 */ 1060 rq = task_rq_lock(p, &rf); 1061 1062 /* 1063 * Setting the clamp bucket is serialized by task_rq_lock(). 1064 * If the task is not yet RUNNABLE and its task_struct is not 1065 * affecting a valid clamp bucket, the next time it's enqueued, 1066 * it will already see the updated clamp bucket value. 1067 */ 1068 if (!p->uclamp[clamp_id].active) { 1069 uclamp_rq_dec_id(rq, p, clamp_id); 1070 uclamp_rq_inc_id(rq, p, clamp_id); 1071 } 1072 1073 task_rq_unlock(rq, p, &rf); 1074} 1075 1076static inline void 1077uclamp_update_active_tasks(struct cgroup_subsys_state *css, 1078 unsigned int clamps) 1079{ 1080 enum uclamp_id clamp_id; 1081 struct css_task_iter it; 1082 struct task_struct *p; 1083 1084 css_task_iter_start(css, 0, &it); 1085 while ((p = css_task_iter_next(&it))) { 1086 for_each_clamp_id(clamp_id) { 1087 if ((0x1 << clamp_id) & clamps) 1088 uclamp_update_active(p, clamp_id); 1089 } 1090 } 1091 css_task_iter_end(&it); 1092} 1093 1094#ifdef CONFIG_UCLAMP_TASK_GROUP 1095static void cpu_util_update_eff(struct cgroup_subsys_state *css); 1096static void uclamp_update_root_tg(void) 1097{ 1098 struct task_group *tg = &root_task_group; 1099 1100 uclamp_se_set(&tg->uclamp_req[UCLAMP_MIN], 1101 sysctl_sched_uclamp_util_min, false); 1102 uclamp_se_set(&tg->uclamp_req[UCLAMP_MAX], 1103 sysctl_sched_uclamp_util_max, false); 1104 1105 rcu_read_lock(); 1106 cpu_util_update_eff(&root_task_group.css); 1107 rcu_read_unlock(); 1108} 1109#else 1110static void uclamp_update_root_tg(void) { } 1111#endif 1112 |
|
1008int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, 1009 void __user *buffer, size_t *lenp, 1010 loff_t *ppos) 1011{ | 1113int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, 1114 void __user *buffer, size_t *lenp, 1115 loff_t *ppos) 1116{ |
1117 bool update_root_tg = false; |
|
1012 int old_min, old_max; | 1118 int old_min, old_max; |
1013 static DEFINE_MUTEX(mutex); | |
1014 int result; 1015 | 1119 int result; 1120 |
1016 mutex_lock(&mutex); | 1121 mutex_lock(&uclamp_mutex); |
1017 old_min = sysctl_sched_uclamp_util_min; 1018 old_max = sysctl_sched_uclamp_util_max; 1019 1020 result = proc_dointvec(table, write, buffer, lenp, ppos); 1021 if (result) 1022 goto undo; 1023 if (!write) 1024 goto done; 1025 1026 if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || 1027 sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { 1028 result = -EINVAL; 1029 goto undo; 1030 } 1031 1032 if (old_min != sysctl_sched_uclamp_util_min) { 1033 uclamp_se_set(&uclamp_default[UCLAMP_MIN], 1034 sysctl_sched_uclamp_util_min, false); | 1122 old_min = sysctl_sched_uclamp_util_min; 1123 old_max = sysctl_sched_uclamp_util_max; 1124 1125 result = proc_dointvec(table, write, buffer, lenp, ppos); 1126 if (result) 1127 goto undo; 1128 if (!write) 1129 goto done; 1130 1131 if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max || 1132 sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) { 1133 result = -EINVAL; 1134 goto undo; 1135 } 1136 1137 if (old_min != sysctl_sched_uclamp_util_min) { 1138 uclamp_se_set(&uclamp_default[UCLAMP_MIN], 1139 sysctl_sched_uclamp_util_min, false); |
1140 update_root_tg = true; |
|
1035 } 1036 if (old_max != sysctl_sched_uclamp_util_max) { 1037 uclamp_se_set(&uclamp_default[UCLAMP_MAX], 1038 sysctl_sched_uclamp_util_max, false); | 1141 } 1142 if (old_max != sysctl_sched_uclamp_util_max) { 1143 uclamp_se_set(&uclamp_default[UCLAMP_MAX], 1144 sysctl_sched_uclamp_util_max, false); |
1145 update_root_tg = true; |
|
1039 } 1040 | 1146 } 1147 |
1148 if (update_root_tg) 1149 uclamp_update_root_tg(); 1150 |
|
1041 /* | 1151 /* |
1042 * Updating all the RUNNABLE task is expensive, keep it simple and do 1043 * just a lazy update at each next enqueue time. | 1152 * We update all RUNNABLE tasks only when task groups are in use. 1153 * Otherwise, keep it simple and do just a lazy update at each next 1154 * task enqueue time. |
1044 */ | 1155 */ |
1156 |
|
1045 goto done; 1046 1047undo: 1048 sysctl_sched_uclamp_util_min = old_min; 1049 sysctl_sched_uclamp_util_max = old_max; 1050done: | 1157 goto done; 1158 1159undo: 1160 sysctl_sched_uclamp_util_min = old_min; 1161 sysctl_sched_uclamp_util_max = old_max; 1162done: |
1051 mutex_unlock(&mutex); | 1163 mutex_unlock(&uclamp_mutex); |
1052 1053 return result; 1054} 1055 1056static int uclamp_validate(struct task_struct *p, 1057 const struct sched_attr *attr) 1058{ 1059 unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; --- 10 unchanged lines hidden (view full) --- 1070 return -EINVAL; 1071 1072 return 0; 1073} 1074 1075static void __setscheduler_uclamp(struct task_struct *p, 1076 const struct sched_attr *attr) 1077{ | 1164 1165 return result; 1166} 1167 1168static int uclamp_validate(struct task_struct *p, 1169 const struct sched_attr *attr) 1170{ 1171 unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; --- 10 unchanged lines hidden (view full) --- 1182 return -EINVAL; 1183 1184 return 0; 1185} 1186 1187static void __setscheduler_uclamp(struct task_struct *p, 1188 const struct sched_attr *attr) 1189{ |
1078 unsigned int clamp_id; | 1190 enum uclamp_id clamp_id; |
1079 1080 /* 1081 * On scheduling class change, reset to default clamps for tasks 1082 * without a task-specific value. 1083 */ 1084 for_each_clamp_id(clamp_id) { 1085 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; 1086 unsigned int clamp_value = uclamp_none(clamp_id); --- 20 unchanged lines hidden (view full) --- 1107 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { 1108 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], 1109 attr->sched_util_max, true); 1110 } 1111} 1112 1113static void uclamp_fork(struct task_struct *p) 1114{ | 1191 1192 /* 1193 * On scheduling class change, reset to default clamps for tasks 1194 * without a task-specific value. 1195 */ 1196 for_each_clamp_id(clamp_id) { 1197 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; 1198 unsigned int clamp_value = uclamp_none(clamp_id); --- 20 unchanged lines hidden (view full) --- 1219 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { 1220 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], 1221 attr->sched_util_max, true); 1222 } 1223} 1224 1225static void uclamp_fork(struct task_struct *p) 1226{ |
1115 unsigned int clamp_id; | 1227 enum uclamp_id clamp_id; |
1116 1117 for_each_clamp_id(clamp_id) 1118 p->uclamp[clamp_id].active = false; 1119 1120 if (likely(!p->sched_reset_on_fork)) 1121 return; 1122 1123 for_each_clamp_id(clamp_id) { --- 5 unchanged lines hidden (view full) --- 1129 1130 uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false); 1131 } 1132} 1133 1134static void __init init_uclamp(void) 1135{ 1136 struct uclamp_se uc_max = {}; | 1228 1229 for_each_clamp_id(clamp_id) 1230 p->uclamp[clamp_id].active = false; 1231 1232 if (likely(!p->sched_reset_on_fork)) 1233 return; 1234 1235 for_each_clamp_id(clamp_id) { --- 5 unchanged lines hidden (view full) --- 1241 1242 uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false); 1243 } 1244} 1245 1246static void __init init_uclamp(void) 1247{ 1248 struct uclamp_se uc_max = {}; |
1137 unsigned int clamp_id; | 1249 enum uclamp_id clamp_id; |
1138 int cpu; 1139 | 1250 int cpu; 1251 |
1252 mutex_init(&uclamp_mutex); 1253 |
|
1140 for_each_possible_cpu(cpu) { 1141 memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq)); 1142 cpu_rq(cpu)->uclamp_flags = 0; 1143 } 1144 1145 for_each_clamp_id(clamp_id) { 1146 uclamp_se_set(&init_task.uclamp_req[clamp_id], 1147 uclamp_none(clamp_id), false); 1148 } 1149 1150 /* System defaults allow max clamp values for both indexes */ 1151 uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false); | 1254 for_each_possible_cpu(cpu) { 1255 memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq)); 1256 cpu_rq(cpu)->uclamp_flags = 0; 1257 } 1258 1259 for_each_clamp_id(clamp_id) { 1260 uclamp_se_set(&init_task.uclamp_req[clamp_id], 1261 uclamp_none(clamp_id), false); 1262 } 1263 1264 /* System defaults allow max clamp values for both indexes */ 1265 uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false); |
1152 for_each_clamp_id(clamp_id) | 1266 for_each_clamp_id(clamp_id) { |
1153 uclamp_default[clamp_id] = uc_max; | 1267 uclamp_default[clamp_id] = uc_max; |
1268#ifdef CONFIG_UCLAMP_TASK_GROUP 1269 root_task_group.uclamp_req[clamp_id] = uc_max; 1270 root_task_group.uclamp[clamp_id] = uc_max; 1271#endif 1272 } |
|
1154} 1155 1156#else /* CONFIG_UCLAMP_TASK */ 1157static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } 1158static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } 1159static inline int uclamp_validate(struct task_struct *p, 1160 const struct sched_attr *attr) 1161{ --- 327 unchanged lines hidden (view full) --- 1489 if (running) 1490 put_prev_task(rq, p); 1491 1492 p->sched_class->set_cpus_allowed(p, new_mask); 1493 1494 if (queued) 1495 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); 1496 if (running) | 1273} 1274 1275#else /* CONFIG_UCLAMP_TASK */ 1276static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { } 1277static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { } 1278static inline int uclamp_validate(struct task_struct *p, 1279 const struct sched_attr *attr) 1280{ --- 327 unchanged lines hidden (view full) --- 1608 if (running) 1609 put_prev_task(rq, p); 1610 1611 p->sched_class->set_cpus_allowed(p, new_mask); 1612 1613 if (queued) 1614 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); 1615 if (running) |
1497 set_curr_task(rq, p); | 1616 set_next_task(rq, p); |
1498} 1499 1500/* 1501 * Change a given task's CPU affinity. Migrate the thread to a 1502 * proper CPU and schedule it away if the CPU it's executing on 1503 * is removed from the allowed bitmask. 1504 * 1505 * NOTE: the caller must have a valid reference to the task, the --- 1624 unchanged lines hidden (view full) --- 3130 * Remove function-return probe instances associated with this 3131 * task and put them back on the free list. 3132 */ 3133 kprobe_flush_task(prev); 3134 3135 /* Task is done with its stack. */ 3136 put_task_stack(prev); 3137 | 1617} 1618 1619/* 1620 * Change a given task's CPU affinity. Migrate the thread to a 1621 * proper CPU and schedule it away if the CPU it's executing on 1622 * is removed from the allowed bitmask. 1623 * 1624 * NOTE: the caller must have a valid reference to the task, the --- 1624 unchanged lines hidden (view full) --- 3249 * Remove function-return probe instances associated with this 3250 * task and put them back on the free list. 3251 */ 3252 kprobe_flush_task(prev); 3253 3254 /* Task is done with its stack. */ 3255 put_task_stack(prev); 3256 |
3138 put_task_struct(prev); | 3257 put_task_struct_rcu_user(prev); |
3139 } 3140 3141 tick_nohz_task_switch(); 3142 return rq; 3143} 3144 3145#ifdef CONFIG_SMP 3146 --- 62 unchanged lines hidden (view full) --- 3209 3210/* 3211 * context_switch - switch to the new MM and the new thread's register state. 3212 */ 3213static __always_inline struct rq * 3214context_switch(struct rq *rq, struct task_struct *prev, 3215 struct task_struct *next, struct rq_flags *rf) 3216{ | 3258 } 3259 3260 tick_nohz_task_switch(); 3261 return rq; 3262} 3263 3264#ifdef CONFIG_SMP 3265 --- 62 unchanged lines hidden (view full) --- 3328 3329/* 3330 * context_switch - switch to the new MM and the new thread's register state. 3331 */ 3332static __always_inline struct rq * 3333context_switch(struct rq *rq, struct task_struct *prev, 3334 struct task_struct *next, struct rq_flags *rf) 3335{ |
3217 struct mm_struct *mm, *oldmm; 3218 | |
3219 prepare_task_switch(rq, prev, next); 3220 | 3336 prepare_task_switch(rq, prev, next); 3337 |
3221 mm = next->mm; 3222 oldmm = prev->active_mm; | |
3223 /* 3224 * For paravirt, this is coupled with an exit in switch_to to 3225 * combine the page table reload and the switch backend into 3226 * one hypercall. 3227 */ 3228 arch_start_context_switch(prev); 3229 3230 /* | 3338 /* 3339 * For paravirt, this is coupled with an exit in switch_to to 3340 * combine the page table reload and the switch backend into 3341 * one hypercall. 3342 */ 3343 arch_start_context_switch(prev); 3344 3345 /* |
3231 * If mm is non-NULL, we pass through switch_mm(). If mm is 3232 * NULL, we will pass through mmdrop() in finish_task_switch(). 3233 * Both of these contain the full memory barrier required by 3234 * membarrier after storing to rq->curr, before returning to 3235 * user-space. | 3346 * kernel -> kernel lazy + transfer active 3347 * user -> kernel lazy + mmgrab() active 3348 * 3349 * kernel -> user switch + mmdrop() active 3350 * user -> user switch |
3236 */ | 3351 */ |
3237 if (!mm) { 3238 next->active_mm = oldmm; 3239 mmgrab(oldmm); 3240 enter_lazy_tlb(oldmm, next); 3241 } else 3242 switch_mm_irqs_off(oldmm, mm, next); | 3352 if (!next->mm) { // to kernel 3353 enter_lazy_tlb(prev->active_mm, next); |
3243 | 3354 |
3244 if (!prev->mm) { 3245 prev->active_mm = NULL; 3246 rq->prev_mm = oldmm; | 3355 next->active_mm = prev->active_mm; 3356 if (prev->mm) // from user 3357 mmgrab(prev->active_mm); 3358 else 3359 prev->active_mm = NULL; 3360 } else { // to user 3361 /* 3362 * sys_membarrier() requires an smp_mb() between setting 3363 * rq->curr and returning to userspace. 3364 * 3365 * The below provides this either through switch_mm(), or in 3366 * case 'prev->active_mm == next->mm' through 3367 * finish_task_switch()'s mmdrop(). 3368 */ 3369 3370 switch_mm_irqs_off(prev->active_mm, next->mm, next); 3371 3372 if (!prev->mm) { // from kernel 3373 /* will mmdrop() in finish_task_switch(). */ 3374 rq->prev_mm = prev->active_mm; 3375 prev->active_mm = NULL; 3376 } |
3247 } 3248 3249 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); 3250 3251 prepare_lock_switch(rq, next, rf); 3252 3253 /* Here we just switch the register state and the stack. */ 3254 switch_to(prev, next, prev); --- 226 unchanged lines hidden (view full) --- 3481 trigger_load_balance(rq); 3482#endif 3483} 3484 3485#ifdef CONFIG_NO_HZ_FULL 3486 3487struct tick_work { 3488 int cpu; | 3377 } 3378 3379 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP); 3380 3381 prepare_lock_switch(rq, next, rf); 3382 3383 /* Here we just switch the register state and the stack. */ 3384 switch_to(prev, next, prev); --- 226 unchanged lines hidden (view full) --- 3611 trigger_load_balance(rq); 3612#endif 3613} 3614 3615#ifdef CONFIG_NO_HZ_FULL 3616 3617struct tick_work { 3618 int cpu; |
3619 atomic_t state; |
|
3489 struct delayed_work work; 3490}; | 3620 struct delayed_work work; 3621}; |
3622/* Values for ->state, see diagram below. */ 3623#define TICK_SCHED_REMOTE_OFFLINE 0 3624#define TICK_SCHED_REMOTE_OFFLINING 1 3625#define TICK_SCHED_REMOTE_RUNNING 2 |
|
3491 | 3626 |
3627/* 3628 * State diagram for ->state: 3629 * 3630 * 3631 * TICK_SCHED_REMOTE_OFFLINE 3632 * | ^ 3633 * | | 3634 * | | sched_tick_remote() 3635 * | | 3636 * | | 3637 * +--TICK_SCHED_REMOTE_OFFLINING 3638 * | ^ 3639 * | | 3640 * sched_tick_start() | | sched_tick_stop() 3641 * | | 3642 * V | 3643 * TICK_SCHED_REMOTE_RUNNING 3644 * 3645 * 3646 * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote() 3647 * and sched_tick_start() are happy to leave the state in RUNNING. 3648 */ 3649 |
|
3492static struct tick_work __percpu *tick_work_cpu; 3493 3494static void sched_tick_remote(struct work_struct *work) 3495{ 3496 struct delayed_work *dwork = to_delayed_work(work); 3497 struct tick_work *twork = container_of(dwork, struct tick_work, work); 3498 int cpu = twork->cpu; 3499 struct rq *rq = cpu_rq(cpu); 3500 struct task_struct *curr; 3501 struct rq_flags rf; 3502 u64 delta; | 3650static struct tick_work __percpu *tick_work_cpu; 3651 3652static void sched_tick_remote(struct work_struct *work) 3653{ 3654 struct delayed_work *dwork = to_delayed_work(work); 3655 struct tick_work *twork = container_of(dwork, struct tick_work, work); 3656 int cpu = twork->cpu; 3657 struct rq *rq = cpu_rq(cpu); 3658 struct task_struct *curr; 3659 struct rq_flags rf; 3660 u64 delta; |
3661 int os; |
|
3503 3504 /* 3505 * Handle the tick only if it appears the remote CPU is running in full 3506 * dynticks mode. The check is racy by nature, but missing a tick or 3507 * having one too much is no big deal because the scheduler tick updates 3508 * statistics and checks timeslices in a time-independent way, regardless 3509 * of when exactly it is running. 3510 */ 3511 if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) 3512 goto out_requeue; 3513 3514 rq_lock_irq(rq, &rf); 3515 curr = rq->curr; | 3662 3663 /* 3664 * Handle the tick only if it appears the remote CPU is running in full 3665 * dynticks mode. The check is racy by nature, but missing a tick or 3666 * having one too much is no big deal because the scheduler tick updates 3667 * statistics and checks timeslices in a time-independent way, regardless 3668 * of when exactly it is running. 3669 */ 3670 if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) 3671 goto out_requeue; 3672 3673 rq_lock_irq(rq, &rf); 3674 curr = rq->curr; |
3516 if (is_idle_task(curr)) | 3675 if (is_idle_task(curr) || cpu_is_offline(cpu)) |
3517 goto out_unlock; 3518 3519 update_rq_clock(rq); 3520 delta = rq_clock_task(rq) - curr->se.exec_start; 3521 3522 /* 3523 * Make sure the next tick runs within a reasonable 3524 * amount of time. 3525 */ 3526 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); 3527 curr->sched_class->task_tick(rq, curr, 0); 3528 3529out_unlock: 3530 rq_unlock_irq(rq, &rf); 3531 3532out_requeue: 3533 /* 3534 * Run the remote tick once per second (1Hz). This arbitrary 3535 * frequency is large enough to avoid overload but short enough | 3676 goto out_unlock; 3677 3678 update_rq_clock(rq); 3679 delta = rq_clock_task(rq) - curr->se.exec_start; 3680 3681 /* 3682 * Make sure the next tick runs within a reasonable 3683 * amount of time. 3684 */ 3685 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); 3686 curr->sched_class->task_tick(rq, curr, 0); 3687 3688out_unlock: 3689 rq_unlock_irq(rq, &rf); 3690 3691out_requeue: 3692 /* 3693 * Run the remote tick once per second (1Hz). This arbitrary 3694 * frequency is large enough to avoid overload but short enough |
3536 * to keep scheduler internal stats reasonably up to date. | 3695 * to keep scheduler internal stats reasonably up to date. But 3696 * first update state to reflect hotplug activity if required. |
3537 */ | 3697 */ |
3538 queue_delayed_work(system_unbound_wq, dwork, HZ); | 3698 os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING); 3699 WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE); 3700 if (os == TICK_SCHED_REMOTE_RUNNING) 3701 queue_delayed_work(system_unbound_wq, dwork, HZ); |
3539} 3540 3541static void sched_tick_start(int cpu) 3542{ | 3702} 3703 3704static void sched_tick_start(int cpu) 3705{ |
3706 int os; |
|
3543 struct tick_work *twork; 3544 3545 if (housekeeping_cpu(cpu, HK_FLAG_TICK)) 3546 return; 3547 3548 WARN_ON_ONCE(!tick_work_cpu); 3549 3550 twork = per_cpu_ptr(tick_work_cpu, cpu); | 3707 struct tick_work *twork; 3708 3709 if (housekeeping_cpu(cpu, HK_FLAG_TICK)) 3710 return; 3711 3712 WARN_ON_ONCE(!tick_work_cpu); 3713 3714 twork = per_cpu_ptr(tick_work_cpu, cpu); |
3551 twork->cpu = cpu; 3552 INIT_DELAYED_WORK(&twork->work, sched_tick_remote); 3553 queue_delayed_work(system_unbound_wq, &twork->work, HZ); | 3715 os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING); 3716 WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING); 3717 if (os == TICK_SCHED_REMOTE_OFFLINE) { 3718 twork->cpu = cpu; 3719 INIT_DELAYED_WORK(&twork->work, sched_tick_remote); 3720 queue_delayed_work(system_unbound_wq, &twork->work, HZ); 3721 } |
3554} 3555 3556#ifdef CONFIG_HOTPLUG_CPU 3557static void sched_tick_stop(int cpu) 3558{ 3559 struct tick_work *twork; | 3722} 3723 3724#ifdef CONFIG_HOTPLUG_CPU 3725static void sched_tick_stop(int cpu) 3726{ 3727 struct tick_work *twork; |
3728 int os; |
|
3560 3561 if (housekeeping_cpu(cpu, HK_FLAG_TICK)) 3562 return; 3563 3564 WARN_ON_ONCE(!tick_work_cpu); 3565 3566 twork = per_cpu_ptr(tick_work_cpu, cpu); | 3729 3730 if (housekeeping_cpu(cpu, HK_FLAG_TICK)) 3731 return; 3732 3733 WARN_ON_ONCE(!tick_work_cpu); 3734 3735 twork = per_cpu_ptr(tick_work_cpu, cpu); |
3567 cancel_delayed_work_sync(&twork->work); | 3736 /* There cannot be competing actions, but don't rely on stop-machine. */ 3737 os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING); 3738 WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING); 3739 /* Don't cancel, as this would mess up the state machine. */ |
3568} 3569#endif /* CONFIG_HOTPLUG_CPU */ 3570 3571int __init sched_tick_offload_init(void) 3572{ 3573 tick_work_cpu = alloc_percpu(struct tick_work); 3574 BUG_ON(!tick_work_cpu); | 3740} 3741#endif /* CONFIG_HOTPLUG_CPU */ 3742 3743int __init sched_tick_offload_init(void) 3744{ 3745 tick_work_cpu = alloc_percpu(struct tick_work); 3746 BUG_ON(!tick_work_cpu); |
3575 | |
3576 return 0; 3577} 3578 3579#else /* !CONFIG_NO_HZ_FULL */ 3580static inline void sched_tick_start(int cpu) { } 3581static inline void sched_tick_stop(int cpu) { } 3582#endif 3583 | 3747 return 0; 3748} 3749 3750#else /* !CONFIG_NO_HZ_FULL */ 3751static inline void sched_tick_start(int cpu) { } 3752static inline void sched_tick_stop(int cpu) { } 3753#endif 3754 |
3584#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ | 3755#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ |
3585 defined(CONFIG_TRACE_PREEMPT_TOGGLE)) 3586/* 3587 * If the value passed in is equal to the current preempt count 3588 * then we just disabled preemption. Start timing the latency. 3589 */ 3590static inline void preempt_latency_start(int val) 3591{ 3592 if (preempt_count() == val) { --- 141 unchanged lines hidden (view full) --- 3734 * opportunity to pull in more work from other CPUs. 3735 */ 3736 if (likely((prev->sched_class == &idle_sched_class || 3737 prev->sched_class == &fair_sched_class) && 3738 rq->nr_running == rq->cfs.h_nr_running)) { 3739 3740 p = fair_sched_class.pick_next_task(rq, prev, rf); 3741 if (unlikely(p == RETRY_TASK)) | 3756 defined(CONFIG_TRACE_PREEMPT_TOGGLE)) 3757/* 3758 * If the value passed in is equal to the current preempt count 3759 * then we just disabled preemption. Start timing the latency. 3760 */ 3761static inline void preempt_latency_start(int val) 3762{ 3763 if (preempt_count() == val) { --- 141 unchanged lines hidden (view full) --- 3905 * opportunity to pull in more work from other CPUs. 3906 */ 3907 if (likely((prev->sched_class == &idle_sched_class || 3908 prev->sched_class == &fair_sched_class) && 3909 rq->nr_running == rq->cfs.h_nr_running)) { 3910 3911 p = fair_sched_class.pick_next_task(rq, prev, rf); 3912 if (unlikely(p == RETRY_TASK)) |
3742 goto again; | 3913 goto restart; |
3743 3744 /* Assumes fair_sched_class->next == idle_sched_class */ 3745 if (unlikely(!p)) 3746 p = idle_sched_class.pick_next_task(rq, prev, rf); 3747 3748 return p; 3749 } 3750 | 3914 3915 /* Assumes fair_sched_class->next == idle_sched_class */ 3916 if (unlikely(!p)) 3917 p = idle_sched_class.pick_next_task(rq, prev, rf); 3918 3919 return p; 3920 } 3921 |
3751again: | 3922restart: 3923 /* 3924 * Ensure that we put DL/RT tasks before the pick loop, such that they 3925 * can PULL higher prio tasks when we lower the RQ 'priority'. 3926 */ 3927 prev->sched_class->put_prev_task(rq, prev, rf); 3928 if (!rq->nr_running) 3929 newidle_balance(rq, rf); 3930 |
3752 for_each_class(class) { | 3931 for_each_class(class) { |
3753 p = class->pick_next_task(rq, prev, rf); 3754 if (p) { 3755 if (unlikely(p == RETRY_TASK)) 3756 goto again; | 3932 p = class->pick_next_task(rq, NULL, NULL); 3933 if (p) |
3757 return p; | 3934 return p; |
3758 } | |
3759 } 3760 3761 /* The idle class should always have a runnable task: */ 3762 BUG(); 3763} 3764 3765/* 3766 * __schedule() is the main scheduler function. --- 10 unchanged lines hidden (view full) --- 3777 * 3778 * 3. Wakeups don't really cause entry into schedule(). They add a 3779 * task to the run-queue and that's it. 3780 * 3781 * Now, if the new task added to the run-queue preempts the current 3782 * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets 3783 * called on the nearest possible occasion: 3784 * | 3935 } 3936 3937 /* The idle class should always have a runnable task: */ 3938 BUG(); 3939} 3940 3941/* 3942 * __schedule() is the main scheduler function. --- 10 unchanged lines hidden (view full) --- 3953 * 3954 * 3. Wakeups don't really cause entry into schedule(). They add a 3955 * task to the run-queue and that's it. 3956 * 3957 * Now, if the new task added to the run-queue preempts the current 3958 * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets 3959 * called on the nearest possible occasion: 3960 * |
3785 * - If the kernel is preemptible (CONFIG_PREEMPT=y): | 3961 * - If the kernel is preemptible (CONFIG_PREEMPTION=y): |
3786 * 3787 * - in syscall or exception context, at the next outmost 3788 * preempt_enable(). (this might be as soon as the wake_up()'s 3789 * spin_unlock()!) 3790 * 3791 * - in IRQ context, return from interrupt-handler to 3792 * preemptible context 3793 * | 3962 * 3963 * - in syscall or exception context, at the next outmost 3964 * preempt_enable(). (this might be as soon as the wake_up()'s 3965 * spin_unlock()!) 3966 * 3967 * - in IRQ context, return from interrupt-handler to 3968 * preemptible context 3969 * |
3794 * - If the kernel is not preemptible (CONFIG_PREEMPT is not set) | 3970 * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) |
3795 * then at the next: 3796 * 3797 * - cond_resched() call 3798 * - explicit schedule() call 3799 * - return from syscall or exception to user-space 3800 * - return from interrupt-handler to user-space 3801 * 3802 * WARNING: must be called with preemption disabled! --- 228 unchanged lines hidden (view full) --- 4031 4032 /* 4033 * Check again in case we missed a preemption opportunity 4034 * between schedule and now. 4035 */ 4036 } while (need_resched()); 4037} 4038 | 3971 * then at the next: 3972 * 3973 * - cond_resched() call 3974 * - explicit schedule() call 3975 * - return from syscall or exception to user-space 3976 * - return from interrupt-handler to user-space 3977 * 3978 * WARNING: must be called with preemption disabled! --- 228 unchanged lines hidden (view full) --- 4207 4208 /* 4209 * Check again in case we missed a preemption opportunity 4210 * between schedule and now. 4211 */ 4212 } while (need_resched()); 4213} 4214 |
4039#ifdef CONFIG_PREEMPT | 4215#ifdef CONFIG_PREEMPTION |
4040/* 4041 * this is the entry point to schedule() from in-kernel preemption 4042 * off of preempt_enable. Kernel preemptions off return from interrupt 4043 * occur there and call schedule directly. 4044 */ 4045asmlinkage __visible void __sched notrace preempt_schedule(void) 4046{ 4047 /* --- 55 unchanged lines hidden (view full) --- 4103 exception_exit(prev_ctx); 4104 4105 preempt_latency_stop(1); 4106 preempt_enable_no_resched_notrace(); 4107 } while (need_resched()); 4108} 4109EXPORT_SYMBOL_GPL(preempt_schedule_notrace); 4110 | 4216/* 4217 * this is the entry point to schedule() from in-kernel preemption 4218 * off of preempt_enable. Kernel preemptions off return from interrupt 4219 * occur there and call schedule directly. 4220 */ 4221asmlinkage __visible void __sched notrace preempt_schedule(void) 4222{ 4223 /* --- 55 unchanged lines hidden (view full) --- 4279 exception_exit(prev_ctx); 4280 4281 preempt_latency_stop(1); 4282 preempt_enable_no_resched_notrace(); 4283 } while (need_resched()); 4284} 4285EXPORT_SYMBOL_GPL(preempt_schedule_notrace); 4286 |
4111#endif /* CONFIG_PREEMPT */ | 4287#endif /* CONFIG_PREEMPTION */ |
4112 4113/* 4114 * this is the entry point to schedule() from kernel preemption 4115 * off of irq context. 4116 * Note, that this is called and return with irqs disabled. This will 4117 * protect us against recursive calling from irq. 4118 */ 4119asmlinkage __visible void __sched preempt_schedule_irq(void) --- 151 unchanged lines hidden (view full) --- 4271 p->sched_class = &fair_sched_class; 4272 } 4273 4274 p->prio = prio; 4275 4276 if (queued) 4277 enqueue_task(rq, p, queue_flag); 4278 if (running) | 4288 4289/* 4290 * this is the entry point to schedule() from kernel preemption 4291 * off of irq context. 4292 * Note, that this is called and return with irqs disabled. This will 4293 * protect us against recursive calling from irq. 4294 */ 4295asmlinkage __visible void __sched preempt_schedule_irq(void) --- 151 unchanged lines hidden (view full) --- 4447 p->sched_class = &fair_sched_class; 4448 } 4449 4450 p->prio = prio; 4451 4452 if (queued) 4453 enqueue_task(rq, p, queue_flag); 4454 if (running) |
4279 set_curr_task(rq, p); | 4455 set_next_task(rq, p); |
4280 4281 check_class_changed(rq, p, prev_class, oldprio); 4282out_unlock: 4283 /* Avoid rq from going away on us: */ 4284 preempt_disable(); 4285 __task_rq_unlock(rq, &rf); 4286 4287 balance_callback(rq); --- 50 unchanged lines hidden (view full) --- 4338 /* 4339 * If the task increased its priority or is running and 4340 * lowered its priority, then reschedule its CPU: 4341 */ 4342 if (delta < 0 || (delta > 0 && task_running(rq, p))) 4343 resched_curr(rq); 4344 } 4345 if (running) | 4456 4457 check_class_changed(rq, p, prev_class, oldprio); 4458out_unlock: 4459 /* Avoid rq from going away on us: */ 4460 preempt_disable(); 4461 __task_rq_unlock(rq, &rf); 4462 4463 balance_callback(rq); --- 50 unchanged lines hidden (view full) --- 4514 /* 4515 * If the task increased its priority or is running and 4516 * lowered its priority, then reschedule its CPU: 4517 */ 4518 if (delta < 0 || (delta > 0 && task_running(rq, p))) 4519 resched_curr(rq); 4520 } 4521 if (running) |
4346 set_curr_task(rq, p); | 4522 set_next_task(rq, p); |
4347out_unlock: 4348 task_rq_unlock(rq, p, &rf); 4349} 4350EXPORT_SYMBOL(set_user_nice); 4351 4352/* 4353 * can_nice - check if a task can reduce its nice value 4354 * @p: task --- 300 unchanged lines hidden (view full) --- 4655 4656 /* Update task specific "requested" clamps */ 4657 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { 4658 retval = uclamp_validate(p, attr); 4659 if (retval) 4660 return retval; 4661 } 4662 | 4523out_unlock: 4524 task_rq_unlock(rq, p, &rf); 4525} 4526EXPORT_SYMBOL(set_user_nice); 4527 4528/* 4529 * can_nice - check if a task can reduce its nice value 4530 * @p: task --- 300 unchanged lines hidden (view full) --- 4831 4832 /* Update task specific "requested" clamps */ 4833 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { 4834 retval = uclamp_validate(p, attr); 4835 if (retval) 4836 return retval; 4837 } 4838 |
4839 if (pi) 4840 cpuset_read_lock(); 4841 |
|
4663 /* 4664 * Make sure no PI-waiters arrive (or leave) while we are 4665 * changing the priority of the task: 4666 * 4667 * To be able to change p->policy safely, the appropriate 4668 * runqueue lock must be held. 4669 */ 4670 rq = task_rq_lock(p, &rf); 4671 update_rq_clock(rq); 4672 4673 /* 4674 * Changing the policy of the stop threads its a very bad idea: 4675 */ 4676 if (p == rq->stop) { | 4842 /* 4843 * Make sure no PI-waiters arrive (or leave) while we are 4844 * changing the priority of the task: 4845 * 4846 * To be able to change p->policy safely, the appropriate 4847 * runqueue lock must be held. 4848 */ 4849 rq = task_rq_lock(p, &rf); 4850 update_rq_clock(rq); 4851 4852 /* 4853 * Changing the policy of the stop threads its a very bad idea: 4854 */ 4855 if (p == rq->stop) { |
4677 task_rq_unlock(rq, p, &rf); 4678 return -EINVAL; | 4856 retval = -EINVAL; 4857 goto unlock; |
4679 } 4680 4681 /* 4682 * If not changing anything there's no need to proceed further, 4683 * but store a possible modification of reset_on_fork. 4684 */ 4685 if (unlikely(policy == p->policy)) { 4686 if (fair_policy(policy) && attr->sched_nice != task_nice(p)) 4687 goto change; 4688 if (rt_policy(policy) && attr->sched_priority != p->rt_priority) 4689 goto change; 4690 if (dl_policy(policy) && dl_param_changed(p, attr)) 4691 goto change; 4692 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) 4693 goto change; 4694 4695 p->sched_reset_on_fork = reset_on_fork; | 4858 } 4859 4860 /* 4861 * If not changing anything there's no need to proceed further, 4862 * but store a possible modification of reset_on_fork. 4863 */ 4864 if (unlikely(policy == p->policy)) { 4865 if (fair_policy(policy) && attr->sched_nice != task_nice(p)) 4866 goto change; 4867 if (rt_policy(policy) && attr->sched_priority != p->rt_priority) 4868 goto change; 4869 if (dl_policy(policy) && dl_param_changed(p, attr)) 4870 goto change; 4871 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) 4872 goto change; 4873 4874 p->sched_reset_on_fork = reset_on_fork; |
4696 task_rq_unlock(rq, p, &rf); 4697 return 0; | 4875 retval = 0; 4876 goto unlock; |
4698 } 4699change: 4700 4701 if (user) { 4702#ifdef CONFIG_RT_GROUP_SCHED 4703 /* 4704 * Do not allow realtime tasks into groups that have no runtime 4705 * assigned. 4706 */ 4707 if (rt_bandwidth_enabled() && rt_policy(policy) && 4708 task_group(p)->rt_bandwidth.rt_runtime == 0 && 4709 !task_group_is_autogroup(task_group(p))) { | 4877 } 4878change: 4879 4880 if (user) { 4881#ifdef CONFIG_RT_GROUP_SCHED 4882 /* 4883 * Do not allow realtime tasks into groups that have no runtime 4884 * assigned. 4885 */ 4886 if (rt_bandwidth_enabled() && rt_policy(policy) && 4887 task_group(p)->rt_bandwidth.rt_runtime == 0 && 4888 !task_group_is_autogroup(task_group(p))) { |
4710 task_rq_unlock(rq, p, &rf); 4711 return -EPERM; | 4889 retval = -EPERM; 4890 goto unlock; |
4712 } 4713#endif 4714#ifdef CONFIG_SMP 4715 if (dl_bandwidth_enabled() && dl_policy(policy) && 4716 !(attr->sched_flags & SCHED_FLAG_SUGOV)) { 4717 cpumask_t *span = rq->rd->span; 4718 4719 /* 4720 * Don't allow tasks with an affinity mask smaller than 4721 * the entire root_domain to become SCHED_DEADLINE. We 4722 * will also fail if there's no bandwidth available. 4723 */ 4724 if (!cpumask_subset(span, p->cpus_ptr) || 4725 rq->rd->dl_bw.bw == 0) { | 4891 } 4892#endif 4893#ifdef CONFIG_SMP 4894 if (dl_bandwidth_enabled() && dl_policy(policy) && 4895 !(attr->sched_flags & SCHED_FLAG_SUGOV)) { 4896 cpumask_t *span = rq->rd->span; 4897 4898 /* 4899 * Don't allow tasks with an affinity mask smaller than 4900 * the entire root_domain to become SCHED_DEADLINE. We 4901 * will also fail if there's no bandwidth available. 4902 */ 4903 if (!cpumask_subset(span, p->cpus_ptr) || 4904 rq->rd->dl_bw.bw == 0) { |
4726 task_rq_unlock(rq, p, &rf); 4727 return -EPERM; | 4905 retval = -EPERM; 4906 goto unlock; |
4728 } 4729 } 4730#endif 4731 } 4732 4733 /* Re-check policy now with rq lock held: */ 4734 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4735 policy = oldpolicy = -1; 4736 task_rq_unlock(rq, p, &rf); | 4907 } 4908 } 4909#endif 4910 } 4911 4912 /* Re-check policy now with rq lock held: */ 4913 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 4914 policy = oldpolicy = -1; 4915 task_rq_unlock(rq, p, &rf); |
4916 if (pi) 4917 cpuset_read_unlock(); |
|
4737 goto recheck; 4738 } 4739 4740 /* 4741 * If setscheduling to SCHED_DEADLINE (or changing the parameters 4742 * of a SCHED_DEADLINE task) we need to check if enough bandwidth 4743 * is available. 4744 */ 4745 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { | 4918 goto recheck; 4919 } 4920 4921 /* 4922 * If setscheduling to SCHED_DEADLINE (or changing the parameters 4923 * of a SCHED_DEADLINE task) we need to check if enough bandwidth 4924 * is available. 4925 */ 4926 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { |
4746 task_rq_unlock(rq, p, &rf); 4747 return -EBUSY; | 4927 retval = -EBUSY; 4928 goto unlock; |
4748 } 4749 4750 p->sched_reset_on_fork = reset_on_fork; 4751 oldprio = p->prio; 4752 4753 if (pi) { 4754 /* 4755 * Take priority boosted tasks into account. If the new --- 25 unchanged lines hidden (view full) --- 4781 * increased (user space view). 4782 */ 4783 if (oldprio < p->prio) 4784 queue_flags |= ENQUEUE_HEAD; 4785 4786 enqueue_task(rq, p, queue_flags); 4787 } 4788 if (running) | 4929 } 4930 4931 p->sched_reset_on_fork = reset_on_fork; 4932 oldprio = p->prio; 4933 4934 if (pi) { 4935 /* 4936 * Take priority boosted tasks into account. If the new --- 25 unchanged lines hidden (view full) --- 4962 * increased (user space view). 4963 */ 4964 if (oldprio < p->prio) 4965 queue_flags |= ENQUEUE_HEAD; 4966 4967 enqueue_task(rq, p, queue_flags); 4968 } 4969 if (running) |
4789 set_curr_task(rq, p); | 4970 set_next_task(rq, p); |
4790 4791 check_class_changed(rq, p, prev_class, oldprio); 4792 4793 /* Avoid rq from going away on us: */ 4794 preempt_disable(); 4795 task_rq_unlock(rq, p, &rf); 4796 | 4971 4972 check_class_changed(rq, p, prev_class, oldprio); 4973 4974 /* Avoid rq from going away on us: */ 4975 preempt_disable(); 4976 task_rq_unlock(rq, p, &rf); 4977 |
4797 if (pi) | 4978 if (pi) { 4979 cpuset_read_unlock(); |
4798 rt_mutex_adjust_pi(p); | 4980 rt_mutex_adjust_pi(p); |
4981 } |
|
4799 4800 /* Run balance callbacks after we've adjusted the PI chain: */ 4801 balance_callback(rq); 4802 preempt_enable(); 4803 4804 return 0; | 4982 4983 /* Run balance callbacks after we've adjusted the PI chain: */ 4984 balance_callback(rq); 4985 preempt_enable(); 4986 4987 return 0; |
4988 4989unlock: 4990 task_rq_unlock(rq, p, &rf); 4991 if (pi) 4992 cpuset_read_unlock(); 4993 return retval; |
|
4805} 4806 4807static int _sched_setscheduler(struct task_struct *p, int policy, 4808 const struct sched_param *param, bool check) 4809{ 4810 struct sched_attr attr = { 4811 .sched_policy = policy, 4812 .sched_priority = param->sched_priority, --- 67 unchanged lines hidden (view full) --- 4880 if (!param || pid < 0) 4881 return -EINVAL; 4882 if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 4883 return -EFAULT; 4884 4885 rcu_read_lock(); 4886 retval = -ESRCH; 4887 p = find_process_by_pid(pid); | 4994} 4995 4996static int _sched_setscheduler(struct task_struct *p, int policy, 4997 const struct sched_param *param, bool check) 4998{ 4999 struct sched_attr attr = { 5000 .sched_policy = policy, 5001 .sched_priority = param->sched_priority, --- 67 unchanged lines hidden (view full) --- 5069 if (!param || pid < 0) 5070 return -EINVAL; 5071 if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 5072 return -EFAULT; 5073 5074 rcu_read_lock(); 5075 retval = -ESRCH; 5076 p = find_process_by_pid(pid); |
4888 if (p != NULL) 4889 retval = sched_setscheduler(p, policy, &lparam); | 5077 if (likely(p)) 5078 get_task_struct(p); |
4890 rcu_read_unlock(); 4891 | 5079 rcu_read_unlock(); 5080 |
5081 if (likely(p)) { 5082 retval = sched_setscheduler(p, policy, &lparam); 5083 put_task_struct(p); 5084 } 5085 |
|
4892 return retval; 4893} 4894 4895/* 4896 * Mimics kernel/events/core.c perf_copy_attr(). 4897 */ 4898static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr) 4899{ --- 514 unchanged lines hidden (view full) --- 5414} 5415 5416SYSCALL_DEFINE0(sched_yield) 5417{ 5418 do_sched_yield(); 5419 return 0; 5420} 5421 | 5086 return retval; 5087} 5088 5089/* 5090 * Mimics kernel/events/core.c perf_copy_attr(). 5091 */ 5092static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr) 5093{ --- 514 unchanged lines hidden (view full) --- 5608} 5609 5610SYSCALL_DEFINE0(sched_yield) 5611{ 5612 do_sched_yield(); 5613 return 0; 5614} 5615 |
5422#ifndef CONFIG_PREEMPT | 5616#ifndef CONFIG_PREEMPTION |
5423int __sched _cond_resched(void) 5424{ 5425 if (should_resched(0)) { 5426 preempt_schedule_common(); 5427 return 1; 5428 } 5429 rcu_all_qs(); 5430 return 0; 5431} 5432EXPORT_SYMBOL(_cond_resched); 5433#endif 5434 5435/* 5436 * __cond_resched_lock() - if a reschedule is pending, drop the given lock, 5437 * call schedule, and on return reacquire the lock. 5438 * | 5617int __sched _cond_resched(void) 5618{ 5619 if (should_resched(0)) { 5620 preempt_schedule_common(); 5621 return 1; 5622 } 5623 rcu_all_qs(); 5624 return 0; 5625} 5626EXPORT_SYMBOL(_cond_resched); 5627#endif 5628 5629/* 5630 * __cond_resched_lock() - if a reschedule is pending, drop the given lock, 5631 * call schedule, and on return reacquire the lock. 5632 * |
5439 * This works OK both with and without CONFIG_PREEMPT. We do strange low-level | 5633 * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level |
5440 * operations here to prevent schedule() from being called twice (once via 5441 * spin_unlock(), once by hand). 5442 */ 5443int __cond_resched_lock(spinlock_t *lock) 5444{ 5445 int resched = should_resched(PREEMPT_LOCK_OFFSET); 5446 int ret = 0; 5447 --- 522 unchanged lines hidden (view full) --- 5970 if (running) 5971 put_prev_task(rq, p); 5972 5973 p->numa_preferred_nid = nid; 5974 5975 if (queued) 5976 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); 5977 if (running) | 5634 * operations here to prevent schedule() from being called twice (once via 5635 * spin_unlock(), once by hand). 5636 */ 5637int __cond_resched_lock(spinlock_t *lock) 5638{ 5639 int resched = should_resched(PREEMPT_LOCK_OFFSET); 5640 int ret = 0; 5641 --- 522 unchanged lines hidden (view full) --- 6164 if (running) 6165 put_prev_task(rq, p); 6166 6167 p->numa_preferred_nid = nid; 6168 6169 if (queued) 6170 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK); 6171 if (running) |
5978 set_curr_task(rq, p); | 6172 set_next_task(rq, p); |
5979 task_rq_unlock(rq, p, &rf); 5980} 5981#endif /* CONFIG_NUMA_BALANCING */ 5982 5983#ifdef CONFIG_HOTPLUG_CPU 5984/* 5985 * Ensure that the idle task is using init_mm right before its CPU goes 5986 * offline. --- 23 unchanged lines hidden (view full) --- 6010 */ 6011static void calc_load_migrate(struct rq *rq) 6012{ 6013 long delta = calc_load_fold_active(rq, 1); 6014 if (delta) 6015 atomic_long_add(delta, &calc_load_tasks); 6016} 6017 | 6173 task_rq_unlock(rq, p, &rf); 6174} 6175#endif /* CONFIG_NUMA_BALANCING */ 6176 6177#ifdef CONFIG_HOTPLUG_CPU 6178/* 6179 * Ensure that the idle task is using init_mm right before its CPU goes 6180 * offline. --- 23 unchanged lines hidden (view full) --- 6204 */ 6205static void calc_load_migrate(struct rq *rq) 6206{ 6207 long delta = calc_load_fold_active(rq, 1); 6208 if (delta) 6209 atomic_long_add(delta, &calc_load_tasks); 6210} 6211 |
6018static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) | 6212static struct task_struct *__pick_migrate_task(struct rq *rq) |
6019{ | 6213{ |
6020} | 6214 const struct sched_class *class; 6215 struct task_struct *next; |
6021 | 6216 |
6022static const struct sched_class fake_sched_class = { 6023 .put_prev_task = put_prev_task_fake, 6024}; | 6217 for_each_class(class) { 6218 next = class->pick_next_task(rq, NULL, NULL); 6219 if (next) { 6220 next->sched_class->put_prev_task(rq, next, NULL); 6221 return next; 6222 } 6223 } |
6025 | 6224 |
6026static struct task_struct fake_task = { 6027 /* 6028 * Avoid pull_{rt,dl}_task() 6029 */ 6030 .prio = MAX_PRIO + 1, 6031 .sched_class = &fake_sched_class, 6032}; | 6225 /* The idle class should always have a runnable task */ 6226 BUG(); 6227} |
6033 6034/* 6035 * Migrate all tasks from the rq, sleeping tasks will be migrated by 6036 * try_to_wake_up()->select_task_rq(). 6037 * 6038 * Called with rq->lock held even though we'er in stop_machine() and 6039 * there's no concurrency possible, we hold the required locks anyway 6040 * because of lock validation efforts. --- 26 unchanged lines hidden (view full) --- 6067 for (;;) { 6068 /* 6069 * There's this thread running, bail when that's the only 6070 * remaining thread: 6071 */ 6072 if (rq->nr_running == 1) 6073 break; 6074 | 6228 6229/* 6230 * Migrate all tasks from the rq, sleeping tasks will be migrated by 6231 * try_to_wake_up()->select_task_rq(). 6232 * 6233 * Called with rq->lock held even though we'er in stop_machine() and 6234 * there's no concurrency possible, we hold the required locks anyway 6235 * because of lock validation efforts. --- 26 unchanged lines hidden (view full) --- 6262 for (;;) { 6263 /* 6264 * There's this thread running, bail when that's the only 6265 * remaining thread: 6266 */ 6267 if (rq->nr_running == 1) 6268 break; 6269 |
6075 /* 6076 * pick_next_task() assumes pinned rq->lock: 6077 */ 6078 next = pick_next_task(rq, &fake_task, rf); 6079 BUG_ON(!next); 6080 put_prev_task(rq, next); | 6270 next = __pick_migrate_task(rq); |
6081 6082 /* 6083 * Rules for changing task_struct::cpus_mask are holding 6084 * both pi_lock and rq->lock, such that holding either 6085 * stabilizes the mask. 6086 * 6087 * Drop rq->lock is not quite as disastrous as it usually is 6088 * because !cpu_active at this point, which means load-balance --- 280 unchanged lines hidden (view full) --- 6369static struct kmem_cache *task_group_cache __read_mostly; 6370#endif 6371 6372DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); 6373DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); 6374 6375void __init sched_init(void) 6376{ | 6271 6272 /* 6273 * Rules for changing task_struct::cpus_mask are holding 6274 * both pi_lock and rq->lock, such that holding either 6275 * stabilizes the mask. 6276 * 6277 * Drop rq->lock is not quite as disastrous as it usually is 6278 * because !cpu_active at this point, which means load-balance --- 280 unchanged lines hidden (view full) --- 6559static struct kmem_cache *task_group_cache __read_mostly; 6560#endif 6561 6562DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); 6563DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); 6564 6565void __init sched_init(void) 6566{ |
6377 unsigned long alloc_size = 0, ptr; | 6567 unsigned long ptr = 0; |
6378 int i; 6379 6380 wait_bit_init(); 6381 6382#ifdef CONFIG_FAIR_GROUP_SCHED | 6568 int i; 6569 6570 wait_bit_init(); 6571 6572#ifdef CONFIG_FAIR_GROUP_SCHED |
6383 alloc_size += 2 * nr_cpu_ids * sizeof(void **); | 6573 ptr += 2 * nr_cpu_ids * sizeof(void **); |
6384#endif 6385#ifdef CONFIG_RT_GROUP_SCHED | 6574#endif 6575#ifdef CONFIG_RT_GROUP_SCHED |
6386 alloc_size += 2 * nr_cpu_ids * sizeof(void **); | 6576 ptr += 2 * nr_cpu_ids * sizeof(void **); |
6387#endif | 6577#endif |
6388 if (alloc_size) { 6389 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 6578 if (ptr) { 6579 ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT); |
6390 6391#ifdef CONFIG_FAIR_GROUP_SCHED 6392 root_task_group.se = (struct sched_entity **)ptr; 6393 ptr += nr_cpu_ids * sizeof(void **); 6394 6395 root_task_group.cfs_rq = (struct cfs_rq **)ptr; 6396 ptr += nr_cpu_ids * sizeof(void **); 6397 --- 302 unchanged lines hidden (view full) --- 6700{ 6701 return cpu_curr(cpu); 6702} 6703 6704#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ 6705 6706#ifdef CONFIG_IA64 6707/** | 6580 6581#ifdef CONFIG_FAIR_GROUP_SCHED 6582 root_task_group.se = (struct sched_entity **)ptr; 6583 ptr += nr_cpu_ids * sizeof(void **); 6584 6585 root_task_group.cfs_rq = (struct cfs_rq **)ptr; 6586 ptr += nr_cpu_ids * sizeof(void **); 6587 --- 302 unchanged lines hidden (view full) --- 6890{ 6891 return cpu_curr(cpu); 6892} 6893 6894#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ 6895 6896#ifdef CONFIG_IA64 6897/** |
6708 * set_curr_task - set the current task for a given CPU. | 6898 * ia64_set_curr_task - set the current task for a given CPU. |
6709 * @cpu: the processor in question. 6710 * @p: the task pointer to set. 6711 * 6712 * Description: This function must only be used when non-maskable interrupts 6713 * are serviced on a separate stack. It allows the architecture to switch the 6714 * notion of the current task on a CPU in a non-blocking manner. This function 6715 * must be called with all CPU's synchronized, and interrupts disabled, the 6716 * and caller must save the original value of the current task (see --- 8 unchanged lines hidden (view full) --- 6725} 6726 6727#endif 6728 6729#ifdef CONFIG_CGROUP_SCHED 6730/* task_group_lock serializes the addition/removal of task groups */ 6731static DEFINE_SPINLOCK(task_group_lock); 6732 | 6899 * @cpu: the processor in question. 6900 * @p: the task pointer to set. 6901 * 6902 * Description: This function must only be used when non-maskable interrupts 6903 * are serviced on a separate stack. It allows the architecture to switch the 6904 * notion of the current task on a CPU in a non-blocking manner. This function 6905 * must be called with all CPU's synchronized, and interrupts disabled, the 6906 * and caller must save the original value of the current task (see --- 8 unchanged lines hidden (view full) --- 6915} 6916 6917#endif 6918 6919#ifdef CONFIG_CGROUP_SCHED 6920/* task_group_lock serializes the addition/removal of task groups */ 6921static DEFINE_SPINLOCK(task_group_lock); 6922 |
6923static inline void alloc_uclamp_sched_group(struct task_group *tg, 6924 struct task_group *parent) 6925{ 6926#ifdef CONFIG_UCLAMP_TASK_GROUP 6927 enum uclamp_id clamp_id; 6928 6929 for_each_clamp_id(clamp_id) { 6930 uclamp_se_set(&tg->uclamp_req[clamp_id], 6931 uclamp_none(clamp_id), false); 6932 tg->uclamp[clamp_id] = parent->uclamp[clamp_id]; 6933 } 6934#endif 6935} 6936 |
|
6733static void sched_free_group(struct task_group *tg) 6734{ 6735 free_fair_sched_group(tg); 6736 free_rt_sched_group(tg); 6737 autogroup_free(tg); 6738 kmem_cache_free(task_group_cache, tg); 6739} 6740 --- 7 unchanged lines hidden (view full) --- 6748 return ERR_PTR(-ENOMEM); 6749 6750 if (!alloc_fair_sched_group(tg, parent)) 6751 goto err; 6752 6753 if (!alloc_rt_sched_group(tg, parent)) 6754 goto err; 6755 | 6937static void sched_free_group(struct task_group *tg) 6938{ 6939 free_fair_sched_group(tg); 6940 free_rt_sched_group(tg); 6941 autogroup_free(tg); 6942 kmem_cache_free(task_group_cache, tg); 6943} 6944 --- 7 unchanged lines hidden (view full) --- 6952 return ERR_PTR(-ENOMEM); 6953 6954 if (!alloc_fair_sched_group(tg, parent)) 6955 goto err; 6956 6957 if (!alloc_rt_sched_group(tg, parent)) 6958 goto err; 6959 |
6960 alloc_uclamp_sched_group(tg, parent); 6961 |
|
6756 return tg; 6757 6758err: 6759 sched_free_group(tg); 6760 return ERR_PTR(-ENOMEM); 6761} 6762 6763void sched_online_group(struct task_group *tg, struct task_group *parent) --- 87 unchanged lines hidden (view full) --- 6851 if (running) 6852 put_prev_task(rq, tsk); 6853 6854 sched_change_group(tsk, TASK_MOVE_GROUP); 6855 6856 if (queued) 6857 enqueue_task(rq, tsk, queue_flags); 6858 if (running) | 6962 return tg; 6963 6964err: 6965 sched_free_group(tg); 6966 return ERR_PTR(-ENOMEM); 6967} 6968 6969void sched_online_group(struct task_group *tg, struct task_group *parent) --- 87 unchanged lines hidden (view full) --- 7057 if (running) 7058 put_prev_task(rq, tsk); 7059 7060 sched_change_group(tsk, TASK_MOVE_GROUP); 7061 7062 if (queued) 7063 enqueue_task(rq, tsk, queue_flags); 7064 if (running) |
6859 set_curr_task(rq, tsk); | 7065 set_next_task(rq, tsk); |
6860 6861 task_rq_unlock(rq, tsk, &rf); 6862} 6863 6864static inline struct task_group *css_tg(struct cgroup_subsys_state *css) 6865{ 6866 return css ? container_of(css, struct task_group, css) : NULL; 6867} --- 66 unchanged lines hidden (view full) --- 6934 struct task_struct *task; 6935 struct cgroup_subsys_state *css; 6936 int ret = 0; 6937 6938 cgroup_taskset_for_each(task, css, tset) { 6939#ifdef CONFIG_RT_GROUP_SCHED 6940 if (!sched_rt_can_attach(css_tg(css), task)) 6941 return -EINVAL; | 7066 7067 task_rq_unlock(rq, tsk, &rf); 7068} 7069 7070static inline struct task_group *css_tg(struct cgroup_subsys_state *css) 7071{ 7072 return css ? container_of(css, struct task_group, css) : NULL; 7073} --- 66 unchanged lines hidden (view full) --- 7140 struct task_struct *task; 7141 struct cgroup_subsys_state *css; 7142 int ret = 0; 7143 7144 cgroup_taskset_for_each(task, css, tset) { 7145#ifdef CONFIG_RT_GROUP_SCHED 7146 if (!sched_rt_can_attach(css_tg(css), task)) 7147 return -EINVAL; |
6942#else 6943 /* We don't support RT-tasks being in separate groups */ 6944 if (task->sched_class != &fair_sched_class) 6945 return -EINVAL; | |
6946#endif 6947 /* 6948 * Serialize against wake_up_new_task() such that if its 6949 * running, we're sure to observe its full state. 6950 */ 6951 raw_spin_lock_irq(&task->pi_lock); 6952 /* 6953 * Avoid calling sched_move_task() before wake_up_new_task() --- 14 unchanged lines hidden (view full) --- 6968{ 6969 struct task_struct *task; 6970 struct cgroup_subsys_state *css; 6971 6972 cgroup_taskset_for_each(task, css, tset) 6973 sched_move_task(task); 6974} 6975 | 7148#endif 7149 /* 7150 * Serialize against wake_up_new_task() such that if its 7151 * running, we're sure to observe its full state. 7152 */ 7153 raw_spin_lock_irq(&task->pi_lock); 7154 /* 7155 * Avoid calling sched_move_task() before wake_up_new_task() --- 14 unchanged lines hidden (view full) --- 7170{ 7171 struct task_struct *task; 7172 struct cgroup_subsys_state *css; 7173 7174 cgroup_taskset_for_each(task, css, tset) 7175 sched_move_task(task); 7176} 7177 |
7178#ifdef CONFIG_UCLAMP_TASK_GROUP 7179static void cpu_util_update_eff(struct cgroup_subsys_state *css) 7180{ 7181 struct cgroup_subsys_state *top_css = css; 7182 struct uclamp_se *uc_parent = NULL; 7183 struct uclamp_se *uc_se = NULL; 7184 unsigned int eff[UCLAMP_CNT]; 7185 enum uclamp_id clamp_id; 7186 unsigned int clamps; 7187 7188 css_for_each_descendant_pre(css, top_css) { 7189 uc_parent = css_tg(css)->parent 7190 ? css_tg(css)->parent->uclamp : NULL; 7191 7192 for_each_clamp_id(clamp_id) { 7193 /* Assume effective clamps matches requested clamps */ 7194 eff[clamp_id] = css_tg(css)->uclamp_req[clamp_id].value; 7195 /* Cap effective clamps with parent's effective clamps */ 7196 if (uc_parent && 7197 eff[clamp_id] > uc_parent[clamp_id].value) { 7198 eff[clamp_id] = uc_parent[clamp_id].value; 7199 } 7200 } 7201 /* Ensure protection is always capped by limit */ 7202 eff[UCLAMP_MIN] = min(eff[UCLAMP_MIN], eff[UCLAMP_MAX]); 7203 7204 /* Propagate most restrictive effective clamps */ 7205 clamps = 0x0; 7206 uc_se = css_tg(css)->uclamp; 7207 for_each_clamp_id(clamp_id) { 7208 if (eff[clamp_id] == uc_se[clamp_id].value) 7209 continue; 7210 uc_se[clamp_id].value = eff[clamp_id]; 7211 uc_se[clamp_id].bucket_id = uclamp_bucket_id(eff[clamp_id]); 7212 clamps |= (0x1 << clamp_id); 7213 } 7214 if (!clamps) { 7215 css = css_rightmost_descendant(css); 7216 continue; 7217 } 7218 7219 /* Immediately update descendants RUNNABLE tasks */ 7220 uclamp_update_active_tasks(css, clamps); 7221 } 7222} 7223 7224/* 7225 * Integer 10^N with a given N exponent by casting to integer the literal "1eN" 7226 * C expression. Since there is no way to convert a macro argument (N) into a 7227 * character constant, use two levels of macros. 7228 */ 7229#define _POW10(exp) ((unsigned int)1e##exp) 7230#define POW10(exp) _POW10(exp) 7231 7232struct uclamp_request { 7233#define UCLAMP_PERCENT_SHIFT 2 7234#define UCLAMP_PERCENT_SCALE (100 * POW10(UCLAMP_PERCENT_SHIFT)) 7235 s64 percent; 7236 u64 util; 7237 int ret; 7238}; 7239 7240static inline struct uclamp_request 7241capacity_from_percent(char *buf) 7242{ 7243 struct uclamp_request req = { 7244 .percent = UCLAMP_PERCENT_SCALE, 7245 .util = SCHED_CAPACITY_SCALE, 7246 .ret = 0, 7247 }; 7248 7249 buf = strim(buf); 7250 if (strcmp(buf, "max")) { 7251 req.ret = cgroup_parse_float(buf, UCLAMP_PERCENT_SHIFT, 7252 &req.percent); 7253 if (req.ret) 7254 return req; 7255 if (req.percent > UCLAMP_PERCENT_SCALE) { 7256 req.ret = -ERANGE; 7257 return req; 7258 } 7259 7260 req.util = req.percent << SCHED_CAPACITY_SHIFT; 7261 req.util = DIV_ROUND_CLOSEST_ULL(req.util, UCLAMP_PERCENT_SCALE); 7262 } 7263 7264 return req; 7265} 7266 7267static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf, 7268 size_t nbytes, loff_t off, 7269 enum uclamp_id clamp_id) 7270{ 7271 struct uclamp_request req; 7272 struct task_group *tg; 7273 7274 req = capacity_from_percent(buf); 7275 if (req.ret) 7276 return req.ret; 7277 7278 mutex_lock(&uclamp_mutex); 7279 rcu_read_lock(); 7280 7281 tg = css_tg(of_css(of)); 7282 if (tg->uclamp_req[clamp_id].value != req.util) 7283 uclamp_se_set(&tg->uclamp_req[clamp_id], req.util, false); 7284 7285 /* 7286 * Because of not recoverable conversion rounding we keep track of the 7287 * exact requested value 7288 */ 7289 tg->uclamp_pct[clamp_id] = req.percent; 7290 7291 /* Update effective clamps to track the most restrictive value */ 7292 cpu_util_update_eff(of_css(of)); 7293 7294 rcu_read_unlock(); 7295 mutex_unlock(&uclamp_mutex); 7296 7297 return nbytes; 7298} 7299 7300static ssize_t cpu_uclamp_min_write(struct kernfs_open_file *of, 7301 char *buf, size_t nbytes, 7302 loff_t off) 7303{ 7304 return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MIN); 7305} 7306 7307static ssize_t cpu_uclamp_max_write(struct kernfs_open_file *of, 7308 char *buf, size_t nbytes, 7309 loff_t off) 7310{ 7311 return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MAX); 7312} 7313 7314static inline void cpu_uclamp_print(struct seq_file *sf, 7315 enum uclamp_id clamp_id) 7316{ 7317 struct task_group *tg; 7318 u64 util_clamp; 7319 u64 percent; 7320 u32 rem; 7321 7322 rcu_read_lock(); 7323 tg = css_tg(seq_css(sf)); 7324 util_clamp = tg->uclamp_req[clamp_id].value; 7325 rcu_read_unlock(); 7326 7327 if (util_clamp == SCHED_CAPACITY_SCALE) { 7328 seq_puts(sf, "max\n"); 7329 return; 7330 } 7331 7332 percent = tg->uclamp_pct[clamp_id]; 7333 percent = div_u64_rem(percent, POW10(UCLAMP_PERCENT_SHIFT), &rem); 7334 seq_printf(sf, "%llu.%0*u\n", percent, UCLAMP_PERCENT_SHIFT, rem); 7335} 7336 7337static int cpu_uclamp_min_show(struct seq_file *sf, void *v) 7338{ 7339 cpu_uclamp_print(sf, UCLAMP_MIN); 7340 return 0; 7341} 7342 7343static int cpu_uclamp_max_show(struct seq_file *sf, void *v) 7344{ 7345 cpu_uclamp_print(sf, UCLAMP_MAX); 7346 return 0; 7347} 7348#endif /* CONFIG_UCLAMP_TASK_GROUP */ 7349 |
|
6976#ifdef CONFIG_FAIR_GROUP_SCHED 6977static int cpu_shares_write_u64(struct cgroup_subsys_state *css, 6978 struct cftype *cftype, u64 shareval) 6979{ 6980 if (shareval > scale_load_down(ULONG_MAX)) 6981 shareval = MAX_SHARES; 6982 return sched_group_set_shares(css_tg(css), scale_load(shareval)); 6983} --- 329 unchanged lines hidden (view full) --- 7313 .write_s64 = cpu_rt_runtime_write, 7314 }, 7315 { 7316 .name = "rt_period_us", 7317 .read_u64 = cpu_rt_period_read_uint, 7318 .write_u64 = cpu_rt_period_write_uint, 7319 }, 7320#endif | 7350#ifdef CONFIG_FAIR_GROUP_SCHED 7351static int cpu_shares_write_u64(struct cgroup_subsys_state *css, 7352 struct cftype *cftype, u64 shareval) 7353{ 7354 if (shareval > scale_load_down(ULONG_MAX)) 7355 shareval = MAX_SHARES; 7356 return sched_group_set_shares(css_tg(css), scale_load(shareval)); 7357} --- 329 unchanged lines hidden (view full) --- 7687 .write_s64 = cpu_rt_runtime_write, 7688 }, 7689 { 7690 .name = "rt_period_us", 7691 .read_u64 = cpu_rt_period_read_uint, 7692 .write_u64 = cpu_rt_period_write_uint, 7693 }, 7694#endif |
7695#ifdef CONFIG_UCLAMP_TASK_GROUP 7696 { 7697 .name = "uclamp.min", 7698 .flags = CFTYPE_NOT_ON_ROOT, 7699 .seq_show = cpu_uclamp_min_show, 7700 .write = cpu_uclamp_min_write, 7701 }, 7702 { 7703 .name = "uclamp.max", 7704 .flags = CFTYPE_NOT_ON_ROOT, 7705 .seq_show = cpu_uclamp_max_show, 7706 .write = cpu_uclamp_max_write, 7707 }, 7708#endif |
|
7321 { } /* Terminate */ 7322}; 7323 7324static int cpu_extra_stat_show(struct seq_file *sf, 7325 struct cgroup_subsys_state *css) 7326{ 7327#ifdef CONFIG_CFS_BANDWIDTH 7328 { --- 151 unchanged lines hidden (view full) --- 7480#ifdef CONFIG_CFS_BANDWIDTH 7481 { 7482 .name = "max", 7483 .flags = CFTYPE_NOT_ON_ROOT, 7484 .seq_show = cpu_max_show, 7485 .write = cpu_max_write, 7486 }, 7487#endif | 7709 { } /* Terminate */ 7710}; 7711 7712static int cpu_extra_stat_show(struct seq_file *sf, 7713 struct cgroup_subsys_state *css) 7714{ 7715#ifdef CONFIG_CFS_BANDWIDTH 7716 { --- 151 unchanged lines hidden (view full) --- 7868#ifdef CONFIG_CFS_BANDWIDTH 7869 { 7870 .name = "max", 7871 .flags = CFTYPE_NOT_ON_ROOT, 7872 .seq_show = cpu_max_show, 7873 .write = cpu_max_write, 7874 }, 7875#endif |
7876#ifdef CONFIG_UCLAMP_TASK_GROUP 7877 { 7878 .name = "uclamp.min", 7879 .flags = CFTYPE_NOT_ON_ROOT, 7880 .seq_show = cpu_uclamp_min_show, 7881 .write = cpu_uclamp_min_write, 7882 }, 7883 { 7884 .name = "uclamp.max", 7885 .flags = CFTYPE_NOT_ON_ROOT, 7886 .seq_show = cpu_uclamp_max_show, 7887 .write = cpu_uclamp_max_write, 7888 }, 7889#endif |
|
7488 { } /* terminate */ 7489}; 7490 7491struct cgroup_subsys cpu_cgrp_subsys = { 7492 .css_alloc = cpu_cgroup_css_alloc, 7493 .css_online = cpu_cgroup_css_online, 7494 .css_released = cpu_cgroup_css_released, 7495 .css_free = cpu_cgroup_css_free, --- 60 unchanged lines hidden --- | 7890 { } /* terminate */ 7891}; 7892 7893struct cgroup_subsys cpu_cgrp_subsys = { 7894 .css_alloc = cpu_cgroup_css_alloc, 7895 .css_online = cpu_cgroup_css_online, 7896 .css_released = cpu_cgroup_css_released, 7897 .css_free = cpu_cgroup_css_free, --- 60 unchanged lines hidden --- |