core.c (c01f5120ca7cf2994336c42b8a9cae697121ffb3) core.c (0ff7b2cfbae36ebcd216c6a5ad7f8534eebeaee2)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * kernel/sched/core.c
4 *
5 * Core kernel scheduler code and related syscalls
6 *
7 * Copyright (C) 1991-2002 Linus Torvalds
8 */

--- 759 unchanged lines hidden (view full) ---

768 } else {
769 load->weight = scale_load(sched_prio_to_weight[prio]);
770 load->inv_weight = sched_prio_to_wmult[prio];
771 p->se.runnable_weight = load->weight;
772 }
773}
774
775#ifdef CONFIG_UCLAMP_TASK
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * kernel/sched/core.c
4 *
5 * Core kernel scheduler code and related syscalls
6 *
7 * Copyright (C) 1991-2002 Linus Torvalds
8 */

--- 759 unchanged lines hidden (view full) ---

768 } else {
769 load->weight = scale_load(sched_prio_to_weight[prio]);
770 load->inv_weight = sched_prio_to_wmult[prio];
771 p->se.runnable_weight = load->weight;
772 }
773}
774
775#ifdef CONFIG_UCLAMP_TASK
776/*
777 * Serializes updates of utilization clamp values
778 *
779 * The (slow-path) user-space triggers utilization clamp value updates which
780 * can require updates on (fast-path) scheduler's data structures used to
781 * support enqueue/dequeue operations.
782 * While the per-CPU rq lock protects fast-path update operations, user-space
783 * requests are serialized using a mutex to reduce the risk of conflicting
784 * updates or API abuses.
785 */
786static DEFINE_MUTEX(uclamp_mutex);
787
776/* Max allowed minimum utilization */
777unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
778
779/* Max allowed maximum utilization */
780unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
781
782/* All clamps are required to be less or equal than these values */
783static struct uclamp_se uclamp_default[UCLAMP_CNT];

--- 9 unchanged lines hidden (view full) ---

793 return clamp_value / UCLAMP_BUCKET_DELTA;
794}
795
796static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
797{
798 return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
799}
800
788/* Max allowed minimum utilization */
789unsigned int sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
790
791/* Max allowed maximum utilization */
792unsigned int sysctl_sched_uclamp_util_max = SCHED_CAPACITY_SCALE;
793
794/* All clamps are required to be less or equal than these values */
795static struct uclamp_se uclamp_default[UCLAMP_CNT];

--- 9 unchanged lines hidden (view full) ---

805 return clamp_value / UCLAMP_BUCKET_DELTA;
806}
807
808static inline unsigned int uclamp_bucket_base_value(unsigned int clamp_value)
809{
810 return UCLAMP_BUCKET_DELTA * uclamp_bucket_id(clamp_value);
811}
812
801static inline unsigned int uclamp_none(int clamp_id)
813static inline enum uclamp_id uclamp_none(enum uclamp_id clamp_id)
802{
803 if (clamp_id == UCLAMP_MIN)
804 return 0;
805 return SCHED_CAPACITY_SCALE;
806}
807
808static inline void uclamp_se_set(struct uclamp_se *uc_se,
809 unsigned int value, bool user_defined)
810{
811 uc_se->value = value;
812 uc_se->bucket_id = uclamp_bucket_id(value);
813 uc_se->user_defined = user_defined;
814}
815
816static inline unsigned int
814{
815 if (clamp_id == UCLAMP_MIN)
816 return 0;
817 return SCHED_CAPACITY_SCALE;
818}
819
820static inline void uclamp_se_set(struct uclamp_se *uc_se,
821 unsigned int value, bool user_defined)
822{
823 uc_se->value = value;
824 uc_se->bucket_id = uclamp_bucket_id(value);
825 uc_se->user_defined = user_defined;
826}
827
828static inline unsigned int
817uclamp_idle_value(struct rq *rq, unsigned int clamp_id,
829uclamp_idle_value(struct rq *rq, enum uclamp_id clamp_id,
818 unsigned int clamp_value)
819{
820 /*
821 * Avoid blocked utilization pushing up the frequency when we go
822 * idle (which drops the max-clamp) by retaining the last known
823 * max-clamp.
824 */
825 if (clamp_id == UCLAMP_MAX) {
826 rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
827 return clamp_value;
828 }
829
830 return uclamp_none(UCLAMP_MIN);
831}
832
830 unsigned int clamp_value)
831{
832 /*
833 * Avoid blocked utilization pushing up the frequency when we go
834 * idle (which drops the max-clamp) by retaining the last known
835 * max-clamp.
836 */
837 if (clamp_id == UCLAMP_MAX) {
838 rq->uclamp_flags |= UCLAMP_FLAG_IDLE;
839 return clamp_value;
840 }
841
842 return uclamp_none(UCLAMP_MIN);
843}
844
833static inline void uclamp_idle_reset(struct rq *rq, unsigned int clamp_id,
845static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
834 unsigned int clamp_value)
835{
836 /* Reset max-clamp retention only on idle exit */
837 if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
838 return;
839
840 WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
841}
842
843static inline
846 unsigned int clamp_value)
847{
848 /* Reset max-clamp retention only on idle exit */
849 if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
850 return;
851
852 WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
853}
854
855static inline
844unsigned int uclamp_rq_max_value(struct rq *rq, unsigned int clamp_id,
845 unsigned int clamp_value)
856enum uclamp_id uclamp_rq_max_value(struct rq *rq, enum uclamp_id clamp_id,
857 unsigned int clamp_value)
846{
847 struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
848 int bucket_id = UCLAMP_BUCKETS - 1;
849
850 /*
851 * Since both min and max clamps are max aggregated, find the
852 * top most bucket with tasks in.
853 */
854 for ( ; bucket_id >= 0; bucket_id--) {
855 if (!bucket[bucket_id].tasks)
856 continue;
857 return bucket[bucket_id].value;
858 }
859
860 /* No tasks -- default clamp values */
861 return uclamp_idle_value(rq, clamp_id, clamp_value);
862}
863
858{
859 struct uclamp_bucket *bucket = rq->uclamp[clamp_id].bucket;
860 int bucket_id = UCLAMP_BUCKETS - 1;
861
862 /*
863 * Since both min and max clamps are max aggregated, find the
864 * top most bucket with tasks in.
865 */
866 for ( ; bucket_id >= 0; bucket_id--) {
867 if (!bucket[bucket_id].tasks)
868 continue;
869 return bucket[bucket_id].value;
870 }
871
872 /* No tasks -- default clamp values */
873 return uclamp_idle_value(rq, clamp_id, clamp_value);
874}
875
876static inline struct uclamp_se
877uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
878{
879 struct uclamp_se uc_req = p->uclamp_req[clamp_id];
880#ifdef CONFIG_UCLAMP_TASK_GROUP
881 struct uclamp_se uc_max;
882
883 /*
884 * Tasks in autogroups or root task group will be
885 * restricted by system defaults.
886 */
887 if (task_group_is_autogroup(task_group(p)))
888 return uc_req;
889 if (task_group(p) == &root_task_group)
890 return uc_req;
891
892 uc_max = task_group(p)->uclamp[clamp_id];
893 if (uc_req.value > uc_max.value || !uc_req.user_defined)
894 return uc_max;
895#endif
896
897 return uc_req;
898}
899
864/*
865 * The effective clamp bucket index of a task depends on, by increasing
866 * priority:
867 * - the task specific clamp value, when explicitly requested from userspace
900/*
901 * The effective clamp bucket index of a task depends on, by increasing
902 * priority:
903 * - the task specific clamp value, when explicitly requested from userspace
904 * - the task group effective clamp value, for tasks not either in the root
905 * group or in an autogroup
868 * - the system default clamp value, defined by the sysadmin
869 */
870static inline struct uclamp_se
906 * - the system default clamp value, defined by the sysadmin
907 */
908static inline struct uclamp_se
871uclamp_eff_get(struct task_struct *p, unsigned int clamp_id)
909uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
872{
910{
873 struct uclamp_se uc_req = p->uclamp_req[clamp_id];
911 struct uclamp_se uc_req = uclamp_tg_restrict(p, clamp_id);
874 struct uclamp_se uc_max = uclamp_default[clamp_id];
875
876 /* System default restrictions always apply */
877 if (unlikely(uc_req.value > uc_max.value))
878 return uc_max;
879
880 return uc_req;
881}
882
912 struct uclamp_se uc_max = uclamp_default[clamp_id];
913
914 /* System default restrictions always apply */
915 if (unlikely(uc_req.value > uc_max.value))
916 return uc_max;
917
918 return uc_req;
919}
920
883unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id)
921enum uclamp_id uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
884{
885 struct uclamp_se uc_eff;
886
887 /* Task currently refcounted: use back-annotated (effective) value */
888 if (p->uclamp[clamp_id].active)
889 return p->uclamp[clamp_id].value;
890
891 uc_eff = uclamp_eff_get(p, clamp_id);

--- 7 unchanged lines hidden (view full) ---

899 * updates the rq's clamp value if required.
900 *
901 * Tasks can have a task-specific value requested from user-space, track
902 * within each bucket the maximum value for tasks refcounted in it.
903 * This "local max aggregation" allows to track the exact "requested" value
904 * for each bucket when all its RUNNABLE tasks require the same clamp.
905 */
906static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
922{
923 struct uclamp_se uc_eff;
924
925 /* Task currently refcounted: use back-annotated (effective) value */
926 if (p->uclamp[clamp_id].active)
927 return p->uclamp[clamp_id].value;
928
929 uc_eff = uclamp_eff_get(p, clamp_id);

--- 7 unchanged lines hidden (view full) ---

937 * updates the rq's clamp value if required.
938 *
939 * Tasks can have a task-specific value requested from user-space, track
940 * within each bucket the maximum value for tasks refcounted in it.
941 * This "local max aggregation" allows to track the exact "requested" value
942 * for each bucket when all its RUNNABLE tasks require the same clamp.
943 */
944static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
907 unsigned int clamp_id)
945 enum uclamp_id clamp_id)
908{
909 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
910 struct uclamp_se *uc_se = &p->uclamp[clamp_id];
911 struct uclamp_bucket *bucket;
912
913 lockdep_assert_held(&rq->lock);
914
915 /* Update task effective clamp */

--- 21 unchanged lines hidden (view full) ---

937 * is released. If this is the last task reference counting the rq's max
938 * active clamp value, then the rq's clamp value is updated.
939 *
940 * Both refcounted tasks and rq's cached clamp values are expected to be
941 * always valid. If it's detected they are not, as defensive programming,
942 * enforce the expected state and warn.
943 */
944static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
946{
947 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
948 struct uclamp_se *uc_se = &p->uclamp[clamp_id];
949 struct uclamp_bucket *bucket;
950
951 lockdep_assert_held(&rq->lock);
952
953 /* Update task effective clamp */

--- 21 unchanged lines hidden (view full) ---

975 * is released. If this is the last task reference counting the rq's max
976 * active clamp value, then the rq's clamp value is updated.
977 *
978 * Both refcounted tasks and rq's cached clamp values are expected to be
979 * always valid. If it's detected they are not, as defensive programming,
980 * enforce the expected state and warn.
981 */
982static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
945 unsigned int clamp_id)
983 enum uclamp_id clamp_id)
946{
947 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
948 struct uclamp_se *uc_se = &p->uclamp[clamp_id];
949 struct uclamp_bucket *bucket;
950 unsigned int bkt_clamp;
951 unsigned int rq_clamp;
952
953 lockdep_assert_held(&rq->lock);

--- 22 unchanged lines hidden (view full) ---

976 if (bucket->value >= rq_clamp) {
977 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
978 WRITE_ONCE(uc_rq->value, bkt_clamp);
979 }
980}
981
982static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
983{
984{
985 struct uclamp_rq *uc_rq = &rq->uclamp[clamp_id];
986 struct uclamp_se *uc_se = &p->uclamp[clamp_id];
987 struct uclamp_bucket *bucket;
988 unsigned int bkt_clamp;
989 unsigned int rq_clamp;
990
991 lockdep_assert_held(&rq->lock);

--- 22 unchanged lines hidden (view full) ---

1014 if (bucket->value >= rq_clamp) {
1015 bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
1016 WRITE_ONCE(uc_rq->value, bkt_clamp);
1017 }
1018}
1019
1020static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
1021{
984 unsigned int clamp_id;
1022 enum uclamp_id clamp_id;
985
986 if (unlikely(!p->sched_class->uclamp_enabled))
987 return;
988
989 for_each_clamp_id(clamp_id)
990 uclamp_rq_inc_id(rq, p, clamp_id);
991
992 /* Reset clamp idle holding when there is one RUNNABLE task */
993 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
994 rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
995}
996
997static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
998{
1023
1024 if (unlikely(!p->sched_class->uclamp_enabled))
1025 return;
1026
1027 for_each_clamp_id(clamp_id)
1028 uclamp_rq_inc_id(rq, p, clamp_id);
1029
1030 /* Reset clamp idle holding when there is one RUNNABLE task */
1031 if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
1032 rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
1033}
1034
1035static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
1036{
999 unsigned int clamp_id;
1037 enum uclamp_id clamp_id;
1000
1001 if (unlikely(!p->sched_class->uclamp_enabled))
1002 return;
1003
1004 for_each_clamp_id(clamp_id)
1005 uclamp_rq_dec_id(rq, p, clamp_id);
1006}
1007
1038
1039 if (unlikely(!p->sched_class->uclamp_enabled))
1040 return;
1041
1042 for_each_clamp_id(clamp_id)
1043 uclamp_rq_dec_id(rq, p, clamp_id);
1044}
1045
1046static inline void
1047uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
1048{
1049 struct rq_flags rf;
1050 struct rq *rq;
1051
1052 /*
1053 * Lock the task and the rq where the task is (or was) queued.
1054 *
1055 * We might lock the (previous) rq of a !RUNNABLE task, but that's the
1056 * price to pay to safely serialize util_{min,max} updates with
1057 * enqueues, dequeues and migration operations.
1058 * This is the same locking schema used by __set_cpus_allowed_ptr().
1059 */
1060 rq = task_rq_lock(p, &rf);
1061
1062 /*
1063 * Setting the clamp bucket is serialized by task_rq_lock().
1064 * If the task is not yet RUNNABLE and its task_struct is not
1065 * affecting a valid clamp bucket, the next time it's enqueued,
1066 * it will already see the updated clamp bucket value.
1067 */
1068 if (!p->uclamp[clamp_id].active) {
1069 uclamp_rq_dec_id(rq, p, clamp_id);
1070 uclamp_rq_inc_id(rq, p, clamp_id);
1071 }
1072
1073 task_rq_unlock(rq, p, &rf);
1074}
1075
1076static inline void
1077uclamp_update_active_tasks(struct cgroup_subsys_state *css,
1078 unsigned int clamps)
1079{
1080 enum uclamp_id clamp_id;
1081 struct css_task_iter it;
1082 struct task_struct *p;
1083
1084 css_task_iter_start(css, 0, &it);
1085 while ((p = css_task_iter_next(&it))) {
1086 for_each_clamp_id(clamp_id) {
1087 if ((0x1 << clamp_id) & clamps)
1088 uclamp_update_active(p, clamp_id);
1089 }
1090 }
1091 css_task_iter_end(&it);
1092}
1093
1094#ifdef CONFIG_UCLAMP_TASK_GROUP
1095static void cpu_util_update_eff(struct cgroup_subsys_state *css);
1096static void uclamp_update_root_tg(void)
1097{
1098 struct task_group *tg = &root_task_group;
1099
1100 uclamp_se_set(&tg->uclamp_req[UCLAMP_MIN],
1101 sysctl_sched_uclamp_util_min, false);
1102 uclamp_se_set(&tg->uclamp_req[UCLAMP_MAX],
1103 sysctl_sched_uclamp_util_max, false);
1104
1105 rcu_read_lock();
1106 cpu_util_update_eff(&root_task_group.css);
1107 rcu_read_unlock();
1108}
1109#else
1110static void uclamp_update_root_tg(void) { }
1111#endif
1112
1008int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
1009 void __user *buffer, size_t *lenp,
1010 loff_t *ppos)
1011{
1113int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
1114 void __user *buffer, size_t *lenp,
1115 loff_t *ppos)
1116{
1117 bool update_root_tg = false;
1012 int old_min, old_max;
1118 int old_min, old_max;
1013 static DEFINE_MUTEX(mutex);
1014 int result;
1015
1119 int result;
1120
1016 mutex_lock(&mutex);
1121 mutex_lock(&uclamp_mutex);
1017 old_min = sysctl_sched_uclamp_util_min;
1018 old_max = sysctl_sched_uclamp_util_max;
1019
1020 result = proc_dointvec(table, write, buffer, lenp, ppos);
1021 if (result)
1022 goto undo;
1023 if (!write)
1024 goto done;
1025
1026 if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
1027 sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
1028 result = -EINVAL;
1029 goto undo;
1030 }
1031
1032 if (old_min != sysctl_sched_uclamp_util_min) {
1033 uclamp_se_set(&uclamp_default[UCLAMP_MIN],
1034 sysctl_sched_uclamp_util_min, false);
1122 old_min = sysctl_sched_uclamp_util_min;
1123 old_max = sysctl_sched_uclamp_util_max;
1124
1125 result = proc_dointvec(table, write, buffer, lenp, ppos);
1126 if (result)
1127 goto undo;
1128 if (!write)
1129 goto done;
1130
1131 if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
1132 sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE) {
1133 result = -EINVAL;
1134 goto undo;
1135 }
1136
1137 if (old_min != sysctl_sched_uclamp_util_min) {
1138 uclamp_se_set(&uclamp_default[UCLAMP_MIN],
1139 sysctl_sched_uclamp_util_min, false);
1140 update_root_tg = true;
1035 }
1036 if (old_max != sysctl_sched_uclamp_util_max) {
1037 uclamp_se_set(&uclamp_default[UCLAMP_MAX],
1038 sysctl_sched_uclamp_util_max, false);
1141 }
1142 if (old_max != sysctl_sched_uclamp_util_max) {
1143 uclamp_se_set(&uclamp_default[UCLAMP_MAX],
1144 sysctl_sched_uclamp_util_max, false);
1145 update_root_tg = true;
1039 }
1040
1146 }
1147
1148 if (update_root_tg)
1149 uclamp_update_root_tg();
1150
1041 /*
1151 /*
1042 * Updating all the RUNNABLE task is expensive, keep it simple and do
1043 * just a lazy update at each next enqueue time.
1152 * We update all RUNNABLE tasks only when task groups are in use.
1153 * Otherwise, keep it simple and do just a lazy update at each next
1154 * task enqueue time.
1044 */
1155 */
1156
1045 goto done;
1046
1047undo:
1048 sysctl_sched_uclamp_util_min = old_min;
1049 sysctl_sched_uclamp_util_max = old_max;
1050done:
1157 goto done;
1158
1159undo:
1160 sysctl_sched_uclamp_util_min = old_min;
1161 sysctl_sched_uclamp_util_max = old_max;
1162done:
1051 mutex_unlock(&mutex);
1163 mutex_unlock(&uclamp_mutex);
1052
1053 return result;
1054}
1055
1056static int uclamp_validate(struct task_struct *p,
1057 const struct sched_attr *attr)
1058{
1059 unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;

--- 10 unchanged lines hidden (view full) ---

1070 return -EINVAL;
1071
1072 return 0;
1073}
1074
1075static void __setscheduler_uclamp(struct task_struct *p,
1076 const struct sched_attr *attr)
1077{
1164
1165 return result;
1166}
1167
1168static int uclamp_validate(struct task_struct *p,
1169 const struct sched_attr *attr)
1170{
1171 unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value;

--- 10 unchanged lines hidden (view full) ---

1182 return -EINVAL;
1183
1184 return 0;
1185}
1186
1187static void __setscheduler_uclamp(struct task_struct *p,
1188 const struct sched_attr *attr)
1189{
1078 unsigned int clamp_id;
1190 enum uclamp_id clamp_id;
1079
1080 /*
1081 * On scheduling class change, reset to default clamps for tasks
1082 * without a task-specific value.
1083 */
1084 for_each_clamp_id(clamp_id) {
1085 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
1086 unsigned int clamp_value = uclamp_none(clamp_id);

--- 20 unchanged lines hidden (view full) ---

1107 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
1108 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
1109 attr->sched_util_max, true);
1110 }
1111}
1112
1113static void uclamp_fork(struct task_struct *p)
1114{
1191
1192 /*
1193 * On scheduling class change, reset to default clamps for tasks
1194 * without a task-specific value.
1195 */
1196 for_each_clamp_id(clamp_id) {
1197 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id];
1198 unsigned int clamp_value = uclamp_none(clamp_id);

--- 20 unchanged lines hidden (view full) ---

1219 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) {
1220 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
1221 attr->sched_util_max, true);
1222 }
1223}
1224
1225static void uclamp_fork(struct task_struct *p)
1226{
1115 unsigned int clamp_id;
1227 enum uclamp_id clamp_id;
1116
1117 for_each_clamp_id(clamp_id)
1118 p->uclamp[clamp_id].active = false;
1119
1120 if (likely(!p->sched_reset_on_fork))
1121 return;
1122
1123 for_each_clamp_id(clamp_id) {

--- 5 unchanged lines hidden (view full) ---

1129
1130 uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
1131 }
1132}
1133
1134static void __init init_uclamp(void)
1135{
1136 struct uclamp_se uc_max = {};
1228
1229 for_each_clamp_id(clamp_id)
1230 p->uclamp[clamp_id].active = false;
1231
1232 if (likely(!p->sched_reset_on_fork))
1233 return;
1234
1235 for_each_clamp_id(clamp_id) {

--- 5 unchanged lines hidden (view full) ---

1241
1242 uclamp_se_set(&p->uclamp_req[clamp_id], clamp_value, false);
1243 }
1244}
1245
1246static void __init init_uclamp(void)
1247{
1248 struct uclamp_se uc_max = {};
1137 unsigned int clamp_id;
1249 enum uclamp_id clamp_id;
1138 int cpu;
1139
1250 int cpu;
1251
1252 mutex_init(&uclamp_mutex);
1253
1140 for_each_possible_cpu(cpu) {
1141 memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq));
1142 cpu_rq(cpu)->uclamp_flags = 0;
1143 }
1144
1145 for_each_clamp_id(clamp_id) {
1146 uclamp_se_set(&init_task.uclamp_req[clamp_id],
1147 uclamp_none(clamp_id), false);
1148 }
1149
1150 /* System defaults allow max clamp values for both indexes */
1151 uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
1254 for_each_possible_cpu(cpu) {
1255 memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq));
1256 cpu_rq(cpu)->uclamp_flags = 0;
1257 }
1258
1259 for_each_clamp_id(clamp_id) {
1260 uclamp_se_set(&init_task.uclamp_req[clamp_id],
1261 uclamp_none(clamp_id), false);
1262 }
1263
1264 /* System defaults allow max clamp values for both indexes */
1265 uclamp_se_set(&uc_max, uclamp_none(UCLAMP_MAX), false);
1152 for_each_clamp_id(clamp_id)
1266 for_each_clamp_id(clamp_id) {
1153 uclamp_default[clamp_id] = uc_max;
1267 uclamp_default[clamp_id] = uc_max;
1268#ifdef CONFIG_UCLAMP_TASK_GROUP
1269 root_task_group.uclamp_req[clamp_id] = uc_max;
1270 root_task_group.uclamp[clamp_id] = uc_max;
1271#endif
1272 }
1154}
1155
1156#else /* CONFIG_UCLAMP_TASK */
1157static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
1158static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
1159static inline int uclamp_validate(struct task_struct *p,
1160 const struct sched_attr *attr)
1161{

--- 327 unchanged lines hidden (view full) ---

1489 if (running)
1490 put_prev_task(rq, p);
1491
1492 p->sched_class->set_cpus_allowed(p, new_mask);
1493
1494 if (queued)
1495 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
1496 if (running)
1273}
1274
1275#else /* CONFIG_UCLAMP_TASK */
1276static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
1277static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
1278static inline int uclamp_validate(struct task_struct *p,
1279 const struct sched_attr *attr)
1280{

--- 327 unchanged lines hidden (view full) ---

1608 if (running)
1609 put_prev_task(rq, p);
1610
1611 p->sched_class->set_cpus_allowed(p, new_mask);
1612
1613 if (queued)
1614 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
1615 if (running)
1497 set_curr_task(rq, p);
1616 set_next_task(rq, p);
1498}
1499
1500/*
1501 * Change a given task's CPU affinity. Migrate the thread to a
1502 * proper CPU and schedule it away if the CPU it's executing on
1503 * is removed from the allowed bitmask.
1504 *
1505 * NOTE: the caller must have a valid reference to the task, the

--- 1624 unchanged lines hidden (view full) ---

3130 * Remove function-return probe instances associated with this
3131 * task and put them back on the free list.
3132 */
3133 kprobe_flush_task(prev);
3134
3135 /* Task is done with its stack. */
3136 put_task_stack(prev);
3137
1617}
1618
1619/*
1620 * Change a given task's CPU affinity. Migrate the thread to a
1621 * proper CPU and schedule it away if the CPU it's executing on
1622 * is removed from the allowed bitmask.
1623 *
1624 * NOTE: the caller must have a valid reference to the task, the

--- 1624 unchanged lines hidden (view full) ---

3249 * Remove function-return probe instances associated with this
3250 * task and put them back on the free list.
3251 */
3252 kprobe_flush_task(prev);
3253
3254 /* Task is done with its stack. */
3255 put_task_stack(prev);
3256
3138 put_task_struct(prev);
3257 put_task_struct_rcu_user(prev);
3139 }
3140
3141 tick_nohz_task_switch();
3142 return rq;
3143}
3144
3145#ifdef CONFIG_SMP
3146

--- 62 unchanged lines hidden (view full) ---

3209
3210/*
3211 * context_switch - switch to the new MM and the new thread's register state.
3212 */
3213static __always_inline struct rq *
3214context_switch(struct rq *rq, struct task_struct *prev,
3215 struct task_struct *next, struct rq_flags *rf)
3216{
3258 }
3259
3260 tick_nohz_task_switch();
3261 return rq;
3262}
3263
3264#ifdef CONFIG_SMP
3265

--- 62 unchanged lines hidden (view full) ---

3328
3329/*
3330 * context_switch - switch to the new MM and the new thread's register state.
3331 */
3332static __always_inline struct rq *
3333context_switch(struct rq *rq, struct task_struct *prev,
3334 struct task_struct *next, struct rq_flags *rf)
3335{
3217 struct mm_struct *mm, *oldmm;
3218
3219 prepare_task_switch(rq, prev, next);
3220
3336 prepare_task_switch(rq, prev, next);
3337
3221 mm = next->mm;
3222 oldmm = prev->active_mm;
3223 /*
3224 * For paravirt, this is coupled with an exit in switch_to to
3225 * combine the page table reload and the switch backend into
3226 * one hypercall.
3227 */
3228 arch_start_context_switch(prev);
3229
3230 /*
3338 /*
3339 * For paravirt, this is coupled with an exit in switch_to to
3340 * combine the page table reload and the switch backend into
3341 * one hypercall.
3342 */
3343 arch_start_context_switch(prev);
3344
3345 /*
3231 * If mm is non-NULL, we pass through switch_mm(). If mm is
3232 * NULL, we will pass through mmdrop() in finish_task_switch().
3233 * Both of these contain the full memory barrier required by
3234 * membarrier after storing to rq->curr, before returning to
3235 * user-space.
3346 * kernel -> kernel lazy + transfer active
3347 * user -> kernel lazy + mmgrab() active
3348 *
3349 * kernel -> user switch + mmdrop() active
3350 * user -> user switch
3236 */
3351 */
3237 if (!mm) {
3238 next->active_mm = oldmm;
3239 mmgrab(oldmm);
3240 enter_lazy_tlb(oldmm, next);
3241 } else
3242 switch_mm_irqs_off(oldmm, mm, next);
3352 if (!next->mm) { // to kernel
3353 enter_lazy_tlb(prev->active_mm, next);
3243
3354
3244 if (!prev->mm) {
3245 prev->active_mm = NULL;
3246 rq->prev_mm = oldmm;
3355 next->active_mm = prev->active_mm;
3356 if (prev->mm) // from user
3357 mmgrab(prev->active_mm);
3358 else
3359 prev->active_mm = NULL;
3360 } else { // to user
3361 /*
3362 * sys_membarrier() requires an smp_mb() between setting
3363 * rq->curr and returning to userspace.
3364 *
3365 * The below provides this either through switch_mm(), or in
3366 * case 'prev->active_mm == next->mm' through
3367 * finish_task_switch()'s mmdrop().
3368 */
3369
3370 switch_mm_irqs_off(prev->active_mm, next->mm, next);
3371
3372 if (!prev->mm) { // from kernel
3373 /* will mmdrop() in finish_task_switch(). */
3374 rq->prev_mm = prev->active_mm;
3375 prev->active_mm = NULL;
3376 }
3247 }
3248
3249 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
3250
3251 prepare_lock_switch(rq, next, rf);
3252
3253 /* Here we just switch the register state and the stack. */
3254 switch_to(prev, next, prev);

--- 226 unchanged lines hidden (view full) ---

3481 trigger_load_balance(rq);
3482#endif
3483}
3484
3485#ifdef CONFIG_NO_HZ_FULL
3486
3487struct tick_work {
3488 int cpu;
3377 }
3378
3379 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
3380
3381 prepare_lock_switch(rq, next, rf);
3382
3383 /* Here we just switch the register state and the stack. */
3384 switch_to(prev, next, prev);

--- 226 unchanged lines hidden (view full) ---

3611 trigger_load_balance(rq);
3612#endif
3613}
3614
3615#ifdef CONFIG_NO_HZ_FULL
3616
3617struct tick_work {
3618 int cpu;
3619 atomic_t state;
3489 struct delayed_work work;
3490};
3620 struct delayed_work work;
3621};
3622/* Values for ->state, see diagram below. */
3623#define TICK_SCHED_REMOTE_OFFLINE 0
3624#define TICK_SCHED_REMOTE_OFFLINING 1
3625#define TICK_SCHED_REMOTE_RUNNING 2
3491
3626
3627/*
3628 * State diagram for ->state:
3629 *
3630 *
3631 * TICK_SCHED_REMOTE_OFFLINE
3632 * | ^
3633 * | |
3634 * | | sched_tick_remote()
3635 * | |
3636 * | |
3637 * +--TICK_SCHED_REMOTE_OFFLINING
3638 * | ^
3639 * | |
3640 * sched_tick_start() | | sched_tick_stop()
3641 * | |
3642 * V |
3643 * TICK_SCHED_REMOTE_RUNNING
3644 *
3645 *
3646 * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
3647 * and sched_tick_start() are happy to leave the state in RUNNING.
3648 */
3649
3492static struct tick_work __percpu *tick_work_cpu;
3493
3494static void sched_tick_remote(struct work_struct *work)
3495{
3496 struct delayed_work *dwork = to_delayed_work(work);
3497 struct tick_work *twork = container_of(dwork, struct tick_work, work);
3498 int cpu = twork->cpu;
3499 struct rq *rq = cpu_rq(cpu);
3500 struct task_struct *curr;
3501 struct rq_flags rf;
3502 u64 delta;
3650static struct tick_work __percpu *tick_work_cpu;
3651
3652static void sched_tick_remote(struct work_struct *work)
3653{
3654 struct delayed_work *dwork = to_delayed_work(work);
3655 struct tick_work *twork = container_of(dwork, struct tick_work, work);
3656 int cpu = twork->cpu;
3657 struct rq *rq = cpu_rq(cpu);
3658 struct task_struct *curr;
3659 struct rq_flags rf;
3660 u64 delta;
3661 int os;
3503
3504 /*
3505 * Handle the tick only if it appears the remote CPU is running in full
3506 * dynticks mode. The check is racy by nature, but missing a tick or
3507 * having one too much is no big deal because the scheduler tick updates
3508 * statistics and checks timeslices in a time-independent way, regardless
3509 * of when exactly it is running.
3510 */
3511 if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
3512 goto out_requeue;
3513
3514 rq_lock_irq(rq, &rf);
3515 curr = rq->curr;
3662
3663 /*
3664 * Handle the tick only if it appears the remote CPU is running in full
3665 * dynticks mode. The check is racy by nature, but missing a tick or
3666 * having one too much is no big deal because the scheduler tick updates
3667 * statistics and checks timeslices in a time-independent way, regardless
3668 * of when exactly it is running.
3669 */
3670 if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
3671 goto out_requeue;
3672
3673 rq_lock_irq(rq, &rf);
3674 curr = rq->curr;
3516 if (is_idle_task(curr))
3675 if (is_idle_task(curr) || cpu_is_offline(cpu))
3517 goto out_unlock;
3518
3519 update_rq_clock(rq);
3520 delta = rq_clock_task(rq) - curr->se.exec_start;
3521
3522 /*
3523 * Make sure the next tick runs within a reasonable
3524 * amount of time.
3525 */
3526 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
3527 curr->sched_class->task_tick(rq, curr, 0);
3528
3529out_unlock:
3530 rq_unlock_irq(rq, &rf);
3531
3532out_requeue:
3533 /*
3534 * Run the remote tick once per second (1Hz). This arbitrary
3535 * frequency is large enough to avoid overload but short enough
3676 goto out_unlock;
3677
3678 update_rq_clock(rq);
3679 delta = rq_clock_task(rq) - curr->se.exec_start;
3680
3681 /*
3682 * Make sure the next tick runs within a reasonable
3683 * amount of time.
3684 */
3685 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
3686 curr->sched_class->task_tick(rq, curr, 0);
3687
3688out_unlock:
3689 rq_unlock_irq(rq, &rf);
3690
3691out_requeue:
3692 /*
3693 * Run the remote tick once per second (1Hz). This arbitrary
3694 * frequency is large enough to avoid overload but short enough
3536 * to keep scheduler internal stats reasonably up to date.
3695 * to keep scheduler internal stats reasonably up to date. But
3696 * first update state to reflect hotplug activity if required.
3537 */
3697 */
3538 queue_delayed_work(system_unbound_wq, dwork, HZ);
3698 os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
3699 WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
3700 if (os == TICK_SCHED_REMOTE_RUNNING)
3701 queue_delayed_work(system_unbound_wq, dwork, HZ);
3539}
3540
3541static void sched_tick_start(int cpu)
3542{
3702}
3703
3704static void sched_tick_start(int cpu)
3705{
3706 int os;
3543 struct tick_work *twork;
3544
3545 if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3546 return;
3547
3548 WARN_ON_ONCE(!tick_work_cpu);
3549
3550 twork = per_cpu_ptr(tick_work_cpu, cpu);
3707 struct tick_work *twork;
3708
3709 if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3710 return;
3711
3712 WARN_ON_ONCE(!tick_work_cpu);
3713
3714 twork = per_cpu_ptr(tick_work_cpu, cpu);
3551 twork->cpu = cpu;
3552 INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
3553 queue_delayed_work(system_unbound_wq, &twork->work, HZ);
3715 os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
3716 WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
3717 if (os == TICK_SCHED_REMOTE_OFFLINE) {
3718 twork->cpu = cpu;
3719 INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
3720 queue_delayed_work(system_unbound_wq, &twork->work, HZ);
3721 }
3554}
3555
3556#ifdef CONFIG_HOTPLUG_CPU
3557static void sched_tick_stop(int cpu)
3558{
3559 struct tick_work *twork;
3722}
3723
3724#ifdef CONFIG_HOTPLUG_CPU
3725static void sched_tick_stop(int cpu)
3726{
3727 struct tick_work *twork;
3728 int os;
3560
3561 if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3562 return;
3563
3564 WARN_ON_ONCE(!tick_work_cpu);
3565
3566 twork = per_cpu_ptr(tick_work_cpu, cpu);
3729
3730 if (housekeeping_cpu(cpu, HK_FLAG_TICK))
3731 return;
3732
3733 WARN_ON_ONCE(!tick_work_cpu);
3734
3735 twork = per_cpu_ptr(tick_work_cpu, cpu);
3567 cancel_delayed_work_sync(&twork->work);
3736 /* There cannot be competing actions, but don't rely on stop-machine. */
3737 os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_OFFLINING);
3738 WARN_ON_ONCE(os != TICK_SCHED_REMOTE_RUNNING);
3739 /* Don't cancel, as this would mess up the state machine. */
3568}
3569#endif /* CONFIG_HOTPLUG_CPU */
3570
3571int __init sched_tick_offload_init(void)
3572{
3573 tick_work_cpu = alloc_percpu(struct tick_work);
3574 BUG_ON(!tick_work_cpu);
3740}
3741#endif /* CONFIG_HOTPLUG_CPU */
3742
3743int __init sched_tick_offload_init(void)
3744{
3745 tick_work_cpu = alloc_percpu(struct tick_work);
3746 BUG_ON(!tick_work_cpu);
3575
3576 return 0;
3577}
3578
3579#else /* !CONFIG_NO_HZ_FULL */
3580static inline void sched_tick_start(int cpu) { }
3581static inline void sched_tick_stop(int cpu) { }
3582#endif
3583
3747 return 0;
3748}
3749
3750#else /* !CONFIG_NO_HZ_FULL */
3751static inline void sched_tick_start(int cpu) { }
3752static inline void sched_tick_stop(int cpu) { }
3753#endif
3754
3584#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
3755#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
3585 defined(CONFIG_TRACE_PREEMPT_TOGGLE))
3586/*
3587 * If the value passed in is equal to the current preempt count
3588 * then we just disabled preemption. Start timing the latency.
3589 */
3590static inline void preempt_latency_start(int val)
3591{
3592 if (preempt_count() == val) {

--- 141 unchanged lines hidden (view full) ---

3734 * opportunity to pull in more work from other CPUs.
3735 */
3736 if (likely((prev->sched_class == &idle_sched_class ||
3737 prev->sched_class == &fair_sched_class) &&
3738 rq->nr_running == rq->cfs.h_nr_running)) {
3739
3740 p = fair_sched_class.pick_next_task(rq, prev, rf);
3741 if (unlikely(p == RETRY_TASK))
3756 defined(CONFIG_TRACE_PREEMPT_TOGGLE))
3757/*
3758 * If the value passed in is equal to the current preempt count
3759 * then we just disabled preemption. Start timing the latency.
3760 */
3761static inline void preempt_latency_start(int val)
3762{
3763 if (preempt_count() == val) {

--- 141 unchanged lines hidden (view full) ---

3905 * opportunity to pull in more work from other CPUs.
3906 */
3907 if (likely((prev->sched_class == &idle_sched_class ||
3908 prev->sched_class == &fair_sched_class) &&
3909 rq->nr_running == rq->cfs.h_nr_running)) {
3910
3911 p = fair_sched_class.pick_next_task(rq, prev, rf);
3912 if (unlikely(p == RETRY_TASK))
3742 goto again;
3913 goto restart;
3743
3744 /* Assumes fair_sched_class->next == idle_sched_class */
3745 if (unlikely(!p))
3746 p = idle_sched_class.pick_next_task(rq, prev, rf);
3747
3748 return p;
3749 }
3750
3914
3915 /* Assumes fair_sched_class->next == idle_sched_class */
3916 if (unlikely(!p))
3917 p = idle_sched_class.pick_next_task(rq, prev, rf);
3918
3919 return p;
3920 }
3921
3751again:
3922restart:
3923 /*
3924 * Ensure that we put DL/RT tasks before the pick loop, such that they
3925 * can PULL higher prio tasks when we lower the RQ 'priority'.
3926 */
3927 prev->sched_class->put_prev_task(rq, prev, rf);
3928 if (!rq->nr_running)
3929 newidle_balance(rq, rf);
3930
3752 for_each_class(class) {
3931 for_each_class(class) {
3753 p = class->pick_next_task(rq, prev, rf);
3754 if (p) {
3755 if (unlikely(p == RETRY_TASK))
3756 goto again;
3932 p = class->pick_next_task(rq, NULL, NULL);
3933 if (p)
3757 return p;
3934 return p;
3758 }
3759 }
3760
3761 /* The idle class should always have a runnable task: */
3762 BUG();
3763}
3764
3765/*
3766 * __schedule() is the main scheduler function.

--- 10 unchanged lines hidden (view full) ---

3777 *
3778 * 3. Wakeups don't really cause entry into schedule(). They add a
3779 * task to the run-queue and that's it.
3780 *
3781 * Now, if the new task added to the run-queue preempts the current
3782 * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
3783 * called on the nearest possible occasion:
3784 *
3935 }
3936
3937 /* The idle class should always have a runnable task: */
3938 BUG();
3939}
3940
3941/*
3942 * __schedule() is the main scheduler function.

--- 10 unchanged lines hidden (view full) ---

3953 *
3954 * 3. Wakeups don't really cause entry into schedule(). They add a
3955 * task to the run-queue and that's it.
3956 *
3957 * Now, if the new task added to the run-queue preempts the current
3958 * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
3959 * called on the nearest possible occasion:
3960 *
3785 * - If the kernel is preemptible (CONFIG_PREEMPT=y):
3961 * - If the kernel is preemptible (CONFIG_PREEMPTION=y):
3786 *
3787 * - in syscall or exception context, at the next outmost
3788 * preempt_enable(). (this might be as soon as the wake_up()'s
3789 * spin_unlock()!)
3790 *
3791 * - in IRQ context, return from interrupt-handler to
3792 * preemptible context
3793 *
3962 *
3963 * - in syscall or exception context, at the next outmost
3964 * preempt_enable(). (this might be as soon as the wake_up()'s
3965 * spin_unlock()!)
3966 *
3967 * - in IRQ context, return from interrupt-handler to
3968 * preemptible context
3969 *
3794 * - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
3970 * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
3795 * then at the next:
3796 *
3797 * - cond_resched() call
3798 * - explicit schedule() call
3799 * - return from syscall or exception to user-space
3800 * - return from interrupt-handler to user-space
3801 *
3802 * WARNING: must be called with preemption disabled!

--- 228 unchanged lines hidden (view full) ---

4031
4032 /*
4033 * Check again in case we missed a preemption opportunity
4034 * between schedule and now.
4035 */
4036 } while (need_resched());
4037}
4038
3971 * then at the next:
3972 *
3973 * - cond_resched() call
3974 * - explicit schedule() call
3975 * - return from syscall or exception to user-space
3976 * - return from interrupt-handler to user-space
3977 *
3978 * WARNING: must be called with preemption disabled!

--- 228 unchanged lines hidden (view full) ---

4207
4208 /*
4209 * Check again in case we missed a preemption opportunity
4210 * between schedule and now.
4211 */
4212 } while (need_resched());
4213}
4214
4039#ifdef CONFIG_PREEMPT
4215#ifdef CONFIG_PREEMPTION
4040/*
4041 * this is the entry point to schedule() from in-kernel preemption
4042 * off of preempt_enable. Kernel preemptions off return from interrupt
4043 * occur there and call schedule directly.
4044 */
4045asmlinkage __visible void __sched notrace preempt_schedule(void)
4046{
4047 /*

--- 55 unchanged lines hidden (view full) ---

4103 exception_exit(prev_ctx);
4104
4105 preempt_latency_stop(1);
4106 preempt_enable_no_resched_notrace();
4107 } while (need_resched());
4108}
4109EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4110
4216/*
4217 * this is the entry point to schedule() from in-kernel preemption
4218 * off of preempt_enable. Kernel preemptions off return from interrupt
4219 * occur there and call schedule directly.
4220 */
4221asmlinkage __visible void __sched notrace preempt_schedule(void)
4222{
4223 /*

--- 55 unchanged lines hidden (view full) ---

4279 exception_exit(prev_ctx);
4280
4281 preempt_latency_stop(1);
4282 preempt_enable_no_resched_notrace();
4283 } while (need_resched());
4284}
4285EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
4286
4111#endif /* CONFIG_PREEMPT */
4287#endif /* CONFIG_PREEMPTION */
4112
4113/*
4114 * this is the entry point to schedule() from kernel preemption
4115 * off of irq context.
4116 * Note, that this is called and return with irqs disabled. This will
4117 * protect us against recursive calling from irq.
4118 */
4119asmlinkage __visible void __sched preempt_schedule_irq(void)

--- 151 unchanged lines hidden (view full) ---

4271 p->sched_class = &fair_sched_class;
4272 }
4273
4274 p->prio = prio;
4275
4276 if (queued)
4277 enqueue_task(rq, p, queue_flag);
4278 if (running)
4288
4289/*
4290 * this is the entry point to schedule() from kernel preemption
4291 * off of irq context.
4292 * Note, that this is called and return with irqs disabled. This will
4293 * protect us against recursive calling from irq.
4294 */
4295asmlinkage __visible void __sched preempt_schedule_irq(void)

--- 151 unchanged lines hidden (view full) ---

4447 p->sched_class = &fair_sched_class;
4448 }
4449
4450 p->prio = prio;
4451
4452 if (queued)
4453 enqueue_task(rq, p, queue_flag);
4454 if (running)
4279 set_curr_task(rq, p);
4455 set_next_task(rq, p);
4280
4281 check_class_changed(rq, p, prev_class, oldprio);
4282out_unlock:
4283 /* Avoid rq from going away on us: */
4284 preempt_disable();
4285 __task_rq_unlock(rq, &rf);
4286
4287 balance_callback(rq);

--- 50 unchanged lines hidden (view full) ---

4338 /*
4339 * If the task increased its priority or is running and
4340 * lowered its priority, then reschedule its CPU:
4341 */
4342 if (delta < 0 || (delta > 0 && task_running(rq, p)))
4343 resched_curr(rq);
4344 }
4345 if (running)
4456
4457 check_class_changed(rq, p, prev_class, oldprio);
4458out_unlock:
4459 /* Avoid rq from going away on us: */
4460 preempt_disable();
4461 __task_rq_unlock(rq, &rf);
4462
4463 balance_callback(rq);

--- 50 unchanged lines hidden (view full) ---

4514 /*
4515 * If the task increased its priority or is running and
4516 * lowered its priority, then reschedule its CPU:
4517 */
4518 if (delta < 0 || (delta > 0 && task_running(rq, p)))
4519 resched_curr(rq);
4520 }
4521 if (running)
4346 set_curr_task(rq, p);
4522 set_next_task(rq, p);
4347out_unlock:
4348 task_rq_unlock(rq, p, &rf);
4349}
4350EXPORT_SYMBOL(set_user_nice);
4351
4352/*
4353 * can_nice - check if a task can reduce its nice value
4354 * @p: task

--- 300 unchanged lines hidden (view full) ---

4655
4656 /* Update task specific "requested" clamps */
4657 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
4658 retval = uclamp_validate(p, attr);
4659 if (retval)
4660 return retval;
4661 }
4662
4523out_unlock:
4524 task_rq_unlock(rq, p, &rf);
4525}
4526EXPORT_SYMBOL(set_user_nice);
4527
4528/*
4529 * can_nice - check if a task can reduce its nice value
4530 * @p: task

--- 300 unchanged lines hidden (view full) ---

4831
4832 /* Update task specific "requested" clamps */
4833 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) {
4834 retval = uclamp_validate(p, attr);
4835 if (retval)
4836 return retval;
4837 }
4838
4839 if (pi)
4840 cpuset_read_lock();
4841
4663 /*
4664 * Make sure no PI-waiters arrive (or leave) while we are
4665 * changing the priority of the task:
4666 *
4667 * To be able to change p->policy safely, the appropriate
4668 * runqueue lock must be held.
4669 */
4670 rq = task_rq_lock(p, &rf);
4671 update_rq_clock(rq);
4672
4673 /*
4674 * Changing the policy of the stop threads its a very bad idea:
4675 */
4676 if (p == rq->stop) {
4842 /*
4843 * Make sure no PI-waiters arrive (or leave) while we are
4844 * changing the priority of the task:
4845 *
4846 * To be able to change p->policy safely, the appropriate
4847 * runqueue lock must be held.
4848 */
4849 rq = task_rq_lock(p, &rf);
4850 update_rq_clock(rq);
4851
4852 /*
4853 * Changing the policy of the stop threads its a very bad idea:
4854 */
4855 if (p == rq->stop) {
4677 task_rq_unlock(rq, p, &rf);
4678 return -EINVAL;
4856 retval = -EINVAL;
4857 goto unlock;
4679 }
4680
4681 /*
4682 * If not changing anything there's no need to proceed further,
4683 * but store a possible modification of reset_on_fork.
4684 */
4685 if (unlikely(policy == p->policy)) {
4686 if (fair_policy(policy) && attr->sched_nice != task_nice(p))
4687 goto change;
4688 if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
4689 goto change;
4690 if (dl_policy(policy) && dl_param_changed(p, attr))
4691 goto change;
4692 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
4693 goto change;
4694
4695 p->sched_reset_on_fork = reset_on_fork;
4858 }
4859
4860 /*
4861 * If not changing anything there's no need to proceed further,
4862 * but store a possible modification of reset_on_fork.
4863 */
4864 if (unlikely(policy == p->policy)) {
4865 if (fair_policy(policy) && attr->sched_nice != task_nice(p))
4866 goto change;
4867 if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
4868 goto change;
4869 if (dl_policy(policy) && dl_param_changed(p, attr))
4870 goto change;
4871 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
4872 goto change;
4873
4874 p->sched_reset_on_fork = reset_on_fork;
4696 task_rq_unlock(rq, p, &rf);
4697 return 0;
4875 retval = 0;
4876 goto unlock;
4698 }
4699change:
4700
4701 if (user) {
4702#ifdef CONFIG_RT_GROUP_SCHED
4703 /*
4704 * Do not allow realtime tasks into groups that have no runtime
4705 * assigned.
4706 */
4707 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4708 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4709 !task_group_is_autogroup(task_group(p))) {
4877 }
4878change:
4879
4880 if (user) {
4881#ifdef CONFIG_RT_GROUP_SCHED
4882 /*
4883 * Do not allow realtime tasks into groups that have no runtime
4884 * assigned.
4885 */
4886 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4887 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4888 !task_group_is_autogroup(task_group(p))) {
4710 task_rq_unlock(rq, p, &rf);
4711 return -EPERM;
4889 retval = -EPERM;
4890 goto unlock;
4712 }
4713#endif
4714#ifdef CONFIG_SMP
4715 if (dl_bandwidth_enabled() && dl_policy(policy) &&
4716 !(attr->sched_flags & SCHED_FLAG_SUGOV)) {
4717 cpumask_t *span = rq->rd->span;
4718
4719 /*
4720 * Don't allow tasks with an affinity mask smaller than
4721 * the entire root_domain to become SCHED_DEADLINE. We
4722 * will also fail if there's no bandwidth available.
4723 */
4724 if (!cpumask_subset(span, p->cpus_ptr) ||
4725 rq->rd->dl_bw.bw == 0) {
4891 }
4892#endif
4893#ifdef CONFIG_SMP
4894 if (dl_bandwidth_enabled() && dl_policy(policy) &&
4895 !(attr->sched_flags & SCHED_FLAG_SUGOV)) {
4896 cpumask_t *span = rq->rd->span;
4897
4898 /*
4899 * Don't allow tasks with an affinity mask smaller than
4900 * the entire root_domain to become SCHED_DEADLINE. We
4901 * will also fail if there's no bandwidth available.
4902 */
4903 if (!cpumask_subset(span, p->cpus_ptr) ||
4904 rq->rd->dl_bw.bw == 0) {
4726 task_rq_unlock(rq, p, &rf);
4727 return -EPERM;
4905 retval = -EPERM;
4906 goto unlock;
4728 }
4729 }
4730#endif
4731 }
4732
4733 /* Re-check policy now with rq lock held: */
4734 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4735 policy = oldpolicy = -1;
4736 task_rq_unlock(rq, p, &rf);
4907 }
4908 }
4909#endif
4910 }
4911
4912 /* Re-check policy now with rq lock held: */
4913 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
4914 policy = oldpolicy = -1;
4915 task_rq_unlock(rq, p, &rf);
4916 if (pi)
4917 cpuset_read_unlock();
4737 goto recheck;
4738 }
4739
4740 /*
4741 * If setscheduling to SCHED_DEADLINE (or changing the parameters
4742 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
4743 * is available.
4744 */
4745 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
4918 goto recheck;
4919 }
4920
4921 /*
4922 * If setscheduling to SCHED_DEADLINE (or changing the parameters
4923 * of a SCHED_DEADLINE task) we need to check if enough bandwidth
4924 * is available.
4925 */
4926 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) {
4746 task_rq_unlock(rq, p, &rf);
4747 return -EBUSY;
4927 retval = -EBUSY;
4928 goto unlock;
4748 }
4749
4750 p->sched_reset_on_fork = reset_on_fork;
4751 oldprio = p->prio;
4752
4753 if (pi) {
4754 /*
4755 * Take priority boosted tasks into account. If the new

--- 25 unchanged lines hidden (view full) ---

4781 * increased (user space view).
4782 */
4783 if (oldprio < p->prio)
4784 queue_flags |= ENQUEUE_HEAD;
4785
4786 enqueue_task(rq, p, queue_flags);
4787 }
4788 if (running)
4929 }
4930
4931 p->sched_reset_on_fork = reset_on_fork;
4932 oldprio = p->prio;
4933
4934 if (pi) {
4935 /*
4936 * Take priority boosted tasks into account. If the new

--- 25 unchanged lines hidden (view full) ---

4962 * increased (user space view).
4963 */
4964 if (oldprio < p->prio)
4965 queue_flags |= ENQUEUE_HEAD;
4966
4967 enqueue_task(rq, p, queue_flags);
4968 }
4969 if (running)
4789 set_curr_task(rq, p);
4970 set_next_task(rq, p);
4790
4791 check_class_changed(rq, p, prev_class, oldprio);
4792
4793 /* Avoid rq from going away on us: */
4794 preempt_disable();
4795 task_rq_unlock(rq, p, &rf);
4796
4971
4972 check_class_changed(rq, p, prev_class, oldprio);
4973
4974 /* Avoid rq from going away on us: */
4975 preempt_disable();
4976 task_rq_unlock(rq, p, &rf);
4977
4797 if (pi)
4978 if (pi) {
4979 cpuset_read_unlock();
4798 rt_mutex_adjust_pi(p);
4980 rt_mutex_adjust_pi(p);
4981 }
4799
4800 /* Run balance callbacks after we've adjusted the PI chain: */
4801 balance_callback(rq);
4802 preempt_enable();
4803
4804 return 0;
4982
4983 /* Run balance callbacks after we've adjusted the PI chain: */
4984 balance_callback(rq);
4985 preempt_enable();
4986
4987 return 0;
4988
4989unlock:
4990 task_rq_unlock(rq, p, &rf);
4991 if (pi)
4992 cpuset_read_unlock();
4993 return retval;
4805}
4806
4807static int _sched_setscheduler(struct task_struct *p, int policy,
4808 const struct sched_param *param, bool check)
4809{
4810 struct sched_attr attr = {
4811 .sched_policy = policy,
4812 .sched_priority = param->sched_priority,

--- 67 unchanged lines hidden (view full) ---

4880 if (!param || pid < 0)
4881 return -EINVAL;
4882 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
4883 return -EFAULT;
4884
4885 rcu_read_lock();
4886 retval = -ESRCH;
4887 p = find_process_by_pid(pid);
4994}
4995
4996static int _sched_setscheduler(struct task_struct *p, int policy,
4997 const struct sched_param *param, bool check)
4998{
4999 struct sched_attr attr = {
5000 .sched_policy = policy,
5001 .sched_priority = param->sched_priority,

--- 67 unchanged lines hidden (view full) ---

5069 if (!param || pid < 0)
5070 return -EINVAL;
5071 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
5072 return -EFAULT;
5073
5074 rcu_read_lock();
5075 retval = -ESRCH;
5076 p = find_process_by_pid(pid);
4888 if (p != NULL)
4889 retval = sched_setscheduler(p, policy, &lparam);
5077 if (likely(p))
5078 get_task_struct(p);
4890 rcu_read_unlock();
4891
5079 rcu_read_unlock();
5080
5081 if (likely(p)) {
5082 retval = sched_setscheduler(p, policy, &lparam);
5083 put_task_struct(p);
5084 }
5085
4892 return retval;
4893}
4894
4895/*
4896 * Mimics kernel/events/core.c perf_copy_attr().
4897 */
4898static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
4899{

--- 514 unchanged lines hidden (view full) ---

5414}
5415
5416SYSCALL_DEFINE0(sched_yield)
5417{
5418 do_sched_yield();
5419 return 0;
5420}
5421
5086 return retval;
5087}
5088
5089/*
5090 * Mimics kernel/events/core.c perf_copy_attr().
5091 */
5092static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
5093{

--- 514 unchanged lines hidden (view full) ---

5608}
5609
5610SYSCALL_DEFINE0(sched_yield)
5611{
5612 do_sched_yield();
5613 return 0;
5614}
5615
5422#ifndef CONFIG_PREEMPT
5616#ifndef CONFIG_PREEMPTION
5423int __sched _cond_resched(void)
5424{
5425 if (should_resched(0)) {
5426 preempt_schedule_common();
5427 return 1;
5428 }
5429 rcu_all_qs();
5430 return 0;
5431}
5432EXPORT_SYMBOL(_cond_resched);
5433#endif
5434
5435/*
5436 * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
5437 * call schedule, and on return reacquire the lock.
5438 *
5617int __sched _cond_resched(void)
5618{
5619 if (should_resched(0)) {
5620 preempt_schedule_common();
5621 return 1;
5622 }
5623 rcu_all_qs();
5624 return 0;
5625}
5626EXPORT_SYMBOL(_cond_resched);
5627#endif
5628
5629/*
5630 * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
5631 * call schedule, and on return reacquire the lock.
5632 *
5439 * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
5633 * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level
5440 * operations here to prevent schedule() from being called twice (once via
5441 * spin_unlock(), once by hand).
5442 */
5443int __cond_resched_lock(spinlock_t *lock)
5444{
5445 int resched = should_resched(PREEMPT_LOCK_OFFSET);
5446 int ret = 0;
5447

--- 522 unchanged lines hidden (view full) ---

5970 if (running)
5971 put_prev_task(rq, p);
5972
5973 p->numa_preferred_nid = nid;
5974
5975 if (queued)
5976 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
5977 if (running)
5634 * operations here to prevent schedule() from being called twice (once via
5635 * spin_unlock(), once by hand).
5636 */
5637int __cond_resched_lock(spinlock_t *lock)
5638{
5639 int resched = should_resched(PREEMPT_LOCK_OFFSET);
5640 int ret = 0;
5641

--- 522 unchanged lines hidden (view full) ---

6164 if (running)
6165 put_prev_task(rq, p);
6166
6167 p->numa_preferred_nid = nid;
6168
6169 if (queued)
6170 enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
6171 if (running)
5978 set_curr_task(rq, p);
6172 set_next_task(rq, p);
5979 task_rq_unlock(rq, p, &rf);
5980}
5981#endif /* CONFIG_NUMA_BALANCING */
5982
5983#ifdef CONFIG_HOTPLUG_CPU
5984/*
5985 * Ensure that the idle task is using init_mm right before its CPU goes
5986 * offline.

--- 23 unchanged lines hidden (view full) ---

6010 */
6011static void calc_load_migrate(struct rq *rq)
6012{
6013 long delta = calc_load_fold_active(rq, 1);
6014 if (delta)
6015 atomic_long_add(delta, &calc_load_tasks);
6016}
6017
6173 task_rq_unlock(rq, p, &rf);
6174}
6175#endif /* CONFIG_NUMA_BALANCING */
6176
6177#ifdef CONFIG_HOTPLUG_CPU
6178/*
6179 * Ensure that the idle task is using init_mm right before its CPU goes
6180 * offline.

--- 23 unchanged lines hidden (view full) ---

6204 */
6205static void calc_load_migrate(struct rq *rq)
6206{
6207 long delta = calc_load_fold_active(rq, 1);
6208 if (delta)
6209 atomic_long_add(delta, &calc_load_tasks);
6210}
6211
6018static void put_prev_task_fake(struct rq *rq, struct task_struct *prev)
6212static struct task_struct *__pick_migrate_task(struct rq *rq)
6019{
6213{
6020}
6214 const struct sched_class *class;
6215 struct task_struct *next;
6021
6216
6022static const struct sched_class fake_sched_class = {
6023 .put_prev_task = put_prev_task_fake,
6024};
6217 for_each_class(class) {
6218 next = class->pick_next_task(rq, NULL, NULL);
6219 if (next) {
6220 next->sched_class->put_prev_task(rq, next, NULL);
6221 return next;
6222 }
6223 }
6025
6224
6026static struct task_struct fake_task = {
6027 /*
6028 * Avoid pull_{rt,dl}_task()
6029 */
6030 .prio = MAX_PRIO + 1,
6031 .sched_class = &fake_sched_class,
6032};
6225 /* The idle class should always have a runnable task */
6226 BUG();
6227}
6033
6034/*
6035 * Migrate all tasks from the rq, sleeping tasks will be migrated by
6036 * try_to_wake_up()->select_task_rq().
6037 *
6038 * Called with rq->lock held even though we'er in stop_machine() and
6039 * there's no concurrency possible, we hold the required locks anyway
6040 * because of lock validation efforts.

--- 26 unchanged lines hidden (view full) ---

6067 for (;;) {
6068 /*
6069 * There's this thread running, bail when that's the only
6070 * remaining thread:
6071 */
6072 if (rq->nr_running == 1)
6073 break;
6074
6228
6229/*
6230 * Migrate all tasks from the rq, sleeping tasks will be migrated by
6231 * try_to_wake_up()->select_task_rq().
6232 *
6233 * Called with rq->lock held even though we'er in stop_machine() and
6234 * there's no concurrency possible, we hold the required locks anyway
6235 * because of lock validation efforts.

--- 26 unchanged lines hidden (view full) ---

6262 for (;;) {
6263 /*
6264 * There's this thread running, bail when that's the only
6265 * remaining thread:
6266 */
6267 if (rq->nr_running == 1)
6268 break;
6269
6075 /*
6076 * pick_next_task() assumes pinned rq->lock:
6077 */
6078 next = pick_next_task(rq, &fake_task, rf);
6079 BUG_ON(!next);
6080 put_prev_task(rq, next);
6270 next = __pick_migrate_task(rq);
6081
6082 /*
6083 * Rules for changing task_struct::cpus_mask are holding
6084 * both pi_lock and rq->lock, such that holding either
6085 * stabilizes the mask.
6086 *
6087 * Drop rq->lock is not quite as disastrous as it usually is
6088 * because !cpu_active at this point, which means load-balance

--- 280 unchanged lines hidden (view full) ---

6369static struct kmem_cache *task_group_cache __read_mostly;
6370#endif
6371
6372DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
6373DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
6374
6375void __init sched_init(void)
6376{
6271
6272 /*
6273 * Rules for changing task_struct::cpus_mask are holding
6274 * both pi_lock and rq->lock, such that holding either
6275 * stabilizes the mask.
6276 *
6277 * Drop rq->lock is not quite as disastrous as it usually is
6278 * because !cpu_active at this point, which means load-balance

--- 280 unchanged lines hidden (view full) ---

6559static struct kmem_cache *task_group_cache __read_mostly;
6560#endif
6561
6562DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
6563DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
6564
6565void __init sched_init(void)
6566{
6377 unsigned long alloc_size = 0, ptr;
6567 unsigned long ptr = 0;
6378 int i;
6379
6380 wait_bit_init();
6381
6382#ifdef CONFIG_FAIR_GROUP_SCHED
6568 int i;
6569
6570 wait_bit_init();
6571
6572#ifdef CONFIG_FAIR_GROUP_SCHED
6383 alloc_size += 2 * nr_cpu_ids * sizeof(void **);
6573 ptr += 2 * nr_cpu_ids * sizeof(void **);
6384#endif
6385#ifdef CONFIG_RT_GROUP_SCHED
6574#endif
6575#ifdef CONFIG_RT_GROUP_SCHED
6386 alloc_size += 2 * nr_cpu_ids * sizeof(void **);
6576 ptr += 2 * nr_cpu_ids * sizeof(void **);
6387#endif
6577#endif
6388 if (alloc_size) {
6389 ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT);
6578 if (ptr) {
6579 ptr = (unsigned long)kzalloc(ptr, GFP_NOWAIT);
6390
6391#ifdef CONFIG_FAIR_GROUP_SCHED
6392 root_task_group.se = (struct sched_entity **)ptr;
6393 ptr += nr_cpu_ids * sizeof(void **);
6394
6395 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
6396 ptr += nr_cpu_ids * sizeof(void **);
6397

--- 302 unchanged lines hidden (view full) ---

6700{
6701 return cpu_curr(cpu);
6702}
6703
6704#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
6705
6706#ifdef CONFIG_IA64
6707/**
6580
6581#ifdef CONFIG_FAIR_GROUP_SCHED
6582 root_task_group.se = (struct sched_entity **)ptr;
6583 ptr += nr_cpu_ids * sizeof(void **);
6584
6585 root_task_group.cfs_rq = (struct cfs_rq **)ptr;
6586 ptr += nr_cpu_ids * sizeof(void **);
6587

--- 302 unchanged lines hidden (view full) ---

6890{
6891 return cpu_curr(cpu);
6892}
6893
6894#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
6895
6896#ifdef CONFIG_IA64
6897/**
6708 * set_curr_task - set the current task for a given CPU.
6898 * ia64_set_curr_task - set the current task for a given CPU.
6709 * @cpu: the processor in question.
6710 * @p: the task pointer to set.
6711 *
6712 * Description: This function must only be used when non-maskable interrupts
6713 * are serviced on a separate stack. It allows the architecture to switch the
6714 * notion of the current task on a CPU in a non-blocking manner. This function
6715 * must be called with all CPU's synchronized, and interrupts disabled, the
6716 * and caller must save the original value of the current task (see

--- 8 unchanged lines hidden (view full) ---

6725}
6726
6727#endif
6728
6729#ifdef CONFIG_CGROUP_SCHED
6730/* task_group_lock serializes the addition/removal of task groups */
6731static DEFINE_SPINLOCK(task_group_lock);
6732
6899 * @cpu: the processor in question.
6900 * @p: the task pointer to set.
6901 *
6902 * Description: This function must only be used when non-maskable interrupts
6903 * are serviced on a separate stack. It allows the architecture to switch the
6904 * notion of the current task on a CPU in a non-blocking manner. This function
6905 * must be called with all CPU's synchronized, and interrupts disabled, the
6906 * and caller must save the original value of the current task (see

--- 8 unchanged lines hidden (view full) ---

6915}
6916
6917#endif
6918
6919#ifdef CONFIG_CGROUP_SCHED
6920/* task_group_lock serializes the addition/removal of task groups */
6921static DEFINE_SPINLOCK(task_group_lock);
6922
6923static inline void alloc_uclamp_sched_group(struct task_group *tg,
6924 struct task_group *parent)
6925{
6926#ifdef CONFIG_UCLAMP_TASK_GROUP
6927 enum uclamp_id clamp_id;
6928
6929 for_each_clamp_id(clamp_id) {
6930 uclamp_se_set(&tg->uclamp_req[clamp_id],
6931 uclamp_none(clamp_id), false);
6932 tg->uclamp[clamp_id] = parent->uclamp[clamp_id];
6933 }
6934#endif
6935}
6936
6733static void sched_free_group(struct task_group *tg)
6734{
6735 free_fair_sched_group(tg);
6736 free_rt_sched_group(tg);
6737 autogroup_free(tg);
6738 kmem_cache_free(task_group_cache, tg);
6739}
6740

--- 7 unchanged lines hidden (view full) ---

6748 return ERR_PTR(-ENOMEM);
6749
6750 if (!alloc_fair_sched_group(tg, parent))
6751 goto err;
6752
6753 if (!alloc_rt_sched_group(tg, parent))
6754 goto err;
6755
6937static void sched_free_group(struct task_group *tg)
6938{
6939 free_fair_sched_group(tg);
6940 free_rt_sched_group(tg);
6941 autogroup_free(tg);
6942 kmem_cache_free(task_group_cache, tg);
6943}
6944

--- 7 unchanged lines hidden (view full) ---

6952 return ERR_PTR(-ENOMEM);
6953
6954 if (!alloc_fair_sched_group(tg, parent))
6955 goto err;
6956
6957 if (!alloc_rt_sched_group(tg, parent))
6958 goto err;
6959
6960 alloc_uclamp_sched_group(tg, parent);
6961
6756 return tg;
6757
6758err:
6759 sched_free_group(tg);
6760 return ERR_PTR(-ENOMEM);
6761}
6762
6763void sched_online_group(struct task_group *tg, struct task_group *parent)

--- 87 unchanged lines hidden (view full) ---

6851 if (running)
6852 put_prev_task(rq, tsk);
6853
6854 sched_change_group(tsk, TASK_MOVE_GROUP);
6855
6856 if (queued)
6857 enqueue_task(rq, tsk, queue_flags);
6858 if (running)
6962 return tg;
6963
6964err:
6965 sched_free_group(tg);
6966 return ERR_PTR(-ENOMEM);
6967}
6968
6969void sched_online_group(struct task_group *tg, struct task_group *parent)

--- 87 unchanged lines hidden (view full) ---

7057 if (running)
7058 put_prev_task(rq, tsk);
7059
7060 sched_change_group(tsk, TASK_MOVE_GROUP);
7061
7062 if (queued)
7063 enqueue_task(rq, tsk, queue_flags);
7064 if (running)
6859 set_curr_task(rq, tsk);
7065 set_next_task(rq, tsk);
6860
6861 task_rq_unlock(rq, tsk, &rf);
6862}
6863
6864static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
6865{
6866 return css ? container_of(css, struct task_group, css) : NULL;
6867}

--- 66 unchanged lines hidden (view full) ---

6934 struct task_struct *task;
6935 struct cgroup_subsys_state *css;
6936 int ret = 0;
6937
6938 cgroup_taskset_for_each(task, css, tset) {
6939#ifdef CONFIG_RT_GROUP_SCHED
6940 if (!sched_rt_can_attach(css_tg(css), task))
6941 return -EINVAL;
7066
7067 task_rq_unlock(rq, tsk, &rf);
7068}
7069
7070static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
7071{
7072 return css ? container_of(css, struct task_group, css) : NULL;
7073}

--- 66 unchanged lines hidden (view full) ---

7140 struct task_struct *task;
7141 struct cgroup_subsys_state *css;
7142 int ret = 0;
7143
7144 cgroup_taskset_for_each(task, css, tset) {
7145#ifdef CONFIG_RT_GROUP_SCHED
7146 if (!sched_rt_can_attach(css_tg(css), task))
7147 return -EINVAL;
6942#else
6943 /* We don't support RT-tasks being in separate groups */
6944 if (task->sched_class != &fair_sched_class)
6945 return -EINVAL;
6946#endif
6947 /*
6948 * Serialize against wake_up_new_task() such that if its
6949 * running, we're sure to observe its full state.
6950 */
6951 raw_spin_lock_irq(&task->pi_lock);
6952 /*
6953 * Avoid calling sched_move_task() before wake_up_new_task()

--- 14 unchanged lines hidden (view full) ---

6968{
6969 struct task_struct *task;
6970 struct cgroup_subsys_state *css;
6971
6972 cgroup_taskset_for_each(task, css, tset)
6973 sched_move_task(task);
6974}
6975
7148#endif
7149 /*
7150 * Serialize against wake_up_new_task() such that if its
7151 * running, we're sure to observe its full state.
7152 */
7153 raw_spin_lock_irq(&task->pi_lock);
7154 /*
7155 * Avoid calling sched_move_task() before wake_up_new_task()

--- 14 unchanged lines hidden (view full) ---

7170{
7171 struct task_struct *task;
7172 struct cgroup_subsys_state *css;
7173
7174 cgroup_taskset_for_each(task, css, tset)
7175 sched_move_task(task);
7176}
7177
7178#ifdef CONFIG_UCLAMP_TASK_GROUP
7179static void cpu_util_update_eff(struct cgroup_subsys_state *css)
7180{
7181 struct cgroup_subsys_state *top_css = css;
7182 struct uclamp_se *uc_parent = NULL;
7183 struct uclamp_se *uc_se = NULL;
7184 unsigned int eff[UCLAMP_CNT];
7185 enum uclamp_id clamp_id;
7186 unsigned int clamps;
7187
7188 css_for_each_descendant_pre(css, top_css) {
7189 uc_parent = css_tg(css)->parent
7190 ? css_tg(css)->parent->uclamp : NULL;
7191
7192 for_each_clamp_id(clamp_id) {
7193 /* Assume effective clamps matches requested clamps */
7194 eff[clamp_id] = css_tg(css)->uclamp_req[clamp_id].value;
7195 /* Cap effective clamps with parent's effective clamps */
7196 if (uc_parent &&
7197 eff[clamp_id] > uc_parent[clamp_id].value) {
7198 eff[clamp_id] = uc_parent[clamp_id].value;
7199 }
7200 }
7201 /* Ensure protection is always capped by limit */
7202 eff[UCLAMP_MIN] = min(eff[UCLAMP_MIN], eff[UCLAMP_MAX]);
7203
7204 /* Propagate most restrictive effective clamps */
7205 clamps = 0x0;
7206 uc_se = css_tg(css)->uclamp;
7207 for_each_clamp_id(clamp_id) {
7208 if (eff[clamp_id] == uc_se[clamp_id].value)
7209 continue;
7210 uc_se[clamp_id].value = eff[clamp_id];
7211 uc_se[clamp_id].bucket_id = uclamp_bucket_id(eff[clamp_id]);
7212 clamps |= (0x1 << clamp_id);
7213 }
7214 if (!clamps) {
7215 css = css_rightmost_descendant(css);
7216 continue;
7217 }
7218
7219 /* Immediately update descendants RUNNABLE tasks */
7220 uclamp_update_active_tasks(css, clamps);
7221 }
7222}
7223
7224/*
7225 * Integer 10^N with a given N exponent by casting to integer the literal "1eN"
7226 * C expression. Since there is no way to convert a macro argument (N) into a
7227 * character constant, use two levels of macros.
7228 */
7229#define _POW10(exp) ((unsigned int)1e##exp)
7230#define POW10(exp) _POW10(exp)
7231
7232struct uclamp_request {
7233#define UCLAMP_PERCENT_SHIFT 2
7234#define UCLAMP_PERCENT_SCALE (100 * POW10(UCLAMP_PERCENT_SHIFT))
7235 s64 percent;
7236 u64 util;
7237 int ret;
7238};
7239
7240static inline struct uclamp_request
7241capacity_from_percent(char *buf)
7242{
7243 struct uclamp_request req = {
7244 .percent = UCLAMP_PERCENT_SCALE,
7245 .util = SCHED_CAPACITY_SCALE,
7246 .ret = 0,
7247 };
7248
7249 buf = strim(buf);
7250 if (strcmp(buf, "max")) {
7251 req.ret = cgroup_parse_float(buf, UCLAMP_PERCENT_SHIFT,
7252 &req.percent);
7253 if (req.ret)
7254 return req;
7255 if (req.percent > UCLAMP_PERCENT_SCALE) {
7256 req.ret = -ERANGE;
7257 return req;
7258 }
7259
7260 req.util = req.percent << SCHED_CAPACITY_SHIFT;
7261 req.util = DIV_ROUND_CLOSEST_ULL(req.util, UCLAMP_PERCENT_SCALE);
7262 }
7263
7264 return req;
7265}
7266
7267static ssize_t cpu_uclamp_write(struct kernfs_open_file *of, char *buf,
7268 size_t nbytes, loff_t off,
7269 enum uclamp_id clamp_id)
7270{
7271 struct uclamp_request req;
7272 struct task_group *tg;
7273
7274 req = capacity_from_percent(buf);
7275 if (req.ret)
7276 return req.ret;
7277
7278 mutex_lock(&uclamp_mutex);
7279 rcu_read_lock();
7280
7281 tg = css_tg(of_css(of));
7282 if (tg->uclamp_req[clamp_id].value != req.util)
7283 uclamp_se_set(&tg->uclamp_req[clamp_id], req.util, false);
7284
7285 /*
7286 * Because of not recoverable conversion rounding we keep track of the
7287 * exact requested value
7288 */
7289 tg->uclamp_pct[clamp_id] = req.percent;
7290
7291 /* Update effective clamps to track the most restrictive value */
7292 cpu_util_update_eff(of_css(of));
7293
7294 rcu_read_unlock();
7295 mutex_unlock(&uclamp_mutex);
7296
7297 return nbytes;
7298}
7299
7300static ssize_t cpu_uclamp_min_write(struct kernfs_open_file *of,
7301 char *buf, size_t nbytes,
7302 loff_t off)
7303{
7304 return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MIN);
7305}
7306
7307static ssize_t cpu_uclamp_max_write(struct kernfs_open_file *of,
7308 char *buf, size_t nbytes,
7309 loff_t off)
7310{
7311 return cpu_uclamp_write(of, buf, nbytes, off, UCLAMP_MAX);
7312}
7313
7314static inline void cpu_uclamp_print(struct seq_file *sf,
7315 enum uclamp_id clamp_id)
7316{
7317 struct task_group *tg;
7318 u64 util_clamp;
7319 u64 percent;
7320 u32 rem;
7321
7322 rcu_read_lock();
7323 tg = css_tg(seq_css(sf));
7324 util_clamp = tg->uclamp_req[clamp_id].value;
7325 rcu_read_unlock();
7326
7327 if (util_clamp == SCHED_CAPACITY_SCALE) {
7328 seq_puts(sf, "max\n");
7329 return;
7330 }
7331
7332 percent = tg->uclamp_pct[clamp_id];
7333 percent = div_u64_rem(percent, POW10(UCLAMP_PERCENT_SHIFT), &rem);
7334 seq_printf(sf, "%llu.%0*u\n", percent, UCLAMP_PERCENT_SHIFT, rem);
7335}
7336
7337static int cpu_uclamp_min_show(struct seq_file *sf, void *v)
7338{
7339 cpu_uclamp_print(sf, UCLAMP_MIN);
7340 return 0;
7341}
7342
7343static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
7344{
7345 cpu_uclamp_print(sf, UCLAMP_MAX);
7346 return 0;
7347}
7348#endif /* CONFIG_UCLAMP_TASK_GROUP */
7349
6976#ifdef CONFIG_FAIR_GROUP_SCHED
6977static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
6978 struct cftype *cftype, u64 shareval)
6979{
6980 if (shareval > scale_load_down(ULONG_MAX))
6981 shareval = MAX_SHARES;
6982 return sched_group_set_shares(css_tg(css), scale_load(shareval));
6983}

--- 329 unchanged lines hidden (view full) ---

7313 .write_s64 = cpu_rt_runtime_write,
7314 },
7315 {
7316 .name = "rt_period_us",
7317 .read_u64 = cpu_rt_period_read_uint,
7318 .write_u64 = cpu_rt_period_write_uint,
7319 },
7320#endif
7350#ifdef CONFIG_FAIR_GROUP_SCHED
7351static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
7352 struct cftype *cftype, u64 shareval)
7353{
7354 if (shareval > scale_load_down(ULONG_MAX))
7355 shareval = MAX_SHARES;
7356 return sched_group_set_shares(css_tg(css), scale_load(shareval));
7357}

--- 329 unchanged lines hidden (view full) ---

7687 .write_s64 = cpu_rt_runtime_write,
7688 },
7689 {
7690 .name = "rt_period_us",
7691 .read_u64 = cpu_rt_period_read_uint,
7692 .write_u64 = cpu_rt_period_write_uint,
7693 },
7694#endif
7695#ifdef CONFIG_UCLAMP_TASK_GROUP
7696 {
7697 .name = "uclamp.min",
7698 .flags = CFTYPE_NOT_ON_ROOT,
7699 .seq_show = cpu_uclamp_min_show,
7700 .write = cpu_uclamp_min_write,
7701 },
7702 {
7703 .name = "uclamp.max",
7704 .flags = CFTYPE_NOT_ON_ROOT,
7705 .seq_show = cpu_uclamp_max_show,
7706 .write = cpu_uclamp_max_write,
7707 },
7708#endif
7321 { } /* Terminate */
7322};
7323
7324static int cpu_extra_stat_show(struct seq_file *sf,
7325 struct cgroup_subsys_state *css)
7326{
7327#ifdef CONFIG_CFS_BANDWIDTH
7328 {

--- 151 unchanged lines hidden (view full) ---

7480#ifdef CONFIG_CFS_BANDWIDTH
7481 {
7482 .name = "max",
7483 .flags = CFTYPE_NOT_ON_ROOT,
7484 .seq_show = cpu_max_show,
7485 .write = cpu_max_write,
7486 },
7487#endif
7709 { } /* Terminate */
7710};
7711
7712static int cpu_extra_stat_show(struct seq_file *sf,
7713 struct cgroup_subsys_state *css)
7714{
7715#ifdef CONFIG_CFS_BANDWIDTH
7716 {

--- 151 unchanged lines hidden (view full) ---

7868#ifdef CONFIG_CFS_BANDWIDTH
7869 {
7870 .name = "max",
7871 .flags = CFTYPE_NOT_ON_ROOT,
7872 .seq_show = cpu_max_show,
7873 .write = cpu_max_write,
7874 },
7875#endif
7876#ifdef CONFIG_UCLAMP_TASK_GROUP
7877 {
7878 .name = "uclamp.min",
7879 .flags = CFTYPE_NOT_ON_ROOT,
7880 .seq_show = cpu_uclamp_min_show,
7881 .write = cpu_uclamp_min_write,
7882 },
7883 {
7884 .name = "uclamp.max",
7885 .flags = CFTYPE_NOT_ON_ROOT,
7886 .seq_show = cpu_uclamp_max_show,
7887 .write = cpu_uclamp_max_write,
7888 },
7889#endif
7488 { } /* terminate */
7489};
7490
7491struct cgroup_subsys cpu_cgrp_subsys = {
7492 .css_alloc = cpu_cgroup_css_alloc,
7493 .css_online = cpu_cgroup_css_online,
7494 .css_released = cpu_cgroup_css_released,
7495 .css_free = cpu_cgroup_css_free,

--- 60 unchanged lines hidden ---
7890 { } /* terminate */
7891};
7892
7893struct cgroup_subsys cpu_cgrp_subsys = {
7894 .css_alloc = cpu_cgroup_css_alloc,
7895 .css_online = cpu_cgroup_css_online,
7896 .css_released = cpu_cgroup_css_released,
7897 .css_free = cpu_cgroup_css_free,

--- 60 unchanged lines hidden ---