Lines Matching +full:kernel +full:- +full:policy
1 // SPDX-License-Identifier: GPL-2.0-only
3 * kernel/sched/syscalls.c
5 * Core kernel scheduler syscalls related code
7 * Copyright (C) 1991-2002 Linus Torvalds
8 * Copyright (C) 1998-2024 Ingo Molnar, Red Hat
19 static inline int __normal_prio(int policy, int rt_prio, int nice) in __normal_prio() argument
23 if (dl_policy(policy)) in __normal_prio()
24 prio = MAX_DL_PRIO - 1; in __normal_prio()
25 else if (rt_policy(policy)) in __normal_prio()
26 prio = MAX_RT_PRIO - 1 - rt_prio; in __normal_prio()
35 * without taking RT-inheritance into account. Might be
42 return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio)); in normal_prio()
50 * RT-boosted. If not then it returns p->normal_prio.
54 p->normal_prio = normal_prio(p); in effective_prio()
60 if (!rt_or_dl_prio(p->prio)) in effective_prio()
61 return p->normal_prio; in effective_prio()
62 return p->prio; in effective_prio()
84 * allow the 'normal' nice value to be set - but as expected in set_user_nice()
89 p->static_prio = NICE_TO_PRIO(nice); in set_user_nice()
100 p->static_prio = NICE_TO_PRIO(nice); in set_user_nice()
102 old_prio = p->prio; in set_user_nice()
103 p->prio = effective_prio(p); in set_user_nice()
114 p->sched_class->prio_changed(rq, p, old_prio); in set_user_nice()
119 * is_nice_reduction - check if nice value is an actual reduction
128 /* Convert nice value [19,-20] to rlimit style value [1,40]: */ in is_nice_reduction()
135 * can_nice - check if a task can reduce its nice value
147 * sys_nice - change the priority of the current process.
162 increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); in SYSCALL_DEFINE1()
167 return -EPERM; in SYSCALL_DEFINE1()
180 * task_prio - return the priority value of a given task.
185 * sched policy return value kernel prio user prio/nice
187 * normal, batch, idle [0 ... 39] [100 ... 139] 0/[-20 ... 19]
188 * fifo, rr [-2 ... -100] [98 ... 0] [1 ... 99]
189 * deadline -101 -1 0
193 return p->prio - MAX_RT_PRIO; in task_prio()
197 * idle_cpu - is a given CPU idle currently?
206 if (rq->curr != rq->idle) in idle_cpu()
209 if (rq->nr_running) in idle_cpu()
213 if (rq->ttwu_pending) in idle_cpu()
221 * available_idle_cpu - is a given CPU idle for enqueuing work.
238 * idle_task - return the idle task for a given CPU.
245 return cpu_rq(cpu)->idle; in idle_task()
253 if (sched_core_enabled(rq) && rq->curr == rq->idle) in sched_core_idle_cpu()
262 * find_process_by_pid - find a process with a matching PID value.
288 * sched_setparam() passes in -1 for its policy, to let the functions in DEFINE_CLASS()
291 #define SETPARAM_POLICY -1 in DEFINE_CLASS()
296 int policy = attr->sched_policy; in DEFINE_CLASS() local
298 if (policy == SETPARAM_POLICY) in DEFINE_CLASS()
299 policy = p->policy; in DEFINE_CLASS()
301 p->policy = policy; in DEFINE_CLASS()
303 if (dl_policy(policy)) { in DEFINE_CLASS()
305 } else if (fair_policy(policy)) { in DEFINE_CLASS()
306 p->static_prio = NICE_TO_PRIO(attr->sched_nice); in DEFINE_CLASS()
307 if (attr->sched_runtime) { in DEFINE_CLASS()
308 p->se.custom_slice = 1; in DEFINE_CLASS()
309 p->se.slice = clamp_t(u64, attr->sched_runtime, in DEFINE_CLASS()
313 p->se.custom_slice = 0; in DEFINE_CLASS()
314 p->se.slice = sysctl_sched_base_slice; in DEFINE_CLASS()
318 /* rt-policy tasks do not have a timerslack */ in DEFINE_CLASS()
320 p->timer_slack_ns = 0; in DEFINE_CLASS()
321 } else if (p->timer_slack_ns == 0) { in DEFINE_CLASS()
322 /* when switching back to non-rt policy, restore timerslack */ in DEFINE_CLASS()
323 p->timer_slack_ns = p->default_timer_slack_ns; in DEFINE_CLASS()
327 * __sched_setscheduler() ensures attr->sched_priority == 0 when in DEFINE_CLASS()
331 p->rt_priority = attr->sched_priority; in DEFINE_CLASS()
332 p->normal_prio = normal_prio(p); in DEFINE_CLASS()
345 return (uid_eq(cred->euid, pcred->euid) || in check_same_owner()
346 uid_eq(cred->euid, pcred->uid)); in check_same_owner()
354 int util_min = p->uclamp_req[UCLAMP_MIN].value; in uclamp_validate()
355 int util_max = p->uclamp_req[UCLAMP_MAX].value; in uclamp_validate()
357 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { in uclamp_validate()
358 util_min = attr->sched_util_min; in uclamp_validate()
361 return -EINVAL; in uclamp_validate()
364 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { in uclamp_validate()
365 util_max = attr->sched_util_max; in uclamp_validate()
368 return -EINVAL; in uclamp_validate()
371 if (util_min != -1 && util_max != -1 && util_min > util_max) in uclamp_validate()
372 return -EINVAL; in uclamp_validate()
390 /* Reset on sched class change for a non user-defined clamp value. */ in uclamp_reset()
391 if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && in uclamp_reset()
392 !uc_se->user_defined) in uclamp_reset()
395 /* Reset on sched_util_{min,max} == -1. */ in uclamp_reset()
397 attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && in uclamp_reset()
398 attr->sched_util_min == -1) { in uclamp_reset()
403 attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && in uclamp_reset()
404 attr->sched_util_max == -1) { in uclamp_reset()
417 struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; in __setscheduler_uclamp()
436 if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) in __setscheduler_uclamp()
439 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && in __setscheduler_uclamp()
440 attr->sched_util_min != -1) { in __setscheduler_uclamp()
441 uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], in __setscheduler_uclamp()
442 attr->sched_util_min, true); in __setscheduler_uclamp()
445 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && in __setscheduler_uclamp()
446 attr->sched_util_max != -1) { in __setscheduler_uclamp()
447 uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], in __setscheduler_uclamp()
448 attr->sched_util_max, true); in __setscheduler_uclamp()
457 return -EOPNOTSUPP; in uclamp_validate()
466 * event on permitted non-privileged operations:
470 int policy, int reset_on_fork) in user_check_sched_setscheduler() argument
472 if (fair_policy(policy)) { in user_check_sched_setscheduler()
473 if (attr->sched_nice < task_nice(p) && in user_check_sched_setscheduler()
474 !is_nice_reduction(p, attr->sched_nice)) in user_check_sched_setscheduler()
478 if (rt_policy(policy)) { in user_check_sched_setscheduler()
481 /* Can't set/change the rt policy: */ in user_check_sched_setscheduler()
482 if (policy != p->policy && !rlim_rtprio) in user_check_sched_setscheduler()
486 if (attr->sched_priority > p->rt_priority && in user_check_sched_setscheduler()
487 attr->sched_priority > rlim_rtprio) in user_check_sched_setscheduler()
492 * Can't set/change SCHED_DEADLINE policy at all for now in user_check_sched_setscheduler()
497 if (dl_policy(policy)) in user_check_sched_setscheduler()
504 if (task_has_idle_policy(p) && !idle_policy(policy)) { in user_check_sched_setscheduler()
514 if (p->sched_reset_on_fork && !reset_on_fork) in user_check_sched_setscheduler()
521 return -EPERM; in user_check_sched_setscheduler()
530 int oldpolicy = -1, policy = attr->sched_policy; in __sched_setscheduler() local
543 /* Double check policy once rq lock held: */ in __sched_setscheduler()
544 if (policy < 0) { in __sched_setscheduler()
545 reset_on_fork = p->sched_reset_on_fork; in __sched_setscheduler()
546 policy = oldpolicy = p->policy; in __sched_setscheduler()
548 reset_on_fork = !!(attr->sched_flags & SCHED_FLAG_RESET_ON_FORK); in __sched_setscheduler()
550 if (!valid_policy(policy)) in __sched_setscheduler()
551 return -EINVAL; in __sched_setscheduler()
554 if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV)) in __sched_setscheduler()
555 return -EINVAL; in __sched_setscheduler()
559 * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL, in __sched_setscheduler()
562 if (attr->sched_priority > MAX_RT_PRIO-1) in __sched_setscheduler()
563 return -EINVAL; in __sched_setscheduler()
564 if ((dl_policy(policy) && !__checkparam_dl(attr)) || in __sched_setscheduler()
565 (rt_policy(policy) != (attr->sched_priority != 0))) in __sched_setscheduler()
566 return -EINVAL; in __sched_setscheduler()
569 retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork); in __sched_setscheduler()
573 if (attr->sched_flags & SCHED_FLAG_SUGOV) in __sched_setscheduler()
574 return -EINVAL; in __sched_setscheduler()
582 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) { in __sched_setscheduler()
592 if (dl_policy(policy) || dl_policy(p->policy)) { in __sched_setscheduler()
598 * Make sure no PI-waiters arrive (or leave) while we are in __sched_setscheduler()
601 * To be able to change p->policy safely, the appropriate in __sched_setscheduler()
608 * Changing the policy of the stop threads its a very bad idea: in __sched_setscheduler()
610 if (p == rq->stop) { in __sched_setscheduler()
611 retval = -EINVAL; in __sched_setscheduler()
615 retval = scx_check_setscheduler(p, policy); in __sched_setscheduler()
623 if (unlikely(policy == p->policy)) { in __sched_setscheduler()
624 if (fair_policy(policy) && in __sched_setscheduler()
625 (attr->sched_nice != task_nice(p) || in __sched_setscheduler()
626 (attr->sched_runtime != p->se.slice))) in __sched_setscheduler()
628 if (rt_policy(policy) && attr->sched_priority != p->rt_priority) in __sched_setscheduler()
630 if (dl_policy(policy) && dl_param_changed(p, attr)) in __sched_setscheduler()
632 if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) in __sched_setscheduler()
635 p->sched_reset_on_fork = reset_on_fork; in __sched_setscheduler()
644 * Do not allow real-time tasks into groups that have no runtime in __sched_setscheduler()
647 if (rt_bandwidth_enabled() && rt_policy(policy) && in __sched_setscheduler()
648 task_group(p)->rt_bandwidth.rt_runtime == 0 && in __sched_setscheduler()
650 retval = -EPERM; in __sched_setscheduler()
655 if (dl_bandwidth_enabled() && dl_policy(policy) && in __sched_setscheduler()
656 !(attr->sched_flags & SCHED_FLAG_SUGOV)) { in __sched_setscheduler()
657 cpumask_t *span = rq->rd->span; in __sched_setscheduler()
664 if (!cpumask_subset(span, p->cpus_ptr) || in __sched_setscheduler()
665 rq->rd->dl_bw.bw == 0) { in __sched_setscheduler()
666 retval = -EPERM; in __sched_setscheduler()
673 /* Re-check policy now with rq lock held: */ in __sched_setscheduler()
674 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { in __sched_setscheduler()
675 policy = oldpolicy = -1; in __sched_setscheduler()
687 if ((dl_policy(policy) || dl_task(p)) && sched_dl_overflow(p, policy, attr)) { in __sched_setscheduler()
688 retval = -EBUSY; in __sched_setscheduler()
692 p->sched_reset_on_fork = reset_on_fork; in __sched_setscheduler()
693 oldprio = p->prio; in __sched_setscheduler()
695 newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice); in __sched_setscheduler()
709 prev_class = p->sched_class; in __sched_setscheduler()
710 next_class = __setscheduler_class(policy, newprio); in __sched_setscheduler()
712 if (prev_class != next_class && p->se.sched_delayed) in __sched_setscheduler()
722 if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) { in __sched_setscheduler()
724 p->sched_class = next_class; in __sched_setscheduler()
725 p->prio = newprio; in __sched_setscheduler()
735 if (oldprio < p->prio) in __sched_setscheduler()
769 static int _sched_setscheduler(struct task_struct *p, int policy, in _sched_setscheduler() argument
773 .sched_policy = policy, in _sched_setscheduler()
774 .sched_priority = param->sched_priority, in _sched_setscheduler()
775 .sched_nice = PRIO_TO_NICE(p->static_prio), in _sched_setscheduler()
778 if (p->se.custom_slice) in _sched_setscheduler()
779 attr.sched_runtime = p->se.slice; in _sched_setscheduler()
782 if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { in _sched_setscheduler()
784 policy &= ~SCHED_RESET_ON_FORK; in _sched_setscheduler()
785 attr.sched_policy = policy; in _sched_setscheduler()
791 * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
793 * @policy: new policy.
802 int sched_setscheduler(struct task_struct *p, int policy, in sched_setscheduler() argument
805 return _sched_setscheduler(p, policy, param, true); in sched_setscheduler()
820 …* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from ke…
822 * @policy: new policy.
832 int sched_setscheduler_nocheck(struct task_struct *p, int policy, in sched_setscheduler_nocheck() argument
835 return _sched_setscheduler(p, policy, param, false); in sched_setscheduler_nocheck()
849 * For this reason 'all' FIFO tasks the kernel creates are basically at:
853 * The administrator _MUST_ configure the system, the kernel simply doesn't
884 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) in do_sched_setscheduler() argument
889 return -EINVAL; in do_sched_setscheduler()
891 return -EFAULT; in do_sched_setscheduler()
895 return -ESRCH; in do_sched_setscheduler()
897 return sched_setscheduler(p, policy, &lparam); in do_sched_setscheduler()
901 * Mimics kernel/events/core.c perf_copy_attr().
911 ret = get_user(size, &uattr->size); in sched_copy_attr()
923 if (ret == -E2BIG) in sched_copy_attr()
928 if ((attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) && in sched_copy_attr()
930 return -EINVAL; in sched_copy_attr()
934 * to be strict and return an error on out-of-bounds values? in sched_copy_attr()
936 attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); in sched_copy_attr()
941 put_user(sizeof(*attr), &uattr->size); in sched_copy_attr()
942 return -E2BIG; in sched_copy_attr()
950 attr->sched_priority = p->rt_priority; in get_params()
952 attr->sched_nice = task_nice(p); in get_params()
953 attr->sched_runtime = p->se.slice; in get_params()
958 * sys_sched_setscheduler - set/change the scheduler policy and RT priority
960 * @policy: new policy.
965 SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) in SYSCALL_DEFINE3() argument
967 if (policy < 0) in SYSCALL_DEFINE3()
968 return -EINVAL; in SYSCALL_DEFINE3()
970 return do_sched_setscheduler(pid, policy, param); in SYSCALL_DEFINE3()
974 * sys_sched_setparam - set/change the RT priority of a thread
986 * sys_sched_setattr - same as above, but with extended sched_attr
998 return -EINVAL; in SYSCALL_DEFINE3()
1005 return -EINVAL; in SYSCALL_DEFINE3()
1011 return -ESRCH; in SYSCALL_DEFINE3()
1020 * sys_sched_getscheduler - get the policy (scheduling class) of a thread
1023 * Return: On success, the policy of the thread. Otherwise, a negative error
1032 return -EINVAL; in SYSCALL_DEFINE1()
1037 return -ESRCH; in SYSCALL_DEFINE1()
1041 retval = p->policy; in SYSCALL_DEFINE1()
1042 if (p->sched_reset_on_fork) in SYSCALL_DEFINE1()
1049 * sys_sched_getparam - get the RT priority of a thread
1063 return -EINVAL; in SYSCALL_DEFINE2()
1068 return -ESRCH; in SYSCALL_DEFINE2()
1075 lp.sched_priority = p->rt_priority; in SYSCALL_DEFINE2()
1081 return copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; in SYSCALL_DEFINE2()
1085 * Copy the kernel size attribute structure (which might be larger
1086 * than what user-space knows about) to user-space.
1088 * Note that all cases are valid: user-space buffer can be larger or
1089 * smaller than the kernel-space buffer. The usual case is that both
1100 return -EFAULT; in sched_attr_copy_to_user()
1105 * If usize == ksize then we just copy everything to user-space and all is good. in sched_attr_copy_to_user()
1107 * If usize < ksize then we only copy as much as user-space has space for, in sched_attr_copy_to_user()
1110 * If usize > ksize then user-space is using a newer version of the ABI, in sched_attr_copy_to_user()
1111 * which part the kernel doesn't know about. Just ignore it - tooling can in sched_attr_copy_to_user()
1112 * detect the kernel's knowledge of attributes from the attr->size value in sched_attr_copy_to_user()
1115 kattr->size = min(usize, ksize); in sched_attr_copy_to_user()
1117 if (copy_to_user(uattr, kattr, kattr->size)) in sched_attr_copy_to_user()
1118 return -EFAULT; in sched_attr_copy_to_user()
1124 * sys_sched_getattr - similar to sched_getparam, but with sched_attr
1139 return -EINVAL; in SYSCALL_DEFINE4()
1144 return -ESRCH; in SYSCALL_DEFINE4()
1150 kattr.sched_policy = p->policy; in SYSCALL_DEFINE4()
1151 if (p->sched_reset_on_fork) in SYSCALL_DEFINE4()
1162 kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; in SYSCALL_DEFINE4()
1163 kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; in SYSCALL_DEFINE4()
1182 * if admission test is enabled, we only admit -deadline in dl_task_check_affinity()
1187 if (!cpumask_subset(task_rq(p)->rd->span, mask)) in dl_task_check_affinity()
1188 return -EBUSY; in dl_task_check_affinity()
1200 return -ENOMEM; in __sched_setaffinity()
1203 retval = -ENOMEM; in __sched_setaffinity()
1208 cpumask_and(new_mask, ctx->new_mask, cpus_allowed); in __sched_setaffinity()
1210 ctx->new_mask = new_mask; in __sched_setaffinity()
1211 ctx->flags |= SCA_CHECK; in __sched_setaffinity()
1237 if (unlikely((ctx->flags & SCA_USER) && ctx->user_mask)) { in __sched_setaffinity()
1239 ctx->user_mask); in __sched_setaffinity()
1245 retval = -EINVAL; in __sched_setaffinity()
1263 return -ESRCH; in sched_setaffinity()
1265 if (p->flags & PF_NO_SETAFFINITY) in sched_setaffinity()
1266 return -EINVAL; in sched_setaffinity()
1270 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) in sched_setaffinity()
1271 return -EPERM; in sched_setaffinity()
1279 * With non-SMP configs, user_cpus_ptr/user_mask isn't used and in sched_setaffinity()
1286 return -ENOMEM; in sched_setaffinity()
1309 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; in get_user_cpu_mask()
1313 * sys_sched_setaffinity - set the CPU affinity of a process
1316 * @user_mask_ptr: user-space pointer to the new CPU mask
1327 return -ENOMEM; in SYSCALL_DEFINE3()
1344 return -ESRCH; in sched_getaffinity()
1350 guard(raw_spinlock_irqsave)(&p->pi_lock); in sched_getaffinity()
1351 cpumask_and(mask, &p->cpus_mask, cpu_active_mask); in sched_getaffinity()
1357 * sys_sched_getaffinity - get the CPU affinity of a process
1360 * @user_mask_ptr: user-space pointer to hold the current CPU mask
1372 return -EINVAL; in SYSCALL_DEFINE3()
1373 if (len & (sizeof(unsigned long)-1)) in SYSCALL_DEFINE3()
1374 return -EINVAL; in SYSCALL_DEFINE3()
1377 return -ENOMEM; in SYSCALL_DEFINE3()
1384 ret = -EFAULT; in SYSCALL_DEFINE3()
1400 schedstat_inc(rq->yld_count); in do_sched_yield()
1401 current->sched_class->yield_task(rq); in do_sched_yield()
1411 * sys_sched_yield - yield the current processor to other threads.
1425 * yield - yield the current processor to other threads.
1454 * yield_to - yield the current processor to another thread in
1466 * -ESRCH if there's no task to yield to.
1483 if (rq->nr_running == 1 && p_rq->nr_running == 1) in yield_to()
1484 return -ESRCH; in yield_to()
1490 if (!curr->sched_class->yield_to_task) in yield_to()
1493 if (curr->sched_class != p->sched_class) in yield_to()
1499 yielded = curr->sched_class->yield_to_task(rq, p); in yield_to()
1501 schedstat_inc(rq->yld_count); in yield_to()
1519 * sys_sched_get_priority_max - return maximum RT priority.
1520 * @policy: scheduling class.
1526 SYSCALL_DEFINE1(sched_get_priority_max, int, policy) in SYSCALL_DEFINE1() argument
1528 int ret = -EINVAL; in SYSCALL_DEFINE1()
1530 switch (policy) { in SYSCALL_DEFINE1()
1533 ret = MAX_RT_PRIO-1; in SYSCALL_DEFINE1()
1547 * sys_sched_get_priority_min - return minimum RT priority.
1548 * @policy: scheduling class.
1554 SYSCALL_DEFINE1(sched_get_priority_min, int, policy) in SYSCALL_DEFINE1() argument
1556 int ret = -EINVAL; in SYSCALL_DEFINE1()
1558 switch (policy) { in SYSCALL_DEFINE1()
1579 return -EINVAL; in sched_rr_get_interval()
1584 return -ESRCH; in sched_rr_get_interval()
1592 if (p->sched_class->get_rr_interval) in sched_rr_get_interval()
1593 time_slice = p->sched_class->get_rr_interval(rq, p); in sched_rr_get_interval()
1602 * sys_sched_rr_get_interval - return the default time-slice of a process.
1604 * @interval: userspace pointer to the time-slice value.
1606 * this syscall writes the default time-slice value of a given process
1607 * into the user-space timespec buffer. A value of '0' means infinity.
1609 * Return: On success, 0 and the time-slice is in @interval. Otherwise,