1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * 2002-10-15  Posix Clocks & timers
4   *                           by George Anzinger george@mvista.com
5   *			     Copyright (C) 2002 2003 by MontaVista Software.
6   *
7   * 2004-06-01  Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
8   *			     Copyright (C) 2004 Boris Hu
9   *
10   * These are all the functions necessary to implement POSIX clocks & timers
11   */
12  #include <linux/mm.h>
13  #include <linux/interrupt.h>
14  #include <linux/slab.h>
15  #include <linux/time.h>
16  #include <linux/mutex.h>
17  #include <linux/sched/task.h>
18  
19  #include <linux/uaccess.h>
20  #include <linux/list.h>
21  #include <linux/init.h>
22  #include <linux/compiler.h>
23  #include <linux/hash.h>
24  #include <linux/posix-clock.h>
25  #include <linux/posix-timers.h>
26  #include <linux/syscalls.h>
27  #include <linux/wait.h>
28  #include <linux/workqueue.h>
29  #include <linux/export.h>
30  #include <linux/hashtable.h>
31  #include <linux/compat.h>
32  #include <linux/nospec.h>
33  #include <linux/time_namespace.h>
34  
35  #include "timekeeping.h"
36  #include "posix-timers.h"
37  
38  static struct kmem_cache *posix_timers_cache;
39  
40  /*
41   * Timers are managed in a hash table for lockless lookup. The hash key is
42   * constructed from current::signal and the timer ID and the timer is
43   * matched against current::signal and the timer ID when walking the hash
44   * bucket list.
45   *
46   * This allows checkpoint/restore to reconstruct the exact timer IDs for
47   * a process.
48   */
49  static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
50  static DEFINE_SPINLOCK(hash_lock);
51  
52  static const struct k_clock * const posix_clocks[];
53  static const struct k_clock *clockid_to_kclock(const clockid_t id);
54  static const struct k_clock clock_realtime, clock_monotonic;
55  
56  /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */
57  #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
58  			~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
59  #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
60  #endif
61  
62  static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
63  
64  #define lock_timer(tid, flags)						   \
65  ({	struct k_itimer *__timr;					   \
66  	__cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags));  \
67  	__timr;								   \
68  })
69  
hash(struct signal_struct * sig,unsigned int nr)70  static int hash(struct signal_struct *sig, unsigned int nr)
71  {
72  	return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
73  }
74  
__posix_timers_find(struct hlist_head * head,struct signal_struct * sig,timer_t id)75  static struct k_itimer *__posix_timers_find(struct hlist_head *head,
76  					    struct signal_struct *sig,
77  					    timer_t id)
78  {
79  	struct k_itimer *timer;
80  
81  	hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) {
82  		/* timer->it_signal can be set concurrently */
83  		if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id))
84  			return timer;
85  	}
86  	return NULL;
87  }
88  
posix_timer_by_id(timer_t id)89  static struct k_itimer *posix_timer_by_id(timer_t id)
90  {
91  	struct signal_struct *sig = current->signal;
92  	struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
93  
94  	return __posix_timers_find(head, sig, id);
95  }
96  
posix_timer_add(struct k_itimer * timer)97  static int posix_timer_add(struct k_itimer *timer)
98  {
99  	struct signal_struct *sig = current->signal;
100  	struct hlist_head *head;
101  	unsigned int cnt, id;
102  
103  	/*
104  	 * FIXME: Replace this by a per signal struct xarray once there is
105  	 * a plan to handle the resulting CRIU regression gracefully.
106  	 */
107  	for (cnt = 0; cnt <= INT_MAX; cnt++) {
108  		spin_lock(&hash_lock);
109  		id = sig->next_posix_timer_id;
110  
111  		/* Write the next ID back. Clamp it to the positive space */
112  		sig->next_posix_timer_id = (id + 1) & INT_MAX;
113  
114  		head = &posix_timers_hashtable[hash(sig, id)];
115  		if (!__posix_timers_find(head, sig, id)) {
116  			hlist_add_head_rcu(&timer->t_hash, head);
117  			spin_unlock(&hash_lock);
118  			return id;
119  		}
120  		spin_unlock(&hash_lock);
121  	}
122  	/* POSIX return code when no timer ID could be allocated */
123  	return -EAGAIN;
124  }
125  
unlock_timer(struct k_itimer * timr,unsigned long flags)126  static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
127  {
128  	spin_unlock_irqrestore(&timr->it_lock, flags);
129  }
130  
posix_get_realtime_timespec(clockid_t which_clock,struct timespec64 * tp)131  static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
132  {
133  	ktime_get_real_ts64(tp);
134  	return 0;
135  }
136  
posix_get_realtime_ktime(clockid_t which_clock)137  static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
138  {
139  	return ktime_get_real();
140  }
141  
posix_clock_realtime_set(const clockid_t which_clock,const struct timespec64 * tp)142  static int posix_clock_realtime_set(const clockid_t which_clock,
143  				    const struct timespec64 *tp)
144  {
145  	return do_sys_settimeofday64(tp, NULL);
146  }
147  
posix_clock_realtime_adj(const clockid_t which_clock,struct __kernel_timex * t)148  static int posix_clock_realtime_adj(const clockid_t which_clock,
149  				    struct __kernel_timex *t)
150  {
151  	return do_adjtimex(t);
152  }
153  
posix_get_monotonic_timespec(clockid_t which_clock,struct timespec64 * tp)154  static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
155  {
156  	ktime_get_ts64(tp);
157  	timens_add_monotonic(tp);
158  	return 0;
159  }
160  
posix_get_monotonic_ktime(clockid_t which_clock)161  static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
162  {
163  	return ktime_get();
164  }
165  
posix_get_monotonic_raw(clockid_t which_clock,struct timespec64 * tp)166  static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
167  {
168  	ktime_get_raw_ts64(tp);
169  	timens_add_monotonic(tp);
170  	return 0;
171  }
172  
posix_get_realtime_coarse(clockid_t which_clock,struct timespec64 * tp)173  static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp)
174  {
175  	ktime_get_coarse_real_ts64(tp);
176  	return 0;
177  }
178  
posix_get_monotonic_coarse(clockid_t which_clock,struct timespec64 * tp)179  static int posix_get_monotonic_coarse(clockid_t which_clock,
180  						struct timespec64 *tp)
181  {
182  	ktime_get_coarse_ts64(tp);
183  	timens_add_monotonic(tp);
184  	return 0;
185  }
186  
posix_get_coarse_res(const clockid_t which_clock,struct timespec64 * tp)187  static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp)
188  {
189  	*tp = ktime_to_timespec64(KTIME_LOW_RES);
190  	return 0;
191  }
192  
posix_get_boottime_timespec(const clockid_t which_clock,struct timespec64 * tp)193  static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
194  {
195  	ktime_get_boottime_ts64(tp);
196  	timens_add_boottime(tp);
197  	return 0;
198  }
199  
posix_get_boottime_ktime(const clockid_t which_clock)200  static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
201  {
202  	return ktime_get_boottime();
203  }
204  
posix_get_tai_timespec(clockid_t which_clock,struct timespec64 * tp)205  static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
206  {
207  	ktime_get_clocktai_ts64(tp);
208  	return 0;
209  }
210  
posix_get_tai_ktime(clockid_t which_clock)211  static ktime_t posix_get_tai_ktime(clockid_t which_clock)
212  {
213  	return ktime_get_clocktai();
214  }
215  
posix_get_hrtimer_res(clockid_t which_clock,struct timespec64 * tp)216  static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
217  {
218  	tp->tv_sec = 0;
219  	tp->tv_nsec = hrtimer_resolution;
220  	return 0;
221  }
222  
init_posix_timers(void)223  static __init int init_posix_timers(void)
224  {
225  	posix_timers_cache = kmem_cache_create("posix_timers_cache",
226  					sizeof(struct k_itimer), 0,
227  					SLAB_PANIC | SLAB_ACCOUNT, NULL);
228  	return 0;
229  }
230  __initcall(init_posix_timers);
231  
232  /*
233   * The siginfo si_overrun field and the return value of timer_getoverrun(2)
234   * are of type int. Clamp the overrun value to INT_MAX
235   */
timer_overrun_to_int(struct k_itimer * timr,int baseval)236  static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
237  {
238  	s64 sum = timr->it_overrun_last + (s64)baseval;
239  
240  	return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
241  }
242  
common_hrtimer_rearm(struct k_itimer * timr)243  static void common_hrtimer_rearm(struct k_itimer *timr)
244  {
245  	struct hrtimer *timer = &timr->it.real.timer;
246  
247  	timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
248  					    timr->it_interval);
249  	hrtimer_restart(timer);
250  }
251  
252  /*
253   * This function is called from the signal delivery code if
254   * info->si_sys_private is not zero, which indicates that the timer has to
255   * be rearmed. Restart the timer and update info::si_overrun.
256   */
posixtimer_rearm(struct kernel_siginfo * info)257  void posixtimer_rearm(struct kernel_siginfo *info)
258  {
259  	struct k_itimer *timr;
260  	unsigned long flags;
261  
262  	timr = lock_timer(info->si_tid, &flags);
263  	if (!timr)
264  		return;
265  
266  	if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) {
267  		timr->kclock->timer_rearm(timr);
268  
269  		timr->it_active = 1;
270  		timr->it_overrun_last = timr->it_overrun;
271  		timr->it_overrun = -1LL;
272  		++timr->it_requeue_pending;
273  
274  		info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
275  	}
276  
277  	unlock_timer(timr, flags);
278  }
279  
posix_timer_queue_signal(struct k_itimer * timr)280  int posix_timer_queue_signal(struct k_itimer *timr)
281  {
282  	int ret, si_private = 0;
283  	enum pid_type type;
284  
285  	lockdep_assert_held(&timr->it_lock);
286  
287  	timr->it_active = 0;
288  	if (timr->it_interval)
289  		si_private = ++timr->it_requeue_pending;
290  
291  	/*
292  	 * FIXME: if ->sigq is queued we can race with
293  	 * dequeue_signal()->posixtimer_rearm().
294  	 *
295  	 * If dequeue_signal() sees the "right" value of
296  	 * si_sys_private it calls posixtimer_rearm().
297  	 * We re-queue ->sigq and drop ->it_lock().
298  	 * posixtimer_rearm() locks the timer
299  	 * and re-schedules it while ->sigq is pending.
300  	 * Not really bad, but not that we want.
301  	 */
302  	timr->sigq->info.si_sys_private = si_private;
303  
304  	type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID;
305  	ret = send_sigqueue(timr->sigq, timr->it_pid, type);
306  	/* If we failed to send the signal the timer stops. */
307  	return ret > 0;
308  }
309  
310  /*
311   * This function gets called when a POSIX.1b interval timer expires from
312   * the HRTIMER interrupt (soft interrupt on RT kernels).
313   *
314   * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI
315   * based timers.
316   */
posix_timer_fn(struct hrtimer * timer)317  static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
318  {
319  	struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer);
320  	enum hrtimer_restart ret = HRTIMER_NORESTART;
321  	unsigned long flags;
322  
323  	spin_lock_irqsave(&timr->it_lock, flags);
324  
325  	if (posix_timer_queue_signal(timr)) {
326  		/*
327  		 * The signal was not queued due to SIG_IGN. As a
328  		 * consequence the timer is not going to be rearmed from
329  		 * the signal delivery path. But as a real signal handler
330  		 * can be installed later the timer must be rearmed here.
331  		 */
332  		if (timr->it_interval != 0) {
333  			ktime_t now = hrtimer_cb_get_time(timer);
334  
335  			/*
336  			 * FIXME: What we really want, is to stop this
337  			 * timer completely and restart it in case the
338  			 * SIG_IGN is removed. This is a non trivial
339  			 * change to the signal handling code.
340  			 *
341  			 * For now let timers with an interval less than a
342  			 * jiffy expire every jiffy and recheck for a
343  			 * valid signal handler.
344  			 *
345  			 * This avoids interrupt starvation in case of a
346  			 * very small interval, which would expire the
347  			 * timer immediately again.
348  			 *
349  			 * Moving now ahead of time by one jiffy tricks
350  			 * hrtimer_forward() to expire the timer later,
351  			 * while it still maintains the overrun accuracy
352  			 * for the price of a slight inconsistency in the
353  			 * timer_gettime() case. This is at least better
354  			 * than a timer storm.
355  			 *
356  			 * Only required when high resolution timers are
357  			 * enabled as the periodic tick based timers are
358  			 * automatically aligned to the next tick.
359  			 */
360  			if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) {
361  				ktime_t kj = TICK_NSEC;
362  
363  				if (timr->it_interval < kj)
364  					now = ktime_add(now, kj);
365  			}
366  
367  			timr->it_overrun += hrtimer_forward(timer, now, timr->it_interval);
368  			ret = HRTIMER_RESTART;
369  			++timr->it_requeue_pending;
370  			timr->it_active = 1;
371  		}
372  	}
373  
374  	unlock_timer(timr, flags);
375  	return ret;
376  }
377  
good_sigevent(sigevent_t * event)378  static struct pid *good_sigevent(sigevent_t * event)
379  {
380  	struct pid *pid = task_tgid(current);
381  	struct task_struct *rtn;
382  
383  	switch (event->sigev_notify) {
384  	case SIGEV_SIGNAL | SIGEV_THREAD_ID:
385  		pid = find_vpid(event->sigev_notify_thread_id);
386  		rtn = pid_task(pid, PIDTYPE_PID);
387  		if (!rtn || !same_thread_group(rtn, current))
388  			return NULL;
389  		fallthrough;
390  	case SIGEV_SIGNAL:
391  	case SIGEV_THREAD:
392  		if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
393  			return NULL;
394  		fallthrough;
395  	case SIGEV_NONE:
396  		return pid;
397  	default:
398  		return NULL;
399  	}
400  }
401  
alloc_posix_timer(void)402  static struct k_itimer * alloc_posix_timer(void)
403  {
404  	struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
405  
406  	if (!tmr)
407  		return tmr;
408  	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
409  		kmem_cache_free(posix_timers_cache, tmr);
410  		return NULL;
411  	}
412  	clear_siginfo(&tmr->sigq->info);
413  	return tmr;
414  }
415  
k_itimer_rcu_free(struct rcu_head * head)416  static void k_itimer_rcu_free(struct rcu_head *head)
417  {
418  	struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
419  
420  	kmem_cache_free(posix_timers_cache, tmr);
421  }
422  
posix_timer_free(struct k_itimer * tmr)423  static void posix_timer_free(struct k_itimer *tmr)
424  {
425  	put_pid(tmr->it_pid);
426  	sigqueue_free(tmr->sigq);
427  	call_rcu(&tmr->rcu, k_itimer_rcu_free);
428  }
429  
posix_timer_unhash_and_free(struct k_itimer * tmr)430  static void posix_timer_unhash_and_free(struct k_itimer *tmr)
431  {
432  	spin_lock(&hash_lock);
433  	hlist_del_rcu(&tmr->t_hash);
434  	spin_unlock(&hash_lock);
435  	posix_timer_free(tmr);
436  }
437  
common_timer_create(struct k_itimer * new_timer)438  static int common_timer_create(struct k_itimer *new_timer)
439  {
440  	hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
441  	return 0;
442  }
443  
444  /* Create a POSIX.1b interval timer. */
do_timer_create(clockid_t which_clock,struct sigevent * event,timer_t __user * created_timer_id)445  static int do_timer_create(clockid_t which_clock, struct sigevent *event,
446  			   timer_t __user *created_timer_id)
447  {
448  	const struct k_clock *kc = clockid_to_kclock(which_clock);
449  	struct k_itimer *new_timer;
450  	int error, new_timer_id;
451  
452  	if (!kc)
453  		return -EINVAL;
454  	if (!kc->timer_create)
455  		return -EOPNOTSUPP;
456  
457  	new_timer = alloc_posix_timer();
458  	if (unlikely(!new_timer))
459  		return -EAGAIN;
460  
461  	spin_lock_init(&new_timer->it_lock);
462  
463  	/*
464  	 * Add the timer to the hash table. The timer is not yet valid
465  	 * because new_timer::it_signal is still NULL. The timer id is also
466  	 * not yet visible to user space.
467  	 */
468  	new_timer_id = posix_timer_add(new_timer);
469  	if (new_timer_id < 0) {
470  		posix_timer_free(new_timer);
471  		return new_timer_id;
472  	}
473  
474  	new_timer->it_id = (timer_t) new_timer_id;
475  	new_timer->it_clock = which_clock;
476  	new_timer->kclock = kc;
477  	new_timer->it_overrun = -1LL;
478  
479  	if (event) {
480  		rcu_read_lock();
481  		new_timer->it_pid = get_pid(good_sigevent(event));
482  		rcu_read_unlock();
483  		if (!new_timer->it_pid) {
484  			error = -EINVAL;
485  			goto out;
486  		}
487  		new_timer->it_sigev_notify     = event->sigev_notify;
488  		new_timer->sigq->info.si_signo = event->sigev_signo;
489  		new_timer->sigq->info.si_value = event->sigev_value;
490  	} else {
491  		new_timer->it_sigev_notify     = SIGEV_SIGNAL;
492  		new_timer->sigq->info.si_signo = SIGALRM;
493  		memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
494  		new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
495  		new_timer->it_pid = get_pid(task_tgid(current));
496  	}
497  
498  	new_timer->sigq->info.si_tid   = new_timer->it_id;
499  	new_timer->sigq->info.si_code  = SI_TIMER;
500  
501  	if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
502  		error = -EFAULT;
503  		goto out;
504  	}
505  	/*
506  	 * After succesful copy out, the timer ID is visible to user space
507  	 * now but not yet valid because new_timer::signal is still NULL.
508  	 *
509  	 * Complete the initialization with the clock specific create
510  	 * callback.
511  	 */
512  	error = kc->timer_create(new_timer);
513  	if (error)
514  		goto out;
515  
516  	spin_lock_irq(&current->sighand->siglock);
517  	/* This makes the timer valid in the hash table */
518  	WRITE_ONCE(new_timer->it_signal, current->signal);
519  	hlist_add_head(&new_timer->list, &current->signal->posix_timers);
520  	spin_unlock_irq(&current->sighand->siglock);
521  	/*
522  	 * After unlocking sighand::siglock @new_timer is subject to
523  	 * concurrent removal and cannot be touched anymore
524  	 */
525  	return 0;
526  out:
527  	posix_timer_unhash_and_free(new_timer);
528  	return error;
529  }
530  
SYSCALL_DEFINE3(timer_create,const clockid_t,which_clock,struct sigevent __user *,timer_event_spec,timer_t __user *,created_timer_id)531  SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
532  		struct sigevent __user *, timer_event_spec,
533  		timer_t __user *, created_timer_id)
534  {
535  	if (timer_event_spec) {
536  		sigevent_t event;
537  
538  		if (copy_from_user(&event, timer_event_spec, sizeof (event)))
539  			return -EFAULT;
540  		return do_timer_create(which_clock, &event, created_timer_id);
541  	}
542  	return do_timer_create(which_clock, NULL, created_timer_id);
543  }
544  
545  #ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE3(timer_create,clockid_t,which_clock,struct compat_sigevent __user *,timer_event_spec,timer_t __user *,created_timer_id)546  COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock,
547  		       struct compat_sigevent __user *, timer_event_spec,
548  		       timer_t __user *, created_timer_id)
549  {
550  	if (timer_event_spec) {
551  		sigevent_t event;
552  
553  		if (get_compat_sigevent(&event, timer_event_spec))
554  			return -EFAULT;
555  		return do_timer_create(which_clock, &event, created_timer_id);
556  	}
557  	return do_timer_create(which_clock, NULL, created_timer_id);
558  }
559  #endif
560  
__lock_timer(timer_t timer_id,unsigned long * flags)561  static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
562  {
563  	struct k_itimer *timr;
564  
565  	/*
566  	 * timer_t could be any type >= int and we want to make sure any
567  	 * @timer_id outside positive int range fails lookup.
568  	 */
569  	if ((unsigned long long)timer_id > INT_MAX)
570  		return NULL;
571  
572  	/*
573  	 * The hash lookup and the timers are RCU protected.
574  	 *
575  	 * Timers are added to the hash in invalid state where
576  	 * timr::it_signal == NULL. timer::it_signal is only set after the
577  	 * rest of the initialization succeeded.
578  	 *
579  	 * Timer destruction happens in steps:
580  	 *  1) Set timr::it_signal to NULL with timr::it_lock held
581  	 *  2) Release timr::it_lock
582  	 *  3) Remove from the hash under hash_lock
583  	 *  4) Call RCU for removal after the grace period
584  	 *
585  	 * Holding rcu_read_lock() accross the lookup ensures that
586  	 * the timer cannot be freed.
587  	 *
588  	 * The lookup validates locklessly that timr::it_signal ==
589  	 * current::it_signal and timr::it_id == @timer_id. timr::it_id
590  	 * can't change, but timr::it_signal becomes NULL during
591  	 * destruction.
592  	 */
593  	rcu_read_lock();
594  	timr = posix_timer_by_id(timer_id);
595  	if (timr) {
596  		spin_lock_irqsave(&timr->it_lock, *flags);
597  		/*
598  		 * Validate under timr::it_lock that timr::it_signal is
599  		 * still valid. Pairs with #1 above.
600  		 */
601  		if (timr->it_signal == current->signal) {
602  			rcu_read_unlock();
603  			return timr;
604  		}
605  		spin_unlock_irqrestore(&timr->it_lock, *flags);
606  	}
607  	rcu_read_unlock();
608  
609  	return NULL;
610  }
611  
common_hrtimer_remaining(struct k_itimer * timr,ktime_t now)612  static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now)
613  {
614  	struct hrtimer *timer = &timr->it.real.timer;
615  
616  	return __hrtimer_expires_remaining_adjusted(timer, now);
617  }
618  
common_hrtimer_forward(struct k_itimer * timr,ktime_t now)619  static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now)
620  {
621  	struct hrtimer *timer = &timr->it.real.timer;
622  
623  	return hrtimer_forward(timer, now, timr->it_interval);
624  }
625  
626  /*
627   * Get the time remaining on a POSIX.1b interval timer.
628   *
629   * Two issues to handle here:
630   *
631   *  1) The timer has a requeue pending. The return value must appear as
632   *     if the timer has been requeued right now.
633   *
634   *  2) The timer is a SIGEV_NONE timer. These timers are never enqueued
635   *     into the hrtimer queue and therefore never expired. Emulate expiry
636   *     here taking #1 into account.
637   */
common_timer_get(struct k_itimer * timr,struct itimerspec64 * cur_setting)638  void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
639  {
640  	const struct k_clock *kc = timr->kclock;
641  	ktime_t now, remaining, iv;
642  	bool sig_none;
643  
644  	sig_none = timr->it_sigev_notify == SIGEV_NONE;
645  	iv = timr->it_interval;
646  
647  	/* interval timer ? */
648  	if (iv) {
649  		cur_setting->it_interval = ktime_to_timespec64(iv);
650  	} else if (!timr->it_active) {
651  		/*
652  		 * SIGEV_NONE oneshot timers are never queued and therefore
653  		 * timr->it_active is always false. The check below
654  		 * vs. remaining time will handle this case.
655  		 *
656  		 * For all other timers there is nothing to update here, so
657  		 * return.
658  		 */
659  		if (!sig_none)
660  			return;
661  	}
662  
663  	now = kc->clock_get_ktime(timr->it_clock);
664  
665  	/*
666  	 * If this is an interval timer and either has requeue pending or
667  	 * is a SIGEV_NONE timer move the expiry time forward by intervals,
668  	 * so expiry is > now.
669  	 */
670  	if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none))
671  		timr->it_overrun += kc->timer_forward(timr, now);
672  
673  	remaining = kc->timer_remaining(timr, now);
674  	/*
675  	 * As @now is retrieved before a possible timer_forward() and
676  	 * cannot be reevaluated by the compiler @remaining is based on the
677  	 * same @now value. Therefore @remaining is consistent vs. @now.
678  	 *
679  	 * Consequently all interval timers, i.e. @iv > 0, cannot have a
680  	 * remaining time <= 0 because timer_forward() guarantees to move
681  	 * them forward so that the next timer expiry is > @now.
682  	 */
683  	if (remaining <= 0) {
684  		/*
685  		 * A single shot SIGEV_NONE timer must return 0, when it is
686  		 * expired! Timers which have a real signal delivery mode
687  		 * must return a remaining time greater than 0 because the
688  		 * signal has not yet been delivered.
689  		 */
690  		if (!sig_none)
691  			cur_setting->it_value.tv_nsec = 1;
692  	} else {
693  		cur_setting->it_value = ktime_to_timespec64(remaining);
694  	}
695  }
696  
do_timer_gettime(timer_t timer_id,struct itimerspec64 * setting)697  static int do_timer_gettime(timer_t timer_id,  struct itimerspec64 *setting)
698  {
699  	const struct k_clock *kc;
700  	struct k_itimer *timr;
701  	unsigned long flags;
702  	int ret = 0;
703  
704  	timr = lock_timer(timer_id, &flags);
705  	if (!timr)
706  		return -EINVAL;
707  
708  	memset(setting, 0, sizeof(*setting));
709  	kc = timr->kclock;
710  	if (WARN_ON_ONCE(!kc || !kc->timer_get))
711  		ret = -EINVAL;
712  	else
713  		kc->timer_get(timr, setting);
714  
715  	unlock_timer(timr, flags);
716  	return ret;
717  }
718  
719  /* Get the time remaining on a POSIX.1b interval timer. */
SYSCALL_DEFINE2(timer_gettime,timer_t,timer_id,struct __kernel_itimerspec __user *,setting)720  SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
721  		struct __kernel_itimerspec __user *, setting)
722  {
723  	struct itimerspec64 cur_setting;
724  
725  	int ret = do_timer_gettime(timer_id, &cur_setting);
726  	if (!ret) {
727  		if (put_itimerspec64(&cur_setting, setting))
728  			ret = -EFAULT;
729  	}
730  	return ret;
731  }
732  
733  #ifdef CONFIG_COMPAT_32BIT_TIME
734  
SYSCALL_DEFINE2(timer_gettime32,timer_t,timer_id,struct old_itimerspec32 __user *,setting)735  SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
736  		struct old_itimerspec32 __user *, setting)
737  {
738  	struct itimerspec64 cur_setting;
739  
740  	int ret = do_timer_gettime(timer_id, &cur_setting);
741  	if (!ret) {
742  		if (put_old_itimerspec32(&cur_setting, setting))
743  			ret = -EFAULT;
744  	}
745  	return ret;
746  }
747  
748  #endif
749  
750  /**
751   * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer
752   * @timer_id:	The timer ID which identifies the timer
753   *
754   * The "overrun count" of a timer is one plus the number of expiration
755   * intervals which have elapsed between the first expiry, which queues the
756   * signal and the actual signal delivery. On signal delivery the "overrun
757   * count" is calculated and cached, so it can be returned directly here.
758   *
759   * As this is relative to the last queued signal the returned overrun count
760   * is meaningless outside of the signal delivery path and even there it
761   * does not accurately reflect the current state when user space evaluates
762   * it.
763   *
764   * Returns:
765   *	-EINVAL		@timer_id is invalid
766   *	1..INT_MAX	The number of overruns related to the last delivered signal
767   */
SYSCALL_DEFINE1(timer_getoverrun,timer_t,timer_id)768  SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
769  {
770  	struct k_itimer *timr;
771  	unsigned long flags;
772  	int overrun;
773  
774  	timr = lock_timer(timer_id, &flags);
775  	if (!timr)
776  		return -EINVAL;
777  
778  	overrun = timer_overrun_to_int(timr, 0);
779  	unlock_timer(timr, flags);
780  
781  	return overrun;
782  }
783  
common_hrtimer_arm(struct k_itimer * timr,ktime_t expires,bool absolute,bool sigev_none)784  static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
785  			       bool absolute, bool sigev_none)
786  {
787  	struct hrtimer *timer = &timr->it.real.timer;
788  	enum hrtimer_mode mode;
789  
790  	mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
791  	/*
792  	 * Posix magic: Relative CLOCK_REALTIME timers are not affected by
793  	 * clock modifications, so they become CLOCK_MONOTONIC based under the
794  	 * hood. See hrtimer_init(). Update timr->kclock, so the generic
795  	 * functions which use timr->kclock->clock_get_*() work.
796  	 *
797  	 * Note: it_clock stays unmodified, because the next timer_set() might
798  	 * use ABSTIME, so it needs to switch back.
799  	 */
800  	if (timr->it_clock == CLOCK_REALTIME)
801  		timr->kclock = absolute ? &clock_realtime : &clock_monotonic;
802  
803  	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
804  	timr->it.real.timer.function = posix_timer_fn;
805  
806  	if (!absolute)
807  		expires = ktime_add_safe(expires, timer->base->get_time());
808  	hrtimer_set_expires(timer, expires);
809  
810  	if (!sigev_none)
811  		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
812  }
813  
common_hrtimer_try_to_cancel(struct k_itimer * timr)814  static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
815  {
816  	return hrtimer_try_to_cancel(&timr->it.real.timer);
817  }
818  
common_timer_wait_running(struct k_itimer * timer)819  static void common_timer_wait_running(struct k_itimer *timer)
820  {
821  	hrtimer_cancel_wait_running(&timer->it.real.timer);
822  }
823  
824  /*
825   * On PREEMPT_RT this prevents priority inversion and a potential livelock
826   * against the ksoftirqd thread in case that ksoftirqd gets preempted while
827   * executing a hrtimer callback.
828   *
829   * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this
830   * just results in a cpu_relax().
831   *
832   * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is
833   * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this
834   * prevents spinning on an eventually scheduled out task and a livelock
835   * when the task which tries to delete or disarm the timer has preempted
836   * the task which runs the expiry in task work context.
837   */
timer_wait_running(struct k_itimer * timer,unsigned long * flags)838  static struct k_itimer *timer_wait_running(struct k_itimer *timer,
839  					   unsigned long *flags)
840  {
841  	const struct k_clock *kc = READ_ONCE(timer->kclock);
842  	timer_t timer_id = READ_ONCE(timer->it_id);
843  
844  	/* Prevent kfree(timer) after dropping the lock */
845  	rcu_read_lock();
846  	unlock_timer(timer, *flags);
847  
848  	/*
849  	 * kc->timer_wait_running() might drop RCU lock. So @timer
850  	 * cannot be touched anymore after the function returns!
851  	 */
852  	if (!WARN_ON_ONCE(!kc->timer_wait_running))
853  		kc->timer_wait_running(timer);
854  
855  	rcu_read_unlock();
856  	/* Relock the timer. It might be not longer hashed. */
857  	return lock_timer(timer_id, flags);
858  }
859  
860  /*
861   * Set up the new interval and reset the signal delivery data
862   */
posix_timer_set_common(struct k_itimer * timer,struct itimerspec64 * new_setting)863  void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting)
864  {
865  	if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec)
866  		timer->it_interval = timespec64_to_ktime(new_setting->it_interval);
867  	else
868  		timer->it_interval = 0;
869  
870  	/* Prevent reloading in case there is a signal pending */
871  	timer->it_requeue_pending = (timer->it_requeue_pending + 2) & ~REQUEUE_PENDING;
872  	/* Reset overrun accounting */
873  	timer->it_overrun_last = 0;
874  	timer->it_overrun = -1LL;
875  }
876  
877  /* Set a POSIX.1b interval timer. */
common_timer_set(struct k_itimer * timr,int flags,struct itimerspec64 * new_setting,struct itimerspec64 * old_setting)878  int common_timer_set(struct k_itimer *timr, int flags,
879  		     struct itimerspec64 *new_setting,
880  		     struct itimerspec64 *old_setting)
881  {
882  	const struct k_clock *kc = timr->kclock;
883  	bool sigev_none;
884  	ktime_t expires;
885  
886  	if (old_setting)
887  		common_timer_get(timr, old_setting);
888  
889  	/* Prevent rearming by clearing the interval */
890  	timr->it_interval = 0;
891  	/*
892  	 * Careful here. On SMP systems the timer expiry function could be
893  	 * active and spinning on timr->it_lock.
894  	 */
895  	if (kc->timer_try_to_cancel(timr) < 0)
896  		return TIMER_RETRY;
897  
898  	timr->it_active = 0;
899  	posix_timer_set_common(timr, new_setting);
900  
901  	/* Keep timer disarmed when it_value is zero */
902  	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
903  		return 0;
904  
905  	expires = timespec64_to_ktime(new_setting->it_value);
906  	if (flags & TIMER_ABSTIME)
907  		expires = timens_ktime_to_host(timr->it_clock, expires);
908  	sigev_none = timr->it_sigev_notify == SIGEV_NONE;
909  
910  	kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
911  	timr->it_active = !sigev_none;
912  	return 0;
913  }
914  
do_timer_settime(timer_t timer_id,int tmr_flags,struct itimerspec64 * new_spec64,struct itimerspec64 * old_spec64)915  static int do_timer_settime(timer_t timer_id, int tmr_flags,
916  			    struct itimerspec64 *new_spec64,
917  			    struct itimerspec64 *old_spec64)
918  {
919  	const struct k_clock *kc;
920  	struct k_itimer *timr;
921  	unsigned long flags;
922  	int error;
923  
924  	if (!timespec64_valid(&new_spec64->it_interval) ||
925  	    !timespec64_valid(&new_spec64->it_value))
926  		return -EINVAL;
927  
928  	if (old_spec64)
929  		memset(old_spec64, 0, sizeof(*old_spec64));
930  
931  	timr = lock_timer(timer_id, &flags);
932  retry:
933  	if (!timr)
934  		return -EINVAL;
935  
936  	if (old_spec64)
937  		old_spec64->it_interval = ktime_to_timespec64(timr->it_interval);
938  
939  	kc = timr->kclock;
940  	if (WARN_ON_ONCE(!kc || !kc->timer_set))
941  		error = -EINVAL;
942  	else
943  		error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
944  
945  	if (error == TIMER_RETRY) {
946  		// We already got the old time...
947  		old_spec64 = NULL;
948  		/* Unlocks and relocks the timer if it still exists */
949  		timr = timer_wait_running(timr, &flags);
950  		goto retry;
951  	}
952  	unlock_timer(timr, flags);
953  
954  	return error;
955  }
956  
957  /* Set a POSIX.1b interval timer */
SYSCALL_DEFINE4(timer_settime,timer_t,timer_id,int,flags,const struct __kernel_itimerspec __user *,new_setting,struct __kernel_itimerspec __user *,old_setting)958  SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
959  		const struct __kernel_itimerspec __user *, new_setting,
960  		struct __kernel_itimerspec __user *, old_setting)
961  {
962  	struct itimerspec64 new_spec, old_spec, *rtn;
963  	int error = 0;
964  
965  	if (!new_setting)
966  		return -EINVAL;
967  
968  	if (get_itimerspec64(&new_spec, new_setting))
969  		return -EFAULT;
970  
971  	rtn = old_setting ? &old_spec : NULL;
972  	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
973  	if (!error && old_setting) {
974  		if (put_itimerspec64(&old_spec, old_setting))
975  			error = -EFAULT;
976  	}
977  	return error;
978  }
979  
980  #ifdef CONFIG_COMPAT_32BIT_TIME
SYSCALL_DEFINE4(timer_settime32,timer_t,timer_id,int,flags,struct old_itimerspec32 __user *,new,struct old_itimerspec32 __user *,old)981  SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
982  		struct old_itimerspec32 __user *, new,
983  		struct old_itimerspec32 __user *, old)
984  {
985  	struct itimerspec64 new_spec, old_spec;
986  	struct itimerspec64 *rtn = old ? &old_spec : NULL;
987  	int error = 0;
988  
989  	if (!new)
990  		return -EINVAL;
991  	if (get_old_itimerspec32(&new_spec, new))
992  		return -EFAULT;
993  
994  	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
995  	if (!error && old) {
996  		if (put_old_itimerspec32(&old_spec, old))
997  			error = -EFAULT;
998  	}
999  	return error;
1000  }
1001  #endif
1002  
common_timer_del(struct k_itimer * timer)1003  int common_timer_del(struct k_itimer *timer)
1004  {
1005  	const struct k_clock *kc = timer->kclock;
1006  
1007  	timer->it_interval = 0;
1008  	if (kc->timer_try_to_cancel(timer) < 0)
1009  		return TIMER_RETRY;
1010  	timer->it_active = 0;
1011  	return 0;
1012  }
1013  
timer_delete_hook(struct k_itimer * timer)1014  static inline int timer_delete_hook(struct k_itimer *timer)
1015  {
1016  	const struct k_clock *kc = timer->kclock;
1017  
1018  	if (WARN_ON_ONCE(!kc || !kc->timer_del))
1019  		return -EINVAL;
1020  	return kc->timer_del(timer);
1021  }
1022  
1023  /* Delete a POSIX.1b interval timer. */
SYSCALL_DEFINE1(timer_delete,timer_t,timer_id)1024  SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
1025  {
1026  	struct k_itimer *timer;
1027  	unsigned long flags;
1028  
1029  	timer = lock_timer(timer_id, &flags);
1030  
1031  retry_delete:
1032  	if (!timer)
1033  		return -EINVAL;
1034  
1035  	if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
1036  		/* Unlocks and relocks the timer if it still exists */
1037  		timer = timer_wait_running(timer, &flags);
1038  		goto retry_delete;
1039  	}
1040  
1041  	spin_lock(&current->sighand->siglock);
1042  	hlist_del(&timer->list);
1043  	spin_unlock(&current->sighand->siglock);
1044  	/*
1045  	 * A concurrent lookup could check timer::it_signal lockless. It
1046  	 * will reevaluate with timer::it_lock held and observe the NULL.
1047  	 */
1048  	WRITE_ONCE(timer->it_signal, NULL);
1049  
1050  	unlock_timer(timer, flags);
1051  	posix_timer_unhash_and_free(timer);
1052  	return 0;
1053  }
1054  
1055  /*
1056   * Delete a timer if it is armed, remove it from the hash and schedule it
1057   * for RCU freeing.
1058   */
itimer_delete(struct k_itimer * timer)1059  static void itimer_delete(struct k_itimer *timer)
1060  {
1061  	unsigned long flags;
1062  
1063  	/*
1064  	 * irqsave is required to make timer_wait_running() work.
1065  	 */
1066  	spin_lock_irqsave(&timer->it_lock, flags);
1067  
1068  retry_delete:
1069  	/*
1070  	 * Even if the timer is not longer accessible from other tasks
1071  	 * it still might be armed and queued in the underlying timer
1072  	 * mechanism. Worse, that timer mechanism might run the expiry
1073  	 * function concurrently.
1074  	 */
1075  	if (timer_delete_hook(timer) == TIMER_RETRY) {
1076  		/*
1077  		 * Timer is expired concurrently, prevent livelocks
1078  		 * and pointless spinning on RT.
1079  		 *
1080  		 * timer_wait_running() drops timer::it_lock, which opens
1081  		 * the possibility for another task to delete the timer.
1082  		 *
1083  		 * That's not possible here because this is invoked from
1084  		 * do_exit() only for the last thread of the thread group.
1085  		 * So no other task can access and delete that timer.
1086  		 */
1087  		if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
1088  			return;
1089  
1090  		goto retry_delete;
1091  	}
1092  	hlist_del(&timer->list);
1093  
1094  	/*
1095  	 * Setting timer::it_signal to NULL is technically not required
1096  	 * here as nothing can access the timer anymore legitimately via
1097  	 * the hash table. Set it to NULL nevertheless so that all deletion
1098  	 * paths are consistent.
1099  	 */
1100  	WRITE_ONCE(timer->it_signal, NULL);
1101  
1102  	spin_unlock_irqrestore(&timer->it_lock, flags);
1103  	posix_timer_unhash_and_free(timer);
1104  }
1105  
1106  /*
1107   * Invoked from do_exit() when the last thread of a thread group exits.
1108   * At that point no other task can access the timers of the dying
1109   * task anymore.
1110   */
exit_itimers(struct task_struct * tsk)1111  void exit_itimers(struct task_struct *tsk)
1112  {
1113  	struct hlist_head timers;
1114  
1115  	if (hlist_empty(&tsk->signal->posix_timers))
1116  		return;
1117  
1118  	/* Protect against concurrent read via /proc/$PID/timers */
1119  	spin_lock_irq(&tsk->sighand->siglock);
1120  	hlist_move_list(&tsk->signal->posix_timers, &timers);
1121  	spin_unlock_irq(&tsk->sighand->siglock);
1122  
1123  	/* The timers are not longer accessible via tsk::signal */
1124  	while (!hlist_empty(&timers))
1125  		itimer_delete(hlist_entry(timers.first, struct k_itimer, list));
1126  }
1127  
SYSCALL_DEFINE2(clock_settime,const clockid_t,which_clock,const struct __kernel_timespec __user *,tp)1128  SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
1129  		const struct __kernel_timespec __user *, tp)
1130  {
1131  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1132  	struct timespec64 new_tp;
1133  
1134  	if (!kc || !kc->clock_set)
1135  		return -EINVAL;
1136  
1137  	if (get_timespec64(&new_tp, tp))
1138  		return -EFAULT;
1139  
1140  	/*
1141  	 * Permission checks have to be done inside the clock specific
1142  	 * setter callback.
1143  	 */
1144  	return kc->clock_set(which_clock, &new_tp);
1145  }
1146  
SYSCALL_DEFINE2(clock_gettime,const clockid_t,which_clock,struct __kernel_timespec __user *,tp)1147  SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
1148  		struct __kernel_timespec __user *, tp)
1149  {
1150  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1151  	struct timespec64 kernel_tp;
1152  	int error;
1153  
1154  	if (!kc)
1155  		return -EINVAL;
1156  
1157  	error = kc->clock_get_timespec(which_clock, &kernel_tp);
1158  
1159  	if (!error && put_timespec64(&kernel_tp, tp))
1160  		error = -EFAULT;
1161  
1162  	return error;
1163  }
1164  
do_clock_adjtime(const clockid_t which_clock,struct __kernel_timex * ktx)1165  int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
1166  {
1167  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1168  
1169  	if (!kc)
1170  		return -EINVAL;
1171  	if (!kc->clock_adj)
1172  		return -EOPNOTSUPP;
1173  
1174  	return kc->clock_adj(which_clock, ktx);
1175  }
1176  
SYSCALL_DEFINE2(clock_adjtime,const clockid_t,which_clock,struct __kernel_timex __user *,utx)1177  SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
1178  		struct __kernel_timex __user *, utx)
1179  {
1180  	struct __kernel_timex ktx;
1181  	int err;
1182  
1183  	if (copy_from_user(&ktx, utx, sizeof(ktx)))
1184  		return -EFAULT;
1185  
1186  	err = do_clock_adjtime(which_clock, &ktx);
1187  
1188  	if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
1189  		return -EFAULT;
1190  
1191  	return err;
1192  }
1193  
1194  /**
1195   * sys_clock_getres - Get the resolution of a clock
1196   * @which_clock:	The clock to get the resolution for
1197   * @tp:			Pointer to a a user space timespec64 for storage
1198   *
1199   * POSIX defines:
1200   *
1201   * "The clock_getres() function shall return the resolution of any
1202   * clock. Clock resolutions are implementation-defined and cannot be set by
1203   * a process. If the argument res is not NULL, the resolution of the
1204   * specified clock shall be stored in the location pointed to by res. If
1205   * res is NULL, the clock resolution is not returned. If the time argument
1206   * of clock_settime() is not a multiple of res, then the value is truncated
1207   * to a multiple of res."
1208   *
1209   * Due to the various hardware constraints the real resolution can vary
1210   * wildly and even change during runtime when the underlying devices are
1211   * replaced. The kernel also can use hardware devices with different
1212   * resolutions for reading the time and for arming timers.
1213   *
1214   * The kernel therefore deviates from the POSIX spec in various aspects:
1215   *
1216   * 1) The resolution returned to user space
1217   *
1218   *    For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI,
1219   *    CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW
1220   *    the kernel differentiates only two cases:
1221   *
1222   *    I)  Low resolution mode:
1223   *
1224   *	  When high resolution timers are disabled at compile or runtime
1225   *	  the resolution returned is nanoseconds per tick, which represents
1226   *	  the precision at which timers expire.
1227   *
1228   *    II) High resolution mode:
1229   *
1230   *	  When high resolution timers are enabled the resolution returned
1231   *	  is always one nanosecond independent of the actual resolution of
1232   *	  the underlying hardware devices.
1233   *
1234   *	  For CLOCK_*_ALARM the actual resolution depends on system
1235   *	  state. When system is running the resolution is the same as the
1236   *	  resolution of the other clocks. During suspend the actual
1237   *	  resolution is the resolution of the underlying RTC device which
1238   *	  might be way less precise than the clockevent device used during
1239   *	  running state.
1240   *
1241   *   For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution
1242   *   returned is always nanoseconds per tick.
1243   *
1244   *   For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution
1245   *   returned is always one nanosecond under the assumption that the
1246   *   underlying scheduler clock has a better resolution than nanoseconds
1247   *   per tick.
1248   *
1249   *   For dynamic POSIX clocks (PTP devices) the resolution returned is
1250   *   always one nanosecond.
1251   *
1252   * 2) Affect on sys_clock_settime()
1253   *
1254   *    The kernel does not truncate the time which is handed in to
1255   *    sys_clock_settime(). The kernel internal timekeeping is always using
1256   *    nanoseconds precision independent of the clocksource device which is
1257   *    used to read the time from. The resolution of that device only
1258   *    affects the presicion of the time returned by sys_clock_gettime().
1259   *
1260   * Returns:
1261   *	0		Success. @tp contains the resolution
1262   *	-EINVAL		@which_clock is not a valid clock ID
1263   *	-EFAULT		Copying the resolution to @tp faulted
1264   *	-ENODEV		Dynamic POSIX clock is not backed by a device
1265   *	-EOPNOTSUPP	Dynamic POSIX clock does not support getres()
1266   */
SYSCALL_DEFINE2(clock_getres,const clockid_t,which_clock,struct __kernel_timespec __user *,tp)1267  SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
1268  		struct __kernel_timespec __user *, tp)
1269  {
1270  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1271  	struct timespec64 rtn_tp;
1272  	int error;
1273  
1274  	if (!kc)
1275  		return -EINVAL;
1276  
1277  	error = kc->clock_getres(which_clock, &rtn_tp);
1278  
1279  	if (!error && tp && put_timespec64(&rtn_tp, tp))
1280  		error = -EFAULT;
1281  
1282  	return error;
1283  }
1284  
1285  #ifdef CONFIG_COMPAT_32BIT_TIME
1286  
SYSCALL_DEFINE2(clock_settime32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1287  SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
1288  		struct old_timespec32 __user *, tp)
1289  {
1290  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1291  	struct timespec64 ts;
1292  
1293  	if (!kc || !kc->clock_set)
1294  		return -EINVAL;
1295  
1296  	if (get_old_timespec32(&ts, tp))
1297  		return -EFAULT;
1298  
1299  	return kc->clock_set(which_clock, &ts);
1300  }
1301  
SYSCALL_DEFINE2(clock_gettime32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1302  SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
1303  		struct old_timespec32 __user *, tp)
1304  {
1305  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1306  	struct timespec64 ts;
1307  	int err;
1308  
1309  	if (!kc)
1310  		return -EINVAL;
1311  
1312  	err = kc->clock_get_timespec(which_clock, &ts);
1313  
1314  	if (!err && put_old_timespec32(&ts, tp))
1315  		err = -EFAULT;
1316  
1317  	return err;
1318  }
1319  
SYSCALL_DEFINE2(clock_adjtime32,clockid_t,which_clock,struct old_timex32 __user *,utp)1320  SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
1321  		struct old_timex32 __user *, utp)
1322  {
1323  	struct __kernel_timex ktx;
1324  	int err;
1325  
1326  	err = get_old_timex32(&ktx, utp);
1327  	if (err)
1328  		return err;
1329  
1330  	err = do_clock_adjtime(which_clock, &ktx);
1331  
1332  	if (err >= 0 && put_old_timex32(utp, &ktx))
1333  		return -EFAULT;
1334  
1335  	return err;
1336  }
1337  
SYSCALL_DEFINE2(clock_getres_time32,clockid_t,which_clock,struct old_timespec32 __user *,tp)1338  SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
1339  		struct old_timespec32 __user *, tp)
1340  {
1341  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1342  	struct timespec64 ts;
1343  	int err;
1344  
1345  	if (!kc)
1346  		return -EINVAL;
1347  
1348  	err = kc->clock_getres(which_clock, &ts);
1349  	if (!err && tp && put_old_timespec32(&ts, tp))
1350  		return -EFAULT;
1351  
1352  	return err;
1353  }
1354  
1355  #endif
1356  
1357  /*
1358   * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI
1359   */
common_nsleep(const clockid_t which_clock,int flags,const struct timespec64 * rqtp)1360  static int common_nsleep(const clockid_t which_clock, int flags,
1361  			 const struct timespec64 *rqtp)
1362  {
1363  	ktime_t texp = timespec64_to_ktime(*rqtp);
1364  
1365  	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1366  				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1367  				 which_clock);
1368  }
1369  
1370  /*
1371   * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME
1372   *
1373   * Absolute nanosleeps for these clocks are time-namespace adjusted.
1374   */
common_nsleep_timens(const clockid_t which_clock,int flags,const struct timespec64 * rqtp)1375  static int common_nsleep_timens(const clockid_t which_clock, int flags,
1376  				const struct timespec64 *rqtp)
1377  {
1378  	ktime_t texp = timespec64_to_ktime(*rqtp);
1379  
1380  	if (flags & TIMER_ABSTIME)
1381  		texp = timens_ktime_to_host(which_clock, texp);
1382  
1383  	return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
1384  				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
1385  				 which_clock);
1386  }
1387  
SYSCALL_DEFINE4(clock_nanosleep,const clockid_t,which_clock,int,flags,const struct __kernel_timespec __user *,rqtp,struct __kernel_timespec __user *,rmtp)1388  SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
1389  		const struct __kernel_timespec __user *, rqtp,
1390  		struct __kernel_timespec __user *, rmtp)
1391  {
1392  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1393  	struct timespec64 t;
1394  
1395  	if (!kc)
1396  		return -EINVAL;
1397  	if (!kc->nsleep)
1398  		return -EOPNOTSUPP;
1399  
1400  	if (get_timespec64(&t, rqtp))
1401  		return -EFAULT;
1402  
1403  	if (!timespec64_valid(&t))
1404  		return -EINVAL;
1405  	if (flags & TIMER_ABSTIME)
1406  		rmtp = NULL;
1407  	current->restart_block.fn = do_no_restart_syscall;
1408  	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
1409  	current->restart_block.nanosleep.rmtp = rmtp;
1410  
1411  	return kc->nsleep(which_clock, flags, &t);
1412  }
1413  
1414  #ifdef CONFIG_COMPAT_32BIT_TIME
1415  
SYSCALL_DEFINE4(clock_nanosleep_time32,clockid_t,which_clock,int,flags,struct old_timespec32 __user *,rqtp,struct old_timespec32 __user *,rmtp)1416  SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
1417  		struct old_timespec32 __user *, rqtp,
1418  		struct old_timespec32 __user *, rmtp)
1419  {
1420  	const struct k_clock *kc = clockid_to_kclock(which_clock);
1421  	struct timespec64 t;
1422  
1423  	if (!kc)
1424  		return -EINVAL;
1425  	if (!kc->nsleep)
1426  		return -EOPNOTSUPP;
1427  
1428  	if (get_old_timespec32(&t, rqtp))
1429  		return -EFAULT;
1430  
1431  	if (!timespec64_valid(&t))
1432  		return -EINVAL;
1433  	if (flags & TIMER_ABSTIME)
1434  		rmtp = NULL;
1435  	current->restart_block.fn = do_no_restart_syscall;
1436  	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
1437  	current->restart_block.nanosleep.compat_rmtp = rmtp;
1438  
1439  	return kc->nsleep(which_clock, flags, &t);
1440  }
1441  
1442  #endif
1443  
1444  static const struct k_clock clock_realtime = {
1445  	.clock_getres		= posix_get_hrtimer_res,
1446  	.clock_get_timespec	= posix_get_realtime_timespec,
1447  	.clock_get_ktime	= posix_get_realtime_ktime,
1448  	.clock_set		= posix_clock_realtime_set,
1449  	.clock_adj		= posix_clock_realtime_adj,
1450  	.nsleep			= common_nsleep,
1451  	.timer_create		= common_timer_create,
1452  	.timer_set		= common_timer_set,
1453  	.timer_get		= common_timer_get,
1454  	.timer_del		= common_timer_del,
1455  	.timer_rearm		= common_hrtimer_rearm,
1456  	.timer_forward		= common_hrtimer_forward,
1457  	.timer_remaining	= common_hrtimer_remaining,
1458  	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1459  	.timer_wait_running	= common_timer_wait_running,
1460  	.timer_arm		= common_hrtimer_arm,
1461  };
1462  
1463  static const struct k_clock clock_monotonic = {
1464  	.clock_getres		= posix_get_hrtimer_res,
1465  	.clock_get_timespec	= posix_get_monotonic_timespec,
1466  	.clock_get_ktime	= posix_get_monotonic_ktime,
1467  	.nsleep			= common_nsleep_timens,
1468  	.timer_create		= common_timer_create,
1469  	.timer_set		= common_timer_set,
1470  	.timer_get		= common_timer_get,
1471  	.timer_del		= common_timer_del,
1472  	.timer_rearm		= common_hrtimer_rearm,
1473  	.timer_forward		= common_hrtimer_forward,
1474  	.timer_remaining	= common_hrtimer_remaining,
1475  	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1476  	.timer_wait_running	= common_timer_wait_running,
1477  	.timer_arm		= common_hrtimer_arm,
1478  };
1479  
1480  static const struct k_clock clock_monotonic_raw = {
1481  	.clock_getres		= posix_get_hrtimer_res,
1482  	.clock_get_timespec	= posix_get_monotonic_raw,
1483  };
1484  
1485  static const struct k_clock clock_realtime_coarse = {
1486  	.clock_getres		= posix_get_coarse_res,
1487  	.clock_get_timespec	= posix_get_realtime_coarse,
1488  };
1489  
1490  static const struct k_clock clock_monotonic_coarse = {
1491  	.clock_getres		= posix_get_coarse_res,
1492  	.clock_get_timespec	= posix_get_monotonic_coarse,
1493  };
1494  
1495  static const struct k_clock clock_tai = {
1496  	.clock_getres		= posix_get_hrtimer_res,
1497  	.clock_get_ktime	= posix_get_tai_ktime,
1498  	.clock_get_timespec	= posix_get_tai_timespec,
1499  	.nsleep			= common_nsleep,
1500  	.timer_create		= common_timer_create,
1501  	.timer_set		= common_timer_set,
1502  	.timer_get		= common_timer_get,
1503  	.timer_del		= common_timer_del,
1504  	.timer_rearm		= common_hrtimer_rearm,
1505  	.timer_forward		= common_hrtimer_forward,
1506  	.timer_remaining	= common_hrtimer_remaining,
1507  	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1508  	.timer_wait_running	= common_timer_wait_running,
1509  	.timer_arm		= common_hrtimer_arm,
1510  };
1511  
1512  static const struct k_clock clock_boottime = {
1513  	.clock_getres		= posix_get_hrtimer_res,
1514  	.clock_get_ktime	= posix_get_boottime_ktime,
1515  	.clock_get_timespec	= posix_get_boottime_timespec,
1516  	.nsleep			= common_nsleep_timens,
1517  	.timer_create		= common_timer_create,
1518  	.timer_set		= common_timer_set,
1519  	.timer_get		= common_timer_get,
1520  	.timer_del		= common_timer_del,
1521  	.timer_rearm		= common_hrtimer_rearm,
1522  	.timer_forward		= common_hrtimer_forward,
1523  	.timer_remaining	= common_hrtimer_remaining,
1524  	.timer_try_to_cancel	= common_hrtimer_try_to_cancel,
1525  	.timer_wait_running	= common_timer_wait_running,
1526  	.timer_arm		= common_hrtimer_arm,
1527  };
1528  
1529  static const struct k_clock * const posix_clocks[] = {
1530  	[CLOCK_REALTIME]		= &clock_realtime,
1531  	[CLOCK_MONOTONIC]		= &clock_monotonic,
1532  	[CLOCK_PROCESS_CPUTIME_ID]	= &clock_process,
1533  	[CLOCK_THREAD_CPUTIME_ID]	= &clock_thread,
1534  	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,
1535  	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,
1536  	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse,
1537  	[CLOCK_BOOTTIME]		= &clock_boottime,
1538  	[CLOCK_REALTIME_ALARM]		= &alarm_clock,
1539  	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,
1540  	[CLOCK_TAI]			= &clock_tai,
1541  };
1542  
clockid_to_kclock(const clockid_t id)1543  static const struct k_clock *clockid_to_kclock(const clockid_t id)
1544  {
1545  	clockid_t idx = id;
1546  
1547  	if (id < 0) {
1548  		return (id & CLOCKFD_MASK) == CLOCKFD ?
1549  			&clock_posix_dynamic : &clock_posix_cpu;
1550  	}
1551  
1552  	if (id >= ARRAY_SIZE(posix_clocks))
1553  		return NULL;
1554  
1555  	return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))];
1556  }
1557