1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _LINUX_PTRACE_H
3  #define _LINUX_PTRACE_H
4  
5  #include <linux/compiler.h>		/* For unlikely.  */
6  #include <linux/sched.h>		/* For struct task_struct.  */
7  #include <linux/sched/signal.h>		/* For send_sig(), same_thread_group(), etc. */
8  #include <linux/err.h>			/* for IS_ERR_VALUE */
9  #include <linux/bug.h>			/* For BUG_ON.  */
10  #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
11  #include <uapi/linux/ptrace.h>
12  #include <linux/seccomp.h>
13  
14  /* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
15  struct syscall_info {
16  	__u64			sp;
17  	struct seccomp_data	data;
18  };
19  
20  extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
21  			    void *buf, int len, unsigned int gup_flags);
22  
23  /*
24   * Ptrace flags
25   *
26   * The owner ship rules for task->ptrace which holds the ptrace
27   * flags is simple.  When a task is running it owns it's task->ptrace
28   * flags.  When the a task is stopped the ptracer owns task->ptrace.
29   */
30  
31  #define PT_SEIZED	0x00010000	/* SEIZE used, enable new behavior */
32  #define PT_PTRACED	0x00000001
33  
34  #define PT_OPT_FLAG_SHIFT	3
35  /* PT_TRACE_* event enable flags */
36  #define PT_EVENT_FLAG(event)	(1 << (PT_OPT_FLAG_SHIFT + (event)))
37  #define PT_TRACESYSGOOD		PT_EVENT_FLAG(0)
38  #define PT_TRACE_FORK		PT_EVENT_FLAG(PTRACE_EVENT_FORK)
39  #define PT_TRACE_VFORK		PT_EVENT_FLAG(PTRACE_EVENT_VFORK)
40  #define PT_TRACE_CLONE		PT_EVENT_FLAG(PTRACE_EVENT_CLONE)
41  #define PT_TRACE_EXEC		PT_EVENT_FLAG(PTRACE_EVENT_EXEC)
42  #define PT_TRACE_VFORK_DONE	PT_EVENT_FLAG(PTRACE_EVENT_VFORK_DONE)
43  #define PT_TRACE_EXIT		PT_EVENT_FLAG(PTRACE_EVENT_EXIT)
44  #define PT_TRACE_SECCOMP	PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP)
45  
46  #define PT_EXITKILL		(PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT)
47  #define PT_SUSPEND_SECCOMP	(PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT)
48  
49  extern long arch_ptrace(struct task_struct *child, long request,
50  			unsigned long addr, unsigned long data);
51  extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
52  extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
53  extern void ptrace_disable(struct task_struct *);
54  extern int ptrace_request(struct task_struct *child, long request,
55  			  unsigned long addr, unsigned long data);
56  extern int ptrace_notify(int exit_code, unsigned long message);
57  extern void __ptrace_link(struct task_struct *child,
58  			  struct task_struct *new_parent,
59  			  const struct cred *ptracer_cred);
60  extern void __ptrace_unlink(struct task_struct *child);
61  extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
62  #define PTRACE_MODE_READ	0x01
63  #define PTRACE_MODE_ATTACH	0x02
64  #define PTRACE_MODE_NOAUDIT	0x04
65  #define PTRACE_MODE_FSCREDS	0x08
66  #define PTRACE_MODE_REALCREDS	0x10
67  
68  /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
69  #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
70  #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
71  #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
72  #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
73  
74  /**
75   * ptrace_may_access - check whether the caller is permitted to access
76   * a target task.
77   * @task: target task
78   * @mode: selects type of access and caller credentials
79   *
80   * Returns true on success, false on denial.
81   *
82   * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must
83   * be set in @mode to specify whether the access was requested through
84   * a filesystem syscall (should use effective capabilities and fsuid
85   * of the caller) or through an explicit syscall such as
86   * process_vm_writev or ptrace (and should use the real credentials).
87   */
88  extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
89  
ptrace_reparented(struct task_struct * child)90  static inline int ptrace_reparented(struct task_struct *child)
91  {
92  	return !same_thread_group(child->real_parent, child->parent);
93  }
94  
ptrace_unlink(struct task_struct * child)95  static inline void ptrace_unlink(struct task_struct *child)
96  {
97  	if (unlikely(child->ptrace))
98  		__ptrace_unlink(child);
99  }
100  
101  int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
102  			    unsigned long data);
103  int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
104  			    unsigned long data);
105  
106  /**
107   * ptrace_parent - return the task that is tracing the given task
108   * @task: task to consider
109   *
110   * Returns %NULL if no one is tracing @task, or the &struct task_struct
111   * pointer to its tracer.
112   *
113   * Must called under rcu_read_lock().  The pointer returned might be kept
114   * live only by RCU.  During exec, this may be called with task_lock() held
115   * on @task, still held from when check_unsafe_exec() was called.
116   */
ptrace_parent(struct task_struct * task)117  static inline struct task_struct *ptrace_parent(struct task_struct *task)
118  {
119  	if (unlikely(task->ptrace))
120  		return rcu_dereference(task->parent);
121  	return NULL;
122  }
123  
124  /**
125   * ptrace_event_enabled - test whether a ptrace event is enabled
126   * @task: ptracee of interest
127   * @event: %PTRACE_EVENT_* to test
128   *
129   * Test whether @event is enabled for ptracee @task.
130   *
131   * Returns %true if @event is enabled, %false otherwise.
132   */
ptrace_event_enabled(struct task_struct * task,int event)133  static inline bool ptrace_event_enabled(struct task_struct *task, int event)
134  {
135  	return task->ptrace & PT_EVENT_FLAG(event);
136  }
137  
138  /**
139   * ptrace_event - possibly stop for a ptrace event notification
140   * @event:	%PTRACE_EVENT_* value to report
141   * @message:	value for %PTRACE_GETEVENTMSG to return
142   *
143   * Check whether @event is enabled and, if so, report @event and @message
144   * to the ptrace parent.
145   *
146   * Called without locks.
147   */
ptrace_event(int event,unsigned long message)148  static inline void ptrace_event(int event, unsigned long message)
149  {
150  	if (unlikely(ptrace_event_enabled(current, event))) {
151  		ptrace_notify((event << 8) | SIGTRAP, message);
152  	} else if (event == PTRACE_EVENT_EXEC) {
153  		/* legacy EXEC report via SIGTRAP */
154  		if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
155  			send_sig(SIGTRAP, current, 0);
156  	}
157  }
158  
159  /**
160   * ptrace_event_pid - possibly stop for a ptrace event notification
161   * @event:	%PTRACE_EVENT_* value to report
162   * @pid:	process identifier for %PTRACE_GETEVENTMSG to return
163   *
164   * Check whether @event is enabled and, if so, report @event and @pid
165   * to the ptrace parent.  @pid is reported as the pid_t seen from the
166   * ptrace parent's pid namespace.
167   *
168   * Called without locks.
169   */
ptrace_event_pid(int event,struct pid * pid)170  static inline void ptrace_event_pid(int event, struct pid *pid)
171  {
172  	/*
173  	 * FIXME: There's a potential race if a ptracer in a different pid
174  	 * namespace than parent attaches between computing message below and
175  	 * when we acquire tasklist_lock in ptrace_stop().  If this happens,
176  	 * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
177  	 */
178  	unsigned long message = 0;
179  	struct pid_namespace *ns;
180  
181  	rcu_read_lock();
182  	ns = task_active_pid_ns(rcu_dereference(current->parent));
183  	if (ns)
184  		message = pid_nr_ns(pid, ns);
185  	rcu_read_unlock();
186  
187  	ptrace_event(event, message);
188  }
189  
190  /**
191   * ptrace_init_task - initialize ptrace state for a new child
192   * @child:		new child task
193   * @ptrace:		true if child should be ptrace'd by parent's tracer
194   *
195   * This is called immediately after adding @child to its parent's children
196   * list.  @ptrace is false in the normal case, and true to ptrace @child.
197   *
198   * Called with current's siglock and write_lock_irq(&tasklist_lock) held.
199   */
ptrace_init_task(struct task_struct * child,bool ptrace)200  static inline void ptrace_init_task(struct task_struct *child, bool ptrace)
201  {
202  	INIT_LIST_HEAD(&child->ptrace_entry);
203  	INIT_LIST_HEAD(&child->ptraced);
204  	child->jobctl = 0;
205  	child->ptrace = 0;
206  	child->parent = child->real_parent;
207  
208  	if (unlikely(ptrace) && current->ptrace) {
209  		child->ptrace = current->ptrace;
210  		__ptrace_link(child, current->parent, current->ptracer_cred);
211  
212  		if (child->ptrace & PT_SEIZED)
213  			task_set_jobctl_pending(child, JOBCTL_TRAP_STOP);
214  		else
215  			sigaddset(&child->pending.signal, SIGSTOP);
216  	}
217  	else
218  		child->ptracer_cred = NULL;
219  }
220  
221  /**
222   * ptrace_release_task - final ptrace-related cleanup of a zombie being reaped
223   * @task:	task in %EXIT_DEAD state
224   *
225   * Called with write_lock(&tasklist_lock) held.
226   */
ptrace_release_task(struct task_struct * task)227  static inline void ptrace_release_task(struct task_struct *task)
228  {
229  	BUG_ON(!list_empty(&task->ptraced));
230  	ptrace_unlink(task);
231  	BUG_ON(!list_empty(&task->ptrace_entry));
232  }
233  
234  #ifndef force_successful_syscall_return
235  /*
236   * System call handlers that, upon successful completion, need to return a
237   * negative value should call force_successful_syscall_return() right before
238   * returning.  On architectures where the syscall convention provides for a
239   * separate error flag (e.g., alpha, ia64, ppc{,64}, sparc{,64}, possibly
240   * others), this macro can be used to ensure that the error flag will not get
241   * set.  On architectures which do not support a separate error flag, the macro
242   * is a no-op and the spurious error condition needs to be filtered out by some
243   * other means (e.g., in user-level, by passing an extra argument to the
244   * syscall handler, or something along those lines).
245   */
246  #define force_successful_syscall_return() do { } while (0)
247  #endif
248  
249  #ifndef is_syscall_success
250  /*
251   * On most systems we can tell if a syscall is a success based on if the retval
252   * is an error value.  On some systems like ia64 and powerpc they have different
253   * indicators of success/failure and must define their own.
254   */
255  #define is_syscall_success(regs) (!IS_ERR_VALUE((unsigned long)(regs_return_value(regs))))
256  #endif
257  
258  /*
259   * <asm/ptrace.h> should define the following things inside #ifdef __KERNEL__.
260   *
261   * These do-nothing inlines are used when the arch does not
262   * implement single-step.  The kerneldoc comments are here
263   * to document the interface for all arch definitions.
264   */
265  
266  #ifndef arch_has_single_step
267  /**
268   * arch_has_single_step - does this CPU support user-mode single-step?
269   *
270   * If this is defined, then there must be function declarations or
271   * inlines for user_enable_single_step() and user_disable_single_step().
272   * arch_has_single_step() should evaluate to nonzero iff the machine
273   * supports instruction single-step for user mode.
274   * It can be a constant or it can test a CPU feature bit.
275   */
276  #define arch_has_single_step()		(0)
277  
278  /**
279   * user_enable_single_step - single-step in user-mode task
280   * @task: either current or a task stopped in %TASK_TRACED
281   *
282   * This can only be called when arch_has_single_step() has returned nonzero.
283   * Set @task so that when it returns to user mode, it will trap after the
284   * next single instruction executes.  If arch_has_block_step() is defined,
285   * this must clear the effects of user_enable_block_step() too.
286   */
user_enable_single_step(struct task_struct * task)287  static inline void user_enable_single_step(struct task_struct *task)
288  {
289  	BUG();			/* This can never be called.  */
290  }
291  
292  /**
293   * user_disable_single_step - cancel user-mode single-step
294   * @task: either current or a task stopped in %TASK_TRACED
295   *
296   * Clear @task of the effects of user_enable_single_step() and
297   * user_enable_block_step().  This can be called whether or not either
298   * of those was ever called on @task, and even if arch_has_single_step()
299   * returned zero.
300   */
user_disable_single_step(struct task_struct * task)301  static inline void user_disable_single_step(struct task_struct *task)
302  {
303  }
304  #else
305  extern void user_enable_single_step(struct task_struct *);
306  extern void user_disable_single_step(struct task_struct *);
307  #endif	/* arch_has_single_step */
308  
309  #ifndef arch_has_block_step
310  /**
311   * arch_has_block_step - does this CPU support user-mode block-step?
312   *
313   * If this is defined, then there must be a function declaration or inline
314   * for user_enable_block_step(), and arch_has_single_step() must be defined
315   * too.  arch_has_block_step() should evaluate to nonzero iff the machine
316   * supports step-until-branch for user mode.  It can be a constant or it
317   * can test a CPU feature bit.
318   */
319  #define arch_has_block_step()		(0)
320  
321  /**
322   * user_enable_block_step - step until branch in user-mode task
323   * @task: either current or a task stopped in %TASK_TRACED
324   *
325   * This can only be called when arch_has_block_step() has returned nonzero,
326   * and will never be called when single-instruction stepping is being used.
327   * Set @task so that when it returns to user mode, it will trap after the
328   * next branch or trap taken.
329   */
user_enable_block_step(struct task_struct * task)330  static inline void user_enable_block_step(struct task_struct *task)
331  {
332  	BUG();			/* This can never be called.  */
333  }
334  #else
335  extern void user_enable_block_step(struct task_struct *);
336  #endif	/* arch_has_block_step */
337  
338  #ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT
339  extern void user_single_step_report(struct pt_regs *regs);
340  #else
user_single_step_report(struct pt_regs * regs)341  static inline void user_single_step_report(struct pt_regs *regs)
342  {
343  	kernel_siginfo_t info;
344  	clear_siginfo(&info);
345  	info.si_signo = SIGTRAP;
346  	info.si_errno = 0;
347  	info.si_code = SI_USER;
348  	info.si_pid = 0;
349  	info.si_uid = 0;
350  	force_sig_info(&info);
351  }
352  #endif
353  
354  #ifndef arch_ptrace_stop_needed
355  /**
356   * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
357   *
358   * This is called with the siglock held, to decide whether or not it's
359   * necessary to release the siglock and call arch_ptrace_stop().  It can be
360   * defined to a constant if arch_ptrace_stop() is never required, or always
361   * is.  On machines where this makes sense, it should be defined to a quick
362   * test to optimize out calling arch_ptrace_stop() when it would be
363   * superfluous.  For example, if the thread has not been back to user mode
364   * since the last stop, the thread state might indicate that nothing needs
365   * to be done.
366   *
367   * This is guaranteed to be invoked once before a task stops for ptrace and
368   * may include arch-specific operations necessary prior to a ptrace stop.
369   */
370  #define arch_ptrace_stop_needed()	(0)
371  #endif
372  
373  #ifndef arch_ptrace_stop
374  /**
375   * arch_ptrace_stop - Do machine-specific work before stopping for ptrace
376   *
377   * This is called with no locks held when arch_ptrace_stop_needed() has
378   * just returned nonzero.  It is allowed to block, e.g. for user memory
379   * access.  The arch can have machine-specific work to be done before
380   * ptrace stops.  On ia64, register backing store gets written back to user
381   * memory here.  Since this can be costly (requires dropping the siglock),
382   * we only do it when the arch requires it for this particular stop, as
383   * indicated by arch_ptrace_stop_needed().
384   */
385  #define arch_ptrace_stop()		do { } while (0)
386  #endif
387  
388  #ifndef current_pt_regs
389  #define current_pt_regs() task_pt_regs(current)
390  #endif
391  
392  #ifndef current_user_stack_pointer
393  #define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
394  #endif
395  
396  #ifndef exception_ip
397  #define exception_ip(x) instruction_pointer(x)
398  #endif
399  
400  extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
401  
402  extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
403  
404  /*
405   * ptrace report for syscall entry and exit looks identical.
406   */
ptrace_report_syscall(unsigned long message)407  static inline int ptrace_report_syscall(unsigned long message)
408  {
409  	int ptrace = current->ptrace;
410  	int signr;
411  
412  	if (!(ptrace & PT_PTRACED))
413  		return 0;
414  
415  	signr = ptrace_notify(SIGTRAP | ((ptrace & PT_TRACESYSGOOD) ? 0x80 : 0),
416  			      message);
417  
418  	/*
419  	 * this isn't the same as continuing with a signal, but it will do
420  	 * for normal use.  strace only continues with a signal if the
421  	 * stopping signal is not SIGTRAP.  -brl
422  	 */
423  	if (signr)
424  		send_sig(signr, current, 1);
425  
426  	return fatal_signal_pending(current);
427  }
428  
429  /**
430   * ptrace_report_syscall_entry - task is about to attempt a system call
431   * @regs:		user register state of current task
432   *
433   * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or
434   * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just
435   * entered the kernel for a system call.  Full user register state is
436   * available here.  Changing the values in @regs can affect the system
437   * call number and arguments to be tried.  It is safe to block here,
438   * preventing the system call from beginning.
439   *
440   * Returns zero normally, or nonzero if the calling arch code should abort
441   * the system call.  That must prevent normal entry so no system call is
442   * made.  If @task ever returns to user mode after this, its register state
443   * is unspecified, but should be something harmless like an %ENOSYS error
444   * return.  It should preserve enough information so that syscall_rollback()
445   * can work (see asm-generic/syscall.h).
446   *
447   * Called without locks, just after entering kernel mode.
448   */
ptrace_report_syscall_entry(struct pt_regs * regs)449  static inline __must_check int ptrace_report_syscall_entry(
450  	struct pt_regs *regs)
451  {
452  	return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY);
453  }
454  
455  /**
456   * ptrace_report_syscall_exit - task has just finished a system call
457   * @regs:		user register state of current task
458   * @step:		nonzero if simulating single-step or block-step
459   *
460   * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when
461   * the current task has just finished an attempted system call.  Full
462   * user register state is available here.  It is safe to block here,
463   * preventing signals from being processed.
464   *
465   * If @step is nonzero, this report is also in lieu of the normal
466   * trap that would follow the system call instruction because
467   * user_enable_block_step() or user_enable_single_step() was used.
468   * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set.
469   *
470   * Called without locks, just before checking for pending signals.
471   */
ptrace_report_syscall_exit(struct pt_regs * regs,int step)472  static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step)
473  {
474  	if (step)
475  		user_single_step_report(regs);
476  	else
477  		ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT);
478  }
479  #endif
480