1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   *  Derived from "arch/i386/kernel/process.c"
4   *    Copyright (C) 1995  Linus Torvalds
5   *
6   *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
7   *  Paul Mackerras (paulus@cs.anu.edu.au)
8   *
9   *  PowerPC version
10   *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
11   */
12  
13  #include <linux/errno.h>
14  #include <linux/sched.h>
15  #include <linux/sched/debug.h>
16  #include <linux/sched/task.h>
17  #include <linux/sched/task_stack.h>
18  #include <linux/kernel.h>
19  #include <linux/mm.h>
20  #include <linux/smp.h>
21  #include <linux/stddef.h>
22  #include <linux/unistd.h>
23  #include <linux/ptrace.h>
24  #include <linux/slab.h>
25  #include <linux/user.h>
26  #include <linux/elf.h>
27  #include <linux/prctl.h>
28  #include <linux/init_task.h>
29  #include <linux/export.h>
30  #include <linux/kallsyms.h>
31  #include <linux/mqueue.h>
32  #include <linux/hardirq.h>
33  #include <linux/utsname.h>
34  #include <linux/ftrace.h>
35  #include <linux/kernel_stat.h>
36  #include <linux/personality.h>
37  #include <linux/hw_breakpoint.h>
38  #include <linux/uaccess.h>
39  #include <linux/pkeys.h>
40  #include <linux/seq_buf.h>
41  
42  #include <asm/interrupt.h>
43  #include <asm/io.h>
44  #include <asm/processor.h>
45  #include <asm/mmu.h>
46  #include <asm/machdep.h>
47  #include <asm/time.h>
48  #include <asm/runlatch.h>
49  #include <asm/syscalls.h>
50  #include <asm/switch_to.h>
51  #include <asm/tm.h>
52  #include <asm/debug.h>
53  #ifdef CONFIG_PPC64
54  #include <asm/firmware.h>
55  #include <asm/hw_irq.h>
56  #endif
57  #include <asm/code-patching.h>
58  #include <asm/exec.h>
59  #include <asm/livepatch.h>
60  #include <asm/cpu_has_feature.h>
61  #include <asm/asm-prototypes.h>
62  #include <asm/stacktrace.h>
63  #include <asm/hw_breakpoint.h>
64  
65  #include <linux/kprobes.h>
66  #include <linux/kdebug.h>
67  
68  /* Transactional Memory debug */
69  #ifdef TM_DEBUG_SW
70  #define TM_DEBUG(x...) printk(KERN_INFO x)
71  #else
72  #define TM_DEBUG(x...) do { } while(0)
73  #endif
74  
75  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
76  /*
77   * Are we running in "Suspend disabled" mode? If so we have to block any
78   * sigreturn that would get us into suspended state, and we also warn in some
79   * other paths that we should never reach with suspend disabled.
80   */
81  bool tm_suspend_disabled __ro_after_init = false;
82  
check_if_tm_restore_required(struct task_struct * tsk)83  static void check_if_tm_restore_required(struct task_struct *tsk)
84  {
85  	/*
86  	 * If we are saving the current thread's registers, and the
87  	 * thread is in a transactional state, set the TIF_RESTORE_TM
88  	 * bit so that we know to restore the registers before
89  	 * returning to userspace.
90  	 */
91  	if (tsk == current && tsk->thread.regs &&
92  	    MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
93  	    !test_thread_flag(TIF_RESTORE_TM)) {
94  		regs_set_return_msr(&tsk->thread.ckpt_regs,
95  						tsk->thread.regs->msr);
96  		set_thread_flag(TIF_RESTORE_TM);
97  	}
98  }
99  
100  #else
check_if_tm_restore_required(struct task_struct * tsk)101  static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
102  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
103  
104  bool strict_msr_control;
105  EXPORT_SYMBOL(strict_msr_control);
106  
enable_strict_msr_control(char * str)107  static int __init enable_strict_msr_control(char *str)
108  {
109  	strict_msr_control = true;
110  	pr_info("Enabling strict facility control\n");
111  
112  	return 0;
113  }
114  early_param("ppc_strict_facility_enable", enable_strict_msr_control);
115  
116  /* notrace because it's called by restore_math */
msr_check_and_set(unsigned long bits)117  unsigned long notrace msr_check_and_set(unsigned long bits)
118  {
119  	unsigned long oldmsr = mfmsr();
120  	unsigned long newmsr;
121  
122  	newmsr = oldmsr | bits;
123  
124  	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
125  		newmsr |= MSR_VSX;
126  
127  	if (oldmsr != newmsr)
128  		newmsr = mtmsr_isync_irqsafe(newmsr);
129  
130  	return newmsr;
131  }
132  EXPORT_SYMBOL_GPL(msr_check_and_set);
133  
134  /* notrace because it's called by restore_math */
__msr_check_and_clear(unsigned long bits)135  void notrace __msr_check_and_clear(unsigned long bits)
136  {
137  	unsigned long oldmsr = mfmsr();
138  	unsigned long newmsr;
139  
140  	newmsr = oldmsr & ~bits;
141  
142  	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
143  		newmsr &= ~MSR_VSX;
144  
145  	if (oldmsr != newmsr)
146  		mtmsr_isync_irqsafe(newmsr);
147  }
148  EXPORT_SYMBOL(__msr_check_and_clear);
149  
150  #ifdef CONFIG_PPC_FPU
__giveup_fpu(struct task_struct * tsk)151  static void __giveup_fpu(struct task_struct *tsk)
152  {
153  	unsigned long msr;
154  
155  	save_fpu(tsk);
156  	msr = tsk->thread.regs->msr;
157  	msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
158  	if (cpu_has_feature(CPU_FTR_VSX))
159  		msr &= ~MSR_VSX;
160  	regs_set_return_msr(tsk->thread.regs, msr);
161  }
162  
giveup_fpu(struct task_struct * tsk)163  void giveup_fpu(struct task_struct *tsk)
164  {
165  	check_if_tm_restore_required(tsk);
166  
167  	msr_check_and_set(MSR_FP);
168  	__giveup_fpu(tsk);
169  	msr_check_and_clear(MSR_FP);
170  }
171  EXPORT_SYMBOL(giveup_fpu);
172  
173  /*
174   * Make sure the floating-point register state in the
175   * the thread_struct is up to date for task tsk.
176   */
flush_fp_to_thread(struct task_struct * tsk)177  void flush_fp_to_thread(struct task_struct *tsk)
178  {
179  	if (tsk->thread.regs) {
180  		/*
181  		 * We need to disable preemption here because if we didn't,
182  		 * another process could get scheduled after the regs->msr
183  		 * test but before we have finished saving the FP registers
184  		 * to the thread_struct.  That process could take over the
185  		 * FPU, and then when we get scheduled again we would store
186  		 * bogus values for the remaining FP registers.
187  		 */
188  		preempt_disable();
189  		if (tsk->thread.regs->msr & MSR_FP) {
190  			/*
191  			 * This should only ever be called for current or
192  			 * for a stopped child process.  Since we save away
193  			 * the FP register state on context switch,
194  			 * there is something wrong if a stopped child appears
195  			 * to still have its FP state in the CPU registers.
196  			 */
197  			BUG_ON(tsk != current);
198  			giveup_fpu(tsk);
199  		}
200  		preempt_enable();
201  	}
202  }
203  EXPORT_SYMBOL_GPL(flush_fp_to_thread);
204  
enable_kernel_fp(void)205  void enable_kernel_fp(void)
206  {
207  	unsigned long cpumsr;
208  
209  	WARN_ON(preemptible());
210  
211  	cpumsr = msr_check_and_set(MSR_FP);
212  
213  	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
214  		check_if_tm_restore_required(current);
215  		/*
216  		 * If a thread has already been reclaimed then the
217  		 * checkpointed registers are on the CPU but have definitely
218  		 * been saved by the reclaim code. Don't need to and *cannot*
219  		 * giveup as this would save  to the 'live' structure not the
220  		 * checkpointed structure.
221  		 */
222  		if (!MSR_TM_ACTIVE(cpumsr) &&
223  		     MSR_TM_ACTIVE(current->thread.regs->msr))
224  			return;
225  		__giveup_fpu(current);
226  	}
227  }
228  EXPORT_SYMBOL(enable_kernel_fp);
229  #else
__giveup_fpu(struct task_struct * tsk)230  static inline void __giveup_fpu(struct task_struct *tsk) { }
231  #endif /* CONFIG_PPC_FPU */
232  
233  #ifdef CONFIG_ALTIVEC
__giveup_altivec(struct task_struct * tsk)234  static void __giveup_altivec(struct task_struct *tsk)
235  {
236  	unsigned long msr;
237  
238  	save_altivec(tsk);
239  	msr = tsk->thread.regs->msr;
240  	msr &= ~MSR_VEC;
241  	if (cpu_has_feature(CPU_FTR_VSX))
242  		msr &= ~MSR_VSX;
243  	regs_set_return_msr(tsk->thread.regs, msr);
244  }
245  
giveup_altivec(struct task_struct * tsk)246  void giveup_altivec(struct task_struct *tsk)
247  {
248  	check_if_tm_restore_required(tsk);
249  
250  	msr_check_and_set(MSR_VEC);
251  	__giveup_altivec(tsk);
252  	msr_check_and_clear(MSR_VEC);
253  }
254  EXPORT_SYMBOL(giveup_altivec);
255  
enable_kernel_altivec(void)256  void enable_kernel_altivec(void)
257  {
258  	unsigned long cpumsr;
259  
260  	WARN_ON(preemptible());
261  
262  	cpumsr = msr_check_and_set(MSR_VEC);
263  
264  	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
265  		check_if_tm_restore_required(current);
266  		/*
267  		 * If a thread has already been reclaimed then the
268  		 * checkpointed registers are on the CPU but have definitely
269  		 * been saved by the reclaim code. Don't need to and *cannot*
270  		 * giveup as this would save  to the 'live' structure not the
271  		 * checkpointed structure.
272  		 */
273  		if (!MSR_TM_ACTIVE(cpumsr) &&
274  		     MSR_TM_ACTIVE(current->thread.regs->msr))
275  			return;
276  		__giveup_altivec(current);
277  	}
278  }
279  EXPORT_SYMBOL(enable_kernel_altivec);
280  
281  /*
282   * Make sure the VMX/Altivec register state in the
283   * the thread_struct is up to date for task tsk.
284   */
flush_altivec_to_thread(struct task_struct * tsk)285  void flush_altivec_to_thread(struct task_struct *tsk)
286  {
287  	if (tsk->thread.regs) {
288  		preempt_disable();
289  		if (tsk->thread.regs->msr & MSR_VEC) {
290  			BUG_ON(tsk != current);
291  			giveup_altivec(tsk);
292  		}
293  		preempt_enable();
294  	}
295  }
296  EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
297  #endif /* CONFIG_ALTIVEC */
298  
299  #ifdef CONFIG_VSX
__giveup_vsx(struct task_struct * tsk)300  static void __giveup_vsx(struct task_struct *tsk)
301  {
302  	unsigned long msr = tsk->thread.regs->msr;
303  
304  	/*
305  	 * We should never be setting MSR_VSX without also setting
306  	 * MSR_FP and MSR_VEC
307  	 */
308  	WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
309  
310  	/* __giveup_fpu will clear MSR_VSX */
311  	if (msr & MSR_FP)
312  		__giveup_fpu(tsk);
313  	if (msr & MSR_VEC)
314  		__giveup_altivec(tsk);
315  }
316  
giveup_vsx(struct task_struct * tsk)317  static void giveup_vsx(struct task_struct *tsk)
318  {
319  	check_if_tm_restore_required(tsk);
320  
321  	msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
322  	__giveup_vsx(tsk);
323  	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
324  }
325  
enable_kernel_vsx(void)326  void enable_kernel_vsx(void)
327  {
328  	unsigned long cpumsr;
329  
330  	WARN_ON(preemptible());
331  
332  	cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
333  
334  	if (current->thread.regs &&
335  	    (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
336  		check_if_tm_restore_required(current);
337  		/*
338  		 * If a thread has already been reclaimed then the
339  		 * checkpointed registers are on the CPU but have definitely
340  		 * been saved by the reclaim code. Don't need to and *cannot*
341  		 * giveup as this would save  to the 'live' structure not the
342  		 * checkpointed structure.
343  		 */
344  		if (!MSR_TM_ACTIVE(cpumsr) &&
345  		     MSR_TM_ACTIVE(current->thread.regs->msr))
346  			return;
347  		__giveup_vsx(current);
348  	}
349  }
350  EXPORT_SYMBOL(enable_kernel_vsx);
351  
flush_vsx_to_thread(struct task_struct * tsk)352  void flush_vsx_to_thread(struct task_struct *tsk)
353  {
354  	if (tsk->thread.regs) {
355  		preempt_disable();
356  		if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
357  			BUG_ON(tsk != current);
358  			giveup_vsx(tsk);
359  		}
360  		preempt_enable();
361  	}
362  }
363  EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
364  #endif /* CONFIG_VSX */
365  
366  #ifdef CONFIG_SPE
giveup_spe(struct task_struct * tsk)367  void giveup_spe(struct task_struct *tsk)
368  {
369  	check_if_tm_restore_required(tsk);
370  
371  	msr_check_and_set(MSR_SPE);
372  	__giveup_spe(tsk);
373  	msr_check_and_clear(MSR_SPE);
374  }
375  EXPORT_SYMBOL(giveup_spe);
376  
enable_kernel_spe(void)377  void enable_kernel_spe(void)
378  {
379  	WARN_ON(preemptible());
380  
381  	msr_check_and_set(MSR_SPE);
382  
383  	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
384  		check_if_tm_restore_required(current);
385  		__giveup_spe(current);
386  	}
387  }
388  EXPORT_SYMBOL(enable_kernel_spe);
389  
flush_spe_to_thread(struct task_struct * tsk)390  void flush_spe_to_thread(struct task_struct *tsk)
391  {
392  	if (tsk->thread.regs) {
393  		preempt_disable();
394  		if (tsk->thread.regs->msr & MSR_SPE) {
395  			BUG_ON(tsk != current);
396  			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
397  			giveup_spe(tsk);
398  		}
399  		preempt_enable();
400  	}
401  }
402  #endif /* CONFIG_SPE */
403  
404  static unsigned long msr_all_available;
405  
init_msr_all_available(void)406  static int __init init_msr_all_available(void)
407  {
408  	if (IS_ENABLED(CONFIG_PPC_FPU))
409  		msr_all_available |= MSR_FP;
410  	if (cpu_has_feature(CPU_FTR_ALTIVEC))
411  		msr_all_available |= MSR_VEC;
412  	if (cpu_has_feature(CPU_FTR_VSX))
413  		msr_all_available |= MSR_VSX;
414  	if (cpu_has_feature(CPU_FTR_SPE))
415  		msr_all_available |= MSR_SPE;
416  
417  	return 0;
418  }
419  early_initcall(init_msr_all_available);
420  
giveup_all(struct task_struct * tsk)421  void giveup_all(struct task_struct *tsk)
422  {
423  	unsigned long usermsr;
424  
425  	if (!tsk->thread.regs)
426  		return;
427  
428  	check_if_tm_restore_required(tsk);
429  
430  	usermsr = tsk->thread.regs->msr;
431  
432  	if ((usermsr & msr_all_available) == 0)
433  		return;
434  
435  	msr_check_and_set(msr_all_available);
436  
437  	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
438  
439  	if (usermsr & MSR_FP)
440  		__giveup_fpu(tsk);
441  	if (usermsr & MSR_VEC)
442  		__giveup_altivec(tsk);
443  	if (usermsr & MSR_SPE)
444  		__giveup_spe(tsk);
445  
446  	msr_check_and_clear(msr_all_available);
447  }
448  EXPORT_SYMBOL(giveup_all);
449  
450  #ifdef CONFIG_PPC_BOOK3S_64
451  #ifdef CONFIG_PPC_FPU
should_restore_fp(void)452  static bool should_restore_fp(void)
453  {
454  	if (current->thread.load_fp) {
455  		current->thread.load_fp++;
456  		return true;
457  	}
458  	return false;
459  }
460  
do_restore_fp(void)461  static void do_restore_fp(void)
462  {
463  	load_fp_state(&current->thread.fp_state);
464  }
465  #else
should_restore_fp(void)466  static bool should_restore_fp(void) { return false; }
do_restore_fp(void)467  static void do_restore_fp(void) { }
468  #endif /* CONFIG_PPC_FPU */
469  
470  #ifdef CONFIG_ALTIVEC
should_restore_altivec(void)471  static bool should_restore_altivec(void)
472  {
473  	if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) {
474  		current->thread.load_vec++;
475  		return true;
476  	}
477  	return false;
478  }
479  
do_restore_altivec(void)480  static void do_restore_altivec(void)
481  {
482  	load_vr_state(&current->thread.vr_state);
483  	current->thread.used_vr = 1;
484  }
485  #else
should_restore_altivec(void)486  static bool should_restore_altivec(void) { return false; }
do_restore_altivec(void)487  static void do_restore_altivec(void) { }
488  #endif /* CONFIG_ALTIVEC */
489  
should_restore_vsx(void)490  static bool should_restore_vsx(void)
491  {
492  	if (cpu_has_feature(CPU_FTR_VSX))
493  		return true;
494  	return false;
495  }
496  #ifdef CONFIG_VSX
do_restore_vsx(void)497  static void do_restore_vsx(void)
498  {
499  	current->thread.used_vsr = 1;
500  }
501  #else
do_restore_vsx(void)502  static void do_restore_vsx(void) { }
503  #endif /* CONFIG_VSX */
504  
505  /*
506   * The exception exit path calls restore_math() with interrupts hard disabled
507   * but the soft irq state not "reconciled". ftrace code that calls
508   * local_irq_save/restore causes warnings.
509   *
510   * Rather than complicate the exit path, just don't trace restore_math. This
511   * could be done by having ftrace entry code check for this un-reconciled
512   * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
513   * temporarily fix it up for the duration of the ftrace call.
514   */
restore_math(struct pt_regs * regs)515  void notrace restore_math(struct pt_regs *regs)
516  {
517  	unsigned long msr;
518  	unsigned long new_msr = 0;
519  
520  	msr = regs->msr;
521  
522  	/*
523  	 * new_msr tracks the facilities that are to be restored. Only reload
524  	 * if the bit is not set in the user MSR (if it is set, the registers
525  	 * are live for the user thread).
526  	 */
527  	if ((!(msr & MSR_FP)) && should_restore_fp())
528  		new_msr |= MSR_FP;
529  
530  	if ((!(msr & MSR_VEC)) && should_restore_altivec())
531  		new_msr |= MSR_VEC;
532  
533  	if ((!(msr & MSR_VSX)) && should_restore_vsx()) {
534  		if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC))
535  			new_msr |= MSR_VSX;
536  	}
537  
538  	if (new_msr) {
539  		unsigned long fpexc_mode = 0;
540  
541  		msr_check_and_set(new_msr);
542  
543  		if (new_msr & MSR_FP) {
544  			do_restore_fp();
545  
546  			// This also covers VSX, because VSX implies FP
547  			fpexc_mode = current->thread.fpexc_mode;
548  		}
549  
550  		if (new_msr & MSR_VEC)
551  			do_restore_altivec();
552  
553  		if (new_msr & MSR_VSX)
554  			do_restore_vsx();
555  
556  		msr_check_and_clear(new_msr);
557  
558  		regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
559  	}
560  }
561  #endif /* CONFIG_PPC_BOOK3S_64 */
562  
save_all(struct task_struct * tsk)563  static void save_all(struct task_struct *tsk)
564  {
565  	unsigned long usermsr;
566  
567  	if (!tsk->thread.regs)
568  		return;
569  
570  	usermsr = tsk->thread.regs->msr;
571  
572  	if ((usermsr & msr_all_available) == 0)
573  		return;
574  
575  	msr_check_and_set(msr_all_available);
576  
577  	WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
578  
579  	if (usermsr & MSR_FP)
580  		save_fpu(tsk);
581  
582  	if (usermsr & MSR_VEC)
583  		save_altivec(tsk);
584  
585  	if (usermsr & MSR_SPE)
586  		__giveup_spe(tsk);
587  
588  	msr_check_and_clear(msr_all_available);
589  }
590  
flush_all_to_thread(struct task_struct * tsk)591  void flush_all_to_thread(struct task_struct *tsk)
592  {
593  	if (tsk->thread.regs) {
594  		preempt_disable();
595  		BUG_ON(tsk != current);
596  #ifdef CONFIG_SPE
597  		if (tsk->thread.regs->msr & MSR_SPE)
598  			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
599  #endif
600  		save_all(tsk);
601  
602  		preempt_enable();
603  	}
604  }
605  EXPORT_SYMBOL(flush_all_to_thread);
606  
607  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
do_send_trap(struct pt_regs * regs,unsigned long address,unsigned long error_code,int breakpt)608  void do_send_trap(struct pt_regs *regs, unsigned long address,
609  		  unsigned long error_code, int breakpt)
610  {
611  	current->thread.trap_nr = TRAP_HWBKPT;
612  	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
613  			11, SIGSEGV) == NOTIFY_STOP)
614  		return;
615  
616  	/* Deliver the signal to userspace */
617  	force_sig_ptrace_errno_trap(breakpt, /* breakpoint or watchpoint id */
618  				    (void __user *)address);
619  }
620  #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
621  
do_break_handler(struct pt_regs * regs)622  static void do_break_handler(struct pt_regs *regs)
623  {
624  	struct arch_hw_breakpoint null_brk = {0};
625  	struct arch_hw_breakpoint *info;
626  	ppc_inst_t instr = ppc_inst(0);
627  	int type = 0;
628  	int size = 0;
629  	unsigned long ea;
630  	int i;
631  
632  	/*
633  	 * If underneath hw supports only one watchpoint, we know it
634  	 * caused exception. 8xx also falls into this category.
635  	 */
636  	if (nr_wp_slots() == 1) {
637  		__set_breakpoint(0, &null_brk);
638  		current->thread.hw_brk[0] = null_brk;
639  		current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
640  		return;
641  	}
642  
643  	/* Otherwise find out which DAWR caused exception and disable it. */
644  	wp_get_instr_detail(regs, &instr, &type, &size, &ea);
645  
646  	for (i = 0; i < nr_wp_slots(); i++) {
647  		info = &current->thread.hw_brk[i];
648  		if (!info->address)
649  			continue;
650  
651  		if (wp_check_constraints(regs, instr, ea, type, size, info)) {
652  			__set_breakpoint(i, &null_brk);
653  			current->thread.hw_brk[i] = null_brk;
654  			current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
655  		}
656  	}
657  }
658  
DEFINE_INTERRUPT_HANDLER(do_break)659  DEFINE_INTERRUPT_HANDLER(do_break)
660  {
661  	current->thread.trap_nr = TRAP_HWBKPT;
662  	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr,
663  			11, SIGSEGV) == NOTIFY_STOP)
664  		return;
665  
666  	if (debugger_break_match(regs))
667  		return;
668  
669  	/*
670  	 * We reach here only when watchpoint exception is generated by ptrace
671  	 * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
672  	 * watchpoint is already handled by hw_breakpoint_handler() so we don't
673  	 * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
674  	 * we need to manually handle the watchpoint here.
675  	 */
676  	if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
677  		do_break_handler(regs);
678  
679  	/* Deliver the signal to userspace */
680  	force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar);
681  }
682  #endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
683  
684  static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
685  
686  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
687  /*
688   * Set the debug registers back to their default "safe" values.
689   */
set_debug_reg_defaults(struct thread_struct * thread)690  static void set_debug_reg_defaults(struct thread_struct *thread)
691  {
692  	thread->debug.iac1 = thread->debug.iac2 = 0;
693  #if CONFIG_PPC_ADV_DEBUG_IACS > 2
694  	thread->debug.iac3 = thread->debug.iac4 = 0;
695  #endif
696  	thread->debug.dac1 = thread->debug.dac2 = 0;
697  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
698  	thread->debug.dvc1 = thread->debug.dvc2 = 0;
699  #endif
700  	thread->debug.dbcr0 = 0;
701  #ifdef CONFIG_BOOKE
702  	/*
703  	 * Force User/Supervisor bits to b11 (user-only MSR[PR]=1)
704  	 */
705  	thread->debug.dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
706  			DBCR1_IAC3US | DBCR1_IAC4US;
707  	/*
708  	 * Force Data Address Compare User/Supervisor bits to be User-only
709  	 * (0b11 MSR[PR]=1) and set all other bits in DBCR2 register to be 0.
710  	 */
711  	thread->debug.dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
712  #else
713  	thread->debug.dbcr1 = 0;
714  #endif
715  }
716  
prime_debug_regs(struct debug_reg * debug)717  static void prime_debug_regs(struct debug_reg *debug)
718  {
719  	/*
720  	 * We could have inherited MSR_DE from userspace, since
721  	 * it doesn't get cleared on exception entry.  Make sure
722  	 * MSR_DE is clear before we enable any debug events.
723  	 */
724  	mtmsr(mfmsr() & ~MSR_DE);
725  
726  	mtspr(SPRN_IAC1, debug->iac1);
727  	mtspr(SPRN_IAC2, debug->iac2);
728  #if CONFIG_PPC_ADV_DEBUG_IACS > 2
729  	mtspr(SPRN_IAC3, debug->iac3);
730  	mtspr(SPRN_IAC4, debug->iac4);
731  #endif
732  	mtspr(SPRN_DAC1, debug->dac1);
733  	mtspr(SPRN_DAC2, debug->dac2);
734  #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
735  	mtspr(SPRN_DVC1, debug->dvc1);
736  	mtspr(SPRN_DVC2, debug->dvc2);
737  #endif
738  	mtspr(SPRN_DBCR0, debug->dbcr0);
739  	mtspr(SPRN_DBCR1, debug->dbcr1);
740  #ifdef CONFIG_BOOKE
741  	mtspr(SPRN_DBCR2, debug->dbcr2);
742  #endif
743  }
744  /*
745   * Unless neither the old or new thread are making use of the
746   * debug registers, set the debug registers from the values
747   * stored in the new thread.
748   */
switch_booke_debug_regs(struct debug_reg * new_debug)749  void switch_booke_debug_regs(struct debug_reg *new_debug)
750  {
751  	if ((current->thread.debug.dbcr0 & DBCR0_IDM)
752  		|| (new_debug->dbcr0 & DBCR0_IDM))
753  			prime_debug_regs(new_debug);
754  }
755  EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
756  #else	/* !CONFIG_PPC_ADV_DEBUG_REGS */
757  #ifndef CONFIG_HAVE_HW_BREAKPOINT
set_breakpoint(int i,struct arch_hw_breakpoint * brk)758  static void set_breakpoint(int i, struct arch_hw_breakpoint *brk)
759  {
760  	preempt_disable();
761  	__set_breakpoint(i, brk);
762  	preempt_enable();
763  }
764  
set_debug_reg_defaults(struct thread_struct * thread)765  static void set_debug_reg_defaults(struct thread_struct *thread)
766  {
767  	int i;
768  	struct arch_hw_breakpoint null_brk = {0};
769  
770  	for (i = 0; i < nr_wp_slots(); i++) {
771  		thread->hw_brk[i] = null_brk;
772  		if (ppc_breakpoint_available())
773  			set_breakpoint(i, &thread->hw_brk[i]);
774  	}
775  }
776  
hw_brk_match(struct arch_hw_breakpoint * a,struct arch_hw_breakpoint * b)777  static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
778  				struct arch_hw_breakpoint *b)
779  {
780  	if (a->address != b->address)
781  		return false;
782  	if (a->type != b->type)
783  		return false;
784  	if (a->len != b->len)
785  		return false;
786  	/* no need to check hw_len. it's calculated from address and len */
787  	return true;
788  }
789  
switch_hw_breakpoint(struct task_struct * new)790  static void switch_hw_breakpoint(struct task_struct *new)
791  {
792  	int i;
793  
794  	for (i = 0; i < nr_wp_slots(); i++) {
795  		if (likely(hw_brk_match(this_cpu_ptr(&current_brk[i]),
796  					&new->thread.hw_brk[i])))
797  			continue;
798  
799  		__set_breakpoint(i, &new->thread.hw_brk[i]);
800  	}
801  }
802  #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
803  #endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
804  
set_dabr(struct arch_hw_breakpoint * brk)805  static inline int set_dabr(struct arch_hw_breakpoint *brk)
806  {
807  	unsigned long dabr, dabrx;
808  
809  	dabr = brk->address | (brk->type & HW_BRK_TYPE_DABR);
810  	dabrx = ((brk->type >> 3) & 0x7);
811  
812  	if (ppc_md.set_dabr)
813  		return ppc_md.set_dabr(dabr, dabrx);
814  
815  	if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
816  		mtspr(SPRN_DAC1, dabr);
817  		if (IS_ENABLED(CONFIG_PPC_47x))
818  			isync();
819  		return 0;
820  	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
821  		mtspr(SPRN_DABR, dabr);
822  		if (cpu_has_feature(CPU_FTR_DABRX))
823  			mtspr(SPRN_DABRX, dabrx);
824  		return 0;
825  	} else {
826  		return -EINVAL;
827  	}
828  }
829  
set_breakpoint_8xx(struct arch_hw_breakpoint * brk)830  static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
831  {
832  	unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
833  			       LCTRL1_CRWF_RW;
834  	unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
835  	unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
836  	unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE);
837  
838  	if (start_addr == 0)
839  		lctrl2 |= LCTRL2_LW0LA_F;
840  	else if (end_addr == 0)
841  		lctrl2 |= LCTRL2_LW0LA_E;
842  	else
843  		lctrl2 |= LCTRL2_LW0LA_EandF;
844  
845  	mtspr(SPRN_LCTRL2, 0);
846  
847  	if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
848  		return 0;
849  
850  	if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
851  		lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
852  	if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
853  		lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
854  
855  	mtspr(SPRN_CMPE, start_addr - 1);
856  	mtspr(SPRN_CMPF, end_addr);
857  	mtspr(SPRN_LCTRL1, lctrl1);
858  	mtspr(SPRN_LCTRL2, lctrl2);
859  
860  	return 0;
861  }
862  
set_hw_breakpoint(int nr,struct arch_hw_breakpoint * brk)863  static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
864  {
865  	if (dawr_enabled())
866  		// Power8 or later
867  		set_dawr(nr, brk);
868  	else if (IS_ENABLED(CONFIG_PPC_8xx))
869  		set_breakpoint_8xx(brk);
870  	else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
871  		// Power7 or earlier
872  		set_dabr(brk);
873  	else
874  		// Shouldn't happen due to higher level checks
875  		WARN_ON_ONCE(1);
876  }
877  
__set_breakpoint(int nr,struct arch_hw_breakpoint * brk)878  void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
879  {
880  	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
881  	set_hw_breakpoint(nr, brk);
882  }
883  
884  /* Check if we have DAWR or DABR hardware */
ppc_breakpoint_available(void)885  bool ppc_breakpoint_available(void)
886  {
887  	if (dawr_enabled())
888  		return true; /* POWER8 DAWR or POWER9 forced DAWR */
889  	if (cpu_has_feature(CPU_FTR_ARCH_207S))
890  		return false; /* POWER9 with DAWR disabled */
891  	/* DABR: Everything but POWER8 and POWER9 */
892  	return true;
893  }
894  EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
895  
896  /* Disable the breakpoint in hardware without touching current_brk[] */
suspend_breakpoints(void)897  void suspend_breakpoints(void)
898  {
899  	struct arch_hw_breakpoint brk = {0};
900  	int i;
901  
902  	if (!ppc_breakpoint_available())
903  		return;
904  
905  	for (i = 0; i < nr_wp_slots(); i++)
906  		set_hw_breakpoint(i, &brk);
907  }
908  
909  /*
910   * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
911   * from current_brk[]
912   */
restore_breakpoints(void)913  void restore_breakpoints(void)
914  {
915  	int i;
916  
917  	if (!ppc_breakpoint_available())
918  		return;
919  
920  	for (i = 0; i < nr_wp_slots(); i++)
921  		set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
922  }
923  
924  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
925  
tm_enabled(struct task_struct * tsk)926  static inline bool tm_enabled(struct task_struct *tsk)
927  {
928  	return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM);
929  }
930  
tm_reclaim_thread(struct thread_struct * thr,uint8_t cause)931  static void tm_reclaim_thread(struct thread_struct *thr, uint8_t cause)
932  {
933  	/*
934  	 * Use the current MSR TM suspended bit to track if we have
935  	 * checkpointed state outstanding.
936  	 * On signal delivery, we'd normally reclaim the checkpointed
937  	 * state to obtain stack pointer (see:get_tm_stackpointer()).
938  	 * This will then directly return to userspace without going
939  	 * through __switch_to(). However, if the stack frame is bad,
940  	 * we need to exit this thread which calls __switch_to() which
941  	 * will again attempt to reclaim the already saved tm state.
942  	 * Hence we need to check that we've not already reclaimed
943  	 * this state.
944  	 * We do this using the current MSR, rather tracking it in
945  	 * some specific thread_struct bit, as it has the additional
946  	 * benefit of checking for a potential TM bad thing exception.
947  	 */
948  	if (!MSR_TM_SUSPENDED(mfmsr()))
949  		return;
950  
951  	giveup_all(container_of(thr, struct task_struct, thread));
952  
953  	tm_reclaim(thr, cause);
954  
955  	/*
956  	 * If we are in a transaction and FP is off then we can't have
957  	 * used FP inside that transaction. Hence the checkpointed
958  	 * state is the same as the live state. We need to copy the
959  	 * live state to the checkpointed state so that when the
960  	 * transaction is restored, the checkpointed state is correct
961  	 * and the aborted transaction sees the correct state. We use
962  	 * ckpt_regs.msr here as that's what tm_reclaim will use to
963  	 * determine if it's going to write the checkpointed state or
964  	 * not. So either this will write the checkpointed registers,
965  	 * or reclaim will. Similarly for VMX.
966  	 */
967  	if ((thr->ckpt_regs.msr & MSR_FP) == 0)
968  		memcpy(&thr->ckfp_state, &thr->fp_state,
969  		       sizeof(struct thread_fp_state));
970  	if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
971  		memcpy(&thr->ckvr_state, &thr->vr_state,
972  		       sizeof(struct thread_vr_state));
973  }
974  
tm_reclaim_current(uint8_t cause)975  void tm_reclaim_current(uint8_t cause)
976  {
977  	tm_enable();
978  	tm_reclaim_thread(&current->thread, cause);
979  }
980  
tm_reclaim_task(struct task_struct * tsk)981  static inline void tm_reclaim_task(struct task_struct *tsk)
982  {
983  	/* We have to work out if we're switching from/to a task that's in the
984  	 * middle of a transaction.
985  	 *
986  	 * In switching we need to maintain a 2nd register state as
987  	 * oldtask->thread.ckpt_regs.  We tm_reclaim(oldproc); this saves the
988  	 * checkpointed (tbegin) state in ckpt_regs, ckfp_state and
989  	 * ckvr_state
990  	 *
991  	 * We also context switch (save) TFHAR/TEXASR/TFIAR in here.
992  	 */
993  	struct thread_struct *thr = &tsk->thread;
994  
995  	if (!thr->regs)
996  		return;
997  
998  	if (!MSR_TM_ACTIVE(thr->regs->msr))
999  		goto out_and_saveregs;
1000  
1001  	WARN_ON(tm_suspend_disabled);
1002  
1003  	TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
1004  		 "ccr=%lx, msr=%lx, trap=%lx)\n",
1005  		 tsk->pid, thr->regs->nip,
1006  		 thr->regs->ccr, thr->regs->msr,
1007  		 thr->regs->trap);
1008  
1009  	tm_reclaim_thread(thr, TM_CAUSE_RESCHED);
1010  
1011  	TM_DEBUG("--- tm_reclaim on pid %d complete\n",
1012  		 tsk->pid);
1013  
1014  out_and_saveregs:
1015  	/* Always save the regs here, even if a transaction's not active.
1016  	 * This context-switches a thread's TM info SPRs.  We do it here to
1017  	 * be consistent with the restore path (in recheckpoint) which
1018  	 * cannot happen later in _switch().
1019  	 */
1020  	tm_save_sprs(thr);
1021  }
1022  
1023  extern void __tm_recheckpoint(struct thread_struct *thread);
1024  
tm_recheckpoint(struct thread_struct * thread)1025  void tm_recheckpoint(struct thread_struct *thread)
1026  {
1027  	unsigned long flags;
1028  
1029  	if (!(thread->regs->msr & MSR_TM))
1030  		return;
1031  
1032  	/* We really can't be interrupted here as the TEXASR registers can't
1033  	 * change and later in the trecheckpoint code, we have a userspace R1.
1034  	 * So let's hard disable over this region.
1035  	 */
1036  	local_irq_save(flags);
1037  	hard_irq_disable();
1038  
1039  	/* The TM SPRs are restored here, so that TEXASR.FS can be set
1040  	 * before the trecheckpoint and no explosion occurs.
1041  	 */
1042  	tm_restore_sprs(thread);
1043  
1044  	__tm_recheckpoint(thread);
1045  
1046  	local_irq_restore(flags);
1047  }
1048  
tm_recheckpoint_new_task(struct task_struct * new)1049  static inline void tm_recheckpoint_new_task(struct task_struct *new)
1050  {
1051  	if (!cpu_has_feature(CPU_FTR_TM))
1052  		return;
1053  
1054  	/* Recheckpoint the registers of the thread we're about to switch to.
1055  	 *
1056  	 * If the task was using FP, we non-lazily reload both the original and
1057  	 * the speculative FP register states.  This is because the kernel
1058  	 * doesn't see if/when a TM rollback occurs, so if we take an FP
1059  	 * unavailable later, we are unable to determine which set of FP regs
1060  	 * need to be restored.
1061  	 */
1062  	if (!tm_enabled(new))
1063  		return;
1064  
1065  	if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
1066  		tm_restore_sprs(&new->thread);
1067  		return;
1068  	}
1069  	/* Recheckpoint to restore original checkpointed register state. */
1070  	TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
1071  		 new->pid, new->thread.regs->msr);
1072  
1073  	tm_recheckpoint(&new->thread);
1074  
1075  	/*
1076  	 * The checkpointed state has been restored but the live state has
1077  	 * not, ensure all the math functionality is turned off to trigger
1078  	 * restore_math() to reload.
1079  	 */
1080  	new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX);
1081  
1082  	TM_DEBUG("*** tm_recheckpoint of pid %d complete "
1083  		 "(kernel msr 0x%lx)\n",
1084  		 new->pid, mfmsr());
1085  }
1086  
__switch_to_tm(struct task_struct * prev,struct task_struct * new)1087  static inline void __switch_to_tm(struct task_struct *prev,
1088  		struct task_struct *new)
1089  {
1090  	if (cpu_has_feature(CPU_FTR_TM)) {
1091  		if (tm_enabled(prev) || tm_enabled(new))
1092  			tm_enable();
1093  
1094  		if (tm_enabled(prev)) {
1095  			prev->thread.load_tm++;
1096  			tm_reclaim_task(prev);
1097  			if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0)
1098  				prev->thread.regs->msr &= ~MSR_TM;
1099  		}
1100  
1101  		tm_recheckpoint_new_task(new);
1102  	}
1103  }
1104  
1105  /*
1106   * This is called if we are on the way out to userspace and the
1107   * TIF_RESTORE_TM flag is set.  It checks if we need to reload
1108   * FP and/or vector state and does so if necessary.
1109   * If userspace is inside a transaction (whether active or
1110   * suspended) and FP/VMX/VSX instructions have ever been enabled
1111   * inside that transaction, then we have to keep them enabled
1112   * and keep the FP/VMX/VSX state loaded while ever the transaction
1113   * continues.  The reason is that if we didn't, and subsequently
1114   * got a FP/VMX/VSX unavailable interrupt inside a transaction,
1115   * we don't know whether it's the same transaction, and thus we
1116   * don't know which of the checkpointed state and the transactional
1117   * state to use.
1118   */
restore_tm_state(struct pt_regs * regs)1119  void restore_tm_state(struct pt_regs *regs)
1120  {
1121  	unsigned long msr_diff;
1122  
1123  	/*
1124  	 * This is the only moment we should clear TIF_RESTORE_TM as
1125  	 * it is here that ckpt_regs.msr and pt_regs.msr become the same
1126  	 * again, anything else could lead to an incorrect ckpt_msr being
1127  	 * saved and therefore incorrect signal contexts.
1128  	 */
1129  	clear_thread_flag(TIF_RESTORE_TM);
1130  	if (!MSR_TM_ACTIVE(regs->msr))
1131  		return;
1132  
1133  	msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
1134  	msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
1135  
1136  	/* Ensure that restore_math() will restore */
1137  	if (msr_diff & MSR_FP)
1138  		current->thread.load_fp = 1;
1139  #ifdef CONFIG_ALTIVEC
1140  	if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
1141  		current->thread.load_vec = 1;
1142  #endif
1143  	restore_math(regs);
1144  
1145  	regs_set_return_msr(regs, regs->msr | msr_diff);
1146  }
1147  
1148  #else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
1149  #define tm_recheckpoint_new_task(new)
1150  #define __switch_to_tm(prev, new)
tm_reclaim_current(uint8_t cause)1151  void tm_reclaim_current(uint8_t cause) {}
1152  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1153  
save_sprs(struct thread_struct * t)1154  static inline void save_sprs(struct thread_struct *t)
1155  {
1156  #ifdef CONFIG_ALTIVEC
1157  	if (cpu_has_feature(CPU_FTR_ALTIVEC))
1158  		t->vrsave = mfspr(SPRN_VRSAVE);
1159  #endif
1160  #ifdef CONFIG_SPE
1161  	if (cpu_has_feature(CPU_FTR_SPE))
1162  		t->spefscr = mfspr(SPRN_SPEFSCR);
1163  #endif
1164  #ifdef CONFIG_PPC_BOOK3S_64
1165  	if (cpu_has_feature(CPU_FTR_DSCR))
1166  		t->dscr = mfspr(SPRN_DSCR);
1167  
1168  	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
1169  		t->bescr = mfspr(SPRN_BESCR);
1170  		t->ebbhr = mfspr(SPRN_EBBHR);
1171  		t->ebbrr = mfspr(SPRN_EBBRR);
1172  
1173  		t->fscr = mfspr(SPRN_FSCR);
1174  
1175  		/*
1176  		 * Note that the TAR is not available for use in the kernel.
1177  		 * (To provide this, the TAR should be backed up/restored on
1178  		 * exception entry/exit instead, and be in pt_regs.  FIXME,
1179  		 * this should be in pt_regs anyway (for debug).)
1180  		 */
1181  		t->tar = mfspr(SPRN_TAR);
1182  	}
1183  
1184  	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
1185  		t->hashkeyr = mfspr(SPRN_HASHKEYR);
1186  
1187  	if (cpu_has_feature(CPU_FTR_ARCH_31))
1188  		t->dexcr = mfspr(SPRN_DEXCR);
1189  #endif
1190  }
1191  
1192  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvmppc_save_user_regs(void)1193  void kvmppc_save_user_regs(void)
1194  {
1195  	unsigned long usermsr;
1196  
1197  	if (!current->thread.regs)
1198  		return;
1199  
1200  	usermsr = current->thread.regs->msr;
1201  
1202  	/* Caller has enabled FP/VEC/VSX/TM in MSR */
1203  	if (usermsr & MSR_FP)
1204  		__giveup_fpu(current);
1205  	if (usermsr & MSR_VEC)
1206  		__giveup_altivec(current);
1207  
1208  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1209  	if (usermsr & MSR_TM) {
1210  		current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
1211  		current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
1212  		current->thread.tm_texasr = mfspr(SPRN_TEXASR);
1213  		current->thread.regs->msr &= ~MSR_TM;
1214  	}
1215  #endif
1216  }
1217  EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
1218  
kvmppc_save_current_sprs(void)1219  void kvmppc_save_current_sprs(void)
1220  {
1221  	save_sprs(&current->thread);
1222  }
1223  EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
1224  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1225  
restore_sprs(struct thread_struct * old_thread,struct thread_struct * new_thread)1226  static inline void restore_sprs(struct thread_struct *old_thread,
1227  				struct thread_struct *new_thread)
1228  {
1229  #ifdef CONFIG_ALTIVEC
1230  	if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
1231  	    old_thread->vrsave != new_thread->vrsave)
1232  		mtspr(SPRN_VRSAVE, new_thread->vrsave);
1233  #endif
1234  #ifdef CONFIG_SPE
1235  	if (cpu_has_feature(CPU_FTR_SPE) &&
1236  	    old_thread->spefscr != new_thread->spefscr)
1237  		mtspr(SPRN_SPEFSCR, new_thread->spefscr);
1238  #endif
1239  #ifdef CONFIG_PPC_BOOK3S_64
1240  	if (cpu_has_feature(CPU_FTR_DSCR)) {
1241  		u64 dscr = get_paca()->dscr_default;
1242  		if (new_thread->dscr_inherit)
1243  			dscr = new_thread->dscr;
1244  
1245  		if (old_thread->dscr != dscr)
1246  			mtspr(SPRN_DSCR, dscr);
1247  	}
1248  
1249  	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
1250  		if (old_thread->bescr != new_thread->bescr)
1251  			mtspr(SPRN_BESCR, new_thread->bescr);
1252  		if (old_thread->ebbhr != new_thread->ebbhr)
1253  			mtspr(SPRN_EBBHR, new_thread->ebbhr);
1254  		if (old_thread->ebbrr != new_thread->ebbrr)
1255  			mtspr(SPRN_EBBRR, new_thread->ebbrr);
1256  
1257  		if (old_thread->fscr != new_thread->fscr)
1258  			mtspr(SPRN_FSCR, new_thread->fscr);
1259  
1260  		if (old_thread->tar != new_thread->tar)
1261  			mtspr(SPRN_TAR, new_thread->tar);
1262  	}
1263  
1264  	if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
1265  	    old_thread->tidr != new_thread->tidr)
1266  		mtspr(SPRN_TIDR, new_thread->tidr);
1267  
1268  	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
1269  	    old_thread->hashkeyr != new_thread->hashkeyr)
1270  		mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
1271  
1272  	if (cpu_has_feature(CPU_FTR_ARCH_31) &&
1273  	    old_thread->dexcr != new_thread->dexcr)
1274  		mtspr(SPRN_DEXCR, new_thread->dexcr);
1275  #endif
1276  
1277  }
1278  
__switch_to(struct task_struct * prev,struct task_struct * new)1279  struct task_struct *__switch_to(struct task_struct *prev,
1280  	struct task_struct *new)
1281  {
1282  	struct thread_struct *new_thread, *old_thread;
1283  	struct task_struct *last;
1284  #ifdef CONFIG_PPC_64S_HASH_MMU
1285  	struct ppc64_tlb_batch *batch;
1286  #endif
1287  
1288  	new_thread = &new->thread;
1289  	old_thread = &current->thread;
1290  
1291  	WARN_ON(!irqs_disabled());
1292  
1293  #ifdef CONFIG_PPC_64S_HASH_MMU
1294  	batch = this_cpu_ptr(&ppc64_tlb_batch);
1295  	if (batch->active) {
1296  		current_thread_info()->local_flags |= _TLF_LAZY_MMU;
1297  		if (batch->index)
1298  			__flush_tlb_pending(batch);
1299  		batch->active = 0;
1300  	}
1301  
1302  	/*
1303  	 * On POWER9 the copy-paste buffer can only paste into
1304  	 * foreign real addresses, so unprivileged processes can not
1305  	 * see the data or use it in any way unless they have
1306  	 * foreign real mappings. If the new process has the foreign
1307  	 * real address mappings, we must issue a cp_abort to clear
1308  	 * any state and prevent snooping, corruption or a covert
1309  	 * channel. ISA v3.1 supports paste into local memory.
1310  	 */
1311  	if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
1312  			atomic_read(&new->mm->context.vas_windows)))
1313  		asm volatile(PPC_CP_ABORT);
1314  #endif /* CONFIG_PPC_BOOK3S_64 */
1315  
1316  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
1317  	switch_booke_debug_regs(&new->thread.debug);
1318  #else
1319  /*
1320   * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
1321   * schedule DABR
1322   */
1323  #ifndef CONFIG_HAVE_HW_BREAKPOINT
1324  	switch_hw_breakpoint(new);
1325  #endif /* CONFIG_HAVE_HW_BREAKPOINT */
1326  #endif
1327  
1328  	/*
1329  	 * We need to save SPRs before treclaim/trecheckpoint as these will
1330  	 * change a number of them.
1331  	 */
1332  	save_sprs(&prev->thread);
1333  
1334  	/* Save FPU, Altivec, VSX and SPE state */
1335  	giveup_all(prev);
1336  
1337  	__switch_to_tm(prev, new);
1338  
1339  	if (!radix_enabled()) {
1340  		/*
1341  		 * We can't take a PMU exception inside _switch() since there
1342  		 * is a window where the kernel stack SLB and the kernel stack
1343  		 * are out of sync. Hard disable here.
1344  		 */
1345  		hard_irq_disable();
1346  	}
1347  
1348  	/*
1349  	 * Call restore_sprs() and set_return_regs_changed() before calling
1350  	 * _switch(). If we move it after _switch() then we miss out on calling
1351  	 * it for new tasks. The reason for this is we manually create a stack
1352  	 * frame for new tasks that directly returns through ret_from_fork() or
1353  	 * ret_from_kernel_thread(). See copy_thread() for details.
1354  	 */
1355  	restore_sprs(old_thread, new_thread);
1356  
1357  	set_return_regs_changed(); /* _switch changes stack (and regs) */
1358  
1359  	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
1360  		kuap_assert_locked();
1361  
1362  	last = _switch(old_thread, new_thread);
1363  
1364  	/*
1365  	 * Nothing after _switch will be run for newly created tasks,
1366  	 * because they switch directly to ret_from_fork/ret_from_kernel_thread
1367  	 * etc. Code added here should have a comment explaining why that is
1368  	 * okay.
1369  	 */
1370  
1371  #ifdef CONFIG_PPC_BOOK3S_64
1372  #ifdef CONFIG_PPC_64S_HASH_MMU
1373  	/*
1374  	 * This applies to a process that was context switched while inside
1375  	 * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
1376  	 * deactivated above, before _switch(). This will never be the case
1377  	 * for new tasks.
1378  	 */
1379  	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
1380  		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
1381  		batch = this_cpu_ptr(&ppc64_tlb_batch);
1382  		batch->active = 1;
1383  	}
1384  #endif
1385  
1386  	/*
1387  	 * Math facilities are masked out of the child MSR in copy_thread.
1388  	 * A new task does not need to restore_math because it will
1389  	 * demand fault them.
1390  	 */
1391  	if (current->thread.regs)
1392  		restore_math(current->thread.regs);
1393  #endif /* CONFIG_PPC_BOOK3S_64 */
1394  
1395  	return last;
1396  }
1397  
1398  #define NR_INSN_TO_PRINT	16
1399  
show_instructions(struct pt_regs * regs)1400  static void show_instructions(struct pt_regs *regs)
1401  {
1402  	int i;
1403  	unsigned long nip = regs->nip;
1404  	unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
1405  
1406  	printk("Code: ");
1407  
1408  	/*
1409  	 * If we were executing with the MMU off for instructions, adjust pc
1410  	 * rather than printing XXXXXXXX.
1411  	 */
1412  	if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) {
1413  		pc = (unsigned long)phys_to_virt(pc);
1414  		nip = (unsigned long)phys_to_virt(regs->nip);
1415  	}
1416  
1417  	for (i = 0; i < NR_INSN_TO_PRINT; i++) {
1418  		int instr;
1419  
1420  		if (get_kernel_nofault(instr, (const void *)pc)) {
1421  			pr_cont("XXXXXXXX ");
1422  		} else {
1423  			if (nip == pc)
1424  				pr_cont("<%08x> ", instr);
1425  			else
1426  				pr_cont("%08x ", instr);
1427  		}
1428  
1429  		pc += sizeof(int);
1430  	}
1431  
1432  	pr_cont("\n");
1433  }
1434  
show_user_instructions(struct pt_regs * regs)1435  void show_user_instructions(struct pt_regs *regs)
1436  {
1437  	unsigned long pc;
1438  	int n = NR_INSN_TO_PRINT;
1439  	struct seq_buf s;
1440  	char buf[96]; /* enough for 8 times 9 + 2 chars */
1441  
1442  	pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
1443  
1444  	seq_buf_init(&s, buf, sizeof(buf));
1445  
1446  	while (n) {
1447  		int i;
1448  
1449  		seq_buf_clear(&s);
1450  
1451  		for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
1452  			int instr;
1453  
1454  			if (copy_from_user_nofault(&instr, (void __user *)pc,
1455  					sizeof(instr))) {
1456  				seq_buf_printf(&s, "XXXXXXXX ");
1457  				continue;
1458  			}
1459  			seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
1460  		}
1461  
1462  		if (!seq_buf_has_overflowed(&s))
1463  			pr_info("%s[%d]: code: %s\n", current->comm,
1464  				current->pid, s.buffer);
1465  	}
1466  }
1467  
1468  struct regbit {
1469  	unsigned long bit;
1470  	const char *name;
1471  };
1472  
1473  static struct regbit msr_bits[] = {
1474  #if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
1475  	{MSR_SF,	"SF"},
1476  	{MSR_HV,	"HV"},
1477  #endif
1478  	{MSR_VEC,	"VEC"},
1479  	{MSR_VSX,	"VSX"},
1480  #ifdef CONFIG_BOOKE
1481  	{MSR_CE,	"CE"},
1482  #endif
1483  	{MSR_EE,	"EE"},
1484  	{MSR_PR,	"PR"},
1485  	{MSR_FP,	"FP"},
1486  	{MSR_ME,	"ME"},
1487  #ifdef CONFIG_BOOKE
1488  	{MSR_DE,	"DE"},
1489  #else
1490  	{MSR_SE,	"SE"},
1491  	{MSR_BE,	"BE"},
1492  #endif
1493  	{MSR_IR,	"IR"},
1494  	{MSR_DR,	"DR"},
1495  	{MSR_PMM,	"PMM"},
1496  #ifndef CONFIG_BOOKE
1497  	{MSR_RI,	"RI"},
1498  	{MSR_LE,	"LE"},
1499  #endif
1500  	{0,		NULL}
1501  };
1502  
print_bits(unsigned long val,struct regbit * bits,const char * sep)1503  static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
1504  {
1505  	const char *s = "";
1506  
1507  	for (; bits->bit; ++bits)
1508  		if (val & bits->bit) {
1509  			pr_cont("%s%s", s, bits->name);
1510  			s = sep;
1511  		}
1512  }
1513  
1514  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1515  static struct regbit msr_tm_bits[] = {
1516  	{MSR_TS_T,	"T"},
1517  	{MSR_TS_S,	"S"},
1518  	{MSR_TM,	"E"},
1519  	{0,		NULL}
1520  };
1521  
print_tm_bits(unsigned long val)1522  static void print_tm_bits(unsigned long val)
1523  {
1524  /*
1525   * This only prints something if at least one of the TM bit is set.
1526   * Inside the TM[], the output means:
1527   *   E: Enabled		(bit 32)
1528   *   S: Suspended	(bit 33)
1529   *   T: Transactional	(bit 34)
1530   */
1531  	if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
1532  		pr_cont(",TM[");
1533  		print_bits(val, msr_tm_bits, "");
1534  		pr_cont("]");
1535  	}
1536  }
1537  #else
print_tm_bits(unsigned long val)1538  static void print_tm_bits(unsigned long val) {}
1539  #endif
1540  
print_msr_bits(unsigned long val)1541  static void print_msr_bits(unsigned long val)
1542  {
1543  	pr_cont("<");
1544  	print_bits(val, msr_bits, ",");
1545  	print_tm_bits(val);
1546  	pr_cont(">");
1547  }
1548  
1549  #ifdef CONFIG_PPC64
1550  #define REG		"%016lx"
1551  #define REGS_PER_LINE	4
1552  #else
1553  #define REG		"%08lx"
1554  #define REGS_PER_LINE	8
1555  #endif
1556  
__show_regs(struct pt_regs * regs)1557  static void __show_regs(struct pt_regs *regs)
1558  {
1559  	int i, trap;
1560  
1561  	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
1562  	       regs->nip, regs->link, regs->ctr);
1563  	printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
1564  	       regs, regs->trap, print_tainted(), init_utsname()->release);
1565  	printk("MSR:  "REG" ", regs->msr);
1566  	print_msr_bits(regs->msr);
1567  	pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
1568  	trap = TRAP(regs);
1569  	if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
1570  		pr_cont("CFAR: "REG" ", regs->orig_gpr3);
1571  	if (trap == INTERRUPT_MACHINE_CHECK ||
1572  	    trap == INTERRUPT_DATA_STORAGE ||
1573  	    trap == INTERRUPT_ALIGNMENT) {
1574  		if (IS_ENABLED(CONFIG_BOOKE))
1575  			pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
1576  		else
1577  			pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
1578  	}
1579  
1580  #ifdef CONFIG_PPC64
1581  	pr_cont("IRQMASK: %lx ", regs->softe);
1582  #endif
1583  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1584  	if (MSR_TM_ACTIVE(regs->msr))
1585  		pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
1586  #endif
1587  
1588  	for (i = 0;  i < 32;  i++) {
1589  		if ((i % REGS_PER_LINE) == 0)
1590  			pr_cont("\nGPR%02d: ", i);
1591  		pr_cont(REG " ", regs->gpr[i]);
1592  	}
1593  	pr_cont("\n");
1594  	/*
1595  	 * Lookup NIP late so we have the best change of getting the
1596  	 * above info out without failing
1597  	 */
1598  	if (IS_ENABLED(CONFIG_KALLSYMS)) {
1599  		printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
1600  		printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
1601  	}
1602  }
1603  
show_regs(struct pt_regs * regs)1604  void show_regs(struct pt_regs *regs)
1605  {
1606  	show_regs_print_info(KERN_DEFAULT);
1607  	__show_regs(regs);
1608  	show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
1609  	if (!user_mode(regs))
1610  		show_instructions(regs);
1611  }
1612  
flush_thread(void)1613  void flush_thread(void)
1614  {
1615  #ifdef CONFIG_HAVE_HW_BREAKPOINT
1616  	flush_ptrace_hw_breakpoint(current);
1617  #else /* CONFIG_HAVE_HW_BREAKPOINT */
1618  	set_debug_reg_defaults(&current->thread);
1619  #endif /* CONFIG_HAVE_HW_BREAKPOINT */
1620  }
1621  
arch_setup_new_exec(void)1622  void arch_setup_new_exec(void)
1623  {
1624  
1625  #ifdef CONFIG_PPC_BOOK3S_64
1626  	if (!radix_enabled())
1627  		hash__setup_new_exec();
1628  #endif
1629  	/*
1630  	 * If we exec out of a kernel thread then thread.regs will not be
1631  	 * set.  Do it now.
1632  	 */
1633  	if (!current->thread.regs) {
1634  		struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
1635  		current->thread.regs = regs - 1;
1636  	}
1637  
1638  #ifdef CONFIG_PPC_MEM_KEYS
1639  	current->thread.regs->amr  = default_amr;
1640  	current->thread.regs->iamr  = default_iamr;
1641  #endif
1642  
1643  #ifdef CONFIG_PPC_BOOK3S_64
1644  	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
1645  		current->thread.dexcr = current->thread.dexcr_onexec;
1646  		mtspr(SPRN_DEXCR, current->thread.dexcr);
1647  	}
1648  #endif /* CONFIG_PPC_BOOK3S_64 */
1649  }
1650  
1651  #ifdef CONFIG_PPC64
1652  /*
1653   * Assign a TIDR (thread ID) for task @t and set it in the thread
1654   * structure. For now, we only support setting TIDR for 'current' task.
1655   *
1656   * Since the TID value is a truncated form of it PID, it is possible
1657   * (but unlikely) for 2 threads to have the same TID. In the unlikely event
1658   * that 2 threads share the same TID and are waiting, one of the following
1659   * cases will happen:
1660   *
1661   * 1. The correct thread is running, the wrong thread is not
1662   * In this situation, the correct thread is woken and proceeds to pass its
1663   * condition check.
1664   *
1665   * 2. Neither threads are running
1666   * In this situation, neither thread will be woken. When scheduled, the waiting
1667   * threads will execute either a wait, which will return immediately, followed
1668   * by a condition check, which will pass for the correct thread and fail
1669   * for the wrong thread, or they will execute the condition check immediately.
1670   *
1671   * 3. The wrong thread is running, the correct thread is not
1672   * The wrong thread will be woken, but will fail its condition check and
1673   * re-execute wait. The correct thread, when scheduled, will execute either
1674   * its condition check (which will pass), or wait, which returns immediately
1675   * when called the first time after the thread is scheduled, followed by its
1676   * condition check (which will pass).
1677   *
1678   * 4. Both threads are running
1679   * Both threads will be woken. The wrong thread will fail its condition check
1680   * and execute another wait, while the correct thread will pass its condition
1681   * check.
1682   *
1683   * @t: the task to set the thread ID for
1684   */
set_thread_tidr(struct task_struct * t)1685  int set_thread_tidr(struct task_struct *t)
1686  {
1687  	if (!cpu_has_feature(CPU_FTR_P9_TIDR))
1688  		return -EINVAL;
1689  
1690  	if (t != current)
1691  		return -EINVAL;
1692  
1693  	if (t->thread.tidr)
1694  		return 0;
1695  
1696  	t->thread.tidr = (u16)task_pid_nr(t);
1697  	mtspr(SPRN_TIDR, t->thread.tidr);
1698  
1699  	return 0;
1700  }
1701  EXPORT_SYMBOL_GPL(set_thread_tidr);
1702  
1703  #endif /* CONFIG_PPC64 */
1704  
1705  /*
1706   * this gets called so that we can store coprocessor state into memory and
1707   * copy the current task into the new thread.
1708   */
arch_dup_task_struct(struct task_struct * dst,struct task_struct * src)1709  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
1710  {
1711  	flush_all_to_thread(src);
1712  	/*
1713  	 * Flush TM state out so we can copy it.  __switch_to_tm() does this
1714  	 * flush but it removes the checkpointed state from the current CPU and
1715  	 * transitions the CPU out of TM mode.  Hence we need to call
1716  	 * tm_recheckpoint_new_task() (on the same task) to restore the
1717  	 * checkpointed state back and the TM mode.
1718  	 *
1719  	 * Can't pass dst because it isn't ready. Doesn't matter, passing
1720  	 * dst is only important for __switch_to()
1721  	 */
1722  	__switch_to_tm(src, src);
1723  
1724  	*dst = *src;
1725  
1726  	clear_task_ebb(dst);
1727  
1728  	return 0;
1729  }
1730  
setup_ksp_vsid(struct task_struct * p,unsigned long sp)1731  static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
1732  {
1733  #ifdef CONFIG_PPC_64S_HASH_MMU
1734  	unsigned long sp_vsid;
1735  	unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
1736  
1737  	if (radix_enabled())
1738  		return;
1739  
1740  	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1741  		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
1742  			<< SLB_VSID_SHIFT_1T;
1743  	else
1744  		sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_256M)
1745  			<< SLB_VSID_SHIFT;
1746  	sp_vsid |= SLB_VSID_KERNEL | llp;
1747  	p->thread.ksp_vsid = sp_vsid;
1748  #endif
1749  }
1750  
1751  /*
1752   * Copy a thread..
1753   */
1754  
1755  /*
1756   * Copy architecture-specific thread state
1757   */
copy_thread(struct task_struct * p,const struct kernel_clone_args * args)1758  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
1759  {
1760  	struct pt_regs *kregs; /* Switch frame regs */
1761  	extern void ret_from_fork(void);
1762  	extern void ret_from_fork_scv(void);
1763  	extern void ret_from_kernel_user_thread(void);
1764  	extern void start_kernel_thread(void);
1765  	void (*f)(void);
1766  	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
1767  #ifdef CONFIG_HAVE_HW_BREAKPOINT
1768  	int i;
1769  #endif
1770  
1771  	klp_init_thread_info(p);
1772  
1773  	if (unlikely(p->flags & PF_KTHREAD)) {
1774  		/* kernel thread */
1775  
1776  		/* Create initial minimum stack frame. */
1777  		sp -= STACK_FRAME_MIN_SIZE;
1778  		((unsigned long *)sp)[0] = 0;
1779  
1780  		f = start_kernel_thread;
1781  		p->thread.regs = NULL;	/* no user register state */
1782  		clear_tsk_compat_task(p);
1783  	} else {
1784  		/* user thread */
1785  		struct pt_regs *childregs;
1786  
1787  		/* Create initial user return stack frame. */
1788  		sp -= STACK_USER_INT_FRAME_SIZE;
1789  		*(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
1790  
1791  		childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
1792  
1793  		if (unlikely(args->fn)) {
1794  			/*
1795  			 * A user space thread, but it first runs a kernel
1796  			 * thread, and then returns as though it had called
1797  			 * execve rather than fork, so user regs will be
1798  			 * filled in (e.g., by kernel_execve()).
1799  			 */
1800  			((unsigned long *)sp)[0] = 0;
1801  			memset(childregs, 0, sizeof(struct pt_regs));
1802  #ifdef CONFIG_PPC64
1803  			childregs->softe = IRQS_ENABLED;
1804  #endif
1805  			f = ret_from_kernel_user_thread;
1806  		} else {
1807  			struct pt_regs *regs = current_pt_regs();
1808  			unsigned long clone_flags = args->flags;
1809  			unsigned long usp = args->stack;
1810  
1811  			/* Copy registers */
1812  			*childregs = *regs;
1813  			if (usp)
1814  				childregs->gpr[1] = usp;
1815  			((unsigned long *)sp)[0] = childregs->gpr[1];
1816  #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
1817  			WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
1818  #endif
1819  			if (clone_flags & CLONE_SETTLS) {
1820  				unsigned long tls = args->tls;
1821  
1822  				if (!is_32bit_task())
1823  					childregs->gpr[13] = tls;
1824  				else
1825  					childregs->gpr[2] = tls;
1826  			}
1827  
1828  			if (trap_is_scv(regs))
1829  				f = ret_from_fork_scv;
1830  			else
1831  				f = ret_from_fork;
1832  		}
1833  
1834  		childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
1835  		p->thread.regs = childregs;
1836  	}
1837  
1838  	/*
1839  	 * The way this works is that at some point in the future
1840  	 * some task will call _switch to switch to the new task.
1841  	 * That will pop off the stack frame created below and start
1842  	 * the new task running at ret_from_fork.  The new task will
1843  	 * do some house keeping and then return from the fork or clone
1844  	 * system call, using the stack frame created above.
1845  	 */
1846  	((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
1847  	sp -= STACK_SWITCH_FRAME_SIZE;
1848  	((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
1849  	kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
1850  	kregs->nip = ppc_function_entry(f);
1851  	if (unlikely(args->fn)) {
1852  		/*
1853  		 * Put kthread fn, arg parameters in non-volatile GPRs in the
1854  		 * switch frame so they are loaded by _switch before it returns
1855  		 * to ret_from_kernel_thread.
1856  		 */
1857  		kregs->gpr[14] = ppc_function_entry((void *)args->fn);
1858  		kregs->gpr[15] = (unsigned long)args->fn_arg;
1859  	}
1860  	p->thread.ksp = sp;
1861  
1862  #ifdef CONFIG_HAVE_HW_BREAKPOINT
1863  	for (i = 0; i < nr_wp_slots(); i++)
1864  		p->thread.ptrace_bps[i] = NULL;
1865  #endif
1866  
1867  #ifdef CONFIG_PPC_FPU_REGS
1868  	p->thread.fp_save_area = NULL;
1869  #endif
1870  #ifdef CONFIG_ALTIVEC
1871  	p->thread.vr_save_area = NULL;
1872  #endif
1873  #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
1874  	p->thread.kuap = KUAP_NONE;
1875  #endif
1876  #if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
1877  	p->thread.pid = MMU_NO_CONTEXT;
1878  #endif
1879  
1880  	setup_ksp_vsid(p, sp);
1881  
1882  #ifdef CONFIG_PPC64
1883  	if (cpu_has_feature(CPU_FTR_DSCR)) {
1884  		p->thread.dscr_inherit = current->thread.dscr_inherit;
1885  		p->thread.dscr = mfspr(SPRN_DSCR);
1886  	}
1887  
1888  	p->thread.tidr = 0;
1889  #endif
1890  #ifdef CONFIG_PPC_BOOK3S_64
1891  	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
1892  		p->thread.hashkeyr = current->thread.hashkeyr;
1893  
1894  	if (cpu_has_feature(CPU_FTR_ARCH_31))
1895  		p->thread.dexcr = mfspr(SPRN_DEXCR);
1896  #endif
1897  	return 0;
1898  }
1899  
1900  void preload_new_slb_context(unsigned long start, unsigned long sp);
1901  
1902  /*
1903   * Set up a thread for executing a new program
1904   */
start_thread(struct pt_regs * regs,unsigned long start,unsigned long sp)1905  void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
1906  {
1907  #ifdef CONFIG_PPC64
1908  	unsigned long load_addr = regs->gpr[2];	/* saved by ELF_PLAT_INIT */
1909  
1910  	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
1911  		preload_new_slb_context(start, sp);
1912  #endif
1913  
1914  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1915  	/*
1916  	 * Clear any transactional state, we're exec()ing. The cause is
1917  	 * not important as there will never be a recheckpoint so it's not
1918  	 * user visible.
1919  	 */
1920  	if (MSR_TM_SUSPENDED(mfmsr()))
1921  		tm_reclaim_current(0);
1922  #endif
1923  
1924  	memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
1925  	regs->ctr = 0;
1926  	regs->link = 0;
1927  	regs->xer = 0;
1928  	regs->ccr = 0;
1929  	regs->gpr[1] = sp;
1930  
1931  #ifdef CONFIG_PPC32
1932  	regs->mq = 0;
1933  	regs->nip = start;
1934  	regs->msr = MSR_USER;
1935  #else
1936  	if (!is_32bit_task()) {
1937  		unsigned long entry;
1938  
1939  		if (is_elf2_task()) {
1940  			/* Look ma, no function descriptors! */
1941  			entry = start;
1942  
1943  			/*
1944  			 * Ulrich says:
1945  			 *   The latest iteration of the ABI requires that when
1946  			 *   calling a function (at its global entry point),
1947  			 *   the caller must ensure r12 holds the entry point
1948  			 *   address (so that the function can quickly
1949  			 *   establish addressability).
1950  			 */
1951  			regs->gpr[12] = start;
1952  			/* Make sure that's restored on entry to userspace. */
1953  			set_thread_flag(TIF_RESTOREALL);
1954  		} else {
1955  			unsigned long toc;
1956  
1957  			/* start is a relocated pointer to the function
1958  			 * descriptor for the elf _start routine.  The first
1959  			 * entry in the function descriptor is the entry
1960  			 * address of _start and the second entry is the TOC
1961  			 * value we need to use.
1962  			 */
1963  			__get_user(entry, (unsigned long __user *)start);
1964  			__get_user(toc, (unsigned long __user *)start+1);
1965  
1966  			/* Check whether the e_entry function descriptor entries
1967  			 * need to be relocated before we can use them.
1968  			 */
1969  			if (load_addr != 0) {
1970  				entry += load_addr;
1971  				toc   += load_addr;
1972  			}
1973  			regs->gpr[2] = toc;
1974  		}
1975  		regs_set_return_ip(regs, entry);
1976  		regs_set_return_msr(regs, MSR_USER64);
1977  	} else {
1978  		regs->gpr[2] = 0;
1979  		regs_set_return_ip(regs, start);
1980  		regs_set_return_msr(regs, MSR_USER32);
1981  	}
1982  
1983  #endif
1984  #ifdef CONFIG_VSX
1985  	current->thread.used_vsr = 0;
1986  #endif
1987  	current->thread.load_slb = 0;
1988  	current->thread.load_fp = 0;
1989  #ifdef CONFIG_PPC_FPU_REGS
1990  	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
1991  	current->thread.fp_save_area = NULL;
1992  #endif
1993  #ifdef CONFIG_ALTIVEC
1994  	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
1995  	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
1996  	current->thread.vr_save_area = NULL;
1997  	current->thread.vrsave = 0;
1998  	current->thread.used_vr = 0;
1999  	current->thread.load_vec = 0;
2000  #endif /* CONFIG_ALTIVEC */
2001  #ifdef CONFIG_SPE
2002  	memset(current->thread.evr, 0, sizeof(current->thread.evr));
2003  	current->thread.acc = 0;
2004  	current->thread.spefscr = 0;
2005  	current->thread.used_spe = 0;
2006  #endif /* CONFIG_SPE */
2007  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2008  	current->thread.tm_tfhar = 0;
2009  	current->thread.tm_texasr = 0;
2010  	current->thread.tm_tfiar = 0;
2011  	current->thread.load_tm = 0;
2012  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
2013  #ifdef CONFIG_PPC_BOOK3S_64
2014  	if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
2015  		current->thread.hashkeyr = get_random_long();
2016  		mtspr(SPRN_HASHKEYR, current->thread.hashkeyr);
2017  	}
2018  #endif /* CONFIG_PPC_BOOK3S_64 */
2019  }
2020  EXPORT_SYMBOL(start_thread);
2021  
2022  #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
2023  		| PR_FP_EXC_RES | PR_FP_EXC_INV)
2024  
set_fpexc_mode(struct task_struct * tsk,unsigned int val)2025  int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
2026  {
2027  	struct pt_regs *regs = tsk->thread.regs;
2028  
2029  	/* This is a bit hairy.  If we are an SPE enabled  processor
2030  	 * (have embedded fp) we store the IEEE exception enable flags in
2031  	 * fpexc_mode.  fpexc_mode is also used for setting FP exception
2032  	 * mode (asyn, precise, disabled) for 'Classic' FP. */
2033  	if (val & PR_FP_EXC_SW_ENABLE) {
2034  		if (cpu_has_feature(CPU_FTR_SPE)) {
2035  			/*
2036  			 * When the sticky exception bits are set
2037  			 * directly by userspace, it must call prctl
2038  			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
2039  			 * in the existing prctl settings) or
2040  			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
2041  			 * the bits being set).  <fenv.h> functions
2042  			 * saving and restoring the whole
2043  			 * floating-point environment need to do so
2044  			 * anyway to restore the prctl settings from
2045  			 * the saved environment.
2046  			 */
2047  #ifdef CONFIG_SPE
2048  			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
2049  			tsk->thread.fpexc_mode = val &
2050  				(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
2051  #endif
2052  			return 0;
2053  		} else {
2054  			return -EINVAL;
2055  		}
2056  	}
2057  
2058  	/* on a CONFIG_SPE this does not hurt us.  The bits that
2059  	 * __pack_fe01 use do not overlap with bits used for
2060  	 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
2061  	 * on CONFIG_SPE implementations are reserved so writing to
2062  	 * them does not change anything */
2063  	if (val > PR_FP_EXC_PRECISE)
2064  		return -EINVAL;
2065  	tsk->thread.fpexc_mode = __pack_fe01(val);
2066  	if (regs != NULL && (regs->msr & MSR_FP) != 0) {
2067  		regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
2068  						| tsk->thread.fpexc_mode);
2069  	}
2070  	return 0;
2071  }
2072  
get_fpexc_mode(struct task_struct * tsk,unsigned long adr)2073  int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
2074  {
2075  	unsigned int val = 0;
2076  
2077  	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
2078  		if (cpu_has_feature(CPU_FTR_SPE)) {
2079  			/*
2080  			 * When the sticky exception bits are set
2081  			 * directly by userspace, it must call prctl
2082  			 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
2083  			 * in the existing prctl settings) or
2084  			 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
2085  			 * the bits being set).  <fenv.h> functions
2086  			 * saving and restoring the whole
2087  			 * floating-point environment need to do so
2088  			 * anyway to restore the prctl settings from
2089  			 * the saved environment.
2090  			 */
2091  #ifdef CONFIG_SPE
2092  			tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
2093  			val = tsk->thread.fpexc_mode;
2094  #endif
2095  		} else
2096  			return -EINVAL;
2097  	} else {
2098  		val = __unpack_fe01(tsk->thread.fpexc_mode);
2099  	}
2100  	return put_user(val, (unsigned int __user *) adr);
2101  }
2102  
set_endian(struct task_struct * tsk,unsigned int val)2103  int set_endian(struct task_struct *tsk, unsigned int val)
2104  {
2105  	struct pt_regs *regs = tsk->thread.regs;
2106  
2107  	if ((val == PR_ENDIAN_LITTLE && !cpu_has_feature(CPU_FTR_REAL_LE)) ||
2108  	    (val == PR_ENDIAN_PPC_LITTLE && !cpu_has_feature(CPU_FTR_PPC_LE)))
2109  		return -EINVAL;
2110  
2111  	if (regs == NULL)
2112  		return -EINVAL;
2113  
2114  	if (val == PR_ENDIAN_BIG)
2115  		regs_set_return_msr(regs, regs->msr & ~MSR_LE);
2116  	else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
2117  		regs_set_return_msr(regs, regs->msr | MSR_LE);
2118  	else
2119  		return -EINVAL;
2120  
2121  	return 0;
2122  }
2123  
get_endian(struct task_struct * tsk,unsigned long adr)2124  int get_endian(struct task_struct *tsk, unsigned long adr)
2125  {
2126  	struct pt_regs *regs = tsk->thread.regs;
2127  	unsigned int val;
2128  
2129  	if (!cpu_has_feature(CPU_FTR_PPC_LE) &&
2130  	    !cpu_has_feature(CPU_FTR_REAL_LE))
2131  		return -EINVAL;
2132  
2133  	if (regs == NULL)
2134  		return -EINVAL;
2135  
2136  	if (regs->msr & MSR_LE) {
2137  		if (cpu_has_feature(CPU_FTR_REAL_LE))
2138  			val = PR_ENDIAN_LITTLE;
2139  		else
2140  			val = PR_ENDIAN_PPC_LITTLE;
2141  	} else
2142  		val = PR_ENDIAN_BIG;
2143  
2144  	return put_user(val, (unsigned int __user *)adr);
2145  }
2146  
set_unalign_ctl(struct task_struct * tsk,unsigned int val)2147  int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
2148  {
2149  	tsk->thread.align_ctl = val;
2150  	return 0;
2151  }
2152  
get_unalign_ctl(struct task_struct * tsk,unsigned long adr)2153  int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
2154  {
2155  	return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
2156  }
2157  
valid_irq_stack(unsigned long sp,struct task_struct * p,unsigned long nbytes)2158  static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
2159  				  unsigned long nbytes)
2160  {
2161  	unsigned long stack_page;
2162  	unsigned long cpu = task_cpu(p);
2163  
2164  	if (!hardirq_ctx[cpu] || !softirq_ctx[cpu])
2165  		return 0;
2166  
2167  	stack_page = (unsigned long)hardirq_ctx[cpu];
2168  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2169  		return 1;
2170  
2171  	stack_page = (unsigned long)softirq_ctx[cpu];
2172  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2173  		return 1;
2174  
2175  	return 0;
2176  }
2177  
2178  #ifdef CONFIG_PPC64
valid_emergency_stack(unsigned long sp,struct task_struct * p,unsigned long nbytes)2179  static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
2180  					unsigned long nbytes)
2181  {
2182  	unsigned long stack_page;
2183  	unsigned long cpu = task_cpu(p);
2184  
2185  	if (!paca_ptrs)
2186  		return 0;
2187  
2188  	if (!paca_ptrs[cpu]->emergency_sp)
2189  		return 0;
2190  
2191  # ifdef CONFIG_PPC_BOOK3S_64
2192  	if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp)
2193  		return 0;
2194  #endif
2195  
2196  	stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
2197  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2198  		return 1;
2199  
2200  # ifdef CONFIG_PPC_BOOK3S_64
2201  	stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
2202  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2203  		return 1;
2204  
2205  	stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
2206  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2207  		return 1;
2208  # endif
2209  
2210  	return 0;
2211  }
2212  #else
valid_emergency_stack(unsigned long sp,struct task_struct * p,unsigned long nbytes)2213  static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
2214  					unsigned long nbytes)
2215  {
2216  	unsigned long stack_page;
2217  	unsigned long cpu = task_cpu(p);
2218  
2219  	if (!IS_ENABLED(CONFIG_VMAP_STACK))
2220  		return 0;
2221  
2222  	stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE;
2223  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2224  		return 1;
2225  
2226  	return 0;
2227  }
2228  #endif
2229  
2230  /*
2231   * validate the stack frame of a particular minimum size, used for when we are
2232   * looking at a certain object in the stack beyond the minimum.
2233   */
validate_sp_size(unsigned long sp,struct task_struct * p,unsigned long nbytes)2234  int validate_sp_size(unsigned long sp, struct task_struct *p,
2235  		     unsigned long nbytes)
2236  {
2237  	unsigned long stack_page = (unsigned long)task_stack_page(p);
2238  
2239  	if (sp < THREAD_SIZE)
2240  		return 0;
2241  
2242  	if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
2243  		return 1;
2244  
2245  	if (valid_irq_stack(sp, p, nbytes))
2246  		return 1;
2247  
2248  	return valid_emergency_stack(sp, p, nbytes);
2249  }
2250  
validate_sp(unsigned long sp,struct task_struct * p)2251  int validate_sp(unsigned long sp, struct task_struct *p)
2252  {
2253  	return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
2254  }
2255  
___get_wchan(struct task_struct * p)2256  static unsigned long ___get_wchan(struct task_struct *p)
2257  {
2258  	unsigned long ip, sp;
2259  	int count = 0;
2260  
2261  	sp = p->thread.ksp;
2262  	if (!validate_sp(sp, p))
2263  		return 0;
2264  
2265  	do {
2266  		sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
2267  		if (!validate_sp(sp, p) || task_is_running(p))
2268  			return 0;
2269  		if (count > 0) {
2270  			ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
2271  			if (!in_sched_functions(ip))
2272  				return ip;
2273  		}
2274  	} while (count++ < 16);
2275  	return 0;
2276  }
2277  
__get_wchan(struct task_struct * p)2278  unsigned long __get_wchan(struct task_struct *p)
2279  {
2280  	unsigned long ret;
2281  
2282  	if (!try_get_task_stack(p))
2283  		return 0;
2284  
2285  	ret = ___get_wchan(p);
2286  
2287  	put_task_stack(p);
2288  
2289  	return ret;
2290  }
2291  
empty_user_regs(struct pt_regs * regs,struct task_struct * tsk)2292  static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk)
2293  {
2294  	unsigned long stack_page;
2295  
2296  	// A non-empty pt_regs should never have a zero MSR or TRAP value.
2297  	if (regs->msr || regs->trap)
2298  		return false;
2299  
2300  	// Check it sits at the very base of the stack
2301  	stack_page = (unsigned long)task_stack_page(tsk);
2302  	if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE)
2303  		return false;
2304  
2305  	return true;
2306  }
2307  
2308  static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
2309  
show_stack(struct task_struct * tsk,unsigned long * stack,const char * loglvl)2310  void __no_sanitize_address show_stack(struct task_struct *tsk,
2311  				      unsigned long *stack,
2312  				      const char *loglvl)
2313  {
2314  	unsigned long sp, ip, lr, newsp;
2315  	int count = 0;
2316  	int firstframe = 1;
2317  	unsigned long ret_addr;
2318  	int ftrace_idx = 0;
2319  
2320  	if (tsk == NULL)
2321  		tsk = current;
2322  
2323  	if (!try_get_task_stack(tsk))
2324  		return;
2325  
2326  	sp = (unsigned long) stack;
2327  	if (sp == 0) {
2328  		if (tsk == current)
2329  			sp = current_stack_frame();
2330  		else
2331  			sp = tsk->thread.ksp;
2332  	}
2333  
2334  	lr = 0;
2335  	printk("%sCall Trace:\n", loglvl);
2336  	do {
2337  		if (!validate_sp(sp, tsk))
2338  			break;
2339  
2340  		stack = (unsigned long *) sp;
2341  		newsp = stack[0];
2342  		ip = stack[STACK_FRAME_LR_SAVE];
2343  		if (!firstframe || ip != lr) {
2344  			printk("%s["REG"] ["REG"] %pS",
2345  				loglvl, sp, ip, (void *)ip);
2346  			ret_addr = ftrace_graph_ret_addr(current,
2347  						&ftrace_idx, ip, stack);
2348  			if (ret_addr != ip)
2349  				pr_cont(" (%pS)", (void *)ret_addr);
2350  			if (firstframe)
2351  				pr_cont(" (unreliable)");
2352  			pr_cont("\n");
2353  		}
2354  		firstframe = 0;
2355  
2356  		/*
2357  		 * See if this is an exception frame.
2358  		 * We look for the "regs" marker in the current frame.
2359  		 *
2360  		 * STACK_SWITCH_FRAME_SIZE being the smallest frame that
2361  		 * could hold a pt_regs, if that does not fit then it can't
2362  		 * have regs.
2363  		 */
2364  		if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
2365  		    && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
2366  			struct pt_regs *regs = (struct pt_regs *)
2367  				(sp + STACK_INT_FRAME_REGS);
2368  
2369  			lr = regs->link;
2370  			printk("%s--- interrupt: %lx at %pS\n",
2371  			       loglvl, regs->trap, (void *)regs->nip);
2372  
2373  			// Detect the case of an empty pt_regs at the very base
2374  			// of the stack and suppress showing it in full.
2375  			if (!empty_user_regs(regs, tsk)) {
2376  				__show_regs(regs);
2377  				printk("%s--- interrupt: %lx\n", loglvl, regs->trap);
2378  			}
2379  
2380  			firstframe = 1;
2381  		}
2382  
2383  		sp = newsp;
2384  	} while (count++ < kstack_depth_to_print);
2385  
2386  	put_task_stack(tsk);
2387  }
2388  
2389  #ifdef CONFIG_PPC64
2390  /* Called with hard IRQs off */
__ppc64_runlatch_on(void)2391  void notrace __ppc64_runlatch_on(void)
2392  {
2393  	struct thread_info *ti = current_thread_info();
2394  
2395  	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
2396  		/*
2397  		 * Least significant bit (RUN) is the only writable bit of
2398  		 * the CTRL register, so we can avoid mfspr. 2.06 is not the
2399  		 * earliest ISA where this is the case, but it's convenient.
2400  		 */
2401  		mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
2402  	} else {
2403  		unsigned long ctrl;
2404  
2405  		/*
2406  		 * Some architectures (e.g., Cell) have writable fields other
2407  		 * than RUN, so do the read-modify-write.
2408  		 */
2409  		ctrl = mfspr(SPRN_CTRLF);
2410  		ctrl |= CTRL_RUNLATCH;
2411  		mtspr(SPRN_CTRLT, ctrl);
2412  	}
2413  
2414  	ti->local_flags |= _TLF_RUNLATCH;
2415  }
2416  
2417  /* Called with hard IRQs off */
__ppc64_runlatch_off(void)2418  void notrace __ppc64_runlatch_off(void)
2419  {
2420  	struct thread_info *ti = current_thread_info();
2421  
2422  	ti->local_flags &= ~_TLF_RUNLATCH;
2423  
2424  	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
2425  		mtspr(SPRN_CTRLT, 0);
2426  	} else {
2427  		unsigned long ctrl;
2428  
2429  		ctrl = mfspr(SPRN_CTRLF);
2430  		ctrl &= ~CTRL_RUNLATCH;
2431  		mtspr(SPRN_CTRLT, ctrl);
2432  	}
2433  }
2434  #endif /* CONFIG_PPC64 */
2435  
arch_align_stack(unsigned long sp)2436  unsigned long arch_align_stack(unsigned long sp)
2437  {
2438  	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
2439  		sp -= get_random_u32_below(PAGE_SIZE);
2440  	return sp & ~0xf;
2441  }
2442