1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * xsave/xrstor support.
4   *
5   * Author: Suresh Siddha <suresh.b.siddha@intel.com>
6   */
7  #include <linux/bitops.h>
8  #include <linux/compat.h>
9  #include <linux/cpu.h>
10  #include <linux/mman.h>
11  #include <linux/nospec.h>
12  #include <linux/pkeys.h>
13  #include <linux/seq_file.h>
14  #include <linux/proc_fs.h>
15  #include <linux/vmalloc.h>
16  #include <linux/coredump.h>
17  
18  #include <asm/fpu/api.h>
19  #include <asm/fpu/regset.h>
20  #include <asm/fpu/signal.h>
21  #include <asm/fpu/xcr.h>
22  
23  #include <asm/tlbflush.h>
24  #include <asm/prctl.h>
25  #include <asm/elf.h>
26  
27  #include <uapi/asm/elf.h>
28  
29  #include "context.h"
30  #include "internal.h"
31  #include "legacy.h"
32  #include "xstate.h"
33  
34  #define for_each_extended_xfeature(bit, mask)				\
35  	(bit) = FIRST_EXTENDED_XFEATURE;				\
36  	for_each_set_bit_from(bit, (unsigned long *)&(mask), 8 * sizeof(mask))
37  
38  /*
39   * Although we spell it out in here, the Processor Trace
40   * xfeature is completely unused.  We use other mechanisms
41   * to save/restore PT state in Linux.
42   */
43  static const char *xfeature_names[] =
44  {
45  	"x87 floating point registers",
46  	"SSE registers",
47  	"AVX registers",
48  	"MPX bounds registers",
49  	"MPX CSR",
50  	"AVX-512 opmask",
51  	"AVX-512 Hi256",
52  	"AVX-512 ZMM_Hi256",
53  	"Processor Trace (unused)",
54  	"Protection Keys User registers",
55  	"PASID state",
56  	"Control-flow User registers",
57  	"Control-flow Kernel registers (unused)",
58  	"unknown xstate feature",
59  	"unknown xstate feature",
60  	"unknown xstate feature",
61  	"unknown xstate feature",
62  	"AMX Tile config",
63  	"AMX Tile data",
64  	"unknown xstate feature",
65  };
66  
67  static unsigned short xsave_cpuid_features[] __initdata = {
68  	[XFEATURE_FP]				= X86_FEATURE_FPU,
69  	[XFEATURE_SSE]				= X86_FEATURE_XMM,
70  	[XFEATURE_YMM]				= X86_FEATURE_AVX,
71  	[XFEATURE_BNDREGS]			= X86_FEATURE_MPX,
72  	[XFEATURE_BNDCSR]			= X86_FEATURE_MPX,
73  	[XFEATURE_OPMASK]			= X86_FEATURE_AVX512F,
74  	[XFEATURE_ZMM_Hi256]			= X86_FEATURE_AVX512F,
75  	[XFEATURE_Hi16_ZMM]			= X86_FEATURE_AVX512F,
76  	[XFEATURE_PT_UNIMPLEMENTED_SO_FAR]	= X86_FEATURE_INTEL_PT,
77  	[XFEATURE_PKRU]				= X86_FEATURE_OSPKE,
78  	[XFEATURE_PASID]			= X86_FEATURE_ENQCMD,
79  	[XFEATURE_CET_USER]			= X86_FEATURE_SHSTK,
80  	[XFEATURE_XTILE_CFG]			= X86_FEATURE_AMX_TILE,
81  	[XFEATURE_XTILE_DATA]			= X86_FEATURE_AMX_TILE,
82  };
83  
84  static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
85  	{ [ 0 ... XFEATURE_MAX - 1] = -1};
86  static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
87  	{ [ 0 ... XFEATURE_MAX - 1] = -1};
88  static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
89  
90  #define XSTATE_FLAG_SUPERVISOR	BIT(0)
91  #define XSTATE_FLAG_ALIGNED64	BIT(1)
92  
93  /*
94   * Return whether the system supports a given xfeature.
95   *
96   * Also return the name of the (most advanced) feature that the caller requested:
97   */
cpu_has_xfeatures(u64 xfeatures_needed,const char ** feature_name)98  int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
99  {
100  	u64 xfeatures_missing = xfeatures_needed & ~fpu_kernel_cfg.max_features;
101  
102  	if (unlikely(feature_name)) {
103  		long xfeature_idx, max_idx;
104  		u64 xfeatures_print;
105  		/*
106  		 * So we use FLS here to be able to print the most advanced
107  		 * feature that was requested but is missing. So if a driver
108  		 * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the
109  		 * missing AVX feature - this is the most informative message
110  		 * to users:
111  		 */
112  		if (xfeatures_missing)
113  			xfeatures_print = xfeatures_missing;
114  		else
115  			xfeatures_print = xfeatures_needed;
116  
117  		xfeature_idx = fls64(xfeatures_print)-1;
118  		max_idx = ARRAY_SIZE(xfeature_names)-1;
119  		xfeature_idx = min(xfeature_idx, max_idx);
120  
121  		*feature_name = xfeature_names[xfeature_idx];
122  	}
123  
124  	if (xfeatures_missing)
125  		return 0;
126  
127  	return 1;
128  }
129  EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
130  
xfeature_is_aligned64(int xfeature_nr)131  static bool xfeature_is_aligned64(int xfeature_nr)
132  {
133  	return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
134  }
135  
xfeature_is_supervisor(int xfeature_nr)136  static bool xfeature_is_supervisor(int xfeature_nr)
137  {
138  	return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
139  }
140  
xfeature_get_offset(u64 xcomp_bv,int xfeature)141  static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
142  {
143  	unsigned int offs, i;
144  
145  	/*
146  	 * Non-compacted format and legacy features use the cached fixed
147  	 * offsets.
148  	 */
149  	if (!cpu_feature_enabled(X86_FEATURE_XCOMPACTED) ||
150  	    xfeature <= XFEATURE_SSE)
151  		return xstate_offsets[xfeature];
152  
153  	/*
154  	 * Compacted format offsets depend on the actual content of the
155  	 * compacted xsave area which is determined by the xcomp_bv header
156  	 * field.
157  	 */
158  	offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
159  	for_each_extended_xfeature(i, xcomp_bv) {
160  		if (xfeature_is_aligned64(i))
161  			offs = ALIGN(offs, 64);
162  		if (i == xfeature)
163  			break;
164  		offs += xstate_sizes[i];
165  	}
166  	return offs;
167  }
168  
169  /*
170   * Enable the extended processor state save/restore feature.
171   * Called once per CPU onlining.
172   */
fpu__init_cpu_xstate(void)173  void fpu__init_cpu_xstate(void)
174  {
175  	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !fpu_kernel_cfg.max_features)
176  		return;
177  
178  	cr4_set_bits(X86_CR4_OSXSAVE);
179  
180  	/*
181  	 * Must happen after CR4 setup and before xsetbv() to allow KVM
182  	 * lazy passthrough.  Write independent of the dynamic state static
183  	 * key as that does not work on the boot CPU. This also ensures
184  	 * that any stale state is wiped out from XFD. Reset the per CPU
185  	 * xfd cache too.
186  	 */
187  	if (cpu_feature_enabled(X86_FEATURE_XFD))
188  		xfd_set_state(init_fpstate.xfd);
189  
190  	/*
191  	 * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
192  	 * managed by XSAVE{C, OPT, S} and XRSTOR{S}.  Only XSAVE user
193  	 * states can be set here.
194  	 */
195  	xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
196  
197  	/*
198  	 * MSR_IA32_XSS sets supervisor states managed by XSAVES.
199  	 */
200  	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
201  		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
202  				     xfeatures_mask_independent());
203  	}
204  }
205  
xfeature_enabled(enum xfeature xfeature)206  static bool xfeature_enabled(enum xfeature xfeature)
207  {
208  	return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
209  }
210  
211  /*
212   * Record the offsets and sizes of various xstates contained
213   * in the XSAVE state memory layout.
214   */
setup_xstate_cache(void)215  static void __init setup_xstate_cache(void)
216  {
217  	u32 eax, ebx, ecx, edx, i;
218  	/* start at the beginning of the "extended state" */
219  	unsigned int last_good_offset = offsetof(struct xregs_state,
220  						 extended_state_area);
221  	/*
222  	 * The FP xstates and SSE xstates are legacy states. They are always
223  	 * in the fixed offsets in the xsave area in either compacted form
224  	 * or standard form.
225  	 */
226  	xstate_offsets[XFEATURE_FP]	= 0;
227  	xstate_sizes[XFEATURE_FP]	= offsetof(struct fxregs_state,
228  						   xmm_space);
229  
230  	xstate_offsets[XFEATURE_SSE]	= xstate_sizes[XFEATURE_FP];
231  	xstate_sizes[XFEATURE_SSE]	= sizeof_field(struct fxregs_state,
232  						       xmm_space);
233  
234  	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
235  		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
236  
237  		xstate_sizes[i] = eax;
238  		xstate_flags[i] = ecx;
239  
240  		/*
241  		 * If an xfeature is supervisor state, the offset in EBX is
242  		 * invalid, leave it to -1.
243  		 */
244  		if (xfeature_is_supervisor(i))
245  			continue;
246  
247  		xstate_offsets[i] = ebx;
248  
249  		/*
250  		 * In our xstate size checks, we assume that the highest-numbered
251  		 * xstate feature has the highest offset in the buffer.  Ensure
252  		 * it does.
253  		 */
254  		WARN_ONCE(last_good_offset > xstate_offsets[i],
255  			  "x86/fpu: misordered xstate at %d\n", last_good_offset);
256  
257  		last_good_offset = xstate_offsets[i];
258  	}
259  }
260  
print_xstate_feature(u64 xstate_mask)261  static void __init print_xstate_feature(u64 xstate_mask)
262  {
263  	const char *feature_name;
264  
265  	if (cpu_has_xfeatures(xstate_mask, &feature_name))
266  		pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
267  }
268  
269  /*
270   * Print out all the supported xstate features:
271   */
print_xstate_features(void)272  static void __init print_xstate_features(void)
273  {
274  	print_xstate_feature(XFEATURE_MASK_FP);
275  	print_xstate_feature(XFEATURE_MASK_SSE);
276  	print_xstate_feature(XFEATURE_MASK_YMM);
277  	print_xstate_feature(XFEATURE_MASK_BNDREGS);
278  	print_xstate_feature(XFEATURE_MASK_BNDCSR);
279  	print_xstate_feature(XFEATURE_MASK_OPMASK);
280  	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
281  	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
282  	print_xstate_feature(XFEATURE_MASK_PKRU);
283  	print_xstate_feature(XFEATURE_MASK_PASID);
284  	print_xstate_feature(XFEATURE_MASK_CET_USER);
285  	print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
286  	print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
287  }
288  
289  /*
290   * This check is important because it is easy to get XSTATE_*
291   * confused with XSTATE_BIT_*.
292   */
293  #define CHECK_XFEATURE(nr) do {		\
294  	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
295  	WARN_ON(nr >= XFEATURE_MAX);	\
296  } while (0)
297  
298  /*
299   * Print out xstate component offsets and sizes
300   */
print_xstate_offset_size(void)301  static void __init print_xstate_offset_size(void)
302  {
303  	int i;
304  
305  	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
306  		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
307  			i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
308  			i, xstate_sizes[i]);
309  	}
310  }
311  
312  /*
313   * This function is called only during boot time when x86 caps are not set
314   * up and alternative can not be used yet.
315   */
os_xrstor_booting(struct xregs_state * xstate)316  static __init void os_xrstor_booting(struct xregs_state *xstate)
317  {
318  	u64 mask = fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSTATE;
319  	u32 lmask = mask;
320  	u32 hmask = mask >> 32;
321  	int err;
322  
323  	if (cpu_feature_enabled(X86_FEATURE_XSAVES))
324  		XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
325  	else
326  		XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
327  
328  	/*
329  	 * We should never fault when copying from a kernel buffer, and the FPU
330  	 * state we set at boot time should be valid.
331  	 */
332  	WARN_ON_FPU(err);
333  }
334  
335  /*
336   * All supported features have either init state all zeros or are
337   * handled in setup_init_fpu() individually. This is an explicit
338   * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
339   * newly added supported features at build time and make people
340   * actually look at the init state for the new feature.
341   */
342  #define XFEATURES_INIT_FPSTATE_HANDLED		\
343  	(XFEATURE_MASK_FP |			\
344  	 XFEATURE_MASK_SSE |			\
345  	 XFEATURE_MASK_YMM |			\
346  	 XFEATURE_MASK_OPMASK |			\
347  	 XFEATURE_MASK_ZMM_Hi256 |		\
348  	 XFEATURE_MASK_Hi16_ZMM	 |		\
349  	 XFEATURE_MASK_PKRU |			\
350  	 XFEATURE_MASK_BNDREGS |		\
351  	 XFEATURE_MASK_BNDCSR |			\
352  	 XFEATURE_MASK_PASID |			\
353  	 XFEATURE_MASK_CET_USER |		\
354  	 XFEATURE_MASK_XTILE)
355  
356  /*
357   * setup the xstate image representing the init state
358   */
setup_init_fpu_buf(void)359  static void __init setup_init_fpu_buf(void)
360  {
361  	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
362  		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
363  		     XFEATURES_INIT_FPSTATE_HANDLED);
364  
365  	if (!boot_cpu_has(X86_FEATURE_XSAVE))
366  		return;
367  
368  	print_xstate_features();
369  
370  	xstate_init_xcomp_bv(&init_fpstate.regs.xsave, init_fpstate.xfeatures);
371  
372  	/*
373  	 * Init all the features state with header.xfeatures being 0x0
374  	 */
375  	os_xrstor_booting(&init_fpstate.regs.xsave);
376  
377  	/*
378  	 * All components are now in init state. Read the state back so
379  	 * that init_fpstate contains all non-zero init state. This only
380  	 * works with XSAVE, but not with XSAVEOPT and XSAVEC/S because
381  	 * those use the init optimization which skips writing data for
382  	 * components in init state.
383  	 *
384  	 * XSAVE could be used, but that would require to reshuffle the
385  	 * data when XSAVEC/S is available because XSAVEC/S uses xstate
386  	 * compaction. But doing so is a pointless exercise because most
387  	 * components have an all zeros init state except for the legacy
388  	 * ones (FP and SSE). Those can be saved with FXSAVE into the
389  	 * legacy area. Adding new features requires to ensure that init
390  	 * state is all zeroes or if not to add the necessary handling
391  	 * here.
392  	 */
393  	fxsave(&init_fpstate.regs.fxsave);
394  }
395  
xfeature_size(int xfeature_nr)396  int xfeature_size(int xfeature_nr)
397  {
398  	u32 eax, ebx, ecx, edx;
399  
400  	CHECK_XFEATURE(xfeature_nr);
401  	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
402  	return eax;
403  }
404  
405  /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
validate_user_xstate_header(const struct xstate_header * hdr,struct fpstate * fpstate)406  static int validate_user_xstate_header(const struct xstate_header *hdr,
407  				       struct fpstate *fpstate)
408  {
409  	/* No unknown or supervisor features may be set */
410  	if (hdr->xfeatures & ~fpstate->user_xfeatures)
411  		return -EINVAL;
412  
413  	/* Userspace must use the uncompacted format */
414  	if (hdr->xcomp_bv)
415  		return -EINVAL;
416  
417  	/*
418  	 * If 'reserved' is shrunken to add a new field, make sure to validate
419  	 * that new field here!
420  	 */
421  	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
422  
423  	/* No reserved bits may be set */
424  	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
425  		return -EINVAL;
426  
427  	return 0;
428  }
429  
__xstate_dump_leaves(void)430  static void __init __xstate_dump_leaves(void)
431  {
432  	int i;
433  	u32 eax, ebx, ecx, edx;
434  	static int should_dump = 1;
435  
436  	if (!should_dump)
437  		return;
438  	should_dump = 0;
439  	/*
440  	 * Dump out a few leaves past the ones that we support
441  	 * just in case there are some goodies up there
442  	 */
443  	for (i = 0; i < XFEATURE_MAX + 10; i++) {
444  		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
445  		pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
446  			XSTATE_CPUID, i, eax, ebx, ecx, edx);
447  	}
448  }
449  
450  #define XSTATE_WARN_ON(x, fmt, ...) do {					\
451  	if (WARN_ONCE(x, "XSAVE consistency problem: " fmt, ##__VA_ARGS__)) {	\
452  		__xstate_dump_leaves();						\
453  	}									\
454  } while (0)
455  
456  #define XCHECK_SZ(sz, nr, __struct) ({					\
457  	if (WARN_ONCE(sz != sizeof(__struct),				\
458  	    "[%s]: struct is %zu bytes, cpu state %d bytes\n",		\
459  	    xfeature_names[nr], sizeof(__struct), sz)) {		\
460  		__xstate_dump_leaves();					\
461  	}								\
462  	true;								\
463  })
464  
465  
466  /**
467   * check_xtile_data_against_struct - Check tile data state size.
468   *
469   * Calculate the state size by multiplying the single tile size which is
470   * recorded in a C struct, and the number of tiles that the CPU informs.
471   * Compare the provided size with the calculation.
472   *
473   * @size:	The tile data state size
474   *
475   * Returns:	0 on success, -EINVAL on mismatch.
476   */
check_xtile_data_against_struct(int size)477  static int __init check_xtile_data_against_struct(int size)
478  {
479  	u32 max_palid, palid, state_size;
480  	u32 eax, ebx, ecx, edx;
481  	u16 max_tile;
482  
483  	/*
484  	 * Check the maximum palette id:
485  	 *   eax: the highest numbered palette subleaf.
486  	 */
487  	cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
488  
489  	/*
490  	 * Cross-check each tile size and find the maximum number of
491  	 * supported tiles.
492  	 */
493  	for (palid = 1, max_tile = 0; palid <= max_palid; palid++) {
494  		u16 tile_size, max;
495  
496  		/*
497  		 * Check the tile size info:
498  		 *   eax[31:16]:  bytes per title
499  		 *   ebx[31:16]:  the max names (or max number of tiles)
500  		 */
501  		cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
502  		tile_size = eax >> 16;
503  		max = ebx >> 16;
504  
505  		if (tile_size != sizeof(struct xtile_data)) {
506  			pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n",
507  			       __stringify(XFEATURE_XTILE_DATA),
508  			       sizeof(struct xtile_data), tile_size);
509  			__xstate_dump_leaves();
510  			return -EINVAL;
511  		}
512  
513  		if (max > max_tile)
514  			max_tile = max;
515  	}
516  
517  	state_size = sizeof(struct xtile_data) * max_tile;
518  	if (size != state_size) {
519  		pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n",
520  		       __stringify(XFEATURE_XTILE_DATA), state_size, size);
521  		__xstate_dump_leaves();
522  		return -EINVAL;
523  	}
524  	return 0;
525  }
526  
527  /*
528   * We have a C struct for each 'xstate'.  We need to ensure
529   * that our software representation matches what the CPU
530   * tells us about the state's size.
531   */
check_xstate_against_struct(int nr)532  static bool __init check_xstate_against_struct(int nr)
533  {
534  	/*
535  	 * Ask the CPU for the size of the state.
536  	 */
537  	int sz = xfeature_size(nr);
538  
539  	/*
540  	 * Match each CPU state with the corresponding software
541  	 * structure.
542  	 */
543  	switch (nr) {
544  	case XFEATURE_YMM:	  return XCHECK_SZ(sz, nr, struct ymmh_struct);
545  	case XFEATURE_BNDREGS:	  return XCHECK_SZ(sz, nr, struct mpx_bndreg_state);
546  	case XFEATURE_BNDCSR:	  return XCHECK_SZ(sz, nr, struct mpx_bndcsr_state);
547  	case XFEATURE_OPMASK:	  return XCHECK_SZ(sz, nr, struct avx_512_opmask_state);
548  	case XFEATURE_ZMM_Hi256:  return XCHECK_SZ(sz, nr, struct avx_512_zmm_uppers_state);
549  	case XFEATURE_Hi16_ZMM:	  return XCHECK_SZ(sz, nr, struct avx_512_hi16_state);
550  	case XFEATURE_PKRU:	  return XCHECK_SZ(sz, nr, struct pkru_state);
551  	case XFEATURE_PASID:	  return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
552  	case XFEATURE_XTILE_CFG:  return XCHECK_SZ(sz, nr, struct xtile_cfg);
553  	case XFEATURE_CET_USER:	  return XCHECK_SZ(sz, nr, struct cet_user_state);
554  	case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
555  	default:
556  		XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
557  		return false;
558  	}
559  
560  	return true;
561  }
562  
xstate_calculate_size(u64 xfeatures,bool compacted)563  static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
564  {
565  	unsigned int topmost = fls64(xfeatures) -  1;
566  	unsigned int offset = xstate_offsets[topmost];
567  
568  	if (topmost <= XFEATURE_SSE)
569  		return sizeof(struct xregs_state);
570  
571  	if (compacted)
572  		offset = xfeature_get_offset(xfeatures, topmost);
573  	return offset + xstate_sizes[topmost];
574  }
575  
576  /*
577   * This essentially double-checks what the cpu told us about
578   * how large the XSAVE buffer needs to be.  We are recalculating
579   * it to be safe.
580   *
581   * Independent XSAVE features allocate their own buffers and are not
582   * covered by these checks. Only the size of the buffer for task->fpu
583   * is checked here.
584   */
paranoid_xstate_size_valid(unsigned int kernel_size)585  static bool __init paranoid_xstate_size_valid(unsigned int kernel_size)
586  {
587  	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
588  	bool xsaves = cpu_feature_enabled(X86_FEATURE_XSAVES);
589  	unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
590  	int i;
591  
592  	for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
593  		if (!check_xstate_against_struct(i))
594  			return false;
595  		/*
596  		 * Supervisor state components can be managed only by
597  		 * XSAVES.
598  		 */
599  		if (!xsaves && xfeature_is_supervisor(i)) {
600  			XSTATE_WARN_ON(1, "Got supervisor feature %d, but XSAVES not advertised\n", i);
601  			return false;
602  		}
603  	}
604  	size = xstate_calculate_size(fpu_kernel_cfg.max_features, compacted);
605  	XSTATE_WARN_ON(size != kernel_size,
606  		       "size %u != kernel_size %u\n", size, kernel_size);
607  	return size == kernel_size;
608  }
609  
610  /*
611   * Get total size of enabled xstates in XCR0 | IA32_XSS.
612   *
613   * Note the SDM's wording here.  "sub-function 0" only enumerates
614   * the size of the *user* states.  If we use it to size a buffer
615   * that we use 'XSAVES' on, we could potentially overflow the
616   * buffer because 'XSAVES' saves system states too.
617   *
618   * This also takes compaction into account. So this works for
619   * XSAVEC as well.
620   */
get_compacted_size(void)621  static unsigned int __init get_compacted_size(void)
622  {
623  	unsigned int eax, ebx, ecx, edx;
624  	/*
625  	 * - CPUID function 0DH, sub-function 1:
626  	 *    EBX enumerates the size (in bytes) required by
627  	 *    the XSAVES instruction for an XSAVE area
628  	 *    containing all the state components
629  	 *    corresponding to bits currently set in
630  	 *    XCR0 | IA32_XSS.
631  	 *
632  	 * When XSAVES is not available but XSAVEC is (virt), then there
633  	 * are no supervisor states, but XSAVEC still uses compacted
634  	 * format.
635  	 */
636  	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
637  	return ebx;
638  }
639  
640  /*
641   * Get the total size of the enabled xstates without the independent supervisor
642   * features.
643   */
get_xsave_compacted_size(void)644  static unsigned int __init get_xsave_compacted_size(void)
645  {
646  	u64 mask = xfeatures_mask_independent();
647  	unsigned int size;
648  
649  	if (!mask)
650  		return get_compacted_size();
651  
652  	/* Disable independent features. */
653  	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
654  
655  	/*
656  	 * Ask the hardware what size is required of the buffer.
657  	 * This is the size required for the task->fpu buffer.
658  	 */
659  	size = get_compacted_size();
660  
661  	/* Re-enable independent features so XSAVES will work on them again. */
662  	wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
663  
664  	return size;
665  }
666  
get_xsave_size_user(void)667  static unsigned int __init get_xsave_size_user(void)
668  {
669  	unsigned int eax, ebx, ecx, edx;
670  	/*
671  	 * - CPUID function 0DH, sub-function 0:
672  	 *    EBX enumerates the size (in bytes) required by
673  	 *    the XSAVE instruction for an XSAVE area
674  	 *    containing all the *user* state components
675  	 *    corresponding to bits currently set in XCR0.
676  	 */
677  	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
678  	return ebx;
679  }
680  
init_xstate_size(void)681  static int __init init_xstate_size(void)
682  {
683  	/* Recompute the context size for enabled features: */
684  	unsigned int user_size, kernel_size, kernel_default_size;
685  	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
686  
687  	/* Uncompacted user space size */
688  	user_size = get_xsave_size_user();
689  
690  	/*
691  	 * XSAVES kernel size includes supervisor states and uses compacted
692  	 * format. XSAVEC uses compacted format, but does not save
693  	 * supervisor states.
694  	 *
695  	 * XSAVE[OPT] do not support supervisor states so kernel and user
696  	 * size is identical.
697  	 */
698  	if (compacted)
699  		kernel_size = get_xsave_compacted_size();
700  	else
701  		kernel_size = user_size;
702  
703  	kernel_default_size =
704  		xstate_calculate_size(fpu_kernel_cfg.default_features, compacted);
705  
706  	if (!paranoid_xstate_size_valid(kernel_size))
707  		return -EINVAL;
708  
709  	fpu_kernel_cfg.max_size = kernel_size;
710  	fpu_user_cfg.max_size = user_size;
711  
712  	fpu_kernel_cfg.default_size = kernel_default_size;
713  	fpu_user_cfg.default_size =
714  		xstate_calculate_size(fpu_user_cfg.default_features, false);
715  
716  	return 0;
717  }
718  
719  /*
720   * We enabled the XSAVE hardware, but something went wrong and
721   * we can not use it.  Disable it.
722   */
fpu__init_disable_system_xstate(unsigned int legacy_size)723  static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
724  {
725  	fpu_kernel_cfg.max_features = 0;
726  	cr4_clear_bits(X86_CR4_OSXSAVE);
727  	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
728  
729  	/* Restore the legacy size.*/
730  	fpu_kernel_cfg.max_size = legacy_size;
731  	fpu_kernel_cfg.default_size = legacy_size;
732  	fpu_user_cfg.max_size = legacy_size;
733  	fpu_user_cfg.default_size = legacy_size;
734  
735  	/*
736  	 * Prevent enabling the static branch which enables writes to the
737  	 * XFD MSR.
738  	 */
739  	init_fpstate.xfd = 0;
740  
741  	fpstate_reset(&current->thread.fpu);
742  }
743  
744  /*
745   * Enable and initialize the xsave feature.
746   * Called once per system bootup.
747   */
fpu__init_system_xstate(unsigned int legacy_size)748  void __init fpu__init_system_xstate(unsigned int legacy_size)
749  {
750  	unsigned int eax, ebx, ecx, edx;
751  	u64 xfeatures;
752  	int err;
753  	int i;
754  
755  	if (!boot_cpu_has(X86_FEATURE_FPU)) {
756  		pr_info("x86/fpu: No FPU detected\n");
757  		return;
758  	}
759  
760  	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
761  		pr_info("x86/fpu: x87 FPU will use %s\n",
762  			boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
763  		return;
764  	}
765  
766  	if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
767  		WARN_ON_FPU(1);
768  		return;
769  	}
770  
771  	/*
772  	 * Find user xstates supported by the processor.
773  	 */
774  	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
775  	fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
776  
777  	/*
778  	 * Find supervisor xstates supported by the processor.
779  	 */
780  	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
781  	fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
782  
783  	if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
784  		/*
785  		 * This indicates that something really unexpected happened
786  		 * with the enumeration.  Disable XSAVE and try to continue
787  		 * booting without it.  This is too early to BUG().
788  		 */
789  		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n",
790  		       fpu_kernel_cfg.max_features);
791  		goto out_disable;
792  	}
793  
794  	fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
795  					      XFEATURE_MASK_INDEPENDENT;
796  
797  	/*
798  	 * Clear XSAVE features that are disabled in the normal CPUID.
799  	 */
800  	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
801  		unsigned short cid = xsave_cpuid_features[i];
802  
803  		/* Careful: X86_FEATURE_FPU is 0! */
804  		if ((i != XFEATURE_FP && !cid) || !boot_cpu_has(cid))
805  			fpu_kernel_cfg.max_features &= ~BIT_ULL(i);
806  	}
807  
808  	if (!cpu_feature_enabled(X86_FEATURE_XFD))
809  		fpu_kernel_cfg.max_features &= ~XFEATURE_MASK_USER_DYNAMIC;
810  
811  	if (!cpu_feature_enabled(X86_FEATURE_XSAVES))
812  		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
813  	else
814  		fpu_kernel_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED |
815  					XFEATURE_MASK_SUPERVISOR_SUPPORTED;
816  
817  	fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
818  	fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
819  
820  	/* Clean out dynamic features from default */
821  	fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
822  	fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
823  
824  	fpu_user_cfg.default_features = fpu_user_cfg.max_features;
825  	fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
826  
827  	/* Store it for paranoia check at the end */
828  	xfeatures = fpu_kernel_cfg.max_features;
829  
830  	/*
831  	 * Initialize the default XFD state in initfp_state and enable the
832  	 * dynamic sizing mechanism if dynamic states are available.  The
833  	 * static key cannot be enabled here because this runs before
834  	 * jump_label_init(). This is delayed to an initcall.
835  	 */
836  	init_fpstate.xfd = fpu_user_cfg.max_features & XFEATURE_MASK_USER_DYNAMIC;
837  
838  	/* Set up compaction feature bit */
839  	if (cpu_feature_enabled(X86_FEATURE_XSAVEC) ||
840  	    cpu_feature_enabled(X86_FEATURE_XSAVES))
841  		setup_force_cpu_cap(X86_FEATURE_XCOMPACTED);
842  
843  	/* Enable xstate instructions to be able to continue with initialization: */
844  	fpu__init_cpu_xstate();
845  
846  	/* Cache size, offset and flags for initialization */
847  	setup_xstate_cache();
848  
849  	err = init_xstate_size();
850  	if (err)
851  		goto out_disable;
852  
853  	/* Reset the state for the current task */
854  	fpstate_reset(&current->thread.fpu);
855  
856  	/*
857  	 * Update info used for ptrace frames; use standard-format size and no
858  	 * supervisor xstates:
859  	 */
860  	update_regset_xstate_info(fpu_user_cfg.max_size,
861  				  fpu_user_cfg.max_features);
862  
863  	/*
864  	 * init_fpstate excludes dynamic states as they are large but init
865  	 * state is zero.
866  	 */
867  	init_fpstate.size		= fpu_kernel_cfg.default_size;
868  	init_fpstate.xfeatures		= fpu_kernel_cfg.default_features;
869  
870  	if (init_fpstate.size > sizeof(init_fpstate.regs)) {
871  		pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
872  			sizeof(init_fpstate.regs), init_fpstate.size);
873  		goto out_disable;
874  	}
875  
876  	setup_init_fpu_buf();
877  
878  	/*
879  	 * Paranoia check whether something in the setup modified the
880  	 * xfeatures mask.
881  	 */
882  	if (xfeatures != fpu_kernel_cfg.max_features) {
883  		pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
884  		       xfeatures, fpu_kernel_cfg.max_features);
885  		goto out_disable;
886  	}
887  
888  	/*
889  	 * CPU capabilities initialization runs before FPU init. So
890  	 * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
891  	 * functional, set the feature bit so depending code works.
892  	 */
893  	setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
894  
895  	print_xstate_offset_size();
896  	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
897  		fpu_kernel_cfg.max_features,
898  		fpu_kernel_cfg.max_size,
899  		boot_cpu_has(X86_FEATURE_XCOMPACTED) ? "compacted" : "standard");
900  	return;
901  
902  out_disable:
903  	/* something went wrong, try to boot without any XSAVE support */
904  	fpu__init_disable_system_xstate(legacy_size);
905  }
906  
907  /*
908   * Restore minimal FPU state after suspend:
909   */
fpu__resume_cpu(void)910  void fpu__resume_cpu(void)
911  {
912  	/*
913  	 * Restore XCR0 on xsave capable CPUs:
914  	 */
915  	if (cpu_feature_enabled(X86_FEATURE_XSAVE))
916  		xsetbv(XCR_XFEATURE_ENABLED_MASK, fpu_user_cfg.max_features);
917  
918  	/*
919  	 * Restore IA32_XSS. The same CPUID bit enumerates support
920  	 * of XSAVES and MSR_IA32_XSS.
921  	 */
922  	if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
923  		wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()  |
924  				     xfeatures_mask_independent());
925  	}
926  
927  	if (fpu_state_size_dynamic())
928  		wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
929  }
930  
931  /*
932   * Given an xstate feature nr, calculate where in the xsave
933   * buffer the state is.  Callers should ensure that the buffer
934   * is valid.
935   */
__raw_xsave_addr(struct xregs_state * xsave,int xfeature_nr)936  static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
937  {
938  	u64 xcomp_bv = xsave->header.xcomp_bv;
939  
940  	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
941  		return NULL;
942  
943  	if (cpu_feature_enabled(X86_FEATURE_XCOMPACTED)) {
944  		if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
945  			return NULL;
946  	}
947  
948  	return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
949  }
950  
951  /*
952   * Given the xsave area and a state inside, this function returns the
953   * address of the state.
954   *
955   * This is the API that is called to get xstate address in either
956   * standard format or compacted format of xsave area.
957   *
958   * Note that if there is no data for the field in the xsave buffer
959   * this will return NULL.
960   *
961   * Inputs:
962   *	xstate: the thread's storage area for all FPU data
963   *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
964   *	XFEATURE_SSE, etc...)
965   * Output:
966   *	address of the state in the xsave area, or NULL if the
967   *	field is not present in the xsave buffer.
968   */
get_xsave_addr(struct xregs_state * xsave,int xfeature_nr)969  void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
970  {
971  	/*
972  	 * Do we even *have* xsave state?
973  	 */
974  	if (!boot_cpu_has(X86_FEATURE_XSAVE))
975  		return NULL;
976  
977  	/*
978  	 * We should not ever be requesting features that we
979  	 * have not enabled.
980  	 */
981  	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
982  		return NULL;
983  
984  	/*
985  	 * This assumes the last 'xsave*' instruction to
986  	 * have requested that 'xfeature_nr' be saved.
987  	 * If it did not, we might be seeing and old value
988  	 * of the field in the buffer.
989  	 *
990  	 * This can happen because the last 'xsave' did not
991  	 * request that this feature be saved (unlikely)
992  	 * or because the "init optimization" caused it
993  	 * to not be saved.
994  	 */
995  	if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
996  		return NULL;
997  
998  	return __raw_xsave_addr(xsave, xfeature_nr);
999  }
1000  EXPORT_SYMBOL_GPL(get_xsave_addr);
1001  
1002  /*
1003   * Given an xstate feature nr, calculate where in the xsave buffer the state is.
1004   * The xsave buffer should be in standard format, not compacted (e.g. user mode
1005   * signal frames).
1006   */
get_xsave_addr_user(struct xregs_state __user * xsave,int xfeature_nr)1007  void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
1008  {
1009  	if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
1010  		return NULL;
1011  
1012  	return (void __user *)xsave + xstate_offsets[xfeature_nr];
1013  }
1014  
1015  #ifdef CONFIG_ARCH_HAS_PKEYS
1016  
1017  /*
1018   * This will go out and modify PKRU register to set the access
1019   * rights for @pkey to @init_val.
1020   */
arch_set_user_pkey_access(struct task_struct * tsk,int pkey,unsigned long init_val)1021  int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
1022  			      unsigned long init_val)
1023  {
1024  	u32 old_pkru, new_pkru_bits = 0;
1025  	int pkey_shift;
1026  
1027  	/*
1028  	 * This check implies XSAVE support.  OSPKE only gets
1029  	 * set if we enable XSAVE and we enable PKU in XCR0.
1030  	 */
1031  	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
1032  		return -EINVAL;
1033  
1034  	/*
1035  	 * This code should only be called with valid 'pkey'
1036  	 * values originating from in-kernel users.  Complain
1037  	 * if a bad value is observed.
1038  	 */
1039  	if (WARN_ON_ONCE(pkey >= arch_max_pkey()))
1040  		return -EINVAL;
1041  
1042  	/* Set the bits we need in PKRU:  */
1043  	if (init_val & PKEY_DISABLE_ACCESS)
1044  		new_pkru_bits |= PKRU_AD_BIT;
1045  	if (init_val & PKEY_DISABLE_WRITE)
1046  		new_pkru_bits |= PKRU_WD_BIT;
1047  
1048  	/* Shift the bits in to the correct place in PKRU for pkey: */
1049  	pkey_shift = pkey * PKRU_BITS_PER_PKEY;
1050  	new_pkru_bits <<= pkey_shift;
1051  
1052  	/* Get old PKRU and mask off any old bits in place: */
1053  	old_pkru = read_pkru();
1054  	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
1055  
1056  	/* Write old part along with new part: */
1057  	write_pkru(old_pkru | new_pkru_bits);
1058  
1059  	return 0;
1060  }
1061  #endif /* ! CONFIG_ARCH_HAS_PKEYS */
1062  
copy_feature(bool from_xstate,struct membuf * to,void * xstate,void * init_xstate,unsigned int size)1063  static void copy_feature(bool from_xstate, struct membuf *to, void *xstate,
1064  			 void *init_xstate, unsigned int size)
1065  {
1066  	membuf_write(to, from_xstate ? xstate : init_xstate, size);
1067  }
1068  
1069  /**
1070   * __copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1071   * @to:		membuf descriptor
1072   * @fpstate:	The fpstate buffer from which to copy
1073   * @xfeatures:	The mask of xfeatures to save (XSAVE mode only)
1074   * @pkru_val:	The PKRU value to store in the PKRU component
1075   * @copy_mode:	The requested copy mode
1076   *
1077   * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1078   * format, i.e. from the kernel internal hardware dependent storage format
1079   * to the requested @mode. UABI XSTATE is always uncompacted!
1080   *
1081   * It supports partial copy but @to.pos always starts from zero.
1082   */
__copy_xstate_to_uabi_buf(struct membuf to,struct fpstate * fpstate,u64 xfeatures,u32 pkru_val,enum xstate_copy_mode copy_mode)1083  void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
1084  			       u64 xfeatures, u32 pkru_val,
1085  			       enum xstate_copy_mode copy_mode)
1086  {
1087  	const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
1088  	struct xregs_state *xinit = &init_fpstate.regs.xsave;
1089  	struct xregs_state *xsave = &fpstate->regs.xsave;
1090  	struct xstate_header header;
1091  	unsigned int zerofrom;
1092  	u64 mask;
1093  	int i;
1094  
1095  	memset(&header, 0, sizeof(header));
1096  	header.xfeatures = xsave->header.xfeatures;
1097  
1098  	/* Mask out the feature bits depending on copy mode */
1099  	switch (copy_mode) {
1100  	case XSTATE_COPY_FP:
1101  		header.xfeatures &= XFEATURE_MASK_FP;
1102  		break;
1103  
1104  	case XSTATE_COPY_FX:
1105  		header.xfeatures &= XFEATURE_MASK_FP | XFEATURE_MASK_SSE;
1106  		break;
1107  
1108  	case XSTATE_COPY_XSAVE:
1109  		header.xfeatures &= fpstate->user_xfeatures & xfeatures;
1110  		break;
1111  	}
1112  
1113  	/* Copy FP state up to MXCSR */
1114  	copy_feature(header.xfeatures & XFEATURE_MASK_FP, &to, &xsave->i387,
1115  		     &xinit->i387, off_mxcsr);
1116  
1117  	/* Copy MXCSR when SSE or YMM are set in the feature mask */
1118  	copy_feature(header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM),
1119  		     &to, &xsave->i387.mxcsr, &xinit->i387.mxcsr,
1120  		     MXCSR_AND_FLAGS_SIZE);
1121  
1122  	/* Copy the remaining FP state */
1123  	copy_feature(header.xfeatures & XFEATURE_MASK_FP,
1124  		     &to, &xsave->i387.st_space, &xinit->i387.st_space,
1125  		     sizeof(xsave->i387.st_space));
1126  
1127  	/* Copy the SSE state - shared with YMM, but independently managed */
1128  	copy_feature(header.xfeatures & XFEATURE_MASK_SSE,
1129  		     &to, &xsave->i387.xmm_space, &xinit->i387.xmm_space,
1130  		     sizeof(xsave->i387.xmm_space));
1131  
1132  	if (copy_mode != XSTATE_COPY_XSAVE)
1133  		goto out;
1134  
1135  	/* Zero the padding area */
1136  	membuf_zero(&to, sizeof(xsave->i387.padding));
1137  
1138  	/* Copy xsave->i387.sw_reserved */
1139  	membuf_write(&to, xstate_fx_sw_bytes, sizeof(xsave->i387.sw_reserved));
1140  
1141  	/* Copy the user space relevant state of @xsave->header */
1142  	membuf_write(&to, &header, sizeof(header));
1143  
1144  	zerofrom = offsetof(struct xregs_state, extended_state_area);
1145  
1146  	/*
1147  	 * This 'mask' indicates which states to copy from fpstate.
1148  	 * Those extended states that are not present in fpstate are
1149  	 * either disabled or initialized:
1150  	 *
1151  	 * In non-compacted format, disabled features still occupy
1152  	 * state space but there is no state to copy from in the
1153  	 * compacted init_fpstate. The gap tracking will zero these
1154  	 * states.
1155  	 *
1156  	 * The extended features have an all zeroes init state. Thus,
1157  	 * remove them from 'mask' to zero those features in the user
1158  	 * buffer instead of retrieving them from init_fpstate.
1159  	 */
1160  	mask = header.xfeatures;
1161  
1162  	for_each_extended_xfeature(i, mask) {
1163  		/*
1164  		 * If there was a feature or alignment gap, zero the space
1165  		 * in the destination buffer.
1166  		 */
1167  		if (zerofrom < xstate_offsets[i])
1168  			membuf_zero(&to, xstate_offsets[i] - zerofrom);
1169  
1170  		if (i == XFEATURE_PKRU) {
1171  			struct pkru_state pkru = {0};
1172  			/*
1173  			 * PKRU is not necessarily up to date in the
1174  			 * XSAVE buffer. Use the provided value.
1175  			 */
1176  			pkru.pkru = pkru_val;
1177  			membuf_write(&to, &pkru, sizeof(pkru));
1178  		} else {
1179  			membuf_write(&to,
1180  				     __raw_xsave_addr(xsave, i),
1181  				     xstate_sizes[i]);
1182  		}
1183  		/*
1184  		 * Keep track of the last copied state in the non-compacted
1185  		 * target buffer for gap zeroing.
1186  		 */
1187  		zerofrom = xstate_offsets[i] + xstate_sizes[i];
1188  	}
1189  
1190  out:
1191  	if (to.left)
1192  		membuf_zero(&to, to.left);
1193  }
1194  
1195  /**
1196   * copy_xstate_to_uabi_buf - Copy kernel saved xstate to a UABI buffer
1197   * @to:		membuf descriptor
1198   * @tsk:	The task from which to copy the saved xstate
1199   * @copy_mode:	The requested copy mode
1200   *
1201   * Converts from kernel XSAVE or XSAVES compacted format to UABI conforming
1202   * format, i.e. from the kernel internal hardware dependent storage format
1203   * to the requested @mode. UABI XSTATE is always uncompacted!
1204   *
1205   * It supports partial copy but @to.pos always starts from zero.
1206   */
copy_xstate_to_uabi_buf(struct membuf to,struct task_struct * tsk,enum xstate_copy_mode copy_mode)1207  void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
1208  			     enum xstate_copy_mode copy_mode)
1209  {
1210  	__copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
1211  				  tsk->thread.fpu.fpstate->user_xfeatures,
1212  				  tsk->thread.pkru, copy_mode);
1213  }
1214  
copy_from_buffer(void * dst,unsigned int offset,unsigned int size,const void * kbuf,const void __user * ubuf)1215  static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size,
1216  			    const void *kbuf, const void __user *ubuf)
1217  {
1218  	if (kbuf) {
1219  		memcpy(dst, kbuf + offset, size);
1220  	} else {
1221  		if (copy_from_user(dst, ubuf + offset, size))
1222  			return -EFAULT;
1223  	}
1224  	return 0;
1225  }
1226  
1227  
1228  /**
1229   * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate
1230   * @fpstate:	The fpstate buffer to copy to
1231   * @kbuf:	The UABI format buffer, if it comes from the kernel
1232   * @ubuf:	The UABI format buffer, if it comes from userspace
1233   * @pkru:	The location to write the PKRU value to
1234   *
1235   * Converts from the UABI format into the kernel internal hardware
1236   * dependent format.
1237   *
1238   * This function ultimately has three different callers with distinct PKRU
1239   * behavior.
1240   * 1.	When called from sigreturn the PKRU register will be restored from
1241   *	@fpstate via an XRSTOR. Correctly copying the UABI format buffer to
1242   *	@fpstate is sufficient to cover this case, but the caller will also
1243   *	pass a pointer to the thread_struct's pkru field in @pkru and updating
1244   *	it is harmless.
1245   * 2.	When called from ptrace the PKRU register will be restored from the
1246   *	thread_struct's pkru field. A pointer to that is passed in @pkru.
1247   *	The kernel will restore it manually, so the XRSTOR behavior that resets
1248   *	the PKRU register to the hardware init value (0) if the corresponding
1249   *	xfeatures bit is not set is emulated here.
1250   * 3.	When called from KVM the PKRU register will be restored from the vcpu's
1251   *	pkru field. A pointer to that is passed in @pkru. KVM hasn't used
1252   *	XRSTOR and hasn't had the PKRU resetting behavior described above. To
1253   *	preserve that KVM behavior, it passes NULL for @pkru if the xfeatures
1254   *	bit is not set.
1255   */
copy_uabi_to_xstate(struct fpstate * fpstate,const void * kbuf,const void __user * ubuf,u32 * pkru)1256  static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf,
1257  			       const void __user *ubuf, u32 *pkru)
1258  {
1259  	struct xregs_state *xsave = &fpstate->regs.xsave;
1260  	unsigned int offset, size;
1261  	struct xstate_header hdr;
1262  	u64 mask;
1263  	int i;
1264  
1265  	offset = offsetof(struct xregs_state, header);
1266  	if (copy_from_buffer(&hdr, offset, sizeof(hdr), kbuf, ubuf))
1267  		return -EFAULT;
1268  
1269  	if (validate_user_xstate_header(&hdr, fpstate))
1270  		return -EINVAL;
1271  
1272  	/* Validate MXCSR when any of the related features is in use */
1273  	mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM;
1274  	if (hdr.xfeatures & mask) {
1275  		u32 mxcsr[2];
1276  
1277  		offset = offsetof(struct fxregs_state, mxcsr);
1278  		if (copy_from_buffer(mxcsr, offset, sizeof(mxcsr), kbuf, ubuf))
1279  			return -EFAULT;
1280  
1281  		/* Reserved bits in MXCSR must be zero. */
1282  		if (mxcsr[0] & ~mxcsr_feature_mask)
1283  			return -EINVAL;
1284  
1285  		/* SSE and YMM require MXCSR even when FP is not in use. */
1286  		if (!(hdr.xfeatures & XFEATURE_MASK_FP)) {
1287  			xsave->i387.mxcsr = mxcsr[0];
1288  			xsave->i387.mxcsr_mask = mxcsr[1];
1289  		}
1290  	}
1291  
1292  	for (i = 0; i < XFEATURE_MAX; i++) {
1293  		mask = BIT_ULL(i);
1294  
1295  		if (hdr.xfeatures & mask) {
1296  			void *dst = __raw_xsave_addr(xsave, i);
1297  
1298  			offset = xstate_offsets[i];
1299  			size = xstate_sizes[i];
1300  
1301  			if (copy_from_buffer(dst, offset, size, kbuf, ubuf))
1302  				return -EFAULT;
1303  		}
1304  	}
1305  
1306  	if (hdr.xfeatures & XFEATURE_MASK_PKRU) {
1307  		struct pkru_state *xpkru;
1308  
1309  		xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU);
1310  		*pkru = xpkru->pkru;
1311  	} else {
1312  		/*
1313  		 * KVM may pass NULL here to indicate that it does not need
1314  		 * PKRU updated.
1315  		 */
1316  		if (pkru)
1317  			*pkru = 0;
1318  	}
1319  
1320  	/*
1321  	 * The state that came in from userspace was user-state only.
1322  	 * Mask all the user states out of 'xfeatures':
1323  	 */
1324  	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL;
1325  
1326  	/*
1327  	 * Add back in the features that came in from userspace:
1328  	 */
1329  	xsave->header.xfeatures |= hdr.xfeatures;
1330  
1331  	return 0;
1332  }
1333  
1334  /*
1335   * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S]
1336   * format and copy to the target thread. Used by ptrace and KVM.
1337   */
copy_uabi_from_kernel_to_xstate(struct fpstate * fpstate,const void * kbuf,u32 * pkru)1338  int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru)
1339  {
1340  	return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru);
1341  }
1342  
1343  /*
1344   * Convert from a sigreturn standard-format user-space buffer to kernel
1345   * XSAVE[S] format and copy to the target thread. This is called from the
1346   * sigreturn() and rt_sigreturn() system calls.
1347   */
copy_sigframe_from_user_to_xstate(struct task_struct * tsk,const void __user * ubuf)1348  int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
1349  				      const void __user *ubuf)
1350  {
1351  	return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
1352  }
1353  
validate_independent_components(u64 mask)1354  static bool validate_independent_components(u64 mask)
1355  {
1356  	u64 xchk;
1357  
1358  	if (WARN_ON_FPU(!cpu_feature_enabled(X86_FEATURE_XSAVES)))
1359  		return false;
1360  
1361  	xchk = ~xfeatures_mask_independent();
1362  
1363  	if (WARN_ON_ONCE(!mask || mask & xchk))
1364  		return false;
1365  
1366  	return true;
1367  }
1368  
1369  /**
1370   * xsaves - Save selected components to a kernel xstate buffer
1371   * @xstate:	Pointer to the buffer
1372   * @mask:	Feature mask to select the components to save
1373   *
1374   * The @xstate buffer must be 64 byte aligned and correctly initialized as
1375   * XSAVES does not write the full xstate header. Before first use the
1376   * buffer should be zeroed otherwise a consecutive XRSTORS from that buffer
1377   * can #GP.
1378   *
1379   * The feature mask must be a subset of the independent features.
1380   */
xsaves(struct xregs_state * xstate,u64 mask)1381  void xsaves(struct xregs_state *xstate, u64 mask)
1382  {
1383  	int err;
1384  
1385  	if (!validate_independent_components(mask))
1386  		return;
1387  
1388  	XSTATE_OP(XSAVES, xstate, (u32)mask, (u32)(mask >> 32), err);
1389  	WARN_ON_ONCE(err);
1390  }
1391  
1392  /**
1393   * xrstors - Restore selected components from a kernel xstate buffer
1394   * @xstate:	Pointer to the buffer
1395   * @mask:	Feature mask to select the components to restore
1396   *
1397   * The @xstate buffer must be 64 byte aligned and correctly initialized
1398   * otherwise XRSTORS from that buffer can #GP.
1399   *
1400   * Proper usage is to restore the state which was saved with
1401   * xsaves() into @xstate.
1402   *
1403   * The feature mask must be a subset of the independent features.
1404   */
xrstors(struct xregs_state * xstate,u64 mask)1405  void xrstors(struct xregs_state *xstate, u64 mask)
1406  {
1407  	int err;
1408  
1409  	if (!validate_independent_components(mask))
1410  		return;
1411  
1412  	XSTATE_OP(XRSTORS, xstate, (u32)mask, (u32)(mask >> 32), err);
1413  	WARN_ON_ONCE(err);
1414  }
1415  
1416  #if IS_ENABLED(CONFIG_KVM)
fpstate_clear_xstate_component(struct fpstate * fps,unsigned int xfeature)1417  void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
1418  {
1419  	void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
1420  
1421  	if (addr)
1422  		memset(addr, 0, xstate_sizes[xfeature]);
1423  }
1424  EXPORT_SYMBOL_GPL(fpstate_clear_xstate_component);
1425  #endif
1426  
1427  #ifdef CONFIG_X86_64
1428  
1429  #ifdef CONFIG_X86_DEBUG_FPU
1430  /*
1431   * Ensure that a subsequent XSAVE* or XRSTOR* instruction with RFBM=@mask
1432   * can safely operate on the @fpstate buffer.
1433   */
xstate_op_valid(struct fpstate * fpstate,u64 mask,bool rstor)1434  static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
1435  {
1436  	u64 xfd = __this_cpu_read(xfd_state);
1437  
1438  	if (fpstate->xfd == xfd)
1439  		return true;
1440  
1441  	 /*
1442  	  * The XFD MSR does not match fpstate->xfd. That's invalid when
1443  	  * the passed in fpstate is current's fpstate.
1444  	  */
1445  	if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
1446  		return false;
1447  
1448  	/*
1449  	 * XRSTOR(S) from init_fpstate are always correct as it will just
1450  	 * bring all components into init state and not read from the
1451  	 * buffer. XSAVE(S) raises #PF after init.
1452  	 */
1453  	if (fpstate == &init_fpstate)
1454  		return rstor;
1455  
1456  	/*
1457  	 * XSAVE(S): clone(), fpu_swap_kvm_fpstate()
1458  	 * XRSTORS(S): fpu_swap_kvm_fpstate()
1459  	 */
1460  
1461  	/*
1462  	 * No XSAVE/XRSTOR instructions (except XSAVE itself) touch
1463  	 * the buffer area for XFD-disabled state components.
1464  	 */
1465  	mask &= ~xfd;
1466  
1467  	/*
1468  	 * Remove features which are valid in fpstate. They
1469  	 * have space allocated in fpstate.
1470  	 */
1471  	mask &= ~fpstate->xfeatures;
1472  
1473  	/*
1474  	 * Any remaining state components in 'mask' might be written
1475  	 * by XSAVE/XRSTOR. Fail validation it found.
1476  	 */
1477  	return !mask;
1478  }
1479  
xfd_validate_state(struct fpstate * fpstate,u64 mask,bool rstor)1480  void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
1481  {
1482  	WARN_ON_ONCE(!xstate_op_valid(fpstate, mask, rstor));
1483  }
1484  #endif /* CONFIG_X86_DEBUG_FPU */
1485  
xfd_update_static_branch(void)1486  static int __init xfd_update_static_branch(void)
1487  {
1488  	/*
1489  	 * If init_fpstate.xfd has bits set then dynamic features are
1490  	 * available and the dynamic sizing must be enabled.
1491  	 */
1492  	if (init_fpstate.xfd)
1493  		static_branch_enable(&__fpu_state_size_dynamic);
1494  	return 0;
1495  }
arch_initcall(xfd_update_static_branch)1496  arch_initcall(xfd_update_static_branch)
1497  
1498  void fpstate_free(struct fpu *fpu)
1499  {
1500  	if (fpu->fpstate && fpu->fpstate != &fpu->__fpstate)
1501  		vfree(fpu->fpstate);
1502  }
1503  
1504  /**
1505   * fpstate_realloc - Reallocate struct fpstate for the requested new features
1506   *
1507   * @xfeatures:	A bitmap of xstate features which extend the enabled features
1508   *		of that task
1509   * @ksize:	The required size for the kernel buffer
1510   * @usize:	The required size for user space buffers
1511   * @guest_fpu:	Pointer to a guest FPU container. NULL for host allocations
1512   *
1513   * Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1514   * terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1515   * with large states are likely to live longer.
1516   *
1517   * Returns: 0 on success, -ENOMEM on allocation error.
1518   */
fpstate_realloc(u64 xfeatures,unsigned int ksize,unsigned int usize,struct fpu_guest * guest_fpu)1519  static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1520  			   unsigned int usize, struct fpu_guest *guest_fpu)
1521  {
1522  	struct fpu *fpu = &current->thread.fpu;
1523  	struct fpstate *curfps, *newfps = NULL;
1524  	unsigned int fpsize;
1525  	bool in_use;
1526  
1527  	fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1528  
1529  	newfps = vzalloc(fpsize);
1530  	if (!newfps)
1531  		return -ENOMEM;
1532  	newfps->size = ksize;
1533  	newfps->user_size = usize;
1534  	newfps->is_valloc = true;
1535  
1536  	/*
1537  	 * When a guest FPU is supplied, use @guest_fpu->fpstate
1538  	 * as reference independent whether it is in use or not.
1539  	 */
1540  	curfps = guest_fpu ? guest_fpu->fpstate : fpu->fpstate;
1541  
1542  	/* Determine whether @curfps is the active fpstate */
1543  	in_use = fpu->fpstate == curfps;
1544  
1545  	if (guest_fpu) {
1546  		newfps->is_guest = true;
1547  		newfps->is_confidential = curfps->is_confidential;
1548  		newfps->in_use = curfps->in_use;
1549  		guest_fpu->xfeatures |= xfeatures;
1550  		guest_fpu->uabi_size = usize;
1551  	}
1552  
1553  	fpregs_lock();
1554  	/*
1555  	 * If @curfps is in use, ensure that the current state is in the
1556  	 * registers before swapping fpstate as that might invalidate it
1557  	 * due to layout changes.
1558  	 */
1559  	if (in_use && test_thread_flag(TIF_NEED_FPU_LOAD))
1560  		fpregs_restore_userregs();
1561  
1562  	newfps->xfeatures = curfps->xfeatures | xfeatures;
1563  	newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1564  	newfps->xfd = curfps->xfd & ~xfeatures;
1565  
1566  	/* Do the final updates within the locked region */
1567  	xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1568  
1569  	if (guest_fpu) {
1570  		guest_fpu->fpstate = newfps;
1571  		/* If curfps is active, update the FPU fpstate pointer */
1572  		if (in_use)
1573  			fpu->fpstate = newfps;
1574  	} else {
1575  		fpu->fpstate = newfps;
1576  	}
1577  
1578  	if (in_use)
1579  		xfd_update_state(fpu->fpstate);
1580  	fpregs_unlock();
1581  
1582  	/* Only free valloc'ed state */
1583  	if (curfps && curfps->is_valloc)
1584  		vfree(curfps);
1585  
1586  	return 0;
1587  }
1588  
validate_sigaltstack(unsigned int usize)1589  static int validate_sigaltstack(unsigned int usize)
1590  {
1591  	struct task_struct *thread, *leader = current->group_leader;
1592  	unsigned long framesize = get_sigframe_size();
1593  
1594  	lockdep_assert_held(&current->sighand->siglock);
1595  
1596  	/* get_sigframe_size() is based on fpu_user_cfg.max_size */
1597  	framesize -= fpu_user_cfg.max_size;
1598  	framesize += usize;
1599  	for_each_thread(leader, thread) {
1600  		if (thread->sas_ss_size && thread->sas_ss_size < framesize)
1601  			return -ENOSPC;
1602  	}
1603  	return 0;
1604  }
1605  
__xstate_request_perm(u64 permitted,u64 requested,bool guest)1606  static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
1607  {
1608  	/*
1609  	 * This deliberately does not exclude !XSAVES as we still might
1610  	 * decide to optionally context switch XCR0 or talk the silicon
1611  	 * vendors into extending XFD for the pre AMX states, especially
1612  	 * AVX512.
1613  	 */
1614  	bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
1615  	struct fpu *fpu = &current->group_leader->thread.fpu;
1616  	struct fpu_state_perm *perm;
1617  	unsigned int ksize, usize;
1618  	u64 mask;
1619  	int ret = 0;
1620  
1621  	/* Check whether fully enabled */
1622  	if ((permitted & requested) == requested)
1623  		return 0;
1624  
1625  	/* Calculate the resulting kernel state size */
1626  	mask = permitted | requested;
1627  	/* Take supervisor states into account on the host */
1628  	if (!guest)
1629  		mask |= xfeatures_mask_supervisor();
1630  	ksize = xstate_calculate_size(mask, compacted);
1631  
1632  	/* Calculate the resulting user state size */
1633  	mask &= XFEATURE_MASK_USER_SUPPORTED;
1634  	usize = xstate_calculate_size(mask, false);
1635  
1636  	if (!guest) {
1637  		ret = validate_sigaltstack(usize);
1638  		if (ret)
1639  			return ret;
1640  	}
1641  
1642  	perm = guest ? &fpu->guest_perm : &fpu->perm;
1643  	/* Pairs with the READ_ONCE() in xstate_get_group_perm() */
1644  	WRITE_ONCE(perm->__state_perm, mask);
1645  	/* Protected by sighand lock */
1646  	perm->__state_size = ksize;
1647  	perm->__user_state_size = usize;
1648  	return ret;
1649  }
1650  
1651  /*
1652   * Permissions array to map facilities with more than one component
1653   */
1654  static const u64 xstate_prctl_req[XFEATURE_MAX] = {
1655  	[XFEATURE_XTILE_DATA] = XFEATURE_MASK_XTILE_DATA,
1656  };
1657  
xstate_request_perm(unsigned long idx,bool guest)1658  static int xstate_request_perm(unsigned long idx, bool guest)
1659  {
1660  	u64 permitted, requested;
1661  	int ret;
1662  
1663  	if (idx >= XFEATURE_MAX)
1664  		return -EINVAL;
1665  
1666  	/*
1667  	 * Look up the facility mask which can require more than
1668  	 * one xstate component.
1669  	 */
1670  	idx = array_index_nospec(idx, ARRAY_SIZE(xstate_prctl_req));
1671  	requested = xstate_prctl_req[idx];
1672  	if (!requested)
1673  		return -EOPNOTSUPP;
1674  
1675  	if ((fpu_user_cfg.max_features & requested) != requested)
1676  		return -EOPNOTSUPP;
1677  
1678  	/* Lockless quick check */
1679  	permitted = xstate_get_group_perm(guest);
1680  	if ((permitted & requested) == requested)
1681  		return 0;
1682  
1683  	/* Protect against concurrent modifications */
1684  	spin_lock_irq(&current->sighand->siglock);
1685  	permitted = xstate_get_group_perm(guest);
1686  
1687  	/* First vCPU allocation locks the permissions. */
1688  	if (guest && (permitted & FPU_GUEST_PERM_LOCKED))
1689  		ret = -EBUSY;
1690  	else
1691  		ret = __xstate_request_perm(permitted, requested, guest);
1692  	spin_unlock_irq(&current->sighand->siglock);
1693  	return ret;
1694  }
1695  
__xfd_enable_feature(u64 xfd_err,struct fpu_guest * guest_fpu)1696  int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
1697  {
1698  	u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;
1699  	struct fpu_state_perm *perm;
1700  	unsigned int ksize, usize;
1701  	struct fpu *fpu;
1702  
1703  	if (!xfd_event) {
1704  		if (!guest_fpu)
1705  			pr_err_once("XFD: Invalid xfd error: %016llx\n", xfd_err);
1706  		return 0;
1707  	}
1708  
1709  	/* Protect against concurrent modifications */
1710  	spin_lock_irq(&current->sighand->siglock);
1711  
1712  	/* If not permitted let it die */
1713  	if ((xstate_get_group_perm(!!guest_fpu) & xfd_event) != xfd_event) {
1714  		spin_unlock_irq(&current->sighand->siglock);
1715  		return -EPERM;
1716  	}
1717  
1718  	fpu = &current->group_leader->thread.fpu;
1719  	perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
1720  	ksize = perm->__state_size;
1721  	usize = perm->__user_state_size;
1722  
1723  	/*
1724  	 * The feature is permitted. State size is sufficient.  Dropping
1725  	 * the lock is safe here even if more features are added from
1726  	 * another task, the retrieved buffer sizes are valid for the
1727  	 * currently requested feature(s).
1728  	 */
1729  	spin_unlock_irq(&current->sighand->siglock);
1730  
1731  	/*
1732  	 * Try to allocate a new fpstate. If that fails there is no way
1733  	 * out.
1734  	 */
1735  	if (fpstate_realloc(xfd_event, ksize, usize, guest_fpu))
1736  		return -EFAULT;
1737  	return 0;
1738  }
1739  
xfd_enable_feature(u64 xfd_err)1740  int xfd_enable_feature(u64 xfd_err)
1741  {
1742  	return __xfd_enable_feature(xfd_err, NULL);
1743  }
1744  
1745  #else /* CONFIG_X86_64 */
xstate_request_perm(unsigned long idx,bool guest)1746  static inline int xstate_request_perm(unsigned long idx, bool guest)
1747  {
1748  	return -EPERM;
1749  }
1750  #endif  /* !CONFIG_X86_64 */
1751  
xstate_get_guest_group_perm(void)1752  u64 xstate_get_guest_group_perm(void)
1753  {
1754  	return xstate_get_group_perm(true);
1755  }
1756  EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);
1757  
1758  /**
1759   * fpu_xstate_prctl - xstate permission operations
1760   * @option:	A subfunction of arch_prctl()
1761   * @arg2:	option argument
1762   * Return:	0 if successful; otherwise, an error code
1763   *
1764   * Option arguments:
1765   *
1766   * ARCH_GET_XCOMP_SUPP: Pointer to user space u64 to store the info
1767   * ARCH_GET_XCOMP_PERM: Pointer to user space u64 to store the info
1768   * ARCH_REQ_XCOMP_PERM: Facility number requested
1769   *
1770   * For facilities which require more than one XSTATE component, the request
1771   * must be the highest state component number related to that facility,
1772   * e.g. for AMX which requires XFEATURE_XTILE_CFG(17) and
1773   * XFEATURE_XTILE_DATA(18) this would be XFEATURE_XTILE_DATA(18).
1774   */
fpu_xstate_prctl(int option,unsigned long arg2)1775  long fpu_xstate_prctl(int option, unsigned long arg2)
1776  {
1777  	u64 __user *uptr = (u64 __user *)arg2;
1778  	u64 permitted, supported;
1779  	unsigned long idx = arg2;
1780  	bool guest = false;
1781  
1782  	switch (option) {
1783  	case ARCH_GET_XCOMP_SUPP:
1784  		supported = fpu_user_cfg.max_features |	fpu_user_cfg.legacy_features;
1785  		return put_user(supported, uptr);
1786  
1787  	case ARCH_GET_XCOMP_PERM:
1788  		/*
1789  		 * Lockless snapshot as it can also change right after the
1790  		 * dropping the lock.
1791  		 */
1792  		permitted = xstate_get_host_group_perm();
1793  		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1794  		return put_user(permitted, uptr);
1795  
1796  	case ARCH_GET_XCOMP_GUEST_PERM:
1797  		permitted = xstate_get_guest_group_perm();
1798  		permitted &= XFEATURE_MASK_USER_SUPPORTED;
1799  		return put_user(permitted, uptr);
1800  
1801  	case ARCH_REQ_XCOMP_GUEST_PERM:
1802  		guest = true;
1803  		fallthrough;
1804  
1805  	case ARCH_REQ_XCOMP_PERM:
1806  		if (!IS_ENABLED(CONFIG_X86_64))
1807  			return -EOPNOTSUPP;
1808  
1809  		return xstate_request_perm(idx, guest);
1810  
1811  	default:
1812  		return -EINVAL;
1813  	}
1814  }
1815  
1816  #ifdef CONFIG_PROC_PID_ARCH_STATUS
1817  /*
1818   * Report the amount of time elapsed in millisecond since last AVX512
1819   * use in the task.
1820   */
avx512_status(struct seq_file * m,struct task_struct * task)1821  static void avx512_status(struct seq_file *m, struct task_struct *task)
1822  {
1823  	unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
1824  	long delta;
1825  
1826  	if (!timestamp) {
1827  		/*
1828  		 * Report -1 if no AVX512 usage
1829  		 */
1830  		delta = -1;
1831  	} else {
1832  		delta = (long)(jiffies - timestamp);
1833  		/*
1834  		 * Cap to LONG_MAX if time difference > LONG_MAX
1835  		 */
1836  		if (delta < 0)
1837  			delta = LONG_MAX;
1838  		delta = jiffies_to_msecs(delta);
1839  	}
1840  
1841  	seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
1842  	seq_putc(m, '\n');
1843  }
1844  
1845  /*
1846   * Report architecture specific information
1847   */
proc_pid_arch_status(struct seq_file * m,struct pid_namespace * ns,struct pid * pid,struct task_struct * task)1848  int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
1849  			struct pid *pid, struct task_struct *task)
1850  {
1851  	/*
1852  	 * Report AVX512 state if the processor and build option supported.
1853  	 */
1854  	if (cpu_feature_enabled(X86_FEATURE_AVX512F))
1855  		avx512_status(m, task);
1856  
1857  	return 0;
1858  }
1859  #endif /* CONFIG_PROC_PID_ARCH_STATUS */
1860  
1861  #ifdef CONFIG_COREDUMP
1862  static const char owner_name[] = "LINUX";
1863  
1864  /*
1865   * Dump type, size, offset and flag values for every xfeature that is present.
1866   */
dump_xsave_layout_desc(struct coredump_params * cprm)1867  static int dump_xsave_layout_desc(struct coredump_params *cprm)
1868  {
1869  	int num_records = 0;
1870  	int i;
1871  
1872  	for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
1873  		struct x86_xfeat_component xc = {
1874  			.type   = i,
1875  			.size   = xstate_sizes[i],
1876  			.offset = xstate_offsets[i],
1877  			/* reserved for future use */
1878  			.flags  = 0,
1879  		};
1880  
1881  		if (!dump_emit(cprm, &xc, sizeof(xc)))
1882  			return 0;
1883  
1884  		num_records++;
1885  	}
1886  	return num_records;
1887  }
1888  
get_xsave_desc_size(void)1889  static u32 get_xsave_desc_size(void)
1890  {
1891  	u32 cnt = 0;
1892  	u32 i;
1893  
1894  	for_each_extended_xfeature(i, fpu_user_cfg.max_features)
1895  		cnt++;
1896  
1897  	return cnt * (sizeof(struct x86_xfeat_component));
1898  }
1899  
elf_coredump_extra_notes_write(struct coredump_params * cprm)1900  int elf_coredump_extra_notes_write(struct coredump_params *cprm)
1901  {
1902  	int num_records = 0;
1903  	struct elf_note en;
1904  
1905  	if (!fpu_user_cfg.max_features)
1906  		return 0;
1907  
1908  	en.n_namesz = sizeof(owner_name);
1909  	en.n_descsz = get_xsave_desc_size();
1910  	en.n_type = NT_X86_XSAVE_LAYOUT;
1911  
1912  	if (!dump_emit(cprm, &en, sizeof(en)))
1913  		return 1;
1914  	if (!dump_emit(cprm, owner_name, en.n_namesz))
1915  		return 1;
1916  	if (!dump_align(cprm, 4))
1917  		return 1;
1918  
1919  	num_records = dump_xsave_layout_desc(cprm);
1920  	if (!num_records)
1921  		return 1;
1922  
1923  	/* Total size should be equal to the number of records */
1924  	if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
1925  		return 1;
1926  
1927  	return 0;
1928  }
1929  
elf_coredump_extra_notes_size(void)1930  int elf_coredump_extra_notes_size(void)
1931  {
1932  	int size;
1933  
1934  	if (!fpu_user_cfg.max_features)
1935  		return 0;
1936  
1937  	/* .note header */
1938  	size  = sizeof(struct elf_note);
1939  	/*  Name plus alignment to 4 bytes */
1940  	size += roundup(sizeof(owner_name), 4);
1941  	size += get_xsave_desc_size();
1942  
1943  	return size;
1944  }
1945  #endif /* CONFIG_COREDUMP */
1946