1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright (C) 2020 Google LLC
4   * Author: Will Deacon <will@kernel.org>
5   */
6  
7  #ifndef __ARM64_KVM_PGTABLE_H__
8  #define __ARM64_KVM_PGTABLE_H__
9  
10  #include <linux/bits.h>
11  #include <linux/kvm_host.h>
12  #include <linux/types.h>
13  
14  #define KVM_PGTABLE_FIRST_LEVEL		-1
15  #define KVM_PGTABLE_LAST_LEVEL		3
16  
17  /*
18   * The largest supported block sizes for KVM (no 52-bit PA support):
19   *  - 4K (level 1):	1GB
20   *  - 16K (level 2):	32MB
21   *  - 64K (level 2):	512MB
22   */
23  #ifdef CONFIG_ARM64_4K_PAGES
24  #define KVM_PGTABLE_MIN_BLOCK_LEVEL	1
25  #else
26  #define KVM_PGTABLE_MIN_BLOCK_LEVEL	2
27  #endif
28  
29  #define kvm_lpa2_is_enabled()		system_supports_lpa2()
30  
kvm_get_parange_max(void)31  static inline u64 kvm_get_parange_max(void)
32  {
33  	if (kvm_lpa2_is_enabled() ||
34  	   (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && PAGE_SHIFT == 16))
35  		return ID_AA64MMFR0_EL1_PARANGE_52;
36  	else
37  		return ID_AA64MMFR0_EL1_PARANGE_48;
38  }
39  
kvm_get_parange(u64 mmfr0)40  static inline u64 kvm_get_parange(u64 mmfr0)
41  {
42  	u64 parange_max = kvm_get_parange_max();
43  	u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
44  				ID_AA64MMFR0_EL1_PARANGE_SHIFT);
45  	if (parange > parange_max)
46  		parange = parange_max;
47  
48  	return parange;
49  }
50  
51  typedef u64 kvm_pte_t;
52  
53  #define KVM_PTE_VALID			BIT(0)
54  
55  #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
56  #define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
57  #define KVM_PTE_ADDR_MASK_LPA2		GENMASK(49, PAGE_SHIFT)
58  #define KVM_PTE_ADDR_51_50_LPA2		GENMASK(9, 8)
59  
60  #define KVM_PHYS_INVALID		(-1ULL)
61  
62  #define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)
63  
64  #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
65  #define KVM_PTE_LEAF_ATTR_LO_S1_AP	GENMASK(7, 6)
66  #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO		\
67  	({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; })
68  #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW		\
69  	({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; })
70  #define KVM_PTE_LEAF_ATTR_LO_S1_SH	GENMASK(9, 8)
71  #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS	3
72  #define KVM_PTE_LEAF_ATTR_LO_S1_AF	BIT(10)
73  
74  #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR	GENMASK(5, 2)
75  #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R	BIT(6)
76  #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W	BIT(7)
77  #define KVM_PTE_LEAF_ATTR_LO_S2_SH	GENMASK(9, 8)
78  #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS	3
79  #define KVM_PTE_LEAF_ATTR_LO_S2_AF	BIT(10)
80  
81  #define KVM_PTE_LEAF_ATTR_HI		GENMASK(63, 50)
82  
83  #define KVM_PTE_LEAF_ATTR_HI_SW		GENMASK(58, 55)
84  
85  #define KVM_PTE_LEAF_ATTR_HI_S1_XN	BIT(54)
86  
87  #define KVM_PTE_LEAF_ATTR_HI_S2_XN	BIT(54)
88  
89  #define KVM_PTE_LEAF_ATTR_HI_S1_GP	BIT(50)
90  
91  #define KVM_PTE_LEAF_ATTR_S2_PERMS	(KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \
92  					 KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
93  					 KVM_PTE_LEAF_ATTR_HI_S2_XN)
94  
95  #define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
96  #define KVM_MAX_OWNER_ID		1
97  
98  /*
99   * Used to indicate a pte for which a 'break-before-make' sequence is in
100   * progress.
101   */
102  #define KVM_INVALID_PTE_LOCKED		BIT(10)
103  
kvm_pte_valid(kvm_pte_t pte)104  static inline bool kvm_pte_valid(kvm_pte_t pte)
105  {
106  	return pte & KVM_PTE_VALID;
107  }
108  
kvm_pte_to_phys(kvm_pte_t pte)109  static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
110  {
111  	u64 pa;
112  
113  	if (kvm_lpa2_is_enabled()) {
114  		pa = pte & KVM_PTE_ADDR_MASK_LPA2;
115  		pa |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, pte) << 50;
116  	} else {
117  		pa = pte & KVM_PTE_ADDR_MASK;
118  		if (PAGE_SHIFT == 16)
119  			pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
120  	}
121  
122  	return pa;
123  }
124  
kvm_phys_to_pte(u64 pa)125  static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
126  {
127  	kvm_pte_t pte;
128  
129  	if (kvm_lpa2_is_enabled()) {
130  		pte = pa & KVM_PTE_ADDR_MASK_LPA2;
131  		pa &= GENMASK(51, 50);
132  		pte |= FIELD_PREP(KVM_PTE_ADDR_51_50_LPA2, pa >> 50);
133  	} else {
134  		pte = pa & KVM_PTE_ADDR_MASK;
135  		if (PAGE_SHIFT == 16) {
136  			pa &= GENMASK(51, 48);
137  			pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
138  		}
139  	}
140  
141  	return pte;
142  }
143  
kvm_pte_to_pfn(kvm_pte_t pte)144  static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
145  {
146  	return __phys_to_pfn(kvm_pte_to_phys(pte));
147  }
148  
kvm_granule_shift(s8 level)149  static inline u64 kvm_granule_shift(s8 level)
150  {
151  	/* Assumes KVM_PGTABLE_LAST_LEVEL is 3 */
152  	return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
153  }
154  
kvm_granule_size(s8 level)155  static inline u64 kvm_granule_size(s8 level)
156  {
157  	return BIT(kvm_granule_shift(level));
158  }
159  
kvm_level_supports_block_mapping(s8 level)160  static inline bool kvm_level_supports_block_mapping(s8 level)
161  {
162  	return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
163  }
164  
kvm_supported_block_sizes(void)165  static inline u32 kvm_supported_block_sizes(void)
166  {
167  	s8 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
168  	u32 r = 0;
169  
170  	for (; level <= KVM_PGTABLE_LAST_LEVEL; level++)
171  		r |= BIT(kvm_granule_shift(level));
172  
173  	return r;
174  }
175  
kvm_is_block_size_supported(u64 size)176  static inline bool kvm_is_block_size_supported(u64 size)
177  {
178  	bool is_power_of_two = IS_ALIGNED(size, size);
179  
180  	return is_power_of_two && (size & kvm_supported_block_sizes());
181  }
182  
183  /**
184   * struct kvm_pgtable_mm_ops - Memory management callbacks.
185   * @zalloc_page:		Allocate a single zeroed memory page.
186   *				The @arg parameter can be used by the walker
187   *				to pass a memcache. The initial refcount of
188   *				the page is 1.
189   * @zalloc_pages_exact:		Allocate an exact number of zeroed memory pages.
190   *				The @size parameter is in bytes, and is rounded
191   *				up to the next page boundary. The resulting
192   *				allocation is physically contiguous.
193   * @free_pages_exact:		Free an exact number of memory pages previously
194   *				allocated by zalloc_pages_exact.
195   * @free_unlinked_table:	Free an unlinked paging structure by unlinking and
196   *				dropping references.
197   * @get_page:			Increment the refcount on a page.
198   * @put_page:			Decrement the refcount on a page. When the
199   *				refcount reaches 0 the page is automatically
200   *				freed.
201   * @page_count:			Return the refcount of a page.
202   * @phys_to_virt:		Convert a physical address into a virtual
203   *				address	mapped in the current context.
204   * @virt_to_phys:		Convert a virtual address mapped in the current
205   *				context into a physical address.
206   * @dcache_clean_inval_poc:	Clean and invalidate the data cache to the PoC
207   *				for the	specified memory address range.
208   * @icache_inval_pou:		Invalidate the instruction cache to the PoU
209   *				for the specified memory address range.
210   */
211  struct kvm_pgtable_mm_ops {
212  	void*		(*zalloc_page)(void *arg);
213  	void*		(*zalloc_pages_exact)(size_t size);
214  	void		(*free_pages_exact)(void *addr, size_t size);
215  	void		(*free_unlinked_table)(void *addr, s8 level);
216  	void		(*get_page)(void *addr);
217  	void		(*put_page)(void *addr);
218  	int		(*page_count)(void *addr);
219  	void*		(*phys_to_virt)(phys_addr_t phys);
220  	phys_addr_t	(*virt_to_phys)(void *addr);
221  	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
222  	void		(*icache_inval_pou)(void *addr, size_t size);
223  };
224  
225  /**
226   * enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
227   * @KVM_PGTABLE_S2_NOFWB:	Don't enforce Normal-WB even if the CPUs have
228   *				ARM64_HAS_STAGE2_FWB.
229   * @KVM_PGTABLE_S2_IDMAP:	Only use identity mappings.
230   */
231  enum kvm_pgtable_stage2_flags {
232  	KVM_PGTABLE_S2_NOFWB			= BIT(0),
233  	KVM_PGTABLE_S2_IDMAP			= BIT(1),
234  };
235  
236  /**
237   * enum kvm_pgtable_prot - Page-table permissions and attributes.
238   * @KVM_PGTABLE_PROT_X:		Execute permission.
239   * @KVM_PGTABLE_PROT_W:		Write permission.
240   * @KVM_PGTABLE_PROT_R:		Read permission.
241   * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
242   * @KVM_PGTABLE_PROT_NORMAL_NC:	Normal noncacheable attributes.
243   * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
244   * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
245   * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
246   * @KVM_PGTABLE_PROT_SW3:	Software bit 3.
247   */
248  enum kvm_pgtable_prot {
249  	KVM_PGTABLE_PROT_X			= BIT(0),
250  	KVM_PGTABLE_PROT_W			= BIT(1),
251  	KVM_PGTABLE_PROT_R			= BIT(2),
252  
253  	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
254  	KVM_PGTABLE_PROT_NORMAL_NC		= BIT(4),
255  
256  	KVM_PGTABLE_PROT_SW0			= BIT(55),
257  	KVM_PGTABLE_PROT_SW1			= BIT(56),
258  	KVM_PGTABLE_PROT_SW2			= BIT(57),
259  	KVM_PGTABLE_PROT_SW3			= BIT(58),
260  };
261  
262  #define KVM_PGTABLE_PROT_RW	(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
263  #define KVM_PGTABLE_PROT_RWX	(KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
264  
265  #define PKVM_HOST_MEM_PROT	KVM_PGTABLE_PROT_RWX
266  #define PKVM_HOST_MMIO_PROT	KVM_PGTABLE_PROT_RW
267  
268  #define PAGE_HYP		KVM_PGTABLE_PROT_RW
269  #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
270  #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
271  #define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
272  
273  typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
274  					   enum kvm_pgtable_prot prot);
275  
276  /**
277   * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
278   * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
279   *					entries.
280   * @KVM_PGTABLE_WALK_TABLE_PRE:		Visit table entries before their
281   *					children.
282   * @KVM_PGTABLE_WALK_TABLE_POST:	Visit table entries after their
283   *					children.
284   * @KVM_PGTABLE_WALK_SHARED:		Indicates the page-tables may be shared
285   *					with other software walkers.
286   * @KVM_PGTABLE_WALK_HANDLE_FAULT:	Indicates the page-table walk was
287   *					invoked from a fault handler.
288   * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI:	Visit and update table entries
289   *					without Break-before-make's
290   *					TLB invalidation.
291   * @KVM_PGTABLE_WALK_SKIP_CMO:		Visit and update table entries
292   *					without Cache maintenance
293   *					operations required.
294   */
295  enum kvm_pgtable_walk_flags {
296  	KVM_PGTABLE_WALK_LEAF			= BIT(0),
297  	KVM_PGTABLE_WALK_TABLE_PRE		= BIT(1),
298  	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
299  	KVM_PGTABLE_WALK_SHARED			= BIT(3),
300  	KVM_PGTABLE_WALK_HANDLE_FAULT		= BIT(4),
301  	KVM_PGTABLE_WALK_SKIP_BBM_TLBI		= BIT(5),
302  	KVM_PGTABLE_WALK_SKIP_CMO		= BIT(6),
303  };
304  
305  struct kvm_pgtable_visit_ctx {
306  	kvm_pte_t				*ptep;
307  	kvm_pte_t				old;
308  	void					*arg;
309  	struct kvm_pgtable_mm_ops		*mm_ops;
310  	u64					start;
311  	u64					addr;
312  	u64					end;
313  	s8					level;
314  	enum kvm_pgtable_walk_flags		flags;
315  };
316  
317  typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
318  					enum kvm_pgtable_walk_flags visit);
319  
kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx * ctx)320  static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
321  {
322  	return ctx->flags & KVM_PGTABLE_WALK_SHARED;
323  }
324  
325  /**
326   * struct kvm_pgtable_walker - Hook into a page-table walk.
327   * @cb:		Callback function to invoke during the walk.
328   * @arg:	Argument passed to the callback function.
329   * @flags:	Bitwise-OR of flags to identify the entry types on which to
330   *		invoke the callback function.
331   */
332  struct kvm_pgtable_walker {
333  	const kvm_pgtable_visitor_fn_t		cb;
334  	void * const				arg;
335  	const enum kvm_pgtable_walk_flags	flags;
336  };
337  
338  /*
339   * RCU cannot be used in a non-kernel context such as the hyp. As such, page
340   * table walkers used in hyp do not call into RCU and instead use other
341   * synchronization mechanisms (such as a spinlock).
342   */
343  #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
344  
345  typedef kvm_pte_t *kvm_pteref_t;
346  
kvm_dereference_pteref(struct kvm_pgtable_walker * walker,kvm_pteref_t pteref)347  static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
348  						kvm_pteref_t pteref)
349  {
350  	return pteref;
351  }
352  
kvm_pgtable_walk_begin(struct kvm_pgtable_walker * walker)353  static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
354  {
355  	/*
356  	 * Due to the lack of RCU (or a similar protection scheme), only
357  	 * non-shared table walkers are allowed in the hypervisor.
358  	 */
359  	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
360  		return -EPERM;
361  
362  	return 0;
363  }
364  
kvm_pgtable_walk_end(struct kvm_pgtable_walker * walker)365  static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
366  
kvm_pgtable_walk_lock_held(void)367  static inline bool kvm_pgtable_walk_lock_held(void)
368  {
369  	return true;
370  }
371  
372  #else
373  
374  typedef kvm_pte_t __rcu *kvm_pteref_t;
375  
kvm_dereference_pteref(struct kvm_pgtable_walker * walker,kvm_pteref_t pteref)376  static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
377  						kvm_pteref_t pteref)
378  {
379  	return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
380  }
381  
kvm_pgtable_walk_begin(struct kvm_pgtable_walker * walker)382  static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
383  {
384  	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
385  		rcu_read_lock();
386  
387  	return 0;
388  }
389  
kvm_pgtable_walk_end(struct kvm_pgtable_walker * walker)390  static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
391  {
392  	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
393  		rcu_read_unlock();
394  }
395  
kvm_pgtable_walk_lock_held(void)396  static inline bool kvm_pgtable_walk_lock_held(void)
397  {
398  	return rcu_read_lock_held();
399  }
400  
401  #endif
402  
403  /**
404   * struct kvm_pgtable - KVM page-table.
405   * @ia_bits:		Maximum input address size, in bits.
406   * @start_level:	Level at which the page-table walk starts.
407   * @pgd:		Pointer to the first top-level entry of the page-table.
408   * @mm_ops:		Memory management callbacks.
409   * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
410   * @flags:		Stage-2 page-table flags.
411   * @force_pte_cb:	Function that returns true if page level mappings must
412   *			be used instead of block mappings.
413   */
414  struct kvm_pgtable {
415  	u32					ia_bits;
416  	s8					start_level;
417  	kvm_pteref_t				pgd;
418  	struct kvm_pgtable_mm_ops		*mm_ops;
419  
420  	/* Stage-2 only */
421  	struct kvm_s2_mmu			*mmu;
422  	enum kvm_pgtable_stage2_flags		flags;
423  	kvm_pgtable_force_pte_cb_t		force_pte_cb;
424  };
425  
426  /**
427   * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
428   * @pgt:	Uninitialised page-table structure to initialise.
429   * @va_bits:	Maximum virtual address bits.
430   * @mm_ops:	Memory management callbacks.
431   *
432   * Return: 0 on success, negative error code on failure.
433   */
434  int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
435  			 struct kvm_pgtable_mm_ops *mm_ops);
436  
437  /**
438   * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
439   * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
440   *
441   * The page-table is assumed to be unreachable by any hardware walkers prior
442   * to freeing and therefore no TLB invalidation is performed.
443   */
444  void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt);
445  
446  /**
447   * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table.
448   * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
449   * @addr:	Virtual address at which to place the mapping.
450   * @size:	Size of the mapping.
451   * @phys:	Physical address of the memory to map.
452   * @prot:	Permissions and attributes for the mapping.
453   *
454   * The offset of @addr within a page is ignored, @size is rounded-up to
455   * the next page boundary and @phys is rounded-down to the previous page
456   * boundary.
457   *
458   * If device attributes are not explicitly requested in @prot, then the
459   * mapping will be normal, cacheable. Attempts to install a new mapping
460   * for a virtual address that is already mapped will be rejected with an
461   * error and a WARN().
462   *
463   * Return: 0 on success, negative error code on failure.
464   */
465  int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
466  			enum kvm_pgtable_prot prot);
467  
468  /**
469   * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table.
470   * @pgt:	Page-table structure initialised by kvm_pgtable_hyp_init().
471   * @addr:	Virtual address from which to remove the mapping.
472   * @size:	Size of the mapping.
473   *
474   * The offset of @addr within a page is ignored, @size is rounded-up to
475   * the next page boundary and @phys is rounded-down to the previous page
476   * boundary.
477   *
478   * TLB invalidation is performed for each page-table entry cleared during the
479   * unmapping operation and the reference count for the page-table page
480   * containing the cleared entry is decremented, with unreferenced pages being
481   * freed. The unmapping operation will stop early if it encounters either an
482   * invalid page-table entry or a valid block mapping which maps beyond the range
483   * being unmapped.
484   *
485   * Return: Number of bytes unmapped, which may be 0.
486   */
487  u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
488  
489  /**
490   * kvm_get_vtcr() - Helper to construct VTCR_EL2
491   * @mmfr0:	Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
492   * @mmfr1:	Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
493   * @phys_shfit:	Value to set in VTCR_EL2.T0SZ.
494   *
495   * The VTCR value is common across all the physical CPUs on the system.
496   * We use system wide sanitised values to fill in different fields,
497   * except for Hardware Management of Access Flags. HA Flag is set
498   * unconditionally on all CPUs, as it is safe to run with or without
499   * the feature and the bit is RES0 on CPUs that don't support it.
500   *
501   * Return: VTCR_EL2 value
502   */
503  u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
504  
505  /**
506   * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
507   * @vtcr:	Content of the VTCR register.
508   *
509   * Return: the size (in bytes) of the stage-2 PGD
510   */
511  size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
512  
513  /**
514   * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
515   * @pgt:	Uninitialised page-table structure to initialise.
516   * @mmu:	S2 MMU context for this S2 translation
517   * @mm_ops:	Memory management callbacks.
518   * @flags:	Stage-2 configuration flags.
519   * @force_pte_cb: Function that returns true if page level mappings must
520   *		be used instead of block mappings.
521   *
522   * Return: 0 on success, negative error code on failure.
523   */
524  int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
525  			      struct kvm_pgtable_mm_ops *mm_ops,
526  			      enum kvm_pgtable_stage2_flags flags,
527  			      kvm_pgtable_force_pte_cb_t force_pte_cb);
528  
529  #define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
530  	__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
531  
532  /**
533   * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
534   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
535   *
536   * The page-table is assumed to be unreachable by any hardware walkers prior
537   * to freeing and therefore no TLB invalidation is performed.
538   */
539  void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
540  
541  /**
542   * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
543   * @mm_ops:	Memory management callbacks.
544   * @pgtable:	Unlinked stage-2 paging structure to be freed.
545   * @level:	Level of the stage-2 paging structure to be freed.
546   *
547   * The page-table is assumed to be unreachable by any hardware walkers prior to
548   * freeing and therefore no TLB invalidation is performed.
549   */
550  void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
551  
552  /**
553   * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
554   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
555   * @phys:	Physical address of the memory to map.
556   * @level:	Starting level of the stage-2 paging structure to be created.
557   * @prot:	Permissions and attributes for the mapping.
558   * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
559   *		page-table pages.
560   * @force_pte:  Force mappings to PAGE_SIZE granularity.
561   *
562   * Returns an unlinked page-table tree.  This new page-table tree is
563   * not reachable (i.e., it is unlinked) from the root pgd and it's
564   * therefore unreachableby the hardware page-table walker. No TLB
565   * invalidation or CMOs are performed.
566   *
567   * If device attributes are not explicitly requested in @prot, then the
568   * mapping will be normal, cacheable.
569   *
570   * Return: The fully populated (unlinked) stage-2 paging structure, or
571   * an ERR_PTR(error) on failure.
572   */
573  kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
574  					      u64 phys, s8 level,
575  					      enum kvm_pgtable_prot prot,
576  					      void *mc, bool force_pte);
577  
578  /**
579   * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
580   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
581   * @addr:	Intermediate physical address at which to place the mapping.
582   * @size:	Size of the mapping.
583   * @phys:	Physical address of the memory to map.
584   * @prot:	Permissions and attributes for the mapping.
585   * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
586   *		page-table pages.
587   * @flags:	Flags to control the page-table walk (ex. a shared walk)
588   *
589   * The offset of @addr within a page is ignored, @size is rounded-up to
590   * the next page boundary and @phys is rounded-down to the previous page
591   * boundary.
592   *
593   * If device attributes are not explicitly requested in @prot, then the
594   * mapping will be normal, cacheable.
595   *
596   * Note that the update of a valid leaf PTE in this function will be aborted,
597   * if it's trying to recreate the exact same mapping or only change the access
598   * permissions. Instead, the vCPU will exit one more time from guest if still
599   * needed and then go through the path of relaxing permissions.
600   *
601   * Note that this function will both coalesce existing table entries and split
602   * existing block mappings, relying on page-faults to fault back areas outside
603   * of the new mapping lazily.
604   *
605   * Return: 0 on success, negative error code on failure.
606   */
607  int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
608  			   u64 phys, enum kvm_pgtable_prot prot,
609  			   void *mc, enum kvm_pgtable_walk_flags flags);
610  
611  /**
612   * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
613   *				    track ownership.
614   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
615   * @addr:	Base intermediate physical address to annotate.
616   * @size:	Size of the annotated range.
617   * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
618   *		page-table pages.
619   * @owner_id:	Unique identifier for the owner of the page.
620   *
621   * By default, all page-tables are owned by identifier 0. This function can be
622   * used to mark portions of the IPA space as owned by other entities. When a
623   * stage 2 is used with identity-mappings, these annotations allow to use the
624   * page-table data structure as a simple rmap.
625   *
626   * Return: 0 on success, negative error code on failure.
627   */
628  int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
629  				 void *mc, u8 owner_id);
630  
631  /**
632   * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
633   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
634   * @addr:	Intermediate physical address from which to remove the mapping.
635   * @size:	Size of the mapping.
636   *
637   * The offset of @addr within a page is ignored and @size is rounded-up to
638   * the next page boundary.
639   *
640   * TLB invalidation is performed for each page-table entry cleared during the
641   * unmapping operation and the reference count for the page-table page
642   * containing the cleared entry is decremented, with unreferenced pages being
643   * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if
644   * FWB is not supported by the CPU.
645   *
646   * Return: 0 on success, negative error code on failure.
647   */
648  int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
649  
650  /**
651   * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
652   *                                  without TLB invalidation.
653   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
654   * @addr:	Intermediate physical address from which to write-protect,
655   * @size:	Size of the range.
656   *
657   * The offset of @addr within a page is ignored and @size is rounded-up to
658   * the next page boundary.
659   *
660   * Note that it is the caller's responsibility to invalidate the TLB after
661   * calling this function to ensure that the updated permissions are visible
662   * to the CPUs.
663   *
664   * Return: 0 on success, negative error code on failure.
665   */
666  int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
667  
668  /**
669   * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
670   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
671   * @addr:	Intermediate physical address to identify the page-table entry.
672   *
673   * The offset of @addr within a page is ignored.
674   *
675   * If there is a valid, leaf page-table entry used to translate @addr, then
676   * set the access flag in that entry.
677   *
678   * Return: The old page-table entry prior to setting the flag, 0 on failure.
679   */
680  kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
681  
682  /**
683   * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
684   *					   flag in a page-table entry.
685   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
686   * @addr:	Intermediate physical address to identify the page-table entry.
687   * @size:	Size of the address range to visit.
688   * @mkold:	True if the access flag should be cleared.
689   *
690   * The offset of @addr within a page is ignored.
691   *
692   * Tests and conditionally clears the access flag for every valid, leaf
693   * page-table entry used to translate the range [@addr, @addr + @size).
694   *
695   * Note that it is the caller's responsibility to invalidate the TLB after
696   * calling this function to ensure that the updated permissions are visible
697   * to the CPUs.
698   *
699   * Return: True if any of the visited PTEs had the access flag set.
700   */
701  bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
702  					 u64 size, bool mkold);
703  
704  /**
705   * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
706   *				      page-table entry.
707   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
708   * @addr:	Intermediate physical address to identify the page-table entry.
709   * @prot:	Additional permissions to grant for the mapping.
710   *
711   * The offset of @addr within a page is ignored.
712   *
713   * If there is a valid, leaf page-table entry used to translate @addr, then
714   * relax the permissions in that entry according to the read, write and
715   * execute permissions specified by @prot. No permissions are removed, and
716   * TLB invalidation is performed after updating the entry. Software bits cannot
717   * be set or cleared using kvm_pgtable_stage2_relax_perms().
718   *
719   * Return: 0 on success, negative error code on failure.
720   */
721  int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
722  				   enum kvm_pgtable_prot prot);
723  
724  /**
725   * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
726   * 				      of Coherency for guest stage-2 address
727   *				      range.
728   * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
729   * @addr:	Intermediate physical address from which to flush.
730   * @size:	Size of the range.
731   *
732   * The offset of @addr within a page is ignored and @size is rounded-up to
733   * the next page boundary.
734   *
735   * Return: 0 on success, negative error code on failure.
736   */
737  int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
738  
739  /**
740   * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
741   *				to PAGE_SIZE guest pages.
742   * @pgt:	 Page-table structure initialised by kvm_pgtable_stage2_init().
743   * @addr:	 Intermediate physical address from which to split.
744   * @size:	 Size of the range.
745   * @mc:		 Cache of pre-allocated and zeroed memory from which to allocate
746   *		 page-table pages.
747   *
748   * The function tries to split any level 1 or 2 entry that overlaps
749   * with the input range (given by @addr and @size).
750   *
751   * Return: 0 on success, negative error code on failure. Note that
752   * kvm_pgtable_stage2_split() is best effort: it tries to break as many
753   * blocks in the input range as allowed by @mc_capacity.
754   */
755  int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
756  			     struct kvm_mmu_memory_cache *mc);
757  
758  /**
759   * kvm_pgtable_walk() - Walk a page-table.
760   * @pgt:	Page-table structure initialised by kvm_pgtable_*_init().
761   * @addr:	Input address for the start of the walk.
762   * @size:	Size of the range to walk.
763   * @walker:	Walker callback description.
764   *
765   * The offset of @addr within a page is ignored and @size is rounded-up to
766   * the next page boundary.
767   *
768   * The walker will walk the page-table entries corresponding to the input
769   * address range specified, visiting entries according to the walker flags.
770   * Invalid entries are treated as leaf entries. The visited page table entry is
771   * reloaded after invoking the walker callback, allowing the walker to descend
772   * into a newly installed table.
773   *
774   * Returning a negative error code from the walker callback function will
775   * terminate the walk immediately with the same error code.
776   *
777   * Return: 0 on success, negative error code on failure.
778   */
779  int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
780  		     struct kvm_pgtable_walker *walker);
781  
782  /**
783   * kvm_pgtable_get_leaf() - Walk a page-table and retrieve the leaf entry
784   *			    with its level.
785   * @pgt:	Page-table structure initialised by kvm_pgtable_*_init()
786   *		or a similar initialiser.
787   * @addr:	Input address for the start of the walk.
788   * @ptep:	Pointer to storage for the retrieved PTE.
789   * @level:	Pointer to storage for the level of the retrieved PTE.
790   *
791   * The offset of @addr within a page is ignored.
792   *
793   * The walker will walk the page-table entries corresponding to the input
794   * address specified, retrieving the leaf corresponding to this address.
795   * Invalid entries are treated as leaf entries.
796   *
797   * Return: 0 on success, negative error code on failure.
798   */
799  int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
800  			 kvm_pte_t *ptep, s8 *level);
801  
802  /**
803   * kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
804   *				   stage-2 Page-Table Entry.
805   * @pte:	Page-table entry
806   *
807   * Return: protection attributes of the page-table entry in the enum
808   *	   kvm_pgtable_prot format.
809   */
810  enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
811  
812  /**
813   * kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
814   *				Page-Table Entry.
815   * @pte:	Page-table entry
816   *
817   * Return: protection attributes of the page-table entry in the enum
818   *	   kvm_pgtable_prot format.
819   */
820  enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
821  
822  /**
823   * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
824   *
825   * @mmu:	Stage-2 KVM MMU struct
826   * @addr:	The base Intermediate physical address from which to invalidate
827   * @size:	Size of the range from the base to invalidate
828   */
829  void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
830  				phys_addr_t addr, size_t size);
831  #endif	/* __ARM64_KVM_PGTABLE_H__ */
832