1  /* SPDX-License-Identifier: GPL-2.0-or-later */
2  /*
3   * vma.h
4   *
5   * Core VMA manipulation API implemented in vma.c.
6   */
7  #ifndef __MM_VMA_H
8  #define __MM_VMA_H
9  
10  /*
11   * VMA lock generalization
12   */
13  struct vma_prepare {
14  	struct vm_area_struct *vma;
15  	struct vm_area_struct *adj_next;
16  	struct file *file;
17  	struct address_space *mapping;
18  	struct anon_vma *anon_vma;
19  	struct vm_area_struct *insert;
20  	struct vm_area_struct *remove;
21  	struct vm_area_struct *remove2;
22  };
23  
24  struct unlink_vma_file_batch {
25  	int count;
26  	struct vm_area_struct *vmas[8];
27  };
28  
29  /*
30   * vma munmap operation
31   */
32  struct vma_munmap_struct {
33  	struct vma_iterator *vmi;
34  	struct vm_area_struct *vma;     /* The first vma to munmap */
35  	struct vm_area_struct *prev;    /* vma before the munmap area */
36  	struct vm_area_struct *next;    /* vma after the munmap area */
37  	struct list_head *uf;           /* Userfaultfd list_head */
38  	unsigned long start;            /* Aligned start addr (inclusive) */
39  	unsigned long end;              /* Aligned end addr (exclusive) */
40  	unsigned long unmap_start;      /* Unmap PTE start */
41  	unsigned long unmap_end;        /* Unmap PTE end */
42  	int vma_count;                  /* Number of vmas that will be removed */
43  	bool unlock;                    /* Unlock after the munmap */
44  	bool clear_ptes;                /* If there are outstanding PTE to be cleared */
45  	/* 2 byte hole */
46  	unsigned long nr_pages;         /* Number of pages being removed */
47  	unsigned long locked_vm;        /* Number of locked pages */
48  	unsigned long nr_accounted;     /* Number of VM_ACCOUNT pages */
49  	unsigned long exec_vm;
50  	unsigned long stack_vm;
51  	unsigned long data_vm;
52  };
53  
54  enum vma_merge_state {
55  	VMA_MERGE_START,
56  	VMA_MERGE_ERROR_NOMEM,
57  	VMA_MERGE_NOMERGE,
58  	VMA_MERGE_SUCCESS,
59  };
60  
61  enum vma_merge_flags {
62  	VMG_FLAG_DEFAULT = 0,
63  	/*
64  	 * If we can expand, simply do so. We know there is nothing to merge to
65  	 * the right. Does not reset state upon failure to merge. The VMA
66  	 * iterator is assumed to be positioned at the previous VMA, rather than
67  	 * at the gap.
68  	 */
69  	VMG_FLAG_JUST_EXPAND = 1 << 0,
70  };
71  
72  /* Represents a VMA merge operation. */
73  struct vma_merge_struct {
74  	struct mm_struct *mm;
75  	struct vma_iterator *vmi;
76  	pgoff_t pgoff;
77  	struct vm_area_struct *prev;
78  	struct vm_area_struct *next; /* Modified by vma_merge(). */
79  	struct vm_area_struct *vma; /* Either a new VMA or the one being modified. */
80  	unsigned long start;
81  	unsigned long end;
82  	unsigned long flags;
83  	struct file *file;
84  	struct anon_vma *anon_vma;
85  	struct mempolicy *policy;
86  	struct vm_userfaultfd_ctx uffd_ctx;
87  	struct anon_vma_name *anon_name;
88  	enum vma_merge_flags merge_flags;
89  	enum vma_merge_state state;
90  };
91  
vmg_nomem(struct vma_merge_struct * vmg)92  static inline bool vmg_nomem(struct vma_merge_struct *vmg)
93  {
94  	return vmg->state == VMA_MERGE_ERROR_NOMEM;
95  }
96  
97  /* Assumes addr >= vma->vm_start. */
vma_pgoff_offset(struct vm_area_struct * vma,unsigned long addr)98  static inline pgoff_t vma_pgoff_offset(struct vm_area_struct *vma,
99  				       unsigned long addr)
100  {
101  	return vma->vm_pgoff + PHYS_PFN(addr - vma->vm_start);
102  }
103  
104  #define VMG_STATE(name, mm_, vmi_, start_, end_, flags_, pgoff_)	\
105  	struct vma_merge_struct name = {				\
106  		.mm = mm_,						\
107  		.vmi = vmi_,						\
108  		.start = start_,					\
109  		.end = end_,						\
110  		.flags = flags_,					\
111  		.pgoff = pgoff_,					\
112  		.state = VMA_MERGE_START,				\
113  		.merge_flags = VMG_FLAG_DEFAULT,			\
114  	}
115  
116  #define VMG_VMA_STATE(name, vmi_, prev_, vma_, start_, end_)	\
117  	struct vma_merge_struct name = {			\
118  		.mm = vma_->vm_mm,				\
119  		.vmi = vmi_,					\
120  		.prev = prev_,					\
121  		.next = NULL,					\
122  		.vma = vma_,					\
123  		.start = start_,				\
124  		.end = end_,					\
125  		.flags = vma_->vm_flags,			\
126  		.pgoff = vma_pgoff_offset(vma_, start_),	\
127  		.file = vma_->vm_file,				\
128  		.anon_vma = vma_->anon_vma,			\
129  		.policy = vma_policy(vma_),			\
130  		.uffd_ctx = vma_->vm_userfaultfd_ctx,		\
131  		.anon_name = anon_vma_name(vma_),		\
132  		.state = VMA_MERGE_START,			\
133  		.merge_flags = VMG_FLAG_DEFAULT,		\
134  	}
135  
136  #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
137  void validate_mm(struct mm_struct *mm);
138  #else
139  #define validate_mm(mm) do { } while (0)
140  #endif
141  
142  /* Required for expand_downwards(). */
143  void anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma);
144  
145  /* Required for expand_downwards(). */
146  void anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma);
147  
148  int vma_expand(struct vma_merge_struct *vmg);
149  int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
150  	       unsigned long start, unsigned long end, pgoff_t pgoff);
151  
vma_iter_store_gfp(struct vma_iterator * vmi,struct vm_area_struct * vma,gfp_t gfp)152  static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
153  			struct vm_area_struct *vma, gfp_t gfp)
154  
155  {
156  	if (vmi->mas.status != ma_start &&
157  	    ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
158  		vma_iter_invalidate(vmi);
159  
160  	__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
161  	mas_store_gfp(&vmi->mas, vma, gfp);
162  	if (unlikely(mas_is_err(&vmi->mas)))
163  		return -ENOMEM;
164  
165  	return 0;
166  }
167  
168  #ifdef CONFIG_MMU
169  /*
170   * init_vma_munmap() - Initializer wrapper for vma_munmap_struct
171   * @vms: The vma munmap struct
172   * @vmi: The vma iterator
173   * @vma: The first vm_area_struct to munmap
174   * @start: The aligned start address to munmap
175   * @end: The aligned end address to munmap
176   * @uf: The userfaultfd list_head
177   * @unlock: Unlock after the operation.  Only unlocked on success
178   */
init_vma_munmap(struct vma_munmap_struct * vms,struct vma_iterator * vmi,struct vm_area_struct * vma,unsigned long start,unsigned long end,struct list_head * uf,bool unlock)179  static inline void init_vma_munmap(struct vma_munmap_struct *vms,
180  		struct vma_iterator *vmi, struct vm_area_struct *vma,
181  		unsigned long start, unsigned long end, struct list_head *uf,
182  		bool unlock)
183  {
184  	vms->vmi = vmi;
185  	vms->vma = vma;
186  	if (vma) {
187  		vms->start = start;
188  		vms->end = end;
189  	} else {
190  		vms->start = vms->end = 0;
191  	}
192  	vms->unlock = unlock;
193  	vms->uf = uf;
194  	vms->vma_count = 0;
195  	vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
196  	vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
197  	vms->unmap_start = FIRST_USER_ADDRESS;
198  	vms->unmap_end = USER_PGTABLES_CEILING;
199  	vms->clear_ptes = false;
200  }
201  #endif
202  
203  int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
204  		struct ma_state *mas_detach);
205  
206  void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
207  		struct ma_state *mas_detach);
208  
209  void vms_clean_up_area(struct vma_munmap_struct *vms,
210  		struct ma_state *mas_detach);
211  
212  /*
213   * reattach_vmas() - Undo any munmap work and free resources
214   * @mas_detach: The maple state with the detached maple tree
215   *
216   * Reattach any detached vmas and free up the maple tree used to track the vmas.
217   */
reattach_vmas(struct ma_state * mas_detach)218  static inline void reattach_vmas(struct ma_state *mas_detach)
219  {
220  	struct vm_area_struct *vma;
221  
222  	mas_set(mas_detach, 0);
223  	mas_for_each(mas_detach, vma, ULONG_MAX)
224  		vma_mark_detached(vma, false);
225  
226  	__mt_destroy(mas_detach->tree);
227  }
228  
229  /*
230   * vms_abort_munmap_vmas() - Undo as much as possible from an aborted munmap()
231   * operation.
232   * @vms: The vma unmap structure
233   * @mas_detach: The maple state with the detached maple tree
234   *
235   * Reattach any detached vmas, free up the maple tree used to track the vmas.
236   * If that's not possible because the ptes are cleared (and vm_ops->closed() may
237   * have been called), then a NULL is written over the vmas and the vmas are
238   * removed (munmap() completed).
239   */
vms_abort_munmap_vmas(struct vma_munmap_struct * vms,struct ma_state * mas_detach)240  static inline void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
241  		struct ma_state *mas_detach)
242  {
243  	struct ma_state *mas = &vms->vmi->mas;
244  	if (!vms->nr_pages)
245  		return;
246  
247  	if (vms->clear_ptes)
248  		return reattach_vmas(mas_detach);
249  
250  	/*
251  	 * Aborting cannot just call the vm_ops open() because they are often
252  	 * not symmetrical and state data has been lost.  Resort to the old
253  	 * failure method of leaving a gap where the MAP_FIXED mapping failed.
254  	 */
255  	mas_set_range(mas, vms->start, vms->end - 1);
256  	mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
257  	/* Clean up the insertion of the unfortunate gap */
258  	vms_complete_munmap_vmas(vms, mas_detach);
259  }
260  
261  int
262  do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
263  		    struct mm_struct *mm, unsigned long start,
264  		    unsigned long end, struct list_head *uf, bool unlock);
265  
266  int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
267  		  unsigned long start, size_t len, struct list_head *uf,
268  		  bool unlock);
269  
270  void remove_vma(struct vm_area_struct *vma, bool unreachable);
271  
272  void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
273  		struct vm_area_struct *prev, struct vm_area_struct *next);
274  
275  /* We are about to modify the VMA's flags. */
276  struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
277  		struct vm_area_struct *prev, struct vm_area_struct *vma,
278  		unsigned long start, unsigned long end,
279  		unsigned long new_flags);
280  
281  /* We are about to modify the VMA's flags and/or anon_name. */
282  struct vm_area_struct
283  *vma_modify_flags_name(struct vma_iterator *vmi,
284  		       struct vm_area_struct *prev,
285  		       struct vm_area_struct *vma,
286  		       unsigned long start,
287  		       unsigned long end,
288  		       unsigned long new_flags,
289  		       struct anon_vma_name *new_name);
290  
291  /* We are about to modify the VMA's memory policy. */
292  struct vm_area_struct
293  *vma_modify_policy(struct vma_iterator *vmi,
294  		   struct vm_area_struct *prev,
295  		   struct vm_area_struct *vma,
296  		   unsigned long start, unsigned long end,
297  		   struct mempolicy *new_pol);
298  
299  /* We are about to modify the VMA's flags and/or uffd context. */
300  struct vm_area_struct
301  *vma_modify_flags_uffd(struct vma_iterator *vmi,
302  		       struct vm_area_struct *prev,
303  		       struct vm_area_struct *vma,
304  		       unsigned long start, unsigned long end,
305  		       unsigned long new_flags,
306  		       struct vm_userfaultfd_ctx new_ctx);
307  
308  struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg);
309  
310  struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
311  					struct vm_area_struct *vma,
312  					unsigned long delta);
313  
314  void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb);
315  
316  void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb);
317  
318  void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
319  			       struct vm_area_struct *vma);
320  
321  void unlink_file_vma(struct vm_area_struct *vma);
322  
323  void vma_link_file(struct vm_area_struct *vma);
324  
325  int vma_link(struct mm_struct *mm, struct vm_area_struct *vma);
326  
327  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
328  	unsigned long addr, unsigned long len, pgoff_t pgoff,
329  	bool *need_rmap_locks);
330  
331  struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma);
332  
333  bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
334  bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
335  
336  int mm_take_all_locks(struct mm_struct *mm);
337  void mm_drop_all_locks(struct mm_struct *mm);
338  
vma_wants_manual_pte_write_upgrade(struct vm_area_struct * vma)339  static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
340  {
341  	/*
342  	 * We want to check manually if we can change individual PTEs writable
343  	 * if we can't do that automatically for all PTEs in a mapping. For
344  	 * private mappings, that's always the case when we have write
345  	 * permissions as we properly have to handle COW.
346  	 */
347  	if (vma->vm_flags & VM_SHARED)
348  		return vma_wants_writenotify(vma, vma->vm_page_prot);
349  	return !!(vma->vm_flags & VM_WRITE);
350  }
351  
352  #ifdef CONFIG_MMU
vm_pgprot_modify(pgprot_t oldprot,unsigned long vm_flags)353  static inline pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
354  {
355  	return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
356  }
357  #endif
358  
vma_prev_limit(struct vma_iterator * vmi,unsigned long min)359  static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
360  						    unsigned long min)
361  {
362  	return mas_prev(&vmi->mas, min);
363  }
364  
365  /*
366   * These three helpers classifies VMAs for virtual memory accounting.
367   */
368  
369  /*
370   * Executable code area - executable, not writable, not stack
371   */
is_exec_mapping(vm_flags_t flags)372  static inline bool is_exec_mapping(vm_flags_t flags)
373  {
374  	return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
375  }
376  
377  /*
378   * Stack area (including shadow stacks)
379   *
380   * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
381   * do_mmap() forbids all other combinations.
382   */
is_stack_mapping(vm_flags_t flags)383  static inline bool is_stack_mapping(vm_flags_t flags)
384  {
385  	return ((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK);
386  }
387  
388  /*
389   * Data area - private, writable, not stack
390   */
is_data_mapping(vm_flags_t flags)391  static inline bool is_data_mapping(vm_flags_t flags)
392  {
393  	return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
394  }
395  
396  
vma_iter_config(struct vma_iterator * vmi,unsigned long index,unsigned long last)397  static inline void vma_iter_config(struct vma_iterator *vmi,
398  		unsigned long index, unsigned long last)
399  {
400  	__mas_set_range(&vmi->mas, index, last - 1);
401  }
402  
vma_iter_reset(struct vma_iterator * vmi)403  static inline void vma_iter_reset(struct vma_iterator *vmi)
404  {
405  	mas_reset(&vmi->mas);
406  }
407  
408  static inline
vma_iter_prev_range_limit(struct vma_iterator * vmi,unsigned long min)409  struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min)
410  {
411  	return mas_prev_range(&vmi->mas, min);
412  }
413  
414  static inline
vma_iter_next_range_limit(struct vma_iterator * vmi,unsigned long max)415  struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max)
416  {
417  	return mas_next_range(&vmi->mas, max);
418  }
419  
vma_iter_area_lowest(struct vma_iterator * vmi,unsigned long min,unsigned long max,unsigned long size)420  static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min,
421  				       unsigned long max, unsigned long size)
422  {
423  	return mas_empty_area(&vmi->mas, min, max - 1, size);
424  }
425  
vma_iter_area_highest(struct vma_iterator * vmi,unsigned long min,unsigned long max,unsigned long size)426  static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min,
427  					unsigned long max, unsigned long size)
428  {
429  	return mas_empty_area_rev(&vmi->mas, min, max - 1, size);
430  }
431  
432  /*
433   * VMA Iterator functions shared between nommu and mmap
434   */
vma_iter_prealloc(struct vma_iterator * vmi,struct vm_area_struct * vma)435  static inline int vma_iter_prealloc(struct vma_iterator *vmi,
436  		struct vm_area_struct *vma)
437  {
438  	return mas_preallocate(&vmi->mas, vma, GFP_KERNEL);
439  }
440  
vma_iter_clear(struct vma_iterator * vmi)441  static inline void vma_iter_clear(struct vma_iterator *vmi)
442  {
443  	mas_store_prealloc(&vmi->mas, NULL);
444  }
445  
vma_iter_load(struct vma_iterator * vmi)446  static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi)
447  {
448  	return mas_walk(&vmi->mas);
449  }
450  
451  /* Store a VMA with preallocated memory */
vma_iter_store(struct vma_iterator * vmi,struct vm_area_struct * vma)452  static inline void vma_iter_store(struct vma_iterator *vmi,
453  				  struct vm_area_struct *vma)
454  {
455  
456  #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
457  	if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
458  			vmi->mas.index > vma->vm_start)) {
459  		pr_warn("%lx > %lx\n store vma %lx-%lx\n into slot %lx-%lx\n",
460  			vmi->mas.index, vma->vm_start, vma->vm_start,
461  			vma->vm_end, vmi->mas.index, vmi->mas.last);
462  	}
463  	if (MAS_WARN_ON(&vmi->mas, vmi->mas.status != ma_start &&
464  			vmi->mas.last <  vma->vm_start)) {
465  		pr_warn("%lx < %lx\nstore vma %lx-%lx\ninto slot %lx-%lx\n",
466  		       vmi->mas.last, vma->vm_start, vma->vm_start, vma->vm_end,
467  		       vmi->mas.index, vmi->mas.last);
468  	}
469  #endif
470  
471  	if (vmi->mas.status != ma_start &&
472  	    ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
473  		vma_iter_invalidate(vmi);
474  
475  	__mas_set_range(&vmi->mas, vma->vm_start, vma->vm_end - 1);
476  	mas_store_prealloc(&vmi->mas, vma);
477  }
478  
vma_iter_addr(struct vma_iterator * vmi)479  static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
480  {
481  	return vmi->mas.index;
482  }
483  
vma_iter_end(struct vma_iterator * vmi)484  static inline unsigned long vma_iter_end(struct vma_iterator *vmi)
485  {
486  	return vmi->mas.last + 1;
487  }
488  
vma_iter_bulk_alloc(struct vma_iterator * vmi,unsigned long count)489  static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi,
490  				      unsigned long count)
491  {
492  	return mas_expected_entries(&vmi->mas, count);
493  }
494  
495  static inline
vma_iter_prev_range(struct vma_iterator * vmi)496  struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
497  {
498  	return mas_prev_range(&vmi->mas, 0);
499  }
500  
501  /*
502   * Retrieve the next VMA and rewind the iterator to end of the previous VMA, or
503   * if no previous VMA, to index 0.
504   */
505  static inline
vma_iter_next_rewind(struct vma_iterator * vmi,struct vm_area_struct ** pprev)506  struct vm_area_struct *vma_iter_next_rewind(struct vma_iterator *vmi,
507  		struct vm_area_struct **pprev)
508  {
509  	struct vm_area_struct *next = vma_next(vmi);
510  	struct vm_area_struct *prev = vma_prev(vmi);
511  
512  	/*
513  	 * Consider the case where no previous VMA exists. We advance to the
514  	 * next VMA, skipping any gap, then rewind to the start of the range.
515  	 *
516  	 * If we were to unconditionally advance to the next range we'd wind up
517  	 * at the next VMA again, so we check to ensure there is a previous VMA
518  	 * to skip over.
519  	 */
520  	if (prev)
521  		vma_iter_next_range(vmi);
522  
523  	if (pprev)
524  		*pprev = prev;
525  
526  	return next;
527  }
528  
529  #ifdef CONFIG_64BIT
530  
vma_is_sealed(struct vm_area_struct * vma)531  static inline bool vma_is_sealed(struct vm_area_struct *vma)
532  {
533  	return (vma->vm_flags & VM_SEALED);
534  }
535  
536  /*
537   * check if a vma is sealed for modification.
538   * return true, if modification is allowed.
539   */
can_modify_vma(struct vm_area_struct * vma)540  static inline bool can_modify_vma(struct vm_area_struct *vma)
541  {
542  	if (unlikely(vma_is_sealed(vma)))
543  		return false;
544  
545  	return true;
546  }
547  
548  bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior);
549  
550  #else
551  
can_modify_vma(struct vm_area_struct * vma)552  static inline bool can_modify_vma(struct vm_area_struct *vma)
553  {
554  	return true;
555  }
556  
can_modify_vma_madv(struct vm_area_struct * vma,int behavior)557  static inline bool can_modify_vma_madv(struct vm_area_struct *vma, int behavior)
558  {
559  	return true;
560  }
561  
562  #endif
563  
564  #endif	/* __MM_VMA_H */
565