1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * Maple Tree implementation
4   * Copyright (c) 2018-2022 Oracle Corporation
5   * Authors: Liam R. Howlett <Liam.Howlett@oracle.com>
6   *	    Matthew Wilcox <willy@infradead.org>
7   * Copyright (c) 2023 ByteDance
8   * Author: Peng Zhang <zhangpeng.00@bytedance.com>
9   */
10  
11  /*
12   * DOC: Interesting implementation details of the Maple Tree
13   *
14   * Each node type has a number of slots for entries and a number of slots for
15   * pivots.  In the case of dense nodes, the pivots are implied by the position
16   * and are simply the slot index + the minimum of the node.
17   *
18   * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
19   * indicate that the tree is specifying ranges.  Pivots may appear in the
20   * subtree with an entry attached to the value whereas keys are unique to a
21   * specific position of a B-tree.  Pivot values are inclusive of the slot with
22   * the same index.
23   *
24   *
25   * The following illustrates the layout of a range64 nodes slots and pivots.
26   *
27   *
28   *  Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 |
29   *           ┬   ┬   ┬   ┬     ┬    ┬    ┬    ┬    ┬
30   *           │   │   │   │     │    │    │    │    └─ Implied maximum
31   *           │   │   │   │     │    │    │    └─ Pivot 14
32   *           │   │   │   │     │    │    └─ Pivot 13
33   *           │   │   │   │     │    └─ Pivot 12
34   *           │   │   │   │     └─ Pivot 11
35   *           │   │   │   └─ Pivot 2
36   *           │   │   └─ Pivot 1
37   *           │   └─ Pivot 0
38   *           └─  Implied minimum
39   *
40   * Slot contents:
41   *  Internal (non-leaf) nodes contain pointers to other nodes.
42   *  Leaf nodes contain entries.
43   *
44   * The location of interest is often referred to as an offset.  All offsets have
45   * a slot, but the last offset has an implied pivot from the node above (or
46   * UINT_MAX for the root node.
47   *
48   * Ranges complicate certain write activities.  When modifying any of
49   * the B-tree variants, it is known that one entry will either be added or
50   * deleted.  When modifying the Maple Tree, one store operation may overwrite
51   * the entire data set, or one half of the tree, or the middle half of the tree.
52   *
53   */
54  
55  
56  #include <linux/maple_tree.h>
57  #include <linux/xarray.h>
58  #include <linux/types.h>
59  #include <linux/export.h>
60  #include <linux/slab.h>
61  #include <linux/limits.h>
62  #include <asm/barrier.h>
63  
64  #define CREATE_TRACE_POINTS
65  #include <trace/events/maple_tree.h>
66  
67  #define MA_ROOT_PARENT 1
68  
69  /*
70   * Maple state flags
71   * * MA_STATE_BULK		- Bulk insert mode
72   * * MA_STATE_REBALANCE		- Indicate a rebalance during bulk insert
73   * * MA_STATE_PREALLOC		- Preallocated nodes, WARN_ON allocation
74   */
75  #define MA_STATE_BULK		1
76  #define MA_STATE_REBALANCE	2
77  #define MA_STATE_PREALLOC	4
78  
79  #define ma_parent_ptr(x) ((struct maple_pnode *)(x))
80  #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT)
81  #define ma_mnode_ptr(x) ((struct maple_node *)(x))
82  #define ma_enode_ptr(x) ((struct maple_enode *)(x))
83  static struct kmem_cache *maple_node_cache;
84  
85  #ifdef CONFIG_DEBUG_MAPLE_TREE
86  static const unsigned long mt_max[] = {
87  	[maple_dense]		= MAPLE_NODE_SLOTS,
88  	[maple_leaf_64]		= ULONG_MAX,
89  	[maple_range_64]	= ULONG_MAX,
90  	[maple_arange_64]	= ULONG_MAX,
91  };
92  #define mt_node_max(x) mt_max[mte_node_type(x)]
93  #endif
94  
95  static const unsigned char mt_slots[] = {
96  	[maple_dense]		= MAPLE_NODE_SLOTS,
97  	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS,
98  	[maple_range_64]	= MAPLE_RANGE64_SLOTS,
99  	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS,
100  };
101  #define mt_slot_count(x) mt_slots[mte_node_type(x)]
102  
103  static const unsigned char mt_pivots[] = {
104  	[maple_dense]		= 0,
105  	[maple_leaf_64]		= MAPLE_RANGE64_SLOTS - 1,
106  	[maple_range_64]	= MAPLE_RANGE64_SLOTS - 1,
107  	[maple_arange_64]	= MAPLE_ARANGE64_SLOTS - 1,
108  };
109  #define mt_pivot_count(x) mt_pivots[mte_node_type(x)]
110  
111  static const unsigned char mt_min_slots[] = {
112  	[maple_dense]		= MAPLE_NODE_SLOTS / 2,
113  	[maple_leaf_64]		= (MAPLE_RANGE64_SLOTS / 2) - 2,
114  	[maple_range_64]	= (MAPLE_RANGE64_SLOTS / 2) - 2,
115  	[maple_arange_64]	= (MAPLE_ARANGE64_SLOTS / 2) - 1,
116  };
117  #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)]
118  
119  #define MAPLE_BIG_NODE_SLOTS	(MAPLE_RANGE64_SLOTS * 2 + 2)
120  #define MAPLE_BIG_NODE_GAPS	(MAPLE_ARANGE64_SLOTS * 2 + 1)
121  
122  struct maple_big_node {
123  	struct maple_pnode *parent;
124  	unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1];
125  	union {
126  		struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS];
127  		struct {
128  			unsigned long padding[MAPLE_BIG_NODE_GAPS];
129  			unsigned long gap[MAPLE_BIG_NODE_GAPS];
130  		};
131  	};
132  	unsigned char b_end;
133  	enum maple_type type;
134  };
135  
136  /*
137   * The maple_subtree_state is used to build a tree to replace a segment of an
138   * existing tree in a more atomic way.  Any walkers of the older tree will hit a
139   * dead node and restart on updates.
140   */
141  struct maple_subtree_state {
142  	struct ma_state *orig_l;	/* Original left side of subtree */
143  	struct ma_state *orig_r;	/* Original right side of subtree */
144  	struct ma_state *l;		/* New left side of subtree */
145  	struct ma_state *m;		/* New middle of subtree (rare) */
146  	struct ma_state *r;		/* New right side of subtree */
147  	struct ma_topiary *free;	/* nodes to be freed */
148  	struct ma_topiary *destroy;	/* Nodes to be destroyed (walked and freed) */
149  	struct maple_big_node *bn;
150  };
151  
152  #ifdef CONFIG_KASAN_STACK
153  /* Prevent mas_wr_bnode() from exceeding the stack frame limit */
154  #define noinline_for_kasan noinline_for_stack
155  #else
156  #define noinline_for_kasan inline
157  #endif
158  
159  /* Functions */
mt_alloc_one(gfp_t gfp)160  static inline struct maple_node *mt_alloc_one(gfp_t gfp)
161  {
162  	return kmem_cache_alloc(maple_node_cache, gfp);
163  }
164  
mt_alloc_bulk(gfp_t gfp,size_t size,void ** nodes)165  static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes)
166  {
167  	return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes);
168  }
169  
mt_free_one(struct maple_node * node)170  static inline void mt_free_one(struct maple_node *node)
171  {
172  	kmem_cache_free(maple_node_cache, node);
173  }
174  
mt_free_bulk(size_t size,void __rcu ** nodes)175  static inline void mt_free_bulk(size_t size, void __rcu **nodes)
176  {
177  	kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes);
178  }
179  
mt_free_rcu(struct rcu_head * head)180  static void mt_free_rcu(struct rcu_head *head)
181  {
182  	struct maple_node *node = container_of(head, struct maple_node, rcu);
183  
184  	kmem_cache_free(maple_node_cache, node);
185  }
186  
187  /*
188   * ma_free_rcu() - Use rcu callback to free a maple node
189   * @node: The node to free
190   *
191   * The maple tree uses the parent pointer to indicate this node is no longer in
192   * use and will be freed.
193   */
ma_free_rcu(struct maple_node * node)194  static void ma_free_rcu(struct maple_node *node)
195  {
196  	WARN_ON(node->parent != ma_parent_ptr(node));
197  	call_rcu(&node->rcu, mt_free_rcu);
198  }
199  
mas_set_height(struct ma_state * mas)200  static void mas_set_height(struct ma_state *mas)
201  {
202  	unsigned int new_flags = mas->tree->ma_flags;
203  
204  	new_flags &= ~MT_FLAGS_HEIGHT_MASK;
205  	MAS_BUG_ON(mas, mas->depth > MAPLE_HEIGHT_MAX);
206  	new_flags |= mas->depth << MT_FLAGS_HEIGHT_OFFSET;
207  	mas->tree->ma_flags = new_flags;
208  }
209  
mas_mt_height(struct ma_state * mas)210  static unsigned int mas_mt_height(struct ma_state *mas)
211  {
212  	return mt_height(mas->tree);
213  }
214  
mt_attr(struct maple_tree * mt)215  static inline unsigned int mt_attr(struct maple_tree *mt)
216  {
217  	return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK;
218  }
219  
mte_node_type(const struct maple_enode * entry)220  static __always_inline enum maple_type mte_node_type(
221  		const struct maple_enode *entry)
222  {
223  	return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) &
224  		MAPLE_NODE_TYPE_MASK;
225  }
226  
ma_is_dense(const enum maple_type type)227  static __always_inline bool ma_is_dense(const enum maple_type type)
228  {
229  	return type < maple_leaf_64;
230  }
231  
ma_is_leaf(const enum maple_type type)232  static __always_inline bool ma_is_leaf(const enum maple_type type)
233  {
234  	return type < maple_range_64;
235  }
236  
mte_is_leaf(const struct maple_enode * entry)237  static __always_inline bool mte_is_leaf(const struct maple_enode *entry)
238  {
239  	return ma_is_leaf(mte_node_type(entry));
240  }
241  
242  /*
243   * We also reserve values with the bottom two bits set to '10' which are
244   * below 4096
245   */
mt_is_reserved(const void * entry)246  static __always_inline bool mt_is_reserved(const void *entry)
247  {
248  	return ((unsigned long)entry < MAPLE_RESERVED_RANGE) &&
249  		xa_is_internal(entry);
250  }
251  
mas_set_err(struct ma_state * mas,long err)252  static __always_inline void mas_set_err(struct ma_state *mas, long err)
253  {
254  	mas->node = MA_ERROR(err);
255  	mas->status = ma_error;
256  }
257  
mas_is_ptr(const struct ma_state * mas)258  static __always_inline bool mas_is_ptr(const struct ma_state *mas)
259  {
260  	return mas->status == ma_root;
261  }
262  
mas_is_start(const struct ma_state * mas)263  static __always_inline bool mas_is_start(const struct ma_state *mas)
264  {
265  	return mas->status == ma_start;
266  }
267  
mas_is_none(const struct ma_state * mas)268  static __always_inline bool mas_is_none(const struct ma_state *mas)
269  {
270  	return mas->status == ma_none;
271  }
272  
mas_is_paused(const struct ma_state * mas)273  static __always_inline bool mas_is_paused(const struct ma_state *mas)
274  {
275  	return mas->status == ma_pause;
276  }
277  
mas_is_overflow(struct ma_state * mas)278  static __always_inline bool mas_is_overflow(struct ma_state *mas)
279  {
280  	return mas->status == ma_overflow;
281  }
282  
mas_is_underflow(struct ma_state * mas)283  static inline bool mas_is_underflow(struct ma_state *mas)
284  {
285  	return mas->status == ma_underflow;
286  }
287  
mte_to_node(const struct maple_enode * entry)288  static __always_inline struct maple_node *mte_to_node(
289  		const struct maple_enode *entry)
290  {
291  	return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK);
292  }
293  
294  /*
295   * mte_to_mat() - Convert a maple encoded node to a maple topiary node.
296   * @entry: The maple encoded node
297   *
298   * Return: a maple topiary pointer
299   */
mte_to_mat(const struct maple_enode * entry)300  static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry)
301  {
302  	return (struct maple_topiary *)
303  		((unsigned long)entry & ~MAPLE_NODE_MASK);
304  }
305  
306  /*
307   * mas_mn() - Get the maple state node.
308   * @mas: The maple state
309   *
310   * Return: the maple node (not encoded - bare pointer).
311   */
mas_mn(const struct ma_state * mas)312  static inline struct maple_node *mas_mn(const struct ma_state *mas)
313  {
314  	return mte_to_node(mas->node);
315  }
316  
317  /*
318   * mte_set_node_dead() - Set a maple encoded node as dead.
319   * @mn: The maple encoded node.
320   */
mte_set_node_dead(struct maple_enode * mn)321  static inline void mte_set_node_dead(struct maple_enode *mn)
322  {
323  	mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn));
324  	smp_wmb(); /* Needed for RCU */
325  }
326  
327  /* Bit 1 indicates the root is a node */
328  #define MAPLE_ROOT_NODE			0x02
329  /* maple_type stored bit 3-6 */
330  #define MAPLE_ENODE_TYPE_SHIFT		0x03
331  /* Bit 2 means a NULL somewhere below */
332  #define MAPLE_ENODE_NULL		0x04
333  
mt_mk_node(const struct maple_node * node,enum maple_type type)334  static inline struct maple_enode *mt_mk_node(const struct maple_node *node,
335  					     enum maple_type type)
336  {
337  	return (void *)((unsigned long)node |
338  			(type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL);
339  }
340  
mte_mk_root(const struct maple_enode * node)341  static inline void *mte_mk_root(const struct maple_enode *node)
342  {
343  	return (void *)((unsigned long)node | MAPLE_ROOT_NODE);
344  }
345  
mte_safe_root(const struct maple_enode * node)346  static inline void *mte_safe_root(const struct maple_enode *node)
347  {
348  	return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
349  }
350  
mte_set_full(const struct maple_enode * node)351  static inline void __maybe_unused *mte_set_full(const struct maple_enode *node)
352  {
353  	return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
354  }
355  
mte_clear_full(const struct maple_enode * node)356  static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node)
357  {
358  	return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
359  }
360  
mte_has_null(const struct maple_enode * node)361  static inline bool __maybe_unused mte_has_null(const struct maple_enode *node)
362  {
363  	return (unsigned long)node & MAPLE_ENODE_NULL;
364  }
365  
ma_is_root(struct maple_node * node)366  static __always_inline bool ma_is_root(struct maple_node *node)
367  {
368  	return ((unsigned long)node->parent & MA_ROOT_PARENT);
369  }
370  
mte_is_root(const struct maple_enode * node)371  static __always_inline bool mte_is_root(const struct maple_enode *node)
372  {
373  	return ma_is_root(mte_to_node(node));
374  }
375  
mas_is_root_limits(const struct ma_state * mas)376  static inline bool mas_is_root_limits(const struct ma_state *mas)
377  {
378  	return !mas->min && mas->max == ULONG_MAX;
379  }
380  
mt_is_alloc(struct maple_tree * mt)381  static __always_inline bool mt_is_alloc(struct maple_tree *mt)
382  {
383  	return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE);
384  }
385  
386  /*
387   * The Parent Pointer
388   * Excluding root, the parent pointer is 256B aligned like all other tree nodes.
389   * When storing a 32 or 64 bit values, the offset can fit into 5 bits.  The 16
390   * bit values need an extra bit to store the offset.  This extra bit comes from
391   * a reuse of the last bit in the node type.  This is possible by using bit 1 to
392   * indicate if bit 2 is part of the type or the slot.
393   *
394   * Note types:
395   *  0x??1 = Root
396   *  0x?00 = 16 bit nodes
397   *  0x010 = 32 bit nodes
398   *  0x110 = 64 bit nodes
399   *
400   * Slot size and alignment
401   *  0b??1 : Root
402   *  0b?00 : 16 bit values, type in 0-1, slot in 2-7
403   *  0b010 : 32 bit values, type in 0-2, slot in 3-7
404   *  0b110 : 64 bit values, type in 0-2, slot in 3-7
405   */
406  
407  #define MAPLE_PARENT_ROOT		0x01
408  
409  #define MAPLE_PARENT_SLOT_SHIFT		0x03
410  #define MAPLE_PARENT_SLOT_MASK		0xF8
411  
412  #define MAPLE_PARENT_16B_SLOT_SHIFT	0x02
413  #define MAPLE_PARENT_16B_SLOT_MASK	0xFC
414  
415  #define MAPLE_PARENT_RANGE64		0x06
416  #define MAPLE_PARENT_RANGE32		0x04
417  #define MAPLE_PARENT_NOT_RANGE16	0x02
418  
419  /*
420   * mte_parent_shift() - Get the parent shift for the slot storage.
421   * @parent: The parent pointer cast as an unsigned long
422   * Return: The shift into that pointer to the star to of the slot
423   */
mte_parent_shift(unsigned long parent)424  static inline unsigned long mte_parent_shift(unsigned long parent)
425  {
426  	/* Note bit 1 == 0 means 16B */
427  	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
428  		return MAPLE_PARENT_SLOT_SHIFT;
429  
430  	return MAPLE_PARENT_16B_SLOT_SHIFT;
431  }
432  
433  /*
434   * mte_parent_slot_mask() - Get the slot mask for the parent.
435   * @parent: The parent pointer cast as an unsigned long.
436   * Return: The slot mask for that parent.
437   */
mte_parent_slot_mask(unsigned long parent)438  static inline unsigned long mte_parent_slot_mask(unsigned long parent)
439  {
440  	/* Note bit 1 == 0 means 16B */
441  	if (likely(parent & MAPLE_PARENT_NOT_RANGE16))
442  		return MAPLE_PARENT_SLOT_MASK;
443  
444  	return MAPLE_PARENT_16B_SLOT_MASK;
445  }
446  
447  /*
448   * mas_parent_type() - Return the maple_type of the parent from the stored
449   * parent type.
450   * @mas: The maple state
451   * @enode: The maple_enode to extract the parent's enum
452   * Return: The node->parent maple_type
453   */
454  static inline
mas_parent_type(struct ma_state * mas,struct maple_enode * enode)455  enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode)
456  {
457  	unsigned long p_type;
458  
459  	p_type = (unsigned long)mte_to_node(enode)->parent;
460  	if (WARN_ON(p_type & MAPLE_PARENT_ROOT))
461  		return 0;
462  
463  	p_type &= MAPLE_NODE_MASK;
464  	p_type &= ~mte_parent_slot_mask(p_type);
465  	switch (p_type) {
466  	case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */
467  		if (mt_is_alloc(mas->tree))
468  			return maple_arange_64;
469  		return maple_range_64;
470  	}
471  
472  	return 0;
473  }
474  
475  /*
476   * mas_set_parent() - Set the parent node and encode the slot
477   * @mas: The maple state
478   * @enode: The encoded maple node.
479   * @parent: The encoded maple node that is the parent of @enode.
480   * @slot: The slot that @enode resides in @parent.
481   *
482   * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the
483   * parent type.
484   */
485  static inline
mas_set_parent(struct ma_state * mas,struct maple_enode * enode,const struct maple_enode * parent,unsigned char slot)486  void mas_set_parent(struct ma_state *mas, struct maple_enode *enode,
487  		    const struct maple_enode *parent, unsigned char slot)
488  {
489  	unsigned long val = (unsigned long)parent;
490  	unsigned long shift;
491  	unsigned long type;
492  	enum maple_type p_type = mte_node_type(parent);
493  
494  	MAS_BUG_ON(mas, p_type == maple_dense);
495  	MAS_BUG_ON(mas, p_type == maple_leaf_64);
496  
497  	switch (p_type) {
498  	case maple_range_64:
499  	case maple_arange_64:
500  		shift = MAPLE_PARENT_SLOT_SHIFT;
501  		type = MAPLE_PARENT_RANGE64;
502  		break;
503  	default:
504  	case maple_dense:
505  	case maple_leaf_64:
506  		shift = type = 0;
507  		break;
508  	}
509  
510  	val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */
511  	val |= (slot << shift) | type;
512  	mte_to_node(enode)->parent = ma_parent_ptr(val);
513  }
514  
515  /*
516   * mte_parent_slot() - get the parent slot of @enode.
517   * @enode: The encoded maple node.
518   *
519   * Return: The slot in the parent node where @enode resides.
520   */
521  static __always_inline
mte_parent_slot(const struct maple_enode * enode)522  unsigned int mte_parent_slot(const struct maple_enode *enode)
523  {
524  	unsigned long val = (unsigned long)mte_to_node(enode)->parent;
525  
526  	if (unlikely(val & MA_ROOT_PARENT))
527  		return 0;
528  
529  	/*
530  	 * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost
531  	 * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT
532  	 */
533  	return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val);
534  }
535  
536  /*
537   * mte_parent() - Get the parent of @node.
538   * @enode: The encoded maple node.
539   *
540   * Return: The parent maple node.
541   */
542  static __always_inline
mte_parent(const struct maple_enode * enode)543  struct maple_node *mte_parent(const struct maple_enode *enode)
544  {
545  	return (void *)((unsigned long)
546  			(mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK);
547  }
548  
549  /*
550   * ma_dead_node() - check if the @enode is dead.
551   * @enode: The encoded maple node
552   *
553   * Return: true if dead, false otherwise.
554   */
ma_dead_node(const struct maple_node * node)555  static __always_inline bool ma_dead_node(const struct maple_node *node)
556  {
557  	struct maple_node *parent;
558  
559  	/* Do not reorder reads from the node prior to the parent check */
560  	smp_rmb();
561  	parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
562  	return (parent == node);
563  }
564  
565  /*
566   * mte_dead_node() - check if the @enode is dead.
567   * @enode: The encoded maple node
568   *
569   * Return: true if dead, false otherwise.
570   */
mte_dead_node(const struct maple_enode * enode)571  static __always_inline bool mte_dead_node(const struct maple_enode *enode)
572  {
573  	struct maple_node *parent, *node;
574  
575  	node = mte_to_node(enode);
576  	/* Do not reorder reads from the node prior to the parent check */
577  	smp_rmb();
578  	parent = mte_parent(enode);
579  	return (parent == node);
580  }
581  
582  /*
583   * mas_allocated() - Get the number of nodes allocated in a maple state.
584   * @mas: The maple state
585   *
586   * The ma_state alloc member is overloaded to hold a pointer to the first
587   * allocated node or to the number of requested nodes to allocate.  If bit 0 is
588   * set, then the alloc contains the number of requested nodes.  If there is an
589   * allocated node, then the total allocated nodes is in that node.
590   *
591   * Return: The total number of nodes allocated
592   */
mas_allocated(const struct ma_state * mas)593  static inline unsigned long mas_allocated(const struct ma_state *mas)
594  {
595  	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1))
596  		return 0;
597  
598  	return mas->alloc->total;
599  }
600  
601  /*
602   * mas_set_alloc_req() - Set the requested number of allocations.
603   * @mas: the maple state
604   * @count: the number of allocations.
605   *
606   * The requested number of allocations is either in the first allocated node,
607   * located in @mas->alloc->request_count, or directly in @mas->alloc if there is
608   * no allocated node.  Set the request either in the node or do the necessary
609   * encoding to store in @mas->alloc directly.
610   */
mas_set_alloc_req(struct ma_state * mas,unsigned long count)611  static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count)
612  {
613  	if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) {
614  		if (!count)
615  			mas->alloc = NULL;
616  		else
617  			mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U);
618  		return;
619  	}
620  
621  	mas->alloc->request_count = count;
622  }
623  
624  /*
625   * mas_alloc_req() - get the requested number of allocations.
626   * @mas: The maple state
627   *
628   * The alloc count is either stored directly in @mas, or in
629   * @mas->alloc->request_count if there is at least one node allocated.  Decode
630   * the request count if it's stored directly in @mas->alloc.
631   *
632   * Return: The allocation request count.
633   */
mas_alloc_req(const struct ma_state * mas)634  static inline unsigned int mas_alloc_req(const struct ma_state *mas)
635  {
636  	if ((unsigned long)mas->alloc & 0x1)
637  		return (unsigned long)(mas->alloc) >> 1;
638  	else if (mas->alloc)
639  		return mas->alloc->request_count;
640  	return 0;
641  }
642  
643  /*
644   * ma_pivots() - Get a pointer to the maple node pivots.
645   * @node: the maple node
646   * @type: the node type
647   *
648   * In the event of a dead node, this array may be %NULL
649   *
650   * Return: A pointer to the maple node pivots
651   */
ma_pivots(struct maple_node * node,enum maple_type type)652  static inline unsigned long *ma_pivots(struct maple_node *node,
653  					   enum maple_type type)
654  {
655  	switch (type) {
656  	case maple_arange_64:
657  		return node->ma64.pivot;
658  	case maple_range_64:
659  	case maple_leaf_64:
660  		return node->mr64.pivot;
661  	case maple_dense:
662  		return NULL;
663  	}
664  	return NULL;
665  }
666  
667  /*
668   * ma_gaps() - Get a pointer to the maple node gaps.
669   * @node: the maple node
670   * @type: the node type
671   *
672   * Return: A pointer to the maple node gaps
673   */
ma_gaps(struct maple_node * node,enum maple_type type)674  static inline unsigned long *ma_gaps(struct maple_node *node,
675  				     enum maple_type type)
676  {
677  	switch (type) {
678  	case maple_arange_64:
679  		return node->ma64.gap;
680  	case maple_range_64:
681  	case maple_leaf_64:
682  	case maple_dense:
683  		return NULL;
684  	}
685  	return NULL;
686  }
687  
688  /*
689   * mas_safe_pivot() - get the pivot at @piv or mas->max.
690   * @mas: The maple state
691   * @pivots: The pointer to the maple node pivots
692   * @piv: The pivot to fetch
693   * @type: The maple node type
694   *
695   * Return: The pivot at @piv within the limit of the @pivots array, @mas->max
696   * otherwise.
697   */
698  static __always_inline unsigned long
mas_safe_pivot(const struct ma_state * mas,unsigned long * pivots,unsigned char piv,enum maple_type type)699  mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots,
700  	       unsigned char piv, enum maple_type type)
701  {
702  	if (piv >= mt_pivots[type])
703  		return mas->max;
704  
705  	return pivots[piv];
706  }
707  
708  /*
709   * mas_safe_min() - Return the minimum for a given offset.
710   * @mas: The maple state
711   * @pivots: The pointer to the maple node pivots
712   * @offset: The offset into the pivot array
713   *
714   * Return: The minimum range value that is contained in @offset.
715   */
716  static inline unsigned long
mas_safe_min(struct ma_state * mas,unsigned long * pivots,unsigned char offset)717  mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset)
718  {
719  	if (likely(offset))
720  		return pivots[offset - 1] + 1;
721  
722  	return mas->min;
723  }
724  
725  /*
726   * mte_set_pivot() - Set a pivot to a value in an encoded maple node.
727   * @mn: The encoded maple node
728   * @piv: The pivot offset
729   * @val: The value of the pivot
730   */
mte_set_pivot(struct maple_enode * mn,unsigned char piv,unsigned long val)731  static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv,
732  				unsigned long val)
733  {
734  	struct maple_node *node = mte_to_node(mn);
735  	enum maple_type type = mte_node_type(mn);
736  
737  	BUG_ON(piv >= mt_pivots[type]);
738  	switch (type) {
739  	case maple_range_64:
740  	case maple_leaf_64:
741  		node->mr64.pivot[piv] = val;
742  		break;
743  	case maple_arange_64:
744  		node->ma64.pivot[piv] = val;
745  		break;
746  	case maple_dense:
747  		break;
748  	}
749  
750  }
751  
752  /*
753   * ma_slots() - Get a pointer to the maple node slots.
754   * @mn: The maple node
755   * @mt: The maple node type
756   *
757   * Return: A pointer to the maple node slots
758   */
ma_slots(struct maple_node * mn,enum maple_type mt)759  static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
760  {
761  	switch (mt) {
762  	case maple_arange_64:
763  		return mn->ma64.slot;
764  	case maple_range_64:
765  	case maple_leaf_64:
766  		return mn->mr64.slot;
767  	case maple_dense:
768  		return mn->slot;
769  	}
770  
771  	return NULL;
772  }
773  
mt_write_locked(const struct maple_tree * mt)774  static inline bool mt_write_locked(const struct maple_tree *mt)
775  {
776  	return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
777  		lockdep_is_held(&mt->ma_lock);
778  }
779  
mt_locked(const struct maple_tree * mt)780  static __always_inline bool mt_locked(const struct maple_tree *mt)
781  {
782  	return mt_external_lock(mt) ? mt_lock_is_held(mt) :
783  		lockdep_is_held(&mt->ma_lock);
784  }
785  
mt_slot(const struct maple_tree * mt,void __rcu ** slots,unsigned char offset)786  static __always_inline void *mt_slot(const struct maple_tree *mt,
787  		void __rcu **slots, unsigned char offset)
788  {
789  	return rcu_dereference_check(slots[offset], mt_locked(mt));
790  }
791  
mt_slot_locked(struct maple_tree * mt,void __rcu ** slots,unsigned char offset)792  static __always_inline void *mt_slot_locked(struct maple_tree *mt,
793  		void __rcu **slots, unsigned char offset)
794  {
795  	return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
796  }
797  /*
798   * mas_slot_locked() - Get the slot value when holding the maple tree lock.
799   * @mas: The maple state
800   * @slots: The pointer to the slots
801   * @offset: The offset into the slots array to fetch
802   *
803   * Return: The entry stored in @slots at the @offset.
804   */
mas_slot_locked(struct ma_state * mas,void __rcu ** slots,unsigned char offset)805  static __always_inline void *mas_slot_locked(struct ma_state *mas,
806  		void __rcu **slots, unsigned char offset)
807  {
808  	return mt_slot_locked(mas->tree, slots, offset);
809  }
810  
811  /*
812   * mas_slot() - Get the slot value when not holding the maple tree lock.
813   * @mas: The maple state
814   * @slots: The pointer to the slots
815   * @offset: The offset into the slots array to fetch
816   *
817   * Return: The entry stored in @slots at the @offset
818   */
mas_slot(struct ma_state * mas,void __rcu ** slots,unsigned char offset)819  static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots,
820  		unsigned char offset)
821  {
822  	return mt_slot(mas->tree, slots, offset);
823  }
824  
825  /*
826   * mas_root() - Get the maple tree root.
827   * @mas: The maple state.
828   *
829   * Return: The pointer to the root of the tree
830   */
mas_root(struct ma_state * mas)831  static __always_inline void *mas_root(struct ma_state *mas)
832  {
833  	return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree));
834  }
835  
mt_root_locked(struct maple_tree * mt)836  static inline void *mt_root_locked(struct maple_tree *mt)
837  {
838  	return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
839  }
840  
841  /*
842   * mas_root_locked() - Get the maple tree root when holding the maple tree lock.
843   * @mas: The maple state.
844   *
845   * Return: The pointer to the root of the tree
846   */
mas_root_locked(struct ma_state * mas)847  static inline void *mas_root_locked(struct ma_state *mas)
848  {
849  	return mt_root_locked(mas->tree);
850  }
851  
ma_meta(struct maple_node * mn,enum maple_type mt)852  static inline struct maple_metadata *ma_meta(struct maple_node *mn,
853  					     enum maple_type mt)
854  {
855  	switch (mt) {
856  	case maple_arange_64:
857  		return &mn->ma64.meta;
858  	default:
859  		return &mn->mr64.meta;
860  	}
861  }
862  
863  /*
864   * ma_set_meta() - Set the metadata information of a node.
865   * @mn: The maple node
866   * @mt: The maple node type
867   * @offset: The offset of the highest sub-gap in this node.
868   * @end: The end of the data in this node.
869   */
ma_set_meta(struct maple_node * mn,enum maple_type mt,unsigned char offset,unsigned char end)870  static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
871  			       unsigned char offset, unsigned char end)
872  {
873  	struct maple_metadata *meta = ma_meta(mn, mt);
874  
875  	meta->gap = offset;
876  	meta->end = end;
877  }
878  
879  /*
880   * mt_clear_meta() - clear the metadata information of a node, if it exists
881   * @mt: The maple tree
882   * @mn: The maple node
883   * @type: The maple node type
884   */
mt_clear_meta(struct maple_tree * mt,struct maple_node * mn,enum maple_type type)885  static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
886  				  enum maple_type type)
887  {
888  	struct maple_metadata *meta;
889  	unsigned long *pivots;
890  	void __rcu **slots;
891  	void *next;
892  
893  	switch (type) {
894  	case maple_range_64:
895  		pivots = mn->mr64.pivot;
896  		if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
897  			slots = mn->mr64.slot;
898  			next = mt_slot_locked(mt, slots,
899  					      MAPLE_RANGE64_SLOTS - 1);
900  			if (unlikely((mte_to_node(next) &&
901  				      mte_node_type(next))))
902  				return; /* no metadata, could be node */
903  		}
904  		fallthrough;
905  	case maple_arange_64:
906  		meta = ma_meta(mn, type);
907  		break;
908  	default:
909  		return;
910  	}
911  
912  	meta->gap = 0;
913  	meta->end = 0;
914  }
915  
916  /*
917   * ma_meta_end() - Get the data end of a node from the metadata
918   * @mn: The maple node
919   * @mt: The maple node type
920   */
ma_meta_end(struct maple_node * mn,enum maple_type mt)921  static inline unsigned char ma_meta_end(struct maple_node *mn,
922  					enum maple_type mt)
923  {
924  	struct maple_metadata *meta = ma_meta(mn, mt);
925  
926  	return meta->end;
927  }
928  
929  /*
930   * ma_meta_gap() - Get the largest gap location of a node from the metadata
931   * @mn: The maple node
932   */
ma_meta_gap(struct maple_node * mn)933  static inline unsigned char ma_meta_gap(struct maple_node *mn)
934  {
935  	return mn->ma64.meta.gap;
936  }
937  
938  /*
939   * ma_set_meta_gap() - Set the largest gap location in a nodes metadata
940   * @mn: The maple node
941   * @mt: The maple node type
942   * @offset: The location of the largest gap.
943   */
ma_set_meta_gap(struct maple_node * mn,enum maple_type mt,unsigned char offset)944  static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt,
945  				   unsigned char offset)
946  {
947  
948  	struct maple_metadata *meta = ma_meta(mn, mt);
949  
950  	meta->gap = offset;
951  }
952  
953  /*
954   * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes.
955   * @mat: the ma_topiary, a linked list of dead nodes.
956   * @dead_enode: the node to be marked as dead and added to the tail of the list
957   *
958   * Add the @dead_enode to the linked list in @mat.
959   */
mat_add(struct ma_topiary * mat,struct maple_enode * dead_enode)960  static inline void mat_add(struct ma_topiary *mat,
961  			   struct maple_enode *dead_enode)
962  {
963  	mte_set_node_dead(dead_enode);
964  	mte_to_mat(dead_enode)->next = NULL;
965  	if (!mat->tail) {
966  		mat->tail = mat->head = dead_enode;
967  		return;
968  	}
969  
970  	mte_to_mat(mat->tail)->next = dead_enode;
971  	mat->tail = dead_enode;
972  }
973  
974  static void mt_free_walk(struct rcu_head *head);
975  static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
976  			    bool free);
977  /*
978   * mas_mat_destroy() - Free all nodes and subtrees in a dead list.
979   * @mas: the maple state
980   * @mat: the ma_topiary linked list of dead nodes to free.
981   *
982   * Destroy walk a dead list.
983   */
mas_mat_destroy(struct ma_state * mas,struct ma_topiary * mat)984  static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat)
985  {
986  	struct maple_enode *next;
987  	struct maple_node *node;
988  	bool in_rcu = mt_in_rcu(mas->tree);
989  
990  	while (mat->head) {
991  		next = mte_to_mat(mat->head)->next;
992  		node = mte_to_node(mat->head);
993  		mt_destroy_walk(mat->head, mas->tree, !in_rcu);
994  		if (in_rcu)
995  			call_rcu(&node->rcu, mt_free_walk);
996  		mat->head = next;
997  	}
998  }
999  /*
1000   * mas_descend() - Descend into the slot stored in the ma_state.
1001   * @mas: the maple state.
1002   *
1003   * Note: Not RCU safe, only use in write side or debug code.
1004   */
mas_descend(struct ma_state * mas)1005  static inline void mas_descend(struct ma_state *mas)
1006  {
1007  	enum maple_type type;
1008  	unsigned long *pivots;
1009  	struct maple_node *node;
1010  	void __rcu **slots;
1011  
1012  	node = mas_mn(mas);
1013  	type = mte_node_type(mas->node);
1014  	pivots = ma_pivots(node, type);
1015  	slots = ma_slots(node, type);
1016  
1017  	if (mas->offset)
1018  		mas->min = pivots[mas->offset - 1] + 1;
1019  	mas->max = mas_safe_pivot(mas, pivots, mas->offset, type);
1020  	mas->node = mas_slot(mas, slots, mas->offset);
1021  }
1022  
1023  /*
1024   * mte_set_gap() - Set a maple node gap.
1025   * @mn: The encoded maple node
1026   * @gap: The offset of the gap to set
1027   * @val: The gap value
1028   */
mte_set_gap(const struct maple_enode * mn,unsigned char gap,unsigned long val)1029  static inline void mte_set_gap(const struct maple_enode *mn,
1030  				 unsigned char gap, unsigned long val)
1031  {
1032  	switch (mte_node_type(mn)) {
1033  	default:
1034  		break;
1035  	case maple_arange_64:
1036  		mte_to_node(mn)->ma64.gap[gap] = val;
1037  		break;
1038  	}
1039  }
1040  
1041  /*
1042   * mas_ascend() - Walk up a level of the tree.
1043   * @mas: The maple state
1044   *
1045   * Sets the @mas->max and @mas->min to the correct values when walking up.  This
1046   * may cause several levels of walking up to find the correct min and max.
1047   * May find a dead node which will cause a premature return.
1048   * Return: 1 on dead node, 0 otherwise
1049   */
mas_ascend(struct ma_state * mas)1050  static int mas_ascend(struct ma_state *mas)
1051  {
1052  	struct maple_enode *p_enode; /* parent enode. */
1053  	struct maple_enode *a_enode; /* ancestor enode. */
1054  	struct maple_node *a_node; /* ancestor node. */
1055  	struct maple_node *p_node; /* parent node. */
1056  	unsigned char a_slot;
1057  	enum maple_type a_type;
1058  	unsigned long min, max;
1059  	unsigned long *pivots;
1060  	bool set_max = false, set_min = false;
1061  
1062  	a_node = mas_mn(mas);
1063  	if (ma_is_root(a_node)) {
1064  		mas->offset = 0;
1065  		return 0;
1066  	}
1067  
1068  	p_node = mte_parent(mas->node);
1069  	if (unlikely(a_node == p_node))
1070  		return 1;
1071  
1072  	a_type = mas_parent_type(mas, mas->node);
1073  	mas->offset = mte_parent_slot(mas->node);
1074  	a_enode = mt_mk_node(p_node, a_type);
1075  
1076  	/* Check to make sure all parent information is still accurate */
1077  	if (p_node != mte_parent(mas->node))
1078  		return 1;
1079  
1080  	mas->node = a_enode;
1081  
1082  	if (mte_is_root(a_enode)) {
1083  		mas->max = ULONG_MAX;
1084  		mas->min = 0;
1085  		return 0;
1086  	}
1087  
1088  	min = 0;
1089  	max = ULONG_MAX;
1090  	if (!mas->offset) {
1091  		min = mas->min;
1092  		set_min = true;
1093  	}
1094  
1095  	if (mas->max == ULONG_MAX)
1096  		set_max = true;
1097  
1098  	do {
1099  		p_enode = a_enode;
1100  		a_type = mas_parent_type(mas, p_enode);
1101  		a_node = mte_parent(p_enode);
1102  		a_slot = mte_parent_slot(p_enode);
1103  		a_enode = mt_mk_node(a_node, a_type);
1104  		pivots = ma_pivots(a_node, a_type);
1105  
1106  		if (unlikely(ma_dead_node(a_node)))
1107  			return 1;
1108  
1109  		if (!set_min && a_slot) {
1110  			set_min = true;
1111  			min = pivots[a_slot - 1] + 1;
1112  		}
1113  
1114  		if (!set_max && a_slot < mt_pivots[a_type]) {
1115  			set_max = true;
1116  			max = pivots[a_slot];
1117  		}
1118  
1119  		if (unlikely(ma_dead_node(a_node)))
1120  			return 1;
1121  
1122  		if (unlikely(ma_is_root(a_node)))
1123  			break;
1124  
1125  	} while (!set_min || !set_max);
1126  
1127  	mas->max = max;
1128  	mas->min = min;
1129  	return 0;
1130  }
1131  
1132  /*
1133   * mas_pop_node() - Get a previously allocated maple node from the maple state.
1134   * @mas: The maple state
1135   *
1136   * Return: A pointer to a maple node.
1137   */
mas_pop_node(struct ma_state * mas)1138  static inline struct maple_node *mas_pop_node(struct ma_state *mas)
1139  {
1140  	struct maple_alloc *ret, *node = mas->alloc;
1141  	unsigned long total = mas_allocated(mas);
1142  	unsigned int req = mas_alloc_req(mas);
1143  
1144  	/* nothing or a request pending. */
1145  	if (WARN_ON(!total))
1146  		return NULL;
1147  
1148  	if (total == 1) {
1149  		/* single allocation in this ma_state */
1150  		mas->alloc = NULL;
1151  		ret = node;
1152  		goto single_node;
1153  	}
1154  
1155  	if (node->node_count == 1) {
1156  		/* Single allocation in this node. */
1157  		mas->alloc = node->slot[0];
1158  		mas->alloc->total = node->total - 1;
1159  		ret = node;
1160  		goto new_head;
1161  	}
1162  	node->total--;
1163  	ret = node->slot[--node->node_count];
1164  	node->slot[node->node_count] = NULL;
1165  
1166  single_node:
1167  new_head:
1168  	if (req) {
1169  		req++;
1170  		mas_set_alloc_req(mas, req);
1171  	}
1172  
1173  	memset(ret, 0, sizeof(*ret));
1174  	return (struct maple_node *)ret;
1175  }
1176  
1177  /*
1178   * mas_push_node() - Push a node back on the maple state allocation.
1179   * @mas: The maple state
1180   * @used: The used maple node
1181   *
1182   * Stores the maple node back into @mas->alloc for reuse.  Updates allocated and
1183   * requested node count as necessary.
1184   */
mas_push_node(struct ma_state * mas,struct maple_node * used)1185  static inline void mas_push_node(struct ma_state *mas, struct maple_node *used)
1186  {
1187  	struct maple_alloc *reuse = (struct maple_alloc *)used;
1188  	struct maple_alloc *head = mas->alloc;
1189  	unsigned long count;
1190  	unsigned int requested = mas_alloc_req(mas);
1191  
1192  	count = mas_allocated(mas);
1193  
1194  	reuse->request_count = 0;
1195  	reuse->node_count = 0;
1196  	if (count && (head->node_count < MAPLE_ALLOC_SLOTS)) {
1197  		head->slot[head->node_count++] = reuse;
1198  		head->total++;
1199  		goto done;
1200  	}
1201  
1202  	reuse->total = 1;
1203  	if ((head) && !((unsigned long)head & 0x1)) {
1204  		reuse->slot[0] = head;
1205  		reuse->node_count = 1;
1206  		reuse->total += head->total;
1207  	}
1208  
1209  	mas->alloc = reuse;
1210  done:
1211  	if (requested > 1)
1212  		mas_set_alloc_req(mas, requested - 1);
1213  }
1214  
1215  /*
1216   * mas_alloc_nodes() - Allocate nodes into a maple state
1217   * @mas: The maple state
1218   * @gfp: The GFP Flags
1219   */
mas_alloc_nodes(struct ma_state * mas,gfp_t gfp)1220  static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
1221  {
1222  	struct maple_alloc *node;
1223  	unsigned long allocated = mas_allocated(mas);
1224  	unsigned int requested = mas_alloc_req(mas);
1225  	unsigned int count;
1226  	void **slots = NULL;
1227  	unsigned int max_req = 0;
1228  
1229  	if (!requested)
1230  		return;
1231  
1232  	mas_set_alloc_req(mas, 0);
1233  	if (mas->mas_flags & MA_STATE_PREALLOC) {
1234  		if (allocated)
1235  			return;
1236  		BUG_ON(!allocated);
1237  		WARN_ON(!allocated);
1238  	}
1239  
1240  	if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) {
1241  		node = (struct maple_alloc *)mt_alloc_one(gfp);
1242  		if (!node)
1243  			goto nomem_one;
1244  
1245  		if (allocated) {
1246  			node->slot[0] = mas->alloc;
1247  			node->node_count = 1;
1248  		} else {
1249  			node->node_count = 0;
1250  		}
1251  
1252  		mas->alloc = node;
1253  		node->total = ++allocated;
1254  		requested--;
1255  	}
1256  
1257  	node = mas->alloc;
1258  	node->request_count = 0;
1259  	while (requested) {
1260  		max_req = MAPLE_ALLOC_SLOTS - node->node_count;
1261  		slots = (void **)&node->slot[node->node_count];
1262  		max_req = min(requested, max_req);
1263  		count = mt_alloc_bulk(gfp, max_req, slots);
1264  		if (!count)
1265  			goto nomem_bulk;
1266  
1267  		if (node->node_count == 0) {
1268  			node->slot[0]->node_count = 0;
1269  			node->slot[0]->request_count = 0;
1270  		}
1271  
1272  		node->node_count += count;
1273  		allocated += count;
1274  		node = node->slot[0];
1275  		requested -= count;
1276  	}
1277  	mas->alloc->total = allocated;
1278  	return;
1279  
1280  nomem_bulk:
1281  	/* Clean up potential freed allocations on bulk failure */
1282  	memset(slots, 0, max_req * sizeof(unsigned long));
1283  nomem_one:
1284  	mas_set_alloc_req(mas, requested);
1285  	if (mas->alloc && !(((unsigned long)mas->alloc & 0x1)))
1286  		mas->alloc->total = allocated;
1287  	mas_set_err(mas, -ENOMEM);
1288  }
1289  
1290  /*
1291   * mas_free() - Free an encoded maple node
1292   * @mas: The maple state
1293   * @used: The encoded maple node to free.
1294   *
1295   * Uses rcu free if necessary, pushes @used back on the maple state allocations
1296   * otherwise.
1297   */
mas_free(struct ma_state * mas,struct maple_enode * used)1298  static inline void mas_free(struct ma_state *mas, struct maple_enode *used)
1299  {
1300  	struct maple_node *tmp = mte_to_node(used);
1301  
1302  	if (mt_in_rcu(mas->tree))
1303  		ma_free_rcu(tmp);
1304  	else
1305  		mas_push_node(mas, tmp);
1306  }
1307  
1308  /*
1309   * mas_node_count_gfp() - Check if enough nodes are allocated and request more
1310   * if there is not enough nodes.
1311   * @mas: The maple state
1312   * @count: The number of nodes needed
1313   * @gfp: the gfp flags
1314   */
mas_node_count_gfp(struct ma_state * mas,int count,gfp_t gfp)1315  static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp)
1316  {
1317  	unsigned long allocated = mas_allocated(mas);
1318  
1319  	if (allocated < count) {
1320  		mas_set_alloc_req(mas, count - allocated);
1321  		mas_alloc_nodes(mas, gfp);
1322  	}
1323  }
1324  
1325  /*
1326   * mas_node_count() - Check if enough nodes are allocated and request more if
1327   * there is not enough nodes.
1328   * @mas: The maple state
1329   * @count: The number of nodes needed
1330   *
1331   * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags.
1332   */
mas_node_count(struct ma_state * mas,int count)1333  static void mas_node_count(struct ma_state *mas, int count)
1334  {
1335  	return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN);
1336  }
1337  
1338  /*
1339   * mas_start() - Sets up maple state for operations.
1340   * @mas: The maple state.
1341   *
1342   * If mas->status == mas_start, then set the min, max and depth to
1343   * defaults.
1344   *
1345   * Return:
1346   * - If mas->node is an error or not mas_start, return NULL.
1347   * - If it's an empty tree:     NULL & mas->status == ma_none
1348   * - If it's a single entry:    The entry & mas->status == ma_root
1349   * - If it's a tree:            NULL & mas->status == ma_active
1350   */
mas_start(struct ma_state * mas)1351  static inline struct maple_enode *mas_start(struct ma_state *mas)
1352  {
1353  	if (likely(mas_is_start(mas))) {
1354  		struct maple_enode *root;
1355  
1356  		mas->min = 0;
1357  		mas->max = ULONG_MAX;
1358  
1359  retry:
1360  		mas->depth = 0;
1361  		root = mas_root(mas);
1362  		/* Tree with nodes */
1363  		if (likely(xa_is_node(root))) {
1364  			mas->depth = 1;
1365  			mas->status = ma_active;
1366  			mas->node = mte_safe_root(root);
1367  			mas->offset = 0;
1368  			if (mte_dead_node(mas->node))
1369  				goto retry;
1370  
1371  			return NULL;
1372  		}
1373  
1374  		mas->node = NULL;
1375  		/* empty tree */
1376  		if (unlikely(!root)) {
1377  			mas->status = ma_none;
1378  			mas->offset = MAPLE_NODE_SLOTS;
1379  			return NULL;
1380  		}
1381  
1382  		/* Single entry tree */
1383  		mas->status = ma_root;
1384  		mas->offset = MAPLE_NODE_SLOTS;
1385  
1386  		/* Single entry tree. */
1387  		if (mas->index > 0)
1388  			return NULL;
1389  
1390  		return root;
1391  	}
1392  
1393  	return NULL;
1394  }
1395  
1396  /*
1397   * ma_data_end() - Find the end of the data in a node.
1398   * @node: The maple node
1399   * @type: The maple node type
1400   * @pivots: The array of pivots in the node
1401   * @max: The maximum value in the node
1402   *
1403   * Uses metadata to find the end of the data when possible.
1404   * Return: The zero indexed last slot with data (may be null).
1405   */
ma_data_end(struct maple_node * node,enum maple_type type,unsigned long * pivots,unsigned long max)1406  static __always_inline unsigned char ma_data_end(struct maple_node *node,
1407  		enum maple_type type, unsigned long *pivots, unsigned long max)
1408  {
1409  	unsigned char offset;
1410  
1411  	if (!pivots)
1412  		return 0;
1413  
1414  	if (type == maple_arange_64)
1415  		return ma_meta_end(node, type);
1416  
1417  	offset = mt_pivots[type] - 1;
1418  	if (likely(!pivots[offset]))
1419  		return ma_meta_end(node, type);
1420  
1421  	if (likely(pivots[offset] == max))
1422  		return offset;
1423  
1424  	return mt_pivots[type];
1425  }
1426  
1427  /*
1428   * mas_data_end() - Find the end of the data (slot).
1429   * @mas: the maple state
1430   *
1431   * This method is optimized to check the metadata of a node if the node type
1432   * supports data end metadata.
1433   *
1434   * Return: The zero indexed last slot with data (may be null).
1435   */
mas_data_end(struct ma_state * mas)1436  static inline unsigned char mas_data_end(struct ma_state *mas)
1437  {
1438  	enum maple_type type;
1439  	struct maple_node *node;
1440  	unsigned char offset;
1441  	unsigned long *pivots;
1442  
1443  	type = mte_node_type(mas->node);
1444  	node = mas_mn(mas);
1445  	if (type == maple_arange_64)
1446  		return ma_meta_end(node, type);
1447  
1448  	pivots = ma_pivots(node, type);
1449  	if (unlikely(ma_dead_node(node)))
1450  		return 0;
1451  
1452  	offset = mt_pivots[type] - 1;
1453  	if (likely(!pivots[offset]))
1454  		return ma_meta_end(node, type);
1455  
1456  	if (likely(pivots[offset] == mas->max))
1457  		return offset;
1458  
1459  	return mt_pivots[type];
1460  }
1461  
1462  /*
1463   * mas_leaf_max_gap() - Returns the largest gap in a leaf node
1464   * @mas: the maple state
1465   *
1466   * Return: The maximum gap in the leaf.
1467   */
mas_leaf_max_gap(struct ma_state * mas)1468  static unsigned long mas_leaf_max_gap(struct ma_state *mas)
1469  {
1470  	enum maple_type mt;
1471  	unsigned long pstart, gap, max_gap;
1472  	struct maple_node *mn;
1473  	unsigned long *pivots;
1474  	void __rcu **slots;
1475  	unsigned char i;
1476  	unsigned char max_piv;
1477  
1478  	mt = mte_node_type(mas->node);
1479  	mn = mas_mn(mas);
1480  	slots = ma_slots(mn, mt);
1481  	max_gap = 0;
1482  	if (unlikely(ma_is_dense(mt))) {
1483  		gap = 0;
1484  		for (i = 0; i < mt_slots[mt]; i++) {
1485  			if (slots[i]) {
1486  				if (gap > max_gap)
1487  					max_gap = gap;
1488  				gap = 0;
1489  			} else {
1490  				gap++;
1491  			}
1492  		}
1493  		if (gap > max_gap)
1494  			max_gap = gap;
1495  		return max_gap;
1496  	}
1497  
1498  	/*
1499  	 * Check the first implied pivot optimizes the loop below and slot 1 may
1500  	 * be skipped if there is a gap in slot 0.
1501  	 */
1502  	pivots = ma_pivots(mn, mt);
1503  	if (likely(!slots[0])) {
1504  		max_gap = pivots[0] - mas->min + 1;
1505  		i = 2;
1506  	} else {
1507  		i = 1;
1508  	}
1509  
1510  	/* reduce max_piv as the special case is checked before the loop */
1511  	max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1;
1512  	/*
1513  	 * Check end implied pivot which can only be a gap on the right most
1514  	 * node.
1515  	 */
1516  	if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) {
1517  		gap = ULONG_MAX - pivots[max_piv];
1518  		if (gap > max_gap)
1519  			max_gap = gap;
1520  
1521  		if (max_gap > pivots[max_piv] - mas->min)
1522  			return max_gap;
1523  	}
1524  
1525  	for (; i <= max_piv; i++) {
1526  		/* data == no gap. */
1527  		if (likely(slots[i]))
1528  			continue;
1529  
1530  		pstart = pivots[i - 1];
1531  		gap = pivots[i] - pstart;
1532  		if (gap > max_gap)
1533  			max_gap = gap;
1534  
1535  		/* There cannot be two gaps in a row. */
1536  		i++;
1537  	}
1538  	return max_gap;
1539  }
1540  
1541  /*
1542   * ma_max_gap() - Get the maximum gap in a maple node (non-leaf)
1543   * @node: The maple node
1544   * @gaps: The pointer to the gaps
1545   * @mt: The maple node type
1546   * @off: Pointer to store the offset location of the gap.
1547   *
1548   * Uses the metadata data end to scan backwards across set gaps.
1549   *
1550   * Return: The maximum gap value
1551   */
1552  static inline unsigned long
ma_max_gap(struct maple_node * node,unsigned long * gaps,enum maple_type mt,unsigned char * off)1553  ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt,
1554  	    unsigned char *off)
1555  {
1556  	unsigned char offset, i;
1557  	unsigned long max_gap = 0;
1558  
1559  	i = offset = ma_meta_end(node, mt);
1560  	do {
1561  		if (gaps[i] > max_gap) {
1562  			max_gap = gaps[i];
1563  			offset = i;
1564  		}
1565  	} while (i--);
1566  
1567  	*off = offset;
1568  	return max_gap;
1569  }
1570  
1571  /*
1572   * mas_max_gap() - find the largest gap in a non-leaf node and set the slot.
1573   * @mas: The maple state.
1574   *
1575   * Return: The gap value.
1576   */
mas_max_gap(struct ma_state * mas)1577  static inline unsigned long mas_max_gap(struct ma_state *mas)
1578  {
1579  	unsigned long *gaps;
1580  	unsigned char offset;
1581  	enum maple_type mt;
1582  	struct maple_node *node;
1583  
1584  	mt = mte_node_type(mas->node);
1585  	if (ma_is_leaf(mt))
1586  		return mas_leaf_max_gap(mas);
1587  
1588  	node = mas_mn(mas);
1589  	MAS_BUG_ON(mas, mt != maple_arange_64);
1590  	offset = ma_meta_gap(node);
1591  	gaps = ma_gaps(node, mt);
1592  	return gaps[offset];
1593  }
1594  
1595  /*
1596   * mas_parent_gap() - Set the parent gap and any gaps above, as needed
1597   * @mas: The maple state
1598   * @offset: The gap offset in the parent to set
1599   * @new: The new gap value.
1600   *
1601   * Set the parent gap then continue to set the gap upwards, using the metadata
1602   * of the parent to see if it is necessary to check the node above.
1603   */
mas_parent_gap(struct ma_state * mas,unsigned char offset,unsigned long new)1604  static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset,
1605  		unsigned long new)
1606  {
1607  	unsigned long meta_gap = 0;
1608  	struct maple_node *pnode;
1609  	struct maple_enode *penode;
1610  	unsigned long *pgaps;
1611  	unsigned char meta_offset;
1612  	enum maple_type pmt;
1613  
1614  	pnode = mte_parent(mas->node);
1615  	pmt = mas_parent_type(mas, mas->node);
1616  	penode = mt_mk_node(pnode, pmt);
1617  	pgaps = ma_gaps(pnode, pmt);
1618  
1619  ascend:
1620  	MAS_BUG_ON(mas, pmt != maple_arange_64);
1621  	meta_offset = ma_meta_gap(pnode);
1622  	meta_gap = pgaps[meta_offset];
1623  
1624  	pgaps[offset] = new;
1625  
1626  	if (meta_gap == new)
1627  		return;
1628  
1629  	if (offset != meta_offset) {
1630  		if (meta_gap > new)
1631  			return;
1632  
1633  		ma_set_meta_gap(pnode, pmt, offset);
1634  	} else if (new < meta_gap) {
1635  		new = ma_max_gap(pnode, pgaps, pmt, &meta_offset);
1636  		ma_set_meta_gap(pnode, pmt, meta_offset);
1637  	}
1638  
1639  	if (ma_is_root(pnode))
1640  		return;
1641  
1642  	/* Go to the parent node. */
1643  	pnode = mte_parent(penode);
1644  	pmt = mas_parent_type(mas, penode);
1645  	pgaps = ma_gaps(pnode, pmt);
1646  	offset = mte_parent_slot(penode);
1647  	penode = mt_mk_node(pnode, pmt);
1648  	goto ascend;
1649  }
1650  
1651  /*
1652   * mas_update_gap() - Update a nodes gaps and propagate up if necessary.
1653   * @mas: the maple state.
1654   */
mas_update_gap(struct ma_state * mas)1655  static inline void mas_update_gap(struct ma_state *mas)
1656  {
1657  	unsigned char pslot;
1658  	unsigned long p_gap;
1659  	unsigned long max_gap;
1660  
1661  	if (!mt_is_alloc(mas->tree))
1662  		return;
1663  
1664  	if (mte_is_root(mas->node))
1665  		return;
1666  
1667  	max_gap = mas_max_gap(mas);
1668  
1669  	pslot = mte_parent_slot(mas->node);
1670  	p_gap = ma_gaps(mte_parent(mas->node),
1671  			mas_parent_type(mas, mas->node))[pslot];
1672  
1673  	if (p_gap != max_gap)
1674  		mas_parent_gap(mas, pslot, max_gap);
1675  }
1676  
1677  /*
1678   * mas_adopt_children() - Set the parent pointer of all nodes in @parent to
1679   * @parent with the slot encoded.
1680   * @mas: the maple state (for the tree)
1681   * @parent: the maple encoded node containing the children.
1682   */
mas_adopt_children(struct ma_state * mas,struct maple_enode * parent)1683  static inline void mas_adopt_children(struct ma_state *mas,
1684  		struct maple_enode *parent)
1685  {
1686  	enum maple_type type = mte_node_type(parent);
1687  	struct maple_node *node = mte_to_node(parent);
1688  	void __rcu **slots = ma_slots(node, type);
1689  	unsigned long *pivots = ma_pivots(node, type);
1690  	struct maple_enode *child;
1691  	unsigned char offset;
1692  
1693  	offset = ma_data_end(node, type, pivots, mas->max);
1694  	do {
1695  		child = mas_slot_locked(mas, slots, offset);
1696  		mas_set_parent(mas, child, parent, offset);
1697  	} while (offset--);
1698  }
1699  
1700  /*
1701   * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
1702   * node as dead.
1703   * @mas: the maple state with the new node
1704   * @old_enode: The old maple encoded node to replace.
1705   */
mas_put_in_tree(struct ma_state * mas,struct maple_enode * old_enode)1706  static inline void mas_put_in_tree(struct ma_state *mas,
1707  		struct maple_enode *old_enode)
1708  	__must_hold(mas->tree->ma_lock)
1709  {
1710  	unsigned char offset;
1711  	void __rcu **slots;
1712  
1713  	if (mte_is_root(mas->node)) {
1714  		mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas));
1715  		rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
1716  		mas_set_height(mas);
1717  	} else {
1718  
1719  		offset = mte_parent_slot(mas->node);
1720  		slots = ma_slots(mte_parent(mas->node),
1721  				 mas_parent_type(mas, mas->node));
1722  		rcu_assign_pointer(slots[offset], mas->node);
1723  	}
1724  
1725  	mte_set_node_dead(old_enode);
1726  }
1727  
1728  /*
1729   * mas_replace_node() - Replace a node by putting it in the tree, marking it
1730   * dead, and freeing it.
1731   * the parent encoding to locate the maple node in the tree.
1732   * @mas: the ma_state with @mas->node pointing to the new node.
1733   * @old_enode: The old maple encoded node.
1734   */
mas_replace_node(struct ma_state * mas,struct maple_enode * old_enode)1735  static inline void mas_replace_node(struct ma_state *mas,
1736  		struct maple_enode *old_enode)
1737  	__must_hold(mas->tree->ma_lock)
1738  {
1739  	mas_put_in_tree(mas, old_enode);
1740  	mas_free(mas, old_enode);
1741  }
1742  
1743  /*
1744   * mas_find_child() - Find a child who has the parent @mas->node.
1745   * @mas: the maple state with the parent.
1746   * @child: the maple state to store the child.
1747   */
mas_find_child(struct ma_state * mas,struct ma_state * child)1748  static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child)
1749  	__must_hold(mas->tree->ma_lock)
1750  {
1751  	enum maple_type mt;
1752  	unsigned char offset;
1753  	unsigned char end;
1754  	unsigned long *pivots;
1755  	struct maple_enode *entry;
1756  	struct maple_node *node;
1757  	void __rcu **slots;
1758  
1759  	mt = mte_node_type(mas->node);
1760  	node = mas_mn(mas);
1761  	slots = ma_slots(node, mt);
1762  	pivots = ma_pivots(node, mt);
1763  	end = ma_data_end(node, mt, pivots, mas->max);
1764  	for (offset = mas->offset; offset <= end; offset++) {
1765  		entry = mas_slot_locked(mas, slots, offset);
1766  		if (mte_parent(entry) == node) {
1767  			*child = *mas;
1768  			mas->offset = offset + 1;
1769  			child->offset = offset;
1770  			mas_descend(child);
1771  			child->offset = 0;
1772  			return true;
1773  		}
1774  	}
1775  	return false;
1776  }
1777  
1778  /*
1779   * mab_shift_right() - Shift the data in mab right. Note, does not clean out the
1780   * old data or set b_node->b_end.
1781   * @b_node: the maple_big_node
1782   * @shift: the shift count
1783   */
mab_shift_right(struct maple_big_node * b_node,unsigned char shift)1784  static inline void mab_shift_right(struct maple_big_node *b_node,
1785  				 unsigned char shift)
1786  {
1787  	unsigned long size = b_node->b_end * sizeof(unsigned long);
1788  
1789  	memmove(b_node->pivot + shift, b_node->pivot, size);
1790  	memmove(b_node->slot + shift, b_node->slot, size);
1791  	if (b_node->type == maple_arange_64)
1792  		memmove(b_node->gap + shift, b_node->gap, size);
1793  }
1794  
1795  /*
1796   * mab_middle_node() - Check if a middle node is needed (unlikely)
1797   * @b_node: the maple_big_node that contains the data.
1798   * @split: the potential split location
1799   * @slot_count: the size that can be stored in a single node being considered.
1800   *
1801   * Return: true if a middle node is required.
1802   */
mab_middle_node(struct maple_big_node * b_node,int split,unsigned char slot_count)1803  static inline bool mab_middle_node(struct maple_big_node *b_node, int split,
1804  				   unsigned char slot_count)
1805  {
1806  	unsigned char size = b_node->b_end;
1807  
1808  	if (size >= 2 * slot_count)
1809  		return true;
1810  
1811  	if (!b_node->slot[split] && (size >= 2 * slot_count - 1))
1812  		return true;
1813  
1814  	return false;
1815  }
1816  
1817  /*
1818   * mab_no_null_split() - ensure the split doesn't fall on a NULL
1819   * @b_node: the maple_big_node with the data
1820   * @split: the suggested split location
1821   * @slot_count: the number of slots in the node being considered.
1822   *
1823   * Return: the split location.
1824   */
mab_no_null_split(struct maple_big_node * b_node,unsigned char split,unsigned char slot_count)1825  static inline int mab_no_null_split(struct maple_big_node *b_node,
1826  				    unsigned char split, unsigned char slot_count)
1827  {
1828  	if (!b_node->slot[split]) {
1829  		/*
1830  		 * If the split is less than the max slot && the right side will
1831  		 * still be sufficient, then increment the split on NULL.
1832  		 */
1833  		if ((split < slot_count - 1) &&
1834  		    (b_node->b_end - split) > (mt_min_slots[b_node->type]))
1835  			split++;
1836  		else
1837  			split--;
1838  	}
1839  	return split;
1840  }
1841  
1842  /*
1843   * mab_calc_split() - Calculate the split location and if there needs to be two
1844   * splits.
1845   * @mas: The maple state
1846   * @bn: The maple_big_node with the data
1847   * @mid_split: The second split, if required.  0 otherwise.
1848   *
1849   * Return: The first split location.  The middle split is set in @mid_split.
1850   */
mab_calc_split(struct ma_state * mas,struct maple_big_node * bn,unsigned char * mid_split,unsigned long min)1851  static inline int mab_calc_split(struct ma_state *mas,
1852  	 struct maple_big_node *bn, unsigned char *mid_split, unsigned long min)
1853  {
1854  	unsigned char b_end = bn->b_end;
1855  	int split = b_end / 2; /* Assume equal split. */
1856  	unsigned char slot_min, slot_count = mt_slots[bn->type];
1857  
1858  	/*
1859  	 * To support gap tracking, all NULL entries are kept together and a node cannot
1860  	 * end on a NULL entry, with the exception of the left-most leaf.  The
1861  	 * limitation means that the split of a node must be checked for this condition
1862  	 * and be able to put more data in one direction or the other.
1863  	 */
1864  	if (unlikely((mas->mas_flags & MA_STATE_BULK))) {
1865  		*mid_split = 0;
1866  		split = b_end - mt_min_slots[bn->type];
1867  
1868  		if (!ma_is_leaf(bn->type))
1869  			return split;
1870  
1871  		mas->mas_flags |= MA_STATE_REBALANCE;
1872  		if (!bn->slot[split])
1873  			split--;
1874  		return split;
1875  	}
1876  
1877  	/*
1878  	 * Although extremely rare, it is possible to enter what is known as the 3-way
1879  	 * split scenario.  The 3-way split comes about by means of a store of a range
1880  	 * that overwrites the end and beginning of two full nodes.  The result is a set
1881  	 * of entries that cannot be stored in 2 nodes.  Sometimes, these two nodes can
1882  	 * also be located in different parent nodes which are also full.  This can
1883  	 * carry upwards all the way to the root in the worst case.
1884  	 */
1885  	if (unlikely(mab_middle_node(bn, split, slot_count))) {
1886  		split = b_end / 3;
1887  		*mid_split = split * 2;
1888  	} else {
1889  		slot_min = mt_min_slots[bn->type];
1890  
1891  		*mid_split = 0;
1892  		/*
1893  		 * Avoid having a range less than the slot count unless it
1894  		 * causes one node to be deficient.
1895  		 * NOTE: mt_min_slots is 1 based, b_end and split are zero.
1896  		 */
1897  		while ((split < slot_count - 1) &&
1898  		       ((bn->pivot[split] - min) < slot_count - 1) &&
1899  		       (b_end - split > slot_min))
1900  			split++;
1901  	}
1902  
1903  	/* Avoid ending a node on a NULL entry */
1904  	split = mab_no_null_split(bn, split, slot_count);
1905  
1906  	if (unlikely(*mid_split))
1907  		*mid_split = mab_no_null_split(bn, *mid_split, slot_count);
1908  
1909  	return split;
1910  }
1911  
1912  /*
1913   * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node
1914   * and set @b_node->b_end to the next free slot.
1915   * @mas: The maple state
1916   * @mas_start: The starting slot to copy
1917   * @mas_end: The end slot to copy (inclusively)
1918   * @b_node: The maple_big_node to place the data
1919   * @mab_start: The starting location in maple_big_node to store the data.
1920   */
mas_mab_cp(struct ma_state * mas,unsigned char mas_start,unsigned char mas_end,struct maple_big_node * b_node,unsigned char mab_start)1921  static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start,
1922  			unsigned char mas_end, struct maple_big_node *b_node,
1923  			unsigned char mab_start)
1924  {
1925  	enum maple_type mt;
1926  	struct maple_node *node;
1927  	void __rcu **slots;
1928  	unsigned long *pivots, *gaps;
1929  	int i = mas_start, j = mab_start;
1930  	unsigned char piv_end;
1931  
1932  	node = mas_mn(mas);
1933  	mt = mte_node_type(mas->node);
1934  	pivots = ma_pivots(node, mt);
1935  	if (!i) {
1936  		b_node->pivot[j] = pivots[i++];
1937  		if (unlikely(i > mas_end))
1938  			goto complete;
1939  		j++;
1940  	}
1941  
1942  	piv_end = min(mas_end, mt_pivots[mt]);
1943  	for (; i < piv_end; i++, j++) {
1944  		b_node->pivot[j] = pivots[i];
1945  		if (unlikely(!b_node->pivot[j]))
1946  			break;
1947  
1948  		if (unlikely(mas->max == b_node->pivot[j]))
1949  			goto complete;
1950  	}
1951  
1952  	if (likely(i <= mas_end))
1953  		b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt);
1954  
1955  complete:
1956  	b_node->b_end = ++j;
1957  	j -= mab_start;
1958  	slots = ma_slots(node, mt);
1959  	memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j);
1960  	if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) {
1961  		gaps = ma_gaps(node, mt);
1962  		memcpy(b_node->gap + mab_start, gaps + mas_start,
1963  		       sizeof(unsigned long) * j);
1964  	}
1965  }
1966  
1967  /*
1968   * mas_leaf_set_meta() - Set the metadata of a leaf if possible.
1969   * @node: The maple node
1970   * @mt: The maple type
1971   * @end: The node end
1972   */
mas_leaf_set_meta(struct maple_node * node,enum maple_type mt,unsigned char end)1973  static inline void mas_leaf_set_meta(struct maple_node *node,
1974  		enum maple_type mt, unsigned char end)
1975  {
1976  	if (end < mt_slots[mt] - 1)
1977  		ma_set_meta(node, mt, 0, end);
1978  }
1979  
1980  /*
1981   * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node.
1982   * @b_node: the maple_big_node that has the data
1983   * @mab_start: the start location in @b_node.
1984   * @mab_end: The end location in @b_node (inclusively)
1985   * @mas: The maple state with the maple encoded node.
1986   */
mab_mas_cp(struct maple_big_node * b_node,unsigned char mab_start,unsigned char mab_end,struct ma_state * mas,bool new_max)1987  static inline void mab_mas_cp(struct maple_big_node *b_node,
1988  			      unsigned char mab_start, unsigned char mab_end,
1989  			      struct ma_state *mas, bool new_max)
1990  {
1991  	int i, j = 0;
1992  	enum maple_type mt = mte_node_type(mas->node);
1993  	struct maple_node *node = mte_to_node(mas->node);
1994  	void __rcu **slots = ma_slots(node, mt);
1995  	unsigned long *pivots = ma_pivots(node, mt);
1996  	unsigned long *gaps = NULL;
1997  	unsigned char end;
1998  
1999  	if (mab_end - mab_start > mt_pivots[mt])
2000  		mab_end--;
2001  
2002  	if (!pivots[mt_pivots[mt] - 1])
2003  		slots[mt_pivots[mt]] = NULL;
2004  
2005  	i = mab_start;
2006  	do {
2007  		pivots[j++] = b_node->pivot[i++];
2008  	} while (i <= mab_end && likely(b_node->pivot[i]));
2009  
2010  	memcpy(slots, b_node->slot + mab_start,
2011  	       sizeof(void *) * (i - mab_start));
2012  
2013  	if (new_max)
2014  		mas->max = b_node->pivot[i - 1];
2015  
2016  	end = j - 1;
2017  	if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) {
2018  		unsigned long max_gap = 0;
2019  		unsigned char offset = 0;
2020  
2021  		gaps = ma_gaps(node, mt);
2022  		do {
2023  			gaps[--j] = b_node->gap[--i];
2024  			if (gaps[j] > max_gap) {
2025  				offset = j;
2026  				max_gap = gaps[j];
2027  			}
2028  		} while (j);
2029  
2030  		ma_set_meta(node, mt, offset, end);
2031  	} else {
2032  		mas_leaf_set_meta(node, mt, end);
2033  	}
2034  }
2035  
2036  /*
2037   * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert.
2038   * @mas: The maple state
2039   * @end: The maple node end
2040   * @mt: The maple node type
2041   */
mas_bulk_rebalance(struct ma_state * mas,unsigned char end,enum maple_type mt)2042  static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end,
2043  				      enum maple_type mt)
2044  {
2045  	if (!(mas->mas_flags & MA_STATE_BULK))
2046  		return;
2047  
2048  	if (mte_is_root(mas->node))
2049  		return;
2050  
2051  	if (end > mt_min_slots[mt]) {
2052  		mas->mas_flags &= ~MA_STATE_REBALANCE;
2053  		return;
2054  	}
2055  }
2056  
2057  /*
2058   * mas_store_b_node() - Store an @entry into the b_node while also copying the
2059   * data from a maple encoded node.
2060   * @wr_mas: the maple write state
2061   * @b_node: the maple_big_node to fill with data
2062   * @offset_end: the offset to end copying
2063   *
2064   * Return: The actual end of the data stored in @b_node
2065   */
mas_store_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node,unsigned char offset_end)2066  static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas,
2067  		struct maple_big_node *b_node, unsigned char offset_end)
2068  {
2069  	unsigned char slot;
2070  	unsigned char b_end;
2071  	/* Possible underflow of piv will wrap back to 0 before use. */
2072  	unsigned long piv;
2073  	struct ma_state *mas = wr_mas->mas;
2074  
2075  	b_node->type = wr_mas->type;
2076  	b_end = 0;
2077  	slot = mas->offset;
2078  	if (slot) {
2079  		/* Copy start data up to insert. */
2080  		mas_mab_cp(mas, 0, slot - 1, b_node, 0);
2081  		b_end = b_node->b_end;
2082  		piv = b_node->pivot[b_end - 1];
2083  	} else
2084  		piv = mas->min - 1;
2085  
2086  	if (piv + 1 < mas->index) {
2087  		/* Handle range starting after old range */
2088  		b_node->slot[b_end] = wr_mas->content;
2089  		if (!wr_mas->content)
2090  			b_node->gap[b_end] = mas->index - 1 - piv;
2091  		b_node->pivot[b_end++] = mas->index - 1;
2092  	}
2093  
2094  	/* Store the new entry. */
2095  	mas->offset = b_end;
2096  	b_node->slot[b_end] = wr_mas->entry;
2097  	b_node->pivot[b_end] = mas->last;
2098  
2099  	/* Appended. */
2100  	if (mas->last >= mas->max)
2101  		goto b_end;
2102  
2103  	/* Handle new range ending before old range ends */
2104  	piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type);
2105  	if (piv > mas->last) {
2106  		if (piv == ULONG_MAX)
2107  			mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type);
2108  
2109  		if (offset_end != slot)
2110  			wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
2111  							  offset_end);
2112  
2113  		b_node->slot[++b_end] = wr_mas->content;
2114  		if (!wr_mas->content)
2115  			b_node->gap[b_end] = piv - mas->last + 1;
2116  		b_node->pivot[b_end] = piv;
2117  	}
2118  
2119  	slot = offset_end + 1;
2120  	if (slot > mas->end)
2121  		goto b_end;
2122  
2123  	/* Copy end data to the end of the node. */
2124  	mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end);
2125  	b_node->b_end--;
2126  	return;
2127  
2128  b_end:
2129  	b_node->b_end = b_end;
2130  }
2131  
2132  /*
2133   * mas_prev_sibling() - Find the previous node with the same parent.
2134   * @mas: the maple state
2135   *
2136   * Return: True if there is a previous sibling, false otherwise.
2137   */
mas_prev_sibling(struct ma_state * mas)2138  static inline bool mas_prev_sibling(struct ma_state *mas)
2139  {
2140  	unsigned int p_slot = mte_parent_slot(mas->node);
2141  
2142  	if (mte_is_root(mas->node))
2143  		return false;
2144  
2145  	if (!p_slot)
2146  		return false;
2147  
2148  	mas_ascend(mas);
2149  	mas->offset = p_slot - 1;
2150  	mas_descend(mas);
2151  	return true;
2152  }
2153  
2154  /*
2155   * mas_next_sibling() - Find the next node with the same parent.
2156   * @mas: the maple state
2157   *
2158   * Return: true if there is a next sibling, false otherwise.
2159   */
mas_next_sibling(struct ma_state * mas)2160  static inline bool mas_next_sibling(struct ma_state *mas)
2161  {
2162  	MA_STATE(parent, mas->tree, mas->index, mas->last);
2163  
2164  	if (mte_is_root(mas->node))
2165  		return false;
2166  
2167  	parent = *mas;
2168  	mas_ascend(&parent);
2169  	parent.offset = mte_parent_slot(mas->node) + 1;
2170  	if (parent.offset > mas_data_end(&parent))
2171  		return false;
2172  
2173  	*mas = parent;
2174  	mas_descend(mas);
2175  	return true;
2176  }
2177  
2178  /*
2179   * mas_node_or_none() - Set the enode and state.
2180   * @mas: the maple state
2181   * @enode: The encoded maple node.
2182   *
2183   * Set the node to the enode and the status.
2184   */
mas_node_or_none(struct ma_state * mas,struct maple_enode * enode)2185  static inline void mas_node_or_none(struct ma_state *mas,
2186  		struct maple_enode *enode)
2187  {
2188  	if (enode) {
2189  		mas->node = enode;
2190  		mas->status = ma_active;
2191  	} else {
2192  		mas->node = NULL;
2193  		mas->status = ma_none;
2194  	}
2195  }
2196  
2197  /*
2198   * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
2199   *                      If @mas->index cannot be found within the containing
2200   *                      node, we traverse to the last entry in the node.
2201   * @wr_mas: The maple write state
2202   *
2203   * Uses mas_slot_locked() and does not need to worry about dead nodes.
2204   */
mas_wr_node_walk(struct ma_wr_state * wr_mas)2205  static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas)
2206  {
2207  	struct ma_state *mas = wr_mas->mas;
2208  	unsigned char count, offset;
2209  
2210  	if (unlikely(ma_is_dense(wr_mas->type))) {
2211  		wr_mas->r_max = wr_mas->r_min = mas->index;
2212  		mas->offset = mas->index = mas->min;
2213  		return;
2214  	}
2215  
2216  	wr_mas->node = mas_mn(wr_mas->mas);
2217  	wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type);
2218  	count = mas->end = ma_data_end(wr_mas->node, wr_mas->type,
2219  				       wr_mas->pivots, mas->max);
2220  	offset = mas->offset;
2221  
2222  	while (offset < count && mas->index > wr_mas->pivots[offset])
2223  		offset++;
2224  
2225  	wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max;
2226  	wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset);
2227  	wr_mas->offset_end = mas->offset = offset;
2228  }
2229  
2230  /*
2231   * mast_rebalance_next() - Rebalance against the next node
2232   * @mast: The maple subtree state
2233   */
mast_rebalance_next(struct maple_subtree_state * mast)2234  static inline void mast_rebalance_next(struct maple_subtree_state *mast)
2235  {
2236  	unsigned char b_end = mast->bn->b_end;
2237  
2238  	mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node),
2239  		   mast->bn, b_end);
2240  	mast->orig_r->last = mast->orig_r->max;
2241  }
2242  
2243  /*
2244   * mast_rebalance_prev() - Rebalance against the previous node
2245   * @mast: The maple subtree state
2246   */
mast_rebalance_prev(struct maple_subtree_state * mast)2247  static inline void mast_rebalance_prev(struct maple_subtree_state *mast)
2248  {
2249  	unsigned char end = mas_data_end(mast->orig_l) + 1;
2250  	unsigned char b_end = mast->bn->b_end;
2251  
2252  	mab_shift_right(mast->bn, end);
2253  	mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0);
2254  	mast->l->min = mast->orig_l->min;
2255  	mast->orig_l->index = mast->orig_l->min;
2256  	mast->bn->b_end = end + b_end;
2257  	mast->l->offset += end;
2258  }
2259  
2260  /*
2261   * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring
2262   * the node to the right.  Checking the nodes to the right then the left at each
2263   * level upwards until root is reached.
2264   * Data is copied into the @mast->bn.
2265   * @mast: The maple_subtree_state.
2266   */
2267  static inline
mast_spanning_rebalance(struct maple_subtree_state * mast)2268  bool mast_spanning_rebalance(struct maple_subtree_state *mast)
2269  {
2270  	struct ma_state r_tmp = *mast->orig_r;
2271  	struct ma_state l_tmp = *mast->orig_l;
2272  	unsigned char depth = 0;
2273  
2274  	do {
2275  		mas_ascend(mast->orig_r);
2276  		mas_ascend(mast->orig_l);
2277  		depth++;
2278  		if (mast->orig_r->offset < mas_data_end(mast->orig_r)) {
2279  			mast->orig_r->offset++;
2280  			do {
2281  				mas_descend(mast->orig_r);
2282  				mast->orig_r->offset = 0;
2283  			} while (--depth);
2284  
2285  			mast_rebalance_next(mast);
2286  			*mast->orig_l = l_tmp;
2287  			return true;
2288  		} else if (mast->orig_l->offset != 0) {
2289  			mast->orig_l->offset--;
2290  			do {
2291  				mas_descend(mast->orig_l);
2292  				mast->orig_l->offset =
2293  					mas_data_end(mast->orig_l);
2294  			} while (--depth);
2295  
2296  			mast_rebalance_prev(mast);
2297  			*mast->orig_r = r_tmp;
2298  			return true;
2299  		}
2300  	} while (!mte_is_root(mast->orig_r->node));
2301  
2302  	*mast->orig_r = r_tmp;
2303  	*mast->orig_l = l_tmp;
2304  	return false;
2305  }
2306  
2307  /*
2308   * mast_ascend() - Ascend the original left and right maple states.
2309   * @mast: the maple subtree state.
2310   *
2311   * Ascend the original left and right sides.  Set the offsets to point to the
2312   * data already in the new tree (@mast->l and @mast->r).
2313   */
mast_ascend(struct maple_subtree_state * mast)2314  static inline void mast_ascend(struct maple_subtree_state *mast)
2315  {
2316  	MA_WR_STATE(wr_mas, mast->orig_r,  NULL);
2317  	mas_ascend(mast->orig_l);
2318  	mas_ascend(mast->orig_r);
2319  
2320  	mast->orig_r->offset = 0;
2321  	mast->orig_r->index = mast->r->max;
2322  	/* last should be larger than or equal to index */
2323  	if (mast->orig_r->last < mast->orig_r->index)
2324  		mast->orig_r->last = mast->orig_r->index;
2325  
2326  	wr_mas.type = mte_node_type(mast->orig_r->node);
2327  	mas_wr_node_walk(&wr_mas);
2328  	/* Set up the left side of things */
2329  	mast->orig_l->offset = 0;
2330  	mast->orig_l->index = mast->l->min;
2331  	wr_mas.mas = mast->orig_l;
2332  	wr_mas.type = mte_node_type(mast->orig_l->node);
2333  	mas_wr_node_walk(&wr_mas);
2334  
2335  	mast->bn->type = wr_mas.type;
2336  }
2337  
2338  /*
2339   * mas_new_ma_node() - Create and return a new maple node.  Helper function.
2340   * @mas: the maple state with the allocations.
2341   * @b_node: the maple_big_node with the type encoding.
2342   *
2343   * Use the node type from the maple_big_node to allocate a new node from the
2344   * ma_state.  This function exists mainly for code readability.
2345   *
2346   * Return: A new maple encoded node
2347   */
2348  static inline struct maple_enode
mas_new_ma_node(struct ma_state * mas,struct maple_big_node * b_node)2349  *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node)
2350  {
2351  	return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type);
2352  }
2353  
2354  /*
2355   * mas_mab_to_node() - Set up right and middle nodes
2356   *
2357   * @mas: the maple state that contains the allocations.
2358   * @b_node: the node which contains the data.
2359   * @left: The pointer which will have the left node
2360   * @right: The pointer which may have the right node
2361   * @middle: the pointer which may have the middle node (rare)
2362   * @mid_split: the split location for the middle node
2363   *
2364   * Return: the split of left.
2365   */
mas_mab_to_node(struct ma_state * mas,struct maple_big_node * b_node,struct maple_enode ** left,struct maple_enode ** right,struct maple_enode ** middle,unsigned char * mid_split,unsigned long min)2366  static inline unsigned char mas_mab_to_node(struct ma_state *mas,
2367  	struct maple_big_node *b_node, struct maple_enode **left,
2368  	struct maple_enode **right, struct maple_enode **middle,
2369  	unsigned char *mid_split, unsigned long min)
2370  {
2371  	unsigned char split = 0;
2372  	unsigned char slot_count = mt_slots[b_node->type];
2373  
2374  	*left = mas_new_ma_node(mas, b_node);
2375  	*right = NULL;
2376  	*middle = NULL;
2377  	*mid_split = 0;
2378  
2379  	if (b_node->b_end < slot_count) {
2380  		split = b_node->b_end;
2381  	} else {
2382  		split = mab_calc_split(mas, b_node, mid_split, min);
2383  		*right = mas_new_ma_node(mas, b_node);
2384  	}
2385  
2386  	if (*mid_split)
2387  		*middle = mas_new_ma_node(mas, b_node);
2388  
2389  	return split;
2390  
2391  }
2392  
2393  /*
2394   * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end
2395   * pointer.
2396   * @b_node: the big node to add the entry
2397   * @mas: the maple state to get the pivot (mas->max)
2398   * @entry: the entry to add, if NULL nothing happens.
2399   */
mab_set_b_end(struct maple_big_node * b_node,struct ma_state * mas,void * entry)2400  static inline void mab_set_b_end(struct maple_big_node *b_node,
2401  				 struct ma_state *mas,
2402  				 void *entry)
2403  {
2404  	if (!entry)
2405  		return;
2406  
2407  	b_node->slot[b_node->b_end] = entry;
2408  	if (mt_is_alloc(mas->tree))
2409  		b_node->gap[b_node->b_end] = mas_max_gap(mas);
2410  	b_node->pivot[b_node->b_end++] = mas->max;
2411  }
2412  
2413  /*
2414   * mas_set_split_parent() - combine_then_separate helper function.  Sets the parent
2415   * of @mas->node to either @left or @right, depending on @slot and @split
2416   *
2417   * @mas: the maple state with the node that needs a parent
2418   * @left: possible parent 1
2419   * @right: possible parent 2
2420   * @slot: the slot the mas->node was placed
2421   * @split: the split location between @left and @right
2422   */
mas_set_split_parent(struct ma_state * mas,struct maple_enode * left,struct maple_enode * right,unsigned char * slot,unsigned char split)2423  static inline void mas_set_split_parent(struct ma_state *mas,
2424  					struct maple_enode *left,
2425  					struct maple_enode *right,
2426  					unsigned char *slot, unsigned char split)
2427  {
2428  	if (mas_is_none(mas))
2429  		return;
2430  
2431  	if ((*slot) <= split)
2432  		mas_set_parent(mas, mas->node, left, *slot);
2433  	else if (right)
2434  		mas_set_parent(mas, mas->node, right, (*slot) - split - 1);
2435  
2436  	(*slot)++;
2437  }
2438  
2439  /*
2440   * mte_mid_split_check() - Check if the next node passes the mid-split
2441   * @l: Pointer to left encoded maple node.
2442   * @m: Pointer to middle encoded maple node.
2443   * @r: Pointer to right encoded maple node.
2444   * @slot: The offset
2445   * @split: The split location.
2446   * @mid_split: The middle split.
2447   */
mte_mid_split_check(struct maple_enode ** l,struct maple_enode ** r,struct maple_enode * right,unsigned char slot,unsigned char * split,unsigned char mid_split)2448  static inline void mte_mid_split_check(struct maple_enode **l,
2449  				       struct maple_enode **r,
2450  				       struct maple_enode *right,
2451  				       unsigned char slot,
2452  				       unsigned char *split,
2453  				       unsigned char mid_split)
2454  {
2455  	if (*r == right)
2456  		return;
2457  
2458  	if (slot < mid_split)
2459  		return;
2460  
2461  	*l = *r;
2462  	*r = right;
2463  	*split = mid_split;
2464  }
2465  
2466  /*
2467   * mast_set_split_parents() - Helper function to set three nodes parents.  Slot
2468   * is taken from @mast->l.
2469   * @mast: the maple subtree state
2470   * @left: the left node
2471   * @right: the right node
2472   * @split: the split location.
2473   */
mast_set_split_parents(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2474  static inline void mast_set_split_parents(struct maple_subtree_state *mast,
2475  					  struct maple_enode *left,
2476  					  struct maple_enode *middle,
2477  					  struct maple_enode *right,
2478  					  unsigned char split,
2479  					  unsigned char mid_split)
2480  {
2481  	unsigned char slot;
2482  	struct maple_enode *l = left;
2483  	struct maple_enode *r = right;
2484  
2485  	if (mas_is_none(mast->l))
2486  		return;
2487  
2488  	if (middle)
2489  		r = middle;
2490  
2491  	slot = mast->l->offset;
2492  
2493  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2494  	mas_set_split_parent(mast->l, l, r, &slot, split);
2495  
2496  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2497  	mas_set_split_parent(mast->m, l, r, &slot, split);
2498  
2499  	mte_mid_split_check(&l, &r, right, slot, &split, mid_split);
2500  	mas_set_split_parent(mast->r, l, r, &slot, split);
2501  }
2502  
2503  /*
2504   * mas_topiary_node() - Dispose of a single node
2505   * @mas: The maple state for pushing nodes
2506   * @in_rcu: If the tree is in rcu mode
2507   *
2508   * The node will either be RCU freed or pushed back on the maple state.
2509   */
mas_topiary_node(struct ma_state * mas,struct ma_state * tmp_mas,bool in_rcu)2510  static inline void mas_topiary_node(struct ma_state *mas,
2511  		struct ma_state *tmp_mas, bool in_rcu)
2512  {
2513  	struct maple_node *tmp;
2514  	struct maple_enode *enode;
2515  
2516  	if (mas_is_none(tmp_mas))
2517  		return;
2518  
2519  	enode = tmp_mas->node;
2520  	tmp = mte_to_node(enode);
2521  	mte_set_node_dead(enode);
2522  	if (in_rcu)
2523  		ma_free_rcu(tmp);
2524  	else
2525  		mas_push_node(mas, tmp);
2526  }
2527  
2528  /*
2529   * mas_topiary_replace() - Replace the data with new data, then repair the
2530   * parent links within the new tree.  Iterate over the dead sub-tree and collect
2531   * the dead subtrees and topiary the nodes that are no longer of use.
2532   *
2533   * The new tree will have up to three children with the correct parent.  Keep
2534   * track of the new entries as they need to be followed to find the next level
2535   * of new entries.
2536   *
2537   * The old tree will have up to three children with the old parent.  Keep track
2538   * of the old entries as they may have more nodes below replaced.  Nodes within
2539   * [index, last] are dead subtrees, others need to be freed and followed.
2540   *
2541   * @mas: The maple state pointing at the new data
2542   * @old_enode: The maple encoded node being replaced
2543   *
2544   */
mas_topiary_replace(struct ma_state * mas,struct maple_enode * old_enode)2545  static inline void mas_topiary_replace(struct ma_state *mas,
2546  		struct maple_enode *old_enode)
2547  {
2548  	struct ma_state tmp[3], tmp_next[3];
2549  	MA_TOPIARY(subtrees, mas->tree);
2550  	bool in_rcu;
2551  	int i, n;
2552  
2553  	/* Place data in tree & then mark node as old */
2554  	mas_put_in_tree(mas, old_enode);
2555  
2556  	/* Update the parent pointers in the tree */
2557  	tmp[0] = *mas;
2558  	tmp[0].offset = 0;
2559  	tmp[1].status = ma_none;
2560  	tmp[2].status = ma_none;
2561  	while (!mte_is_leaf(tmp[0].node)) {
2562  		n = 0;
2563  		for (i = 0; i < 3; i++) {
2564  			if (mas_is_none(&tmp[i]))
2565  				continue;
2566  
2567  			while (n < 3) {
2568  				if (!mas_find_child(&tmp[i], &tmp_next[n]))
2569  					break;
2570  				n++;
2571  			}
2572  
2573  			mas_adopt_children(&tmp[i], tmp[i].node);
2574  		}
2575  
2576  		if (MAS_WARN_ON(mas, n == 0))
2577  			break;
2578  
2579  		while (n < 3)
2580  			tmp_next[n++].status = ma_none;
2581  
2582  		for (i = 0; i < 3; i++)
2583  			tmp[i] = tmp_next[i];
2584  	}
2585  
2586  	/* Collect the old nodes that need to be discarded */
2587  	if (mte_is_leaf(old_enode))
2588  		return mas_free(mas, old_enode);
2589  
2590  	tmp[0] = *mas;
2591  	tmp[0].offset = 0;
2592  	tmp[0].node = old_enode;
2593  	tmp[1].status = ma_none;
2594  	tmp[2].status = ma_none;
2595  	in_rcu = mt_in_rcu(mas->tree);
2596  	do {
2597  		n = 0;
2598  		for (i = 0; i < 3; i++) {
2599  			if (mas_is_none(&tmp[i]))
2600  				continue;
2601  
2602  			while (n < 3) {
2603  				if (!mas_find_child(&tmp[i], &tmp_next[n]))
2604  					break;
2605  
2606  				if ((tmp_next[n].min >= tmp_next->index) &&
2607  				    (tmp_next[n].max <= tmp_next->last)) {
2608  					mat_add(&subtrees, tmp_next[n].node);
2609  					tmp_next[n].status = ma_none;
2610  				} else {
2611  					n++;
2612  				}
2613  			}
2614  		}
2615  
2616  		if (MAS_WARN_ON(mas, n == 0))
2617  			break;
2618  
2619  		while (n < 3)
2620  			tmp_next[n++].status = ma_none;
2621  
2622  		for (i = 0; i < 3; i++) {
2623  			mas_topiary_node(mas, &tmp[i], in_rcu);
2624  			tmp[i] = tmp_next[i];
2625  		}
2626  	} while (!mte_is_leaf(tmp[0].node));
2627  
2628  	for (i = 0; i < 3; i++)
2629  		mas_topiary_node(mas, &tmp[i], in_rcu);
2630  
2631  	mas_mat_destroy(mas, &subtrees);
2632  }
2633  
2634  /*
2635   * mas_wmb_replace() - Write memory barrier and replace
2636   * @mas: The maple state
2637   * @old_enode: The old maple encoded node that is being replaced.
2638   *
2639   * Updates gap as necessary.
2640   */
mas_wmb_replace(struct ma_state * mas,struct maple_enode * old_enode)2641  static inline void mas_wmb_replace(struct ma_state *mas,
2642  		struct maple_enode *old_enode)
2643  {
2644  	/* Insert the new data in the tree */
2645  	mas_topiary_replace(mas, old_enode);
2646  
2647  	if (mte_is_leaf(mas->node))
2648  		return;
2649  
2650  	mas_update_gap(mas);
2651  }
2652  
2653  /*
2654   * mast_cp_to_nodes() - Copy data out to nodes.
2655   * @mast: The maple subtree state
2656   * @left: The left encoded maple node
2657   * @middle: The middle encoded maple node
2658   * @right: The right encoded maple node
2659   * @split: The location to split between left and (middle ? middle : right)
2660   * @mid_split: The location to split between middle and right.
2661   */
mast_cp_to_nodes(struct maple_subtree_state * mast,struct maple_enode * left,struct maple_enode * middle,struct maple_enode * right,unsigned char split,unsigned char mid_split)2662  static inline void mast_cp_to_nodes(struct maple_subtree_state *mast,
2663  	struct maple_enode *left, struct maple_enode *middle,
2664  	struct maple_enode *right, unsigned char split, unsigned char mid_split)
2665  {
2666  	bool new_lmax = true;
2667  
2668  	mas_node_or_none(mast->l, left);
2669  	mas_node_or_none(mast->m, middle);
2670  	mas_node_or_none(mast->r, right);
2671  
2672  	mast->l->min = mast->orig_l->min;
2673  	if (split == mast->bn->b_end) {
2674  		mast->l->max = mast->orig_r->max;
2675  		new_lmax = false;
2676  	}
2677  
2678  	mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax);
2679  
2680  	if (middle) {
2681  		mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true);
2682  		mast->m->min = mast->bn->pivot[split] + 1;
2683  		split = mid_split;
2684  	}
2685  
2686  	mast->r->max = mast->orig_r->max;
2687  	if (right) {
2688  		mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false);
2689  		mast->r->min = mast->bn->pivot[split] + 1;
2690  	}
2691  }
2692  
2693  /*
2694   * mast_combine_cp_left - Copy in the original left side of the tree into the
2695   * combined data set in the maple subtree state big node.
2696   * @mast: The maple subtree state
2697   */
mast_combine_cp_left(struct maple_subtree_state * mast)2698  static inline void mast_combine_cp_left(struct maple_subtree_state *mast)
2699  {
2700  	unsigned char l_slot = mast->orig_l->offset;
2701  
2702  	if (!l_slot)
2703  		return;
2704  
2705  	mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0);
2706  }
2707  
2708  /*
2709   * mast_combine_cp_right: Copy in the original right side of the tree into the
2710   * combined data set in the maple subtree state big node.
2711   * @mast: The maple subtree state
2712   */
mast_combine_cp_right(struct maple_subtree_state * mast)2713  static inline void mast_combine_cp_right(struct maple_subtree_state *mast)
2714  {
2715  	if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max)
2716  		return;
2717  
2718  	mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1,
2719  		   mt_slot_count(mast->orig_r->node), mast->bn,
2720  		   mast->bn->b_end);
2721  	mast->orig_r->last = mast->orig_r->max;
2722  }
2723  
2724  /*
2725   * mast_sufficient: Check if the maple subtree state has enough data in the big
2726   * node to create at least one sufficient node
2727   * @mast: the maple subtree state
2728   */
mast_sufficient(struct maple_subtree_state * mast)2729  static inline bool mast_sufficient(struct maple_subtree_state *mast)
2730  {
2731  	if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node))
2732  		return true;
2733  
2734  	return false;
2735  }
2736  
2737  /*
2738   * mast_overflow: Check if there is too much data in the subtree state for a
2739   * single node.
2740   * @mast: The maple subtree state
2741   */
mast_overflow(struct maple_subtree_state * mast)2742  static inline bool mast_overflow(struct maple_subtree_state *mast)
2743  {
2744  	if (mast->bn->b_end >= mt_slot_count(mast->orig_l->node))
2745  		return true;
2746  
2747  	return false;
2748  }
2749  
mtree_range_walk(struct ma_state * mas)2750  static inline void *mtree_range_walk(struct ma_state *mas)
2751  {
2752  	unsigned long *pivots;
2753  	unsigned char offset;
2754  	struct maple_node *node;
2755  	struct maple_enode *next, *last;
2756  	enum maple_type type;
2757  	void __rcu **slots;
2758  	unsigned char end;
2759  	unsigned long max, min;
2760  	unsigned long prev_max, prev_min;
2761  
2762  	next = mas->node;
2763  	min = mas->min;
2764  	max = mas->max;
2765  	do {
2766  		last = next;
2767  		node = mte_to_node(next);
2768  		type = mte_node_type(next);
2769  		pivots = ma_pivots(node, type);
2770  		end = ma_data_end(node, type, pivots, max);
2771  		prev_min = min;
2772  		prev_max = max;
2773  		if (pivots[0] >= mas->index) {
2774  			offset = 0;
2775  			max = pivots[0];
2776  			goto next;
2777  		}
2778  
2779  		offset = 1;
2780  		while (offset < end) {
2781  			if (pivots[offset] >= mas->index) {
2782  				max = pivots[offset];
2783  				break;
2784  			}
2785  			offset++;
2786  		}
2787  
2788  		min = pivots[offset - 1] + 1;
2789  next:
2790  		slots = ma_slots(node, type);
2791  		next = mt_slot(mas->tree, slots, offset);
2792  		if (unlikely(ma_dead_node(node)))
2793  			goto dead_node;
2794  	} while (!ma_is_leaf(type));
2795  
2796  	mas->end = end;
2797  	mas->offset = offset;
2798  	mas->index = min;
2799  	mas->last = max;
2800  	mas->min = prev_min;
2801  	mas->max = prev_max;
2802  	mas->node = last;
2803  	return (void *)next;
2804  
2805  dead_node:
2806  	mas_reset(mas);
2807  	return NULL;
2808  }
2809  
2810  /*
2811   * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers.
2812   * @mas: The starting maple state
2813   * @mast: The maple_subtree_state, keeps track of 4 maple states.
2814   * @count: The estimated count of iterations needed.
2815   *
2816   * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root
2817   * is hit.  First @b_node is split into two entries which are inserted into the
2818   * next iteration of the loop.  @b_node is returned populated with the final
2819   * iteration. @mas is used to obtain allocations.  orig_l_mas keeps track of the
2820   * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last
2821   * to account of what has been copied into the new sub-tree.  The update of
2822   * orig_l_mas->last is used in mas_consume to find the slots that will need to
2823   * be either freed or destroyed.  orig_l_mas->depth keeps track of the height of
2824   * the new sub-tree in case the sub-tree becomes the full tree.
2825   */
mas_spanning_rebalance(struct ma_state * mas,struct maple_subtree_state * mast,unsigned char count)2826  static void mas_spanning_rebalance(struct ma_state *mas,
2827  		struct maple_subtree_state *mast, unsigned char count)
2828  {
2829  	unsigned char split, mid_split;
2830  	unsigned char slot = 0;
2831  	struct maple_enode *left = NULL, *middle = NULL, *right = NULL;
2832  	struct maple_enode *old_enode;
2833  
2834  	MA_STATE(l_mas, mas->tree, mas->index, mas->index);
2835  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2836  	MA_STATE(m_mas, mas->tree, mas->index, mas->index);
2837  
2838  	/*
2839  	 * The tree needs to be rebalanced and leaves need to be kept at the same level.
2840  	 * Rebalancing is done by use of the ``struct maple_topiary``.
2841  	 */
2842  	mast->l = &l_mas;
2843  	mast->m = &m_mas;
2844  	mast->r = &r_mas;
2845  	l_mas.status = r_mas.status = m_mas.status = ma_none;
2846  
2847  	/* Check if this is not root and has sufficient data.  */
2848  	if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) &&
2849  	    unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type]))
2850  		mast_spanning_rebalance(mast);
2851  
2852  	l_mas.depth = 0;
2853  
2854  	/*
2855  	 * Each level of the tree is examined and balanced, pushing data to the left or
2856  	 * right, or rebalancing against left or right nodes is employed to avoid
2857  	 * rippling up the tree to limit the amount of churn.  Once a new sub-section of
2858  	 * the tree is created, there may be a mix of new and old nodes.  The old nodes
2859  	 * will have the incorrect parent pointers and currently be in two trees: the
2860  	 * original tree and the partially new tree.  To remedy the parent pointers in
2861  	 * the old tree, the new data is swapped into the active tree and a walk down
2862  	 * the tree is performed and the parent pointers are updated.
2863  	 * See mas_topiary_replace() for more information.
2864  	 */
2865  	while (count--) {
2866  		mast->bn->b_end--;
2867  		mast->bn->type = mte_node_type(mast->orig_l->node);
2868  		split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle,
2869  					&mid_split, mast->orig_l->min);
2870  		mast_set_split_parents(mast, left, middle, right, split,
2871  				       mid_split);
2872  		mast_cp_to_nodes(mast, left, middle, right, split, mid_split);
2873  
2874  		/*
2875  		 * Copy data from next level in the tree to mast->bn from next
2876  		 * iteration
2877  		 */
2878  		memset(mast->bn, 0, sizeof(struct maple_big_node));
2879  		mast->bn->type = mte_node_type(left);
2880  		l_mas.depth++;
2881  
2882  		/* Root already stored in l->node. */
2883  		if (mas_is_root_limits(mast->l))
2884  			goto new_root;
2885  
2886  		mast_ascend(mast);
2887  		mast_combine_cp_left(mast);
2888  		l_mas.offset = mast->bn->b_end;
2889  		mab_set_b_end(mast->bn, &l_mas, left);
2890  		mab_set_b_end(mast->bn, &m_mas, middle);
2891  		mab_set_b_end(mast->bn, &r_mas, right);
2892  
2893  		/* Copy anything necessary out of the right node. */
2894  		mast_combine_cp_right(mast);
2895  		mast->orig_l->last = mast->orig_l->max;
2896  
2897  		if (mast_sufficient(mast))
2898  			continue;
2899  
2900  		if (mast_overflow(mast))
2901  			continue;
2902  
2903  		/* May be a new root stored in mast->bn */
2904  		if (mas_is_root_limits(mast->orig_l))
2905  			break;
2906  
2907  		mast_spanning_rebalance(mast);
2908  
2909  		/* rebalancing from other nodes may require another loop. */
2910  		if (!count)
2911  			count++;
2912  	}
2913  
2914  	l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)),
2915  				mte_node_type(mast->orig_l->node));
2916  	l_mas.depth++;
2917  	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true);
2918  	mas_set_parent(mas, left, l_mas.node, slot);
2919  	if (middle)
2920  		mas_set_parent(mas, middle, l_mas.node, ++slot);
2921  
2922  	if (right)
2923  		mas_set_parent(mas, right, l_mas.node, ++slot);
2924  
2925  	if (mas_is_root_limits(mast->l)) {
2926  new_root:
2927  		mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas));
2928  		while (!mte_is_root(mast->orig_l->node))
2929  			mast_ascend(mast);
2930  	} else {
2931  		mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent;
2932  	}
2933  
2934  	old_enode = mast->orig_l->node;
2935  	mas->depth = l_mas.depth;
2936  	mas->node = l_mas.node;
2937  	mas->min = l_mas.min;
2938  	mas->max = l_mas.max;
2939  	mas->offset = l_mas.offset;
2940  	mas_wmb_replace(mas, old_enode);
2941  	mtree_range_walk(mas);
2942  	return;
2943  }
2944  
2945  /*
2946   * mas_rebalance() - Rebalance a given node.
2947   * @mas: The maple state
2948   * @b_node: The big maple node.
2949   *
2950   * Rebalance two nodes into a single node or two new nodes that are sufficient.
2951   * Continue upwards until tree is sufficient.
2952   */
mas_rebalance(struct ma_state * mas,struct maple_big_node * b_node)2953  static inline void mas_rebalance(struct ma_state *mas,
2954  				struct maple_big_node *b_node)
2955  {
2956  	char empty_count = mas_mt_height(mas);
2957  	struct maple_subtree_state mast;
2958  	unsigned char shift, b_end = ++b_node->b_end;
2959  
2960  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
2961  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
2962  
2963  	trace_ma_op(__func__, mas);
2964  
2965  	/*
2966  	 * Rebalancing occurs if a node is insufficient.  Data is rebalanced
2967  	 * against the node to the right if it exists, otherwise the node to the
2968  	 * left of this node is rebalanced against this node.  If rebalancing
2969  	 * causes just one node to be produced instead of two, then the parent
2970  	 * is also examined and rebalanced if it is insufficient.  Every level
2971  	 * tries to combine the data in the same way.  If one node contains the
2972  	 * entire range of the tree, then that node is used as a new root node.
2973  	 */
2974  
2975  	mast.orig_l = &l_mas;
2976  	mast.orig_r = &r_mas;
2977  	mast.bn = b_node;
2978  	mast.bn->type = mte_node_type(mas->node);
2979  
2980  	l_mas = r_mas = *mas;
2981  
2982  	if (mas_next_sibling(&r_mas)) {
2983  		mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end);
2984  		r_mas.last = r_mas.index = r_mas.max;
2985  	} else {
2986  		mas_prev_sibling(&l_mas);
2987  		shift = mas_data_end(&l_mas) + 1;
2988  		mab_shift_right(b_node, shift);
2989  		mas->offset += shift;
2990  		mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0);
2991  		b_node->b_end = shift + b_end;
2992  		l_mas.index = l_mas.last = l_mas.min;
2993  	}
2994  
2995  	return mas_spanning_rebalance(mas, &mast, empty_count);
2996  }
2997  
2998  /*
2999   * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple
3000   * state.
3001   * @mas: The maple state
3002   * @end: The end of the left-most node.
3003   *
3004   * During a mass-insert event (such as forking), it may be necessary to
3005   * rebalance the left-most node when it is not sufficient.
3006   */
mas_destroy_rebalance(struct ma_state * mas,unsigned char end)3007  static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end)
3008  {
3009  	enum maple_type mt = mte_node_type(mas->node);
3010  	struct maple_node reuse, *newnode, *parent, *new_left, *left, *node;
3011  	struct maple_enode *eparent, *old_eparent;
3012  	unsigned char offset, tmp, split = mt_slots[mt] / 2;
3013  	void __rcu **l_slots, **slots;
3014  	unsigned long *l_pivs, *pivs, gap;
3015  	bool in_rcu = mt_in_rcu(mas->tree);
3016  
3017  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3018  
3019  	l_mas = *mas;
3020  	mas_prev_sibling(&l_mas);
3021  
3022  	/* set up node. */
3023  	if (in_rcu) {
3024  		newnode = mas_pop_node(mas);
3025  	} else {
3026  		newnode = &reuse;
3027  	}
3028  
3029  	node = mas_mn(mas);
3030  	newnode->parent = node->parent;
3031  	slots = ma_slots(newnode, mt);
3032  	pivs = ma_pivots(newnode, mt);
3033  	left = mas_mn(&l_mas);
3034  	l_slots = ma_slots(left, mt);
3035  	l_pivs = ma_pivots(left, mt);
3036  	if (!l_slots[split])
3037  		split++;
3038  	tmp = mas_data_end(&l_mas) - split;
3039  
3040  	memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp);
3041  	memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp);
3042  	pivs[tmp] = l_mas.max;
3043  	memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end);
3044  	memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end);
3045  
3046  	l_mas.max = l_pivs[split];
3047  	mas->min = l_mas.max + 1;
3048  	old_eparent = mt_mk_node(mte_parent(l_mas.node),
3049  			     mas_parent_type(&l_mas, l_mas.node));
3050  	tmp += end;
3051  	if (!in_rcu) {
3052  		unsigned char max_p = mt_pivots[mt];
3053  		unsigned char max_s = mt_slots[mt];
3054  
3055  		if (tmp < max_p)
3056  			memset(pivs + tmp, 0,
3057  			       sizeof(unsigned long) * (max_p - tmp));
3058  
3059  		if (tmp < mt_slots[mt])
3060  			memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3061  
3062  		memcpy(node, newnode, sizeof(struct maple_node));
3063  		ma_set_meta(node, mt, 0, tmp - 1);
3064  		mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node),
3065  			      l_pivs[split]);
3066  
3067  		/* Remove data from l_pivs. */
3068  		tmp = split + 1;
3069  		memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp));
3070  		memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp));
3071  		ma_set_meta(left, mt, 0, split);
3072  		eparent = old_eparent;
3073  
3074  		goto done;
3075  	}
3076  
3077  	/* RCU requires replacing both l_mas, mas, and parent. */
3078  	mas->node = mt_mk_node(newnode, mt);
3079  	ma_set_meta(newnode, mt, 0, tmp);
3080  
3081  	new_left = mas_pop_node(mas);
3082  	new_left->parent = left->parent;
3083  	mt = mte_node_type(l_mas.node);
3084  	slots = ma_slots(new_left, mt);
3085  	pivs = ma_pivots(new_left, mt);
3086  	memcpy(slots, l_slots, sizeof(void *) * split);
3087  	memcpy(pivs, l_pivs, sizeof(unsigned long) * split);
3088  	ma_set_meta(new_left, mt, 0, split);
3089  	l_mas.node = mt_mk_node(new_left, mt);
3090  
3091  	/* replace parent. */
3092  	offset = mte_parent_slot(mas->node);
3093  	mt = mas_parent_type(&l_mas, l_mas.node);
3094  	parent = mas_pop_node(mas);
3095  	slots = ma_slots(parent, mt);
3096  	pivs = ma_pivots(parent, mt);
3097  	memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node));
3098  	rcu_assign_pointer(slots[offset], mas->node);
3099  	rcu_assign_pointer(slots[offset - 1], l_mas.node);
3100  	pivs[offset - 1] = l_mas.max;
3101  	eparent = mt_mk_node(parent, mt);
3102  done:
3103  	gap = mas_leaf_max_gap(mas);
3104  	mte_set_gap(eparent, mte_parent_slot(mas->node), gap);
3105  	gap = mas_leaf_max_gap(&l_mas);
3106  	mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap);
3107  	mas_ascend(mas);
3108  
3109  	if (in_rcu) {
3110  		mas_replace_node(mas, old_eparent);
3111  		mas_adopt_children(mas, mas->node);
3112  	}
3113  
3114  	mas_update_gap(mas);
3115  }
3116  
3117  /*
3118   * mas_split_final_node() - Split the final node in a subtree operation.
3119   * @mast: the maple subtree state
3120   * @mas: The maple state
3121   * @height: The height of the tree in case it's a new root.
3122   */
mas_split_final_node(struct maple_subtree_state * mast,struct ma_state * mas,int height)3123  static inline void mas_split_final_node(struct maple_subtree_state *mast,
3124  					struct ma_state *mas, int height)
3125  {
3126  	struct maple_enode *ancestor;
3127  
3128  	if (mte_is_root(mas->node)) {
3129  		if (mt_is_alloc(mas->tree))
3130  			mast->bn->type = maple_arange_64;
3131  		else
3132  			mast->bn->type = maple_range_64;
3133  		mas->depth = height;
3134  	}
3135  	/*
3136  	 * Only a single node is used here, could be root.
3137  	 * The Big_node data should just fit in a single node.
3138  	 */
3139  	ancestor = mas_new_ma_node(mas, mast->bn);
3140  	mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset);
3141  	mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset);
3142  	mte_to_node(ancestor)->parent = mas_mn(mas)->parent;
3143  
3144  	mast->l->node = ancestor;
3145  	mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true);
3146  	mas->offset = mast->bn->b_end - 1;
3147  }
3148  
3149  /*
3150   * mast_fill_bnode() - Copy data into the big node in the subtree state
3151   * @mast: The maple subtree state
3152   * @mas: the maple state
3153   * @skip: The number of entries to skip for new nodes insertion.
3154   */
mast_fill_bnode(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char skip)3155  static inline void mast_fill_bnode(struct maple_subtree_state *mast,
3156  					 struct ma_state *mas,
3157  					 unsigned char skip)
3158  {
3159  	bool cp = true;
3160  	unsigned char split;
3161  
3162  	memset(mast->bn->gap, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->gap));
3163  	memset(mast->bn->slot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->slot));
3164  	memset(mast->bn->pivot, 0, sizeof(unsigned long) * ARRAY_SIZE(mast->bn->pivot));
3165  	mast->bn->b_end = 0;
3166  
3167  	if (mte_is_root(mas->node)) {
3168  		cp = false;
3169  	} else {
3170  		mas_ascend(mas);
3171  		mas->offset = mte_parent_slot(mas->node);
3172  	}
3173  
3174  	if (cp && mast->l->offset)
3175  		mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0);
3176  
3177  	split = mast->bn->b_end;
3178  	mab_set_b_end(mast->bn, mast->l, mast->l->node);
3179  	mast->r->offset = mast->bn->b_end;
3180  	mab_set_b_end(mast->bn, mast->r, mast->r->node);
3181  	if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max)
3182  		cp = false;
3183  
3184  	if (cp)
3185  		mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1,
3186  			   mast->bn, mast->bn->b_end);
3187  
3188  	mast->bn->b_end--;
3189  	mast->bn->type = mte_node_type(mas->node);
3190  }
3191  
3192  /*
3193   * mast_split_data() - Split the data in the subtree state big node into regular
3194   * nodes.
3195   * @mast: The maple subtree state
3196   * @mas: The maple state
3197   * @split: The location to split the big node
3198   */
mast_split_data(struct maple_subtree_state * mast,struct ma_state * mas,unsigned char split)3199  static inline void mast_split_data(struct maple_subtree_state *mast,
3200  	   struct ma_state *mas, unsigned char split)
3201  {
3202  	unsigned char p_slot;
3203  
3204  	mab_mas_cp(mast->bn, 0, split, mast->l, true);
3205  	mte_set_pivot(mast->r->node, 0, mast->r->max);
3206  	mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false);
3207  	mast->l->offset = mte_parent_slot(mas->node);
3208  	mast->l->max = mast->bn->pivot[split];
3209  	mast->r->min = mast->l->max + 1;
3210  	if (mte_is_leaf(mas->node))
3211  		return;
3212  
3213  	p_slot = mast->orig_l->offset;
3214  	mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node,
3215  			     &p_slot, split);
3216  	mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node,
3217  			     &p_slot, split);
3218  }
3219  
3220  /*
3221   * mas_push_data() - Instead of splitting a node, it is beneficial to push the
3222   * data to the right or left node if there is room.
3223   * @mas: The maple state
3224   * @height: The current height of the maple state
3225   * @mast: The maple subtree state
3226   * @left: Push left or not.
3227   *
3228   * Keeping the height of the tree low means faster lookups.
3229   *
3230   * Return: True if pushed, false otherwise.
3231   */
mas_push_data(struct ma_state * mas,int height,struct maple_subtree_state * mast,bool left)3232  static inline bool mas_push_data(struct ma_state *mas, int height,
3233  				 struct maple_subtree_state *mast, bool left)
3234  {
3235  	unsigned char slot_total = mast->bn->b_end;
3236  	unsigned char end, space, split;
3237  
3238  	MA_STATE(tmp_mas, mas->tree, mas->index, mas->last);
3239  	tmp_mas = *mas;
3240  	tmp_mas.depth = mast->l->depth;
3241  
3242  	if (left && !mas_prev_sibling(&tmp_mas))
3243  		return false;
3244  	else if (!left && !mas_next_sibling(&tmp_mas))
3245  		return false;
3246  
3247  	end = mas_data_end(&tmp_mas);
3248  	slot_total += end;
3249  	space = 2 * mt_slot_count(mas->node) - 2;
3250  	/* -2 instead of -1 to ensure there isn't a triple split */
3251  	if (ma_is_leaf(mast->bn->type))
3252  		space--;
3253  
3254  	if (mas->max == ULONG_MAX)
3255  		space--;
3256  
3257  	if (slot_total >= space)
3258  		return false;
3259  
3260  	/* Get the data; Fill mast->bn */
3261  	mast->bn->b_end++;
3262  	if (left) {
3263  		mab_shift_right(mast->bn, end + 1);
3264  		mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0);
3265  		mast->bn->b_end = slot_total + 1;
3266  	} else {
3267  		mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end);
3268  	}
3269  
3270  	/* Configure mast for splitting of mast->bn */
3271  	split = mt_slots[mast->bn->type] - 2;
3272  	if (left) {
3273  		/*  Switch mas to prev node  */
3274  		*mas = tmp_mas;
3275  		/* Start using mast->l for the left side. */
3276  		tmp_mas.node = mast->l->node;
3277  		*mast->l = tmp_mas;
3278  	} else {
3279  		tmp_mas.node = mast->r->node;
3280  		*mast->r = tmp_mas;
3281  		split = slot_total - split;
3282  	}
3283  	split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]);
3284  	/* Update parent slot for split calculation. */
3285  	if (left)
3286  		mast->orig_l->offset += end + 1;
3287  
3288  	mast_split_data(mast, mas, split);
3289  	mast_fill_bnode(mast, mas, 2);
3290  	mas_split_final_node(mast, mas, height + 1);
3291  	return true;
3292  }
3293  
3294  /*
3295   * mas_split() - Split data that is too big for one node into two.
3296   * @mas: The maple state
3297   * @b_node: The maple big node
3298   */
mas_split(struct ma_state * mas,struct maple_big_node * b_node)3299  static void mas_split(struct ma_state *mas, struct maple_big_node *b_node)
3300  {
3301  	struct maple_subtree_state mast;
3302  	int height = 0;
3303  	unsigned char mid_split, split = 0;
3304  	struct maple_enode *old;
3305  
3306  	/*
3307  	 * Splitting is handled differently from any other B-tree; the Maple
3308  	 * Tree splits upwards.  Splitting up means that the split operation
3309  	 * occurs when the walk of the tree hits the leaves and not on the way
3310  	 * down.  The reason for splitting up is that it is impossible to know
3311  	 * how much space will be needed until the leaf is (or leaves are)
3312  	 * reached.  Since overwriting data is allowed and a range could
3313  	 * overwrite more than one range or result in changing one entry into 3
3314  	 * entries, it is impossible to know if a split is required until the
3315  	 * data is examined.
3316  	 *
3317  	 * Splitting is a balancing act between keeping allocations to a minimum
3318  	 * and avoiding a 'jitter' event where a tree is expanded to make room
3319  	 * for an entry followed by a contraction when the entry is removed.  To
3320  	 * accomplish the balance, there are empty slots remaining in both left
3321  	 * and right nodes after a split.
3322  	 */
3323  	MA_STATE(l_mas, mas->tree, mas->index, mas->last);
3324  	MA_STATE(r_mas, mas->tree, mas->index, mas->last);
3325  	MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last);
3326  	MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last);
3327  
3328  	trace_ma_op(__func__, mas);
3329  	mas->depth = mas_mt_height(mas);
3330  
3331  	mast.l = &l_mas;
3332  	mast.r = &r_mas;
3333  	mast.orig_l = &prev_l_mas;
3334  	mast.orig_r = &prev_r_mas;
3335  	mast.bn = b_node;
3336  
3337  	while (height++ <= mas->depth) {
3338  		if (mt_slots[b_node->type] > b_node->b_end) {
3339  			mas_split_final_node(&mast, mas, height);
3340  			break;
3341  		}
3342  
3343  		l_mas = r_mas = *mas;
3344  		l_mas.node = mas_new_ma_node(mas, b_node);
3345  		r_mas.node = mas_new_ma_node(mas, b_node);
3346  		/*
3347  		 * Another way that 'jitter' is avoided is to terminate a split up early if the
3348  		 * left or right node has space to spare.  This is referred to as "pushing left"
3349  		 * or "pushing right" and is similar to the B* tree, except the nodes left or
3350  		 * right can rarely be reused due to RCU, but the ripple upwards is halted which
3351  		 * is a significant savings.
3352  		 */
3353  		/* Try to push left. */
3354  		if (mas_push_data(mas, height, &mast, true))
3355  			break;
3356  		/* Try to push right. */
3357  		if (mas_push_data(mas, height, &mast, false))
3358  			break;
3359  
3360  		split = mab_calc_split(mas, b_node, &mid_split, prev_l_mas.min);
3361  		mast_split_data(&mast, mas, split);
3362  		/*
3363  		 * Usually correct, mab_mas_cp in the above call overwrites
3364  		 * r->max.
3365  		 */
3366  		mast.r->max = mas->max;
3367  		mast_fill_bnode(&mast, mas, 1);
3368  		prev_l_mas = *mast.l;
3369  		prev_r_mas = *mast.r;
3370  	}
3371  
3372  	/* Set the original node as dead */
3373  	old = mas->node;
3374  	mas->node = l_mas.node;
3375  	mas_wmb_replace(mas, old);
3376  	mtree_range_walk(mas);
3377  	return;
3378  }
3379  
3380  /*
3381   * mas_commit_b_node() - Commit the big node into the tree.
3382   * @wr_mas: The maple write state
3383   * @b_node: The maple big node
3384   */
mas_commit_b_node(struct ma_wr_state * wr_mas,struct maple_big_node * b_node)3385  static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas,
3386  			    struct maple_big_node *b_node)
3387  {
3388  	enum store_type type = wr_mas->mas->store_type;
3389  
3390  	WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store);
3391  
3392  	if (type == wr_rebalance)
3393  		return mas_rebalance(wr_mas->mas, b_node);
3394  
3395  	return mas_split(wr_mas->mas, b_node);
3396  }
3397  
3398  /*
3399   * mas_root_expand() - Expand a root to a node
3400   * @mas: The maple state
3401   * @entry: The entry to store into the tree
3402   */
mas_root_expand(struct ma_state * mas,void * entry)3403  static inline int mas_root_expand(struct ma_state *mas, void *entry)
3404  {
3405  	void *contents = mas_root_locked(mas);
3406  	enum maple_type type = maple_leaf_64;
3407  	struct maple_node *node;
3408  	void __rcu **slots;
3409  	unsigned long *pivots;
3410  	int slot = 0;
3411  
3412  	node = mas_pop_node(mas);
3413  	pivots = ma_pivots(node, type);
3414  	slots = ma_slots(node, type);
3415  	node->parent = ma_parent_ptr(mas_tree_parent(mas));
3416  	mas->node = mt_mk_node(node, type);
3417  	mas->status = ma_active;
3418  
3419  	if (mas->index) {
3420  		if (contents) {
3421  			rcu_assign_pointer(slots[slot], contents);
3422  			if (likely(mas->index > 1))
3423  				slot++;
3424  		}
3425  		pivots[slot++] = mas->index - 1;
3426  	}
3427  
3428  	rcu_assign_pointer(slots[slot], entry);
3429  	mas->offset = slot;
3430  	pivots[slot] = mas->last;
3431  	if (mas->last != ULONG_MAX)
3432  		pivots[++slot] = ULONG_MAX;
3433  
3434  	mas->depth = 1;
3435  	mas_set_height(mas);
3436  	ma_set_meta(node, maple_leaf_64, 0, slot);
3437  	/* swap the new root into the tree */
3438  	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3439  	return slot;
3440  }
3441  
mas_store_root(struct ma_state * mas,void * entry)3442  static inline void mas_store_root(struct ma_state *mas, void *entry)
3443  {
3444  	if (likely((mas->last != 0) || (mas->index != 0)))
3445  		mas_root_expand(mas, entry);
3446  	else if (((unsigned long) (entry) & 3) == 2)
3447  		mas_root_expand(mas, entry);
3448  	else {
3449  		rcu_assign_pointer(mas->tree->ma_root, entry);
3450  		mas->status = ma_start;
3451  	}
3452  }
3453  
3454  /*
3455   * mas_is_span_wr() - Check if the write needs to be treated as a write that
3456   * spans the node.
3457   * @wr_mas: The maple write state
3458   *
3459   * Spanning writes are writes that start in one node and end in another OR if
3460   * the write of a %NULL will cause the node to end with a %NULL.
3461   *
3462   * Return: True if this is a spanning write, false otherwise.
3463   */
mas_is_span_wr(struct ma_wr_state * wr_mas)3464  static bool mas_is_span_wr(struct ma_wr_state *wr_mas)
3465  {
3466  	unsigned long max = wr_mas->r_max;
3467  	unsigned long last = wr_mas->mas->last;
3468  	enum maple_type type = wr_mas->type;
3469  	void *entry = wr_mas->entry;
3470  
3471  	/* Contained in this pivot, fast path */
3472  	if (last < max)
3473  		return false;
3474  
3475  	if (ma_is_leaf(type)) {
3476  		max = wr_mas->mas->max;
3477  		if (last < max)
3478  			return false;
3479  	}
3480  
3481  	if (last == max) {
3482  		/*
3483  		 * The last entry of leaf node cannot be NULL unless it is the
3484  		 * rightmost node (writing ULONG_MAX), otherwise it spans slots.
3485  		 */
3486  		if (entry || last == ULONG_MAX)
3487  			return false;
3488  	}
3489  
3490  	trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry);
3491  	return true;
3492  }
3493  
mas_wr_walk_descend(struct ma_wr_state * wr_mas)3494  static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas)
3495  {
3496  	wr_mas->type = mte_node_type(wr_mas->mas->node);
3497  	mas_wr_node_walk(wr_mas);
3498  	wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type);
3499  }
3500  
mas_wr_walk_traverse(struct ma_wr_state * wr_mas)3501  static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas)
3502  {
3503  	wr_mas->mas->max = wr_mas->r_max;
3504  	wr_mas->mas->min = wr_mas->r_min;
3505  	wr_mas->mas->node = wr_mas->content;
3506  	wr_mas->mas->offset = 0;
3507  	wr_mas->mas->depth++;
3508  }
3509  /*
3510   * mas_wr_walk() - Walk the tree for a write.
3511   * @wr_mas: The maple write state
3512   *
3513   * Uses mas_slot_locked() and does not need to worry about dead nodes.
3514   *
3515   * Return: True if it's contained in a node, false on spanning write.
3516   */
mas_wr_walk(struct ma_wr_state * wr_mas)3517  static bool mas_wr_walk(struct ma_wr_state *wr_mas)
3518  {
3519  	struct ma_state *mas = wr_mas->mas;
3520  
3521  	while (true) {
3522  		mas_wr_walk_descend(wr_mas);
3523  		if (unlikely(mas_is_span_wr(wr_mas)))
3524  			return false;
3525  
3526  		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3527  						  mas->offset);
3528  		if (ma_is_leaf(wr_mas->type))
3529  			return true;
3530  
3531  		mas_wr_walk_traverse(wr_mas);
3532  	}
3533  
3534  	return true;
3535  }
3536  
mas_wr_walk_index(struct ma_wr_state * wr_mas)3537  static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
3538  {
3539  	struct ma_state *mas = wr_mas->mas;
3540  
3541  	while (true) {
3542  		mas_wr_walk_descend(wr_mas);
3543  		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
3544  						  mas->offset);
3545  		if (ma_is_leaf(wr_mas->type))
3546  			return;
3547  		mas_wr_walk_traverse(wr_mas);
3548  	}
3549  }
3550  /*
3551   * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
3552   * @l_wr_mas: The left maple write state
3553   * @r_wr_mas: The right maple write state
3554   */
mas_extend_spanning_null(struct ma_wr_state * l_wr_mas,struct ma_wr_state * r_wr_mas)3555  static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas,
3556  					    struct ma_wr_state *r_wr_mas)
3557  {
3558  	struct ma_state *r_mas = r_wr_mas->mas;
3559  	struct ma_state *l_mas = l_wr_mas->mas;
3560  	unsigned char l_slot;
3561  
3562  	l_slot = l_mas->offset;
3563  	if (!l_wr_mas->content)
3564  		l_mas->index = l_wr_mas->r_min;
3565  
3566  	if ((l_mas->index == l_wr_mas->r_min) &&
3567  		 (l_slot &&
3568  		  !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) {
3569  		if (l_slot > 1)
3570  			l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1;
3571  		else
3572  			l_mas->index = l_mas->min;
3573  
3574  		l_mas->offset = l_slot - 1;
3575  	}
3576  
3577  	if (!r_wr_mas->content) {
3578  		if (r_mas->last < r_wr_mas->r_max)
3579  			r_mas->last = r_wr_mas->r_max;
3580  		r_mas->offset++;
3581  	} else if ((r_mas->last == r_wr_mas->r_max) &&
3582  	    (r_mas->last < r_mas->max) &&
3583  	    !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) {
3584  		r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots,
3585  					     r_wr_mas->type, r_mas->offset + 1);
3586  		r_mas->offset++;
3587  	}
3588  }
3589  
mas_state_walk(struct ma_state * mas)3590  static inline void *mas_state_walk(struct ma_state *mas)
3591  {
3592  	void *entry;
3593  
3594  	entry = mas_start(mas);
3595  	if (mas_is_none(mas))
3596  		return NULL;
3597  
3598  	if (mas_is_ptr(mas))
3599  		return entry;
3600  
3601  	return mtree_range_walk(mas);
3602  }
3603  
3604  /*
3605   * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up
3606   * to date.
3607   *
3608   * @mas: The maple state.
3609   *
3610   * Note: Leaves mas in undesirable state.
3611   * Return: The entry for @mas->index or %NULL on dead node.
3612   */
mtree_lookup_walk(struct ma_state * mas)3613  static inline void *mtree_lookup_walk(struct ma_state *mas)
3614  {
3615  	unsigned long *pivots;
3616  	unsigned char offset;
3617  	struct maple_node *node;
3618  	struct maple_enode *next;
3619  	enum maple_type type;
3620  	void __rcu **slots;
3621  	unsigned char end;
3622  
3623  	next = mas->node;
3624  	do {
3625  		node = mte_to_node(next);
3626  		type = mte_node_type(next);
3627  		pivots = ma_pivots(node, type);
3628  		end = mt_pivots[type];
3629  		offset = 0;
3630  		do {
3631  			if (pivots[offset] >= mas->index)
3632  				break;
3633  		} while (++offset < end);
3634  
3635  		slots = ma_slots(node, type);
3636  		next = mt_slot(mas->tree, slots, offset);
3637  		if (unlikely(ma_dead_node(node)))
3638  			goto dead_node;
3639  	} while (!ma_is_leaf(type));
3640  
3641  	return (void *)next;
3642  
3643  dead_node:
3644  	mas_reset(mas);
3645  	return NULL;
3646  }
3647  
3648  static void mte_destroy_walk(struct maple_enode *, struct maple_tree *);
3649  /*
3650   * mas_new_root() - Create a new root node that only contains the entry passed
3651   * in.
3652   * @mas: The maple state
3653   * @entry: The entry to store.
3654   *
3655   * Only valid when the index == 0 and the last == ULONG_MAX
3656   */
mas_new_root(struct ma_state * mas,void * entry)3657  static inline void mas_new_root(struct ma_state *mas, void *entry)
3658  {
3659  	struct maple_enode *root = mas_root_locked(mas);
3660  	enum maple_type type = maple_leaf_64;
3661  	struct maple_node *node;
3662  	void __rcu **slots;
3663  	unsigned long *pivots;
3664  
3665  	if (!entry && !mas->index && mas->last == ULONG_MAX) {
3666  		mas->depth = 0;
3667  		mas_set_height(mas);
3668  		rcu_assign_pointer(mas->tree->ma_root, entry);
3669  		mas->status = ma_start;
3670  		goto done;
3671  	}
3672  
3673  	node = mas_pop_node(mas);
3674  	pivots = ma_pivots(node, type);
3675  	slots = ma_slots(node, type);
3676  	node->parent = ma_parent_ptr(mas_tree_parent(mas));
3677  	mas->node = mt_mk_node(node, type);
3678  	mas->status = ma_active;
3679  	rcu_assign_pointer(slots[0], entry);
3680  	pivots[0] = mas->last;
3681  	mas->depth = 1;
3682  	mas_set_height(mas);
3683  	rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
3684  
3685  done:
3686  	if (xa_is_node(root))
3687  		mte_destroy_walk(root, mas->tree);
3688  
3689  	return;
3690  }
3691  /*
3692   * mas_wr_spanning_store() - Create a subtree with the store operation completed
3693   * and new nodes where necessary, then place the sub-tree in the actual tree.
3694   * Note that mas is expected to point to the node which caused the store to
3695   * span.
3696   * @wr_mas: The maple write state
3697   */
mas_wr_spanning_store(struct ma_wr_state * wr_mas)3698  static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
3699  {
3700  	struct maple_subtree_state mast;
3701  	struct maple_big_node b_node;
3702  	struct ma_state *mas;
3703  	unsigned char height;
3704  
3705  	/* Left and Right side of spanning store */
3706  	MA_STATE(l_mas, NULL, 0, 0);
3707  	MA_STATE(r_mas, NULL, 0, 0);
3708  	MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry);
3709  	MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry);
3710  
3711  	/*
3712  	 * A store operation that spans multiple nodes is called a spanning
3713  	 * store and is handled early in the store call stack by the function
3714  	 * mas_is_span_wr().  When a spanning store is identified, the maple
3715  	 * state is duplicated.  The first maple state walks the left tree path
3716  	 * to ``index``, the duplicate walks the right tree path to ``last``.
3717  	 * The data in the two nodes are combined into a single node, two nodes,
3718  	 * or possibly three nodes (see the 3-way split above).  A ``NULL``
3719  	 * written to the last entry of a node is considered a spanning store as
3720  	 * a rebalance is required for the operation to complete and an overflow
3721  	 * of data may happen.
3722  	 */
3723  	mas = wr_mas->mas;
3724  	trace_ma_op(__func__, mas);
3725  
3726  	if (unlikely(!mas->index && mas->last == ULONG_MAX))
3727  		return mas_new_root(mas, wr_mas->entry);
3728  	/*
3729  	 * Node rebalancing may occur due to this store, so there may be three new
3730  	 * entries per level plus a new root.
3731  	 */
3732  	height = mas_mt_height(mas);
3733  
3734  	/*
3735  	 * Set up right side.  Need to get to the next offset after the spanning
3736  	 * store to ensure it's not NULL and to combine both the next node and
3737  	 * the node with the start together.
3738  	 */
3739  	r_mas = *mas;
3740  	/* Avoid overflow, walk to next slot in the tree. */
3741  	if (r_mas.last + 1)
3742  		r_mas.last++;
3743  
3744  	r_mas.index = r_mas.last;
3745  	mas_wr_walk_index(&r_wr_mas);
3746  	r_mas.last = r_mas.index = mas->last;
3747  
3748  	/* Set up left side. */
3749  	l_mas = *mas;
3750  	mas_wr_walk_index(&l_wr_mas);
3751  
3752  	if (!wr_mas->entry) {
3753  		mas_extend_spanning_null(&l_wr_mas, &r_wr_mas);
3754  		mas->offset = l_mas.offset;
3755  		mas->index = l_mas.index;
3756  		mas->last = l_mas.last = r_mas.last;
3757  	}
3758  
3759  	/* expanding NULLs may make this cover the entire range */
3760  	if (!l_mas.index && r_mas.last == ULONG_MAX) {
3761  		mas_set_range(mas, 0, ULONG_MAX);
3762  		return mas_new_root(mas, wr_mas->entry);
3763  	}
3764  
3765  	memset(&b_node, 0, sizeof(struct maple_big_node));
3766  	/* Copy l_mas and store the value in b_node. */
3767  	mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
3768  	/* Copy r_mas into b_node if there is anything to copy. */
3769  	if (r_mas.max > r_mas.last)
3770  		mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
3771  			   &b_node, b_node.b_end + 1);
3772  	else
3773  		b_node.b_end++;
3774  
3775  	/* Stop spanning searches by searching for just index. */
3776  	l_mas.index = l_mas.last = mas->index;
3777  
3778  	mast.bn = &b_node;
3779  	mast.orig_l = &l_mas;
3780  	mast.orig_r = &r_mas;
3781  	/* Combine l_mas and r_mas and split them up evenly again. */
3782  	return mas_spanning_rebalance(mas, &mast, height + 1);
3783  }
3784  
3785  /*
3786   * mas_wr_node_store() - Attempt to store the value in a node
3787   * @wr_mas: The maple write state
3788   *
3789   * Attempts to reuse the node, but may allocate.
3790   */
mas_wr_node_store(struct ma_wr_state * wr_mas,unsigned char new_end)3791  static inline void mas_wr_node_store(struct ma_wr_state *wr_mas,
3792  				     unsigned char new_end)
3793  {
3794  	struct ma_state *mas = wr_mas->mas;
3795  	void __rcu **dst_slots;
3796  	unsigned long *dst_pivots;
3797  	unsigned char dst_offset, offset_end = wr_mas->offset_end;
3798  	struct maple_node reuse, *newnode;
3799  	unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type];
3800  	bool in_rcu = mt_in_rcu(mas->tree);
3801  
3802  	if (mas->last == wr_mas->end_piv)
3803  		offset_end++; /* don't copy this offset */
3804  	else if (unlikely(wr_mas->r_max == ULONG_MAX))
3805  		mas_bulk_rebalance(mas, mas->end, wr_mas->type);
3806  
3807  	/* set up node. */
3808  	if (in_rcu) {
3809  		newnode = mas_pop_node(mas);
3810  	} else {
3811  		memset(&reuse, 0, sizeof(struct maple_node));
3812  		newnode = &reuse;
3813  	}
3814  
3815  	newnode->parent = mas_mn(mas)->parent;
3816  	dst_pivots = ma_pivots(newnode, wr_mas->type);
3817  	dst_slots = ma_slots(newnode, wr_mas->type);
3818  	/* Copy from start to insert point */
3819  	memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset);
3820  	memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset);
3821  
3822  	/* Handle insert of new range starting after old range */
3823  	if (wr_mas->r_min < mas->index) {
3824  		rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content);
3825  		dst_pivots[mas->offset++] = mas->index - 1;
3826  	}
3827  
3828  	/* Store the new entry and range end. */
3829  	if (mas->offset < node_pivots)
3830  		dst_pivots[mas->offset] = mas->last;
3831  	rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry);
3832  
3833  	/*
3834  	 * this range wrote to the end of the node or it overwrote the rest of
3835  	 * the data
3836  	 */
3837  	if (offset_end > mas->end)
3838  		goto done;
3839  
3840  	dst_offset = mas->offset + 1;
3841  	/* Copy to the end of node if necessary. */
3842  	copy_size = mas->end - offset_end + 1;
3843  	memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end,
3844  	       sizeof(void *) * copy_size);
3845  	memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end,
3846  	       sizeof(unsigned long) * (copy_size - 1));
3847  
3848  	if (new_end < node_pivots)
3849  		dst_pivots[new_end] = mas->max;
3850  
3851  done:
3852  	mas_leaf_set_meta(newnode, maple_leaf_64, new_end);
3853  	if (in_rcu) {
3854  		struct maple_enode *old_enode = mas->node;
3855  
3856  		mas->node = mt_mk_node(newnode, wr_mas->type);
3857  		mas_replace_node(mas, old_enode);
3858  	} else {
3859  		memcpy(wr_mas->node, newnode, sizeof(struct maple_node));
3860  	}
3861  	trace_ma_write(__func__, mas, 0, wr_mas->entry);
3862  	mas_update_gap(mas);
3863  	mas->end = new_end;
3864  	return;
3865  }
3866  
3867  /*
3868   * mas_wr_slot_store: Attempt to store a value in a slot.
3869   * @wr_mas: the maple write state
3870   */
mas_wr_slot_store(struct ma_wr_state * wr_mas)3871  static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas)
3872  {
3873  	struct ma_state *mas = wr_mas->mas;
3874  	unsigned char offset = mas->offset;
3875  	void __rcu **slots = wr_mas->slots;
3876  	bool gap = false;
3877  
3878  	gap |= !mt_slot_locked(mas->tree, slots, offset);
3879  	gap |= !mt_slot_locked(mas->tree, slots, offset + 1);
3880  
3881  	if (wr_mas->offset_end - offset == 1) {
3882  		if (mas->index == wr_mas->r_min) {
3883  			/* Overwriting the range and a part of the next one */
3884  			rcu_assign_pointer(slots[offset], wr_mas->entry);
3885  			wr_mas->pivots[offset] = mas->last;
3886  		} else {
3887  			/* Overwriting a part of the range and the next one */
3888  			rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3889  			wr_mas->pivots[offset] = mas->index - 1;
3890  			mas->offset++; /* Keep mas accurate. */
3891  		}
3892  	} else if (!mt_in_rcu(mas->tree)) {
3893  		/*
3894  		 * Expand the range, only partially overwriting the previous and
3895  		 * next ranges
3896  		 */
3897  		gap |= !mt_slot_locked(mas->tree, slots, offset + 2);
3898  		rcu_assign_pointer(slots[offset + 1], wr_mas->entry);
3899  		wr_mas->pivots[offset] = mas->index - 1;
3900  		wr_mas->pivots[offset + 1] = mas->last;
3901  		mas->offset++; /* Keep mas accurate. */
3902  	} else {
3903  		return;
3904  	}
3905  
3906  	trace_ma_write(__func__, mas, 0, wr_mas->entry);
3907  	/*
3908  	 * Only update gap when the new entry is empty or there is an empty
3909  	 * entry in the original two ranges.
3910  	 */
3911  	if (!wr_mas->entry || gap)
3912  		mas_update_gap(mas);
3913  
3914  	return;
3915  }
3916  
mas_wr_extend_null(struct ma_wr_state * wr_mas)3917  static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas)
3918  {
3919  	struct ma_state *mas = wr_mas->mas;
3920  
3921  	if (!wr_mas->slots[wr_mas->offset_end]) {
3922  		/* If this one is null, the next and prev are not */
3923  		mas->last = wr_mas->end_piv;
3924  	} else {
3925  		/* Check next slot(s) if we are overwriting the end */
3926  		if ((mas->last == wr_mas->end_piv) &&
3927  		    (mas->end != wr_mas->offset_end) &&
3928  		    !wr_mas->slots[wr_mas->offset_end + 1]) {
3929  			wr_mas->offset_end++;
3930  			if (wr_mas->offset_end == mas->end)
3931  				mas->last = mas->max;
3932  			else
3933  				mas->last = wr_mas->pivots[wr_mas->offset_end];
3934  			wr_mas->end_piv = mas->last;
3935  		}
3936  	}
3937  
3938  	if (!wr_mas->content) {
3939  		/* If this one is null, the next and prev are not */
3940  		mas->index = wr_mas->r_min;
3941  	} else {
3942  		/* Check prev slot if we are overwriting the start */
3943  		if (mas->index == wr_mas->r_min && mas->offset &&
3944  		    !wr_mas->slots[mas->offset - 1]) {
3945  			mas->offset--;
3946  			wr_mas->r_min = mas->index =
3947  				mas_safe_min(mas, wr_mas->pivots, mas->offset);
3948  			wr_mas->r_max = wr_mas->pivots[mas->offset];
3949  		}
3950  	}
3951  }
3952  
mas_wr_end_piv(struct ma_wr_state * wr_mas)3953  static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas)
3954  {
3955  	while ((wr_mas->offset_end < wr_mas->mas->end) &&
3956  	       (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end]))
3957  		wr_mas->offset_end++;
3958  
3959  	if (wr_mas->offset_end < wr_mas->mas->end)
3960  		wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end];
3961  	else
3962  		wr_mas->end_piv = wr_mas->mas->max;
3963  }
3964  
mas_wr_new_end(struct ma_wr_state * wr_mas)3965  static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
3966  {
3967  	struct ma_state *mas = wr_mas->mas;
3968  	unsigned char new_end = mas->end + 2;
3969  
3970  	new_end -= wr_mas->offset_end - mas->offset;
3971  	if (wr_mas->r_min == mas->index)
3972  		new_end--;
3973  
3974  	if (wr_mas->end_piv == mas->last)
3975  		new_end--;
3976  
3977  	return new_end;
3978  }
3979  
3980  /*
3981   * mas_wr_append: Attempt to append
3982   * @wr_mas: the maple write state
3983   * @new_end: The end of the node after the modification
3984   *
3985   * This is currently unsafe in rcu mode since the end of the node may be cached
3986   * by readers while the node contents may be updated which could result in
3987   * inaccurate information.
3988   */
mas_wr_append(struct ma_wr_state * wr_mas,unsigned char new_end)3989  static inline void mas_wr_append(struct ma_wr_state *wr_mas,
3990  		unsigned char new_end)
3991  {
3992  	struct ma_state *mas = wr_mas->mas;
3993  	void __rcu **slots;
3994  	unsigned char end = mas->end;
3995  
3996  	if (new_end < mt_pivots[wr_mas->type]) {
3997  		wr_mas->pivots[new_end] = wr_mas->pivots[end];
3998  		ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end);
3999  	}
4000  
4001  	slots = wr_mas->slots;
4002  	if (new_end == end + 1) {
4003  		if (mas->last == wr_mas->r_max) {
4004  			/* Append to end of range */
4005  			rcu_assign_pointer(slots[new_end], wr_mas->entry);
4006  			wr_mas->pivots[end] = mas->index - 1;
4007  			mas->offset = new_end;
4008  		} else {
4009  			/* Append to start of range */
4010  			rcu_assign_pointer(slots[new_end], wr_mas->content);
4011  			wr_mas->pivots[end] = mas->last;
4012  			rcu_assign_pointer(slots[end], wr_mas->entry);
4013  		}
4014  	} else {
4015  		/* Append to the range without touching any boundaries. */
4016  		rcu_assign_pointer(slots[new_end], wr_mas->content);
4017  		wr_mas->pivots[end + 1] = mas->last;
4018  		rcu_assign_pointer(slots[end + 1], wr_mas->entry);
4019  		wr_mas->pivots[end] = mas->index - 1;
4020  		mas->offset = end + 1;
4021  	}
4022  
4023  	if (!wr_mas->content || !wr_mas->entry)
4024  		mas_update_gap(mas);
4025  
4026  	mas->end = new_end;
4027  	trace_ma_write(__func__, mas, new_end, wr_mas->entry);
4028  	return;
4029  }
4030  
4031  /*
4032   * mas_wr_bnode() - Slow path for a modification.
4033   * @wr_mas: The write maple state
4034   *
4035   * This is where split, rebalance end up.
4036   */
mas_wr_bnode(struct ma_wr_state * wr_mas)4037  static void mas_wr_bnode(struct ma_wr_state *wr_mas)
4038  {
4039  	struct maple_big_node b_node;
4040  
4041  	trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry);
4042  	memset(&b_node, 0, sizeof(struct maple_big_node));
4043  	mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end);
4044  	mas_commit_b_node(wr_mas, &b_node);
4045  }
4046  
4047  /*
4048   * mas_wr_store_entry() - Internal call to store a value
4049   * @wr_mas: The maple write state
4050   */
mas_wr_store_entry(struct ma_wr_state * wr_mas)4051  static inline void mas_wr_store_entry(struct ma_wr_state *wr_mas)
4052  {
4053  	struct ma_state *mas = wr_mas->mas;
4054  	unsigned char new_end = mas_wr_new_end(wr_mas);
4055  
4056  	switch (mas->store_type) {
4057  	case wr_invalid:
4058  		MT_BUG_ON(mas->tree, 1);
4059  		return;
4060  	case wr_new_root:
4061  		mas_new_root(mas, wr_mas->entry);
4062  		break;
4063  	case wr_store_root:
4064  		mas_store_root(mas, wr_mas->entry);
4065  		break;
4066  	case wr_exact_fit:
4067  		rcu_assign_pointer(wr_mas->slots[mas->offset], wr_mas->entry);
4068  		if (!!wr_mas->entry ^ !!wr_mas->content)
4069  			mas_update_gap(mas);
4070  		break;
4071  	case wr_append:
4072  		mas_wr_append(wr_mas, new_end);
4073  		break;
4074  	case wr_slot_store:
4075  		mas_wr_slot_store(wr_mas);
4076  		break;
4077  	case wr_node_store:
4078  		mas_wr_node_store(wr_mas, new_end);
4079  		break;
4080  	case wr_spanning_store:
4081  		mas_wr_spanning_store(wr_mas);
4082  		break;
4083  	case wr_split_store:
4084  	case wr_rebalance:
4085  		mas_wr_bnode(wr_mas);
4086  		break;
4087  	}
4088  
4089  	return;
4090  }
4091  
mas_wr_prealloc_setup(struct ma_wr_state * wr_mas)4092  static inline void mas_wr_prealloc_setup(struct ma_wr_state *wr_mas)
4093  {
4094  	struct ma_state *mas = wr_mas->mas;
4095  
4096  	if (!mas_is_active(mas)) {
4097  		if (mas_is_start(mas))
4098  			goto set_content;
4099  
4100  		if (unlikely(mas_is_paused(mas)))
4101  			goto reset;
4102  
4103  		if (unlikely(mas_is_none(mas)))
4104  			goto reset;
4105  
4106  		if (unlikely(mas_is_overflow(mas)))
4107  			goto reset;
4108  
4109  		if (unlikely(mas_is_underflow(mas)))
4110  			goto reset;
4111  	}
4112  
4113  	/*
4114  	 * A less strict version of mas_is_span_wr() where we allow spanning
4115  	 * writes within this node.  This is to stop partial walks in
4116  	 * mas_prealloc() from being reset.
4117  	 */
4118  	if (mas->last > mas->max)
4119  		goto reset;
4120  
4121  	if (wr_mas->entry)
4122  		goto set_content;
4123  
4124  	if (mte_is_leaf(mas->node) && mas->last == mas->max)
4125  		goto reset;
4126  
4127  	goto set_content;
4128  
4129  reset:
4130  	mas_reset(mas);
4131  set_content:
4132  	wr_mas->content = mas_start(mas);
4133  }
4134  
4135  /**
4136   * mas_prealloc_calc() - Calculate number of nodes needed for a
4137   * given store oepration
4138   * @mas: The maple state
4139   * @entry: The entry to store into the tree
4140   *
4141   * Return: Number of nodes required for preallocation.
4142   */
mas_prealloc_calc(struct ma_state * mas,void * entry)4143  static inline int mas_prealloc_calc(struct ma_state *mas, void *entry)
4144  {
4145  	int ret = mas_mt_height(mas) * 3 + 1;
4146  
4147  	switch (mas->store_type) {
4148  	case wr_invalid:
4149  		WARN_ON_ONCE(1);
4150  		break;
4151  	case wr_new_root:
4152  		ret = 1;
4153  		break;
4154  	case wr_store_root:
4155  		if (likely((mas->last != 0) || (mas->index != 0)))
4156  			ret = 1;
4157  		else if (((unsigned long) (entry) & 3) == 2)
4158  			ret = 1;
4159  		else
4160  			ret = 0;
4161  		break;
4162  	case wr_spanning_store:
4163  		ret =  mas_mt_height(mas) * 3 + 1;
4164  		break;
4165  	case wr_split_store:
4166  		ret =  mas_mt_height(mas) * 2 + 1;
4167  		break;
4168  	case wr_rebalance:
4169  		ret =  mas_mt_height(mas) * 2 - 1;
4170  		break;
4171  	case wr_node_store:
4172  		ret = mt_in_rcu(mas->tree) ? 1 : 0;
4173  		break;
4174  	case wr_append:
4175  	case wr_exact_fit:
4176  	case wr_slot_store:
4177  		ret = 0;
4178  	}
4179  
4180  	return ret;
4181  }
4182  
4183  /*
4184   * mas_wr_store_type() - Set the store type for a given
4185   * store operation.
4186   * @wr_mas: The maple write state
4187   */
mas_wr_store_type(struct ma_wr_state * wr_mas)4188  static inline void mas_wr_store_type(struct ma_wr_state *wr_mas)
4189  {
4190  	struct ma_state *mas = wr_mas->mas;
4191  	unsigned char new_end;
4192  
4193  	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas))) {
4194  		mas->store_type = wr_store_root;
4195  		return;
4196  	}
4197  
4198  	if (unlikely(!mas_wr_walk(wr_mas))) {
4199  		mas->store_type = wr_spanning_store;
4200  		return;
4201  	}
4202  
4203  	/* At this point, we are at the leaf node that needs to be altered. */
4204  	mas_wr_end_piv(wr_mas);
4205  	if (!wr_mas->entry)
4206  		mas_wr_extend_null(wr_mas);
4207  
4208  	new_end = mas_wr_new_end(wr_mas);
4209  	if ((wr_mas->r_min == mas->index) && (wr_mas->r_max == mas->last)) {
4210  		mas->store_type = wr_exact_fit;
4211  		return;
4212  	}
4213  
4214  	if (unlikely(!mas->index && mas->last == ULONG_MAX)) {
4215  		mas->store_type = wr_new_root;
4216  		return;
4217  	}
4218  
4219  	/* Potential spanning rebalance collapsing a node */
4220  	if (new_end < mt_min_slots[wr_mas->type]) {
4221  		if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) {
4222  			mas->store_type = wr_rebalance;
4223  			return;
4224  		}
4225  		mas->store_type = wr_node_store;
4226  		return;
4227  	}
4228  
4229  	if (new_end >= mt_slots[wr_mas->type]) {
4230  		mas->store_type = wr_split_store;
4231  		return;
4232  	}
4233  
4234  	if (!mt_in_rcu(mas->tree) && (mas->offset == mas->end)) {
4235  		mas->store_type = wr_append;
4236  		return;
4237  	}
4238  
4239  	if ((new_end == mas->end) && (!mt_in_rcu(mas->tree) ||
4240  		(wr_mas->offset_end - mas->offset == 1))) {
4241  		mas->store_type = wr_slot_store;
4242  		return;
4243  	}
4244  
4245  	if (mte_is_root(mas->node) || (new_end >= mt_min_slots[wr_mas->type]) ||
4246  		(mas->mas_flags & MA_STATE_BULK)) {
4247  		mas->store_type = wr_node_store;
4248  		return;
4249  	}
4250  
4251  	mas->store_type = wr_invalid;
4252  	MAS_WARN_ON(mas, 1);
4253  }
4254  
4255  /**
4256   * mas_wr_preallocate() - Preallocate enough nodes for a store operation
4257   * @wr_mas: The maple write state
4258   * @entry: The entry that will be stored
4259   *
4260   */
mas_wr_preallocate(struct ma_wr_state * wr_mas,void * entry)4261  static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry)
4262  {
4263  	struct ma_state *mas = wr_mas->mas;
4264  	int request;
4265  
4266  	mas_wr_prealloc_setup(wr_mas);
4267  	mas_wr_store_type(wr_mas);
4268  	request = mas_prealloc_calc(mas, entry);
4269  	if (!request)
4270  		return;
4271  
4272  	mas_node_count(mas, request);
4273  }
4274  
4275  /**
4276   * mas_insert() - Internal call to insert a value
4277   * @mas: The maple state
4278   * @entry: The entry to store
4279   *
4280   * Return: %NULL or the contents that already exists at the requested index
4281   * otherwise.  The maple state needs to be checked for error conditions.
4282   */
mas_insert(struct ma_state * mas,void * entry)4283  static inline void *mas_insert(struct ma_state *mas, void *entry)
4284  {
4285  	MA_WR_STATE(wr_mas, mas, entry);
4286  
4287  	/*
4288  	 * Inserting a new range inserts either 0, 1, or 2 pivots within the
4289  	 * tree.  If the insert fits exactly into an existing gap with a value
4290  	 * of NULL, then the slot only needs to be written with the new value.
4291  	 * If the range being inserted is adjacent to another range, then only a
4292  	 * single pivot needs to be inserted (as well as writing the entry).  If
4293  	 * the new range is within a gap but does not touch any other ranges,
4294  	 * then two pivots need to be inserted: the start - 1, and the end.  As
4295  	 * usual, the entry must be written.  Most operations require a new node
4296  	 * to be allocated and replace an existing node to ensure RCU safety,
4297  	 * when in RCU mode.  The exception to requiring a newly allocated node
4298  	 * is when inserting at the end of a node (appending).  When done
4299  	 * carefully, appending can reuse the node in place.
4300  	 */
4301  	wr_mas.content = mas_start(mas);
4302  	if (wr_mas.content)
4303  		goto exists;
4304  
4305  	mas_wr_preallocate(&wr_mas, entry);
4306  	if (mas_is_err(mas))
4307  		return NULL;
4308  
4309  	/* spanning writes always overwrite something */
4310  	if (mas->store_type == wr_spanning_store)
4311  		goto exists;
4312  
4313  	/* At this point, we are at the leaf node that needs to be altered. */
4314  	if (mas->store_type != wr_new_root && mas->store_type != wr_store_root) {
4315  		wr_mas.offset_end = mas->offset;
4316  		wr_mas.end_piv = wr_mas.r_max;
4317  
4318  		if (wr_mas.content || (mas->last > wr_mas.r_max))
4319  			goto exists;
4320  	}
4321  
4322  	mas_wr_store_entry(&wr_mas);
4323  	return wr_mas.content;
4324  
4325  exists:
4326  	mas_set_err(mas, -EEXIST);
4327  	return wr_mas.content;
4328  
4329  }
4330  
4331  /**
4332   * mas_alloc_cyclic() - Internal call to find somewhere to store an entry
4333   * @mas: The maple state.
4334   * @startp: Pointer to ID.
4335   * @range_lo: Lower bound of range to search.
4336   * @range_hi: Upper bound of range to search.
4337   * @entry: The entry to store.
4338   * @next: Pointer to next ID to allocate.
4339   * @gfp: The GFP_FLAGS to use for allocations.
4340   *
4341   * Return: 0 if the allocation succeeded without wrapping, 1 if the
4342   * allocation succeeded after wrapping, or -EBUSY if there are no
4343   * free entries.
4344   */
mas_alloc_cyclic(struct ma_state * mas,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)4345  int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
4346  		void *entry, unsigned long range_lo, unsigned long range_hi,
4347  		unsigned long *next, gfp_t gfp)
4348  {
4349  	unsigned long min = range_lo;
4350  	int ret = 0;
4351  
4352  	range_lo = max(min, *next);
4353  	ret = mas_empty_area(mas, range_lo, range_hi, 1);
4354  	if ((mas->tree->ma_flags & MT_FLAGS_ALLOC_WRAPPED) && ret == 0) {
4355  		mas->tree->ma_flags &= ~MT_FLAGS_ALLOC_WRAPPED;
4356  		ret = 1;
4357  	}
4358  	if (ret < 0 && range_lo > min) {
4359  		ret = mas_empty_area(mas, min, range_hi, 1);
4360  		if (ret == 0)
4361  			ret = 1;
4362  	}
4363  	if (ret < 0)
4364  		return ret;
4365  
4366  	do {
4367  		mas_insert(mas, entry);
4368  	} while (mas_nomem(mas, gfp));
4369  	if (mas_is_err(mas))
4370  		return xa_err(mas->node);
4371  
4372  	*startp = mas->index;
4373  	*next = *startp + 1;
4374  	if (*next == 0)
4375  		mas->tree->ma_flags |= MT_FLAGS_ALLOC_WRAPPED;
4376  
4377  	mas_destroy(mas);
4378  	return ret;
4379  }
4380  EXPORT_SYMBOL(mas_alloc_cyclic);
4381  
mas_rewalk(struct ma_state * mas,unsigned long index)4382  static __always_inline void mas_rewalk(struct ma_state *mas, unsigned long index)
4383  {
4384  retry:
4385  	mas_set(mas, index);
4386  	mas_state_walk(mas);
4387  	if (mas_is_start(mas))
4388  		goto retry;
4389  }
4390  
mas_rewalk_if_dead(struct ma_state * mas,struct maple_node * node,const unsigned long index)4391  static __always_inline bool mas_rewalk_if_dead(struct ma_state *mas,
4392  		struct maple_node *node, const unsigned long index)
4393  {
4394  	if (unlikely(ma_dead_node(node))) {
4395  		mas_rewalk(mas, index);
4396  		return true;
4397  	}
4398  	return false;
4399  }
4400  
4401  /*
4402   * mas_prev_node() - Find the prev non-null entry at the same level in the
4403   * tree.  The prev value will be mas->node[mas->offset] or the status will be
4404   * ma_none.
4405   * @mas: The maple state
4406   * @min: The lower limit to search
4407   *
4408   * The prev node value will be mas->node[mas->offset] or the status will be
4409   * ma_none.
4410   * Return: 1 if the node is dead, 0 otherwise.
4411   */
mas_prev_node(struct ma_state * mas,unsigned long min)4412  static int mas_prev_node(struct ma_state *mas, unsigned long min)
4413  {
4414  	enum maple_type mt;
4415  	int offset, level;
4416  	void __rcu **slots;
4417  	struct maple_node *node;
4418  	unsigned long *pivots;
4419  	unsigned long max;
4420  
4421  	node = mas_mn(mas);
4422  	if (!mas->min)
4423  		goto no_entry;
4424  
4425  	max = mas->min - 1;
4426  	if (max < min)
4427  		goto no_entry;
4428  
4429  	level = 0;
4430  	do {
4431  		if (ma_is_root(node))
4432  			goto no_entry;
4433  
4434  		/* Walk up. */
4435  		if (unlikely(mas_ascend(mas)))
4436  			return 1;
4437  		offset = mas->offset;
4438  		level++;
4439  		node = mas_mn(mas);
4440  	} while (!offset);
4441  
4442  	offset--;
4443  	mt = mte_node_type(mas->node);
4444  	while (level > 1) {
4445  		level--;
4446  		slots = ma_slots(node, mt);
4447  		mas->node = mas_slot(mas, slots, offset);
4448  		if (unlikely(ma_dead_node(node)))
4449  			return 1;
4450  
4451  		mt = mte_node_type(mas->node);
4452  		node = mas_mn(mas);
4453  		pivots = ma_pivots(node, mt);
4454  		offset = ma_data_end(node, mt, pivots, max);
4455  		if (unlikely(ma_dead_node(node)))
4456  			return 1;
4457  	}
4458  
4459  	slots = ma_slots(node, mt);
4460  	mas->node = mas_slot(mas, slots, offset);
4461  	pivots = ma_pivots(node, mt);
4462  	if (unlikely(ma_dead_node(node)))
4463  		return 1;
4464  
4465  	if (likely(offset))
4466  		mas->min = pivots[offset - 1] + 1;
4467  	mas->max = max;
4468  	mas->offset = mas_data_end(mas);
4469  	if (unlikely(mte_dead_node(mas->node)))
4470  		return 1;
4471  
4472  	mas->end = mas->offset;
4473  	return 0;
4474  
4475  no_entry:
4476  	if (unlikely(ma_dead_node(node)))
4477  		return 1;
4478  
4479  	mas->status = ma_underflow;
4480  	return 0;
4481  }
4482  
4483  /*
4484   * mas_prev_slot() - Get the entry in the previous slot
4485   *
4486   * @mas: The maple state
4487   * @min: The minimum starting range
4488   * @empty: Can be empty
4489   *
4490   * Return: The entry in the previous slot which is possibly NULL
4491   */
mas_prev_slot(struct ma_state * mas,unsigned long min,bool empty)4492  static void *mas_prev_slot(struct ma_state *mas, unsigned long min, bool empty)
4493  {
4494  	void *entry;
4495  	void __rcu **slots;
4496  	unsigned long pivot;
4497  	enum maple_type type;
4498  	unsigned long *pivots;
4499  	struct maple_node *node;
4500  	unsigned long save_point = mas->index;
4501  
4502  retry:
4503  	node = mas_mn(mas);
4504  	type = mte_node_type(mas->node);
4505  	pivots = ma_pivots(node, type);
4506  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4507  		goto retry;
4508  
4509  	if (mas->min <= min) {
4510  		pivot = mas_safe_min(mas, pivots, mas->offset);
4511  
4512  		if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4513  			goto retry;
4514  
4515  		if (pivot <= min)
4516  			goto underflow;
4517  	}
4518  
4519  again:
4520  	if (likely(mas->offset)) {
4521  		mas->offset--;
4522  		mas->last = mas->index - 1;
4523  		mas->index = mas_safe_min(mas, pivots, mas->offset);
4524  	} else  {
4525  		if (mas->index <= min)
4526  			goto underflow;
4527  
4528  		if (mas_prev_node(mas, min)) {
4529  			mas_rewalk(mas, save_point);
4530  			goto retry;
4531  		}
4532  
4533  		if (WARN_ON_ONCE(mas_is_underflow(mas)))
4534  			return NULL;
4535  
4536  		mas->last = mas->max;
4537  		node = mas_mn(mas);
4538  		type = mte_node_type(mas->node);
4539  		pivots = ma_pivots(node, type);
4540  		mas->index = pivots[mas->offset - 1] + 1;
4541  	}
4542  
4543  	slots = ma_slots(node, type);
4544  	entry = mas_slot(mas, slots, mas->offset);
4545  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4546  		goto retry;
4547  
4548  
4549  	if (likely(entry))
4550  		return entry;
4551  
4552  	if (!empty) {
4553  		if (mas->index <= min) {
4554  			mas->status = ma_underflow;
4555  			return NULL;
4556  		}
4557  
4558  		goto again;
4559  	}
4560  
4561  	return entry;
4562  
4563  underflow:
4564  	mas->status = ma_underflow;
4565  	return NULL;
4566  }
4567  
4568  /*
4569   * mas_next_node() - Get the next node at the same level in the tree.
4570   * @mas: The maple state
4571   * @node: The maple node
4572   * @max: The maximum pivot value to check.
4573   *
4574   * The next value will be mas->node[mas->offset] or the status will have
4575   * overflowed.
4576   * Return: 1 on dead node, 0 otherwise.
4577   */
mas_next_node(struct ma_state * mas,struct maple_node * node,unsigned long max)4578  static int mas_next_node(struct ma_state *mas, struct maple_node *node,
4579  		unsigned long max)
4580  {
4581  	unsigned long min;
4582  	unsigned long *pivots;
4583  	struct maple_enode *enode;
4584  	struct maple_node *tmp;
4585  	int level = 0;
4586  	unsigned char node_end;
4587  	enum maple_type mt;
4588  	void __rcu **slots;
4589  
4590  	if (mas->max >= max)
4591  		goto overflow;
4592  
4593  	min = mas->max + 1;
4594  	level = 0;
4595  	do {
4596  		if (ma_is_root(node))
4597  			goto overflow;
4598  
4599  		/* Walk up. */
4600  		if (unlikely(mas_ascend(mas)))
4601  			return 1;
4602  
4603  		level++;
4604  		node = mas_mn(mas);
4605  		mt = mte_node_type(mas->node);
4606  		pivots = ma_pivots(node, mt);
4607  		node_end = ma_data_end(node, mt, pivots, mas->max);
4608  		if (unlikely(ma_dead_node(node)))
4609  			return 1;
4610  
4611  	} while (unlikely(mas->offset == node_end));
4612  
4613  	slots = ma_slots(node, mt);
4614  	mas->offset++;
4615  	enode = mas_slot(mas, slots, mas->offset);
4616  	if (unlikely(ma_dead_node(node)))
4617  		return 1;
4618  
4619  	if (level > 1)
4620  		mas->offset = 0;
4621  
4622  	while (unlikely(level > 1)) {
4623  		level--;
4624  		mas->node = enode;
4625  		node = mas_mn(mas);
4626  		mt = mte_node_type(mas->node);
4627  		slots = ma_slots(node, mt);
4628  		enode = mas_slot(mas, slots, 0);
4629  		if (unlikely(ma_dead_node(node)))
4630  			return 1;
4631  	}
4632  
4633  	if (!mas->offset)
4634  		pivots = ma_pivots(node, mt);
4635  
4636  	mas->max = mas_safe_pivot(mas, pivots, mas->offset, mt);
4637  	tmp = mte_to_node(enode);
4638  	mt = mte_node_type(enode);
4639  	pivots = ma_pivots(tmp, mt);
4640  	mas->end = ma_data_end(tmp, mt, pivots, mas->max);
4641  	if (unlikely(ma_dead_node(node)))
4642  		return 1;
4643  
4644  	mas->node = enode;
4645  	mas->min = min;
4646  	return 0;
4647  
4648  overflow:
4649  	if (unlikely(ma_dead_node(node)))
4650  		return 1;
4651  
4652  	mas->status = ma_overflow;
4653  	return 0;
4654  }
4655  
4656  /*
4657   * mas_next_slot() - Get the entry in the next slot
4658   *
4659   * @mas: The maple state
4660   * @max: The maximum starting range
4661   * @empty: Can be empty
4662   *
4663   * Return: The entry in the next slot which is possibly NULL
4664   */
mas_next_slot(struct ma_state * mas,unsigned long max,bool empty)4665  static void *mas_next_slot(struct ma_state *mas, unsigned long max, bool empty)
4666  {
4667  	void __rcu **slots;
4668  	unsigned long *pivots;
4669  	unsigned long pivot;
4670  	enum maple_type type;
4671  	struct maple_node *node;
4672  	unsigned long save_point = mas->last;
4673  	void *entry;
4674  
4675  retry:
4676  	node = mas_mn(mas);
4677  	type = mte_node_type(mas->node);
4678  	pivots = ma_pivots(node, type);
4679  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4680  		goto retry;
4681  
4682  	if (mas->max >= max) {
4683  		if (likely(mas->offset < mas->end))
4684  			pivot = pivots[mas->offset];
4685  		else
4686  			pivot = mas->max;
4687  
4688  		if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4689  			goto retry;
4690  
4691  		if (pivot >= max) { /* Was at the limit, next will extend beyond */
4692  			mas->status = ma_overflow;
4693  			return NULL;
4694  		}
4695  	}
4696  
4697  	if (likely(mas->offset < mas->end)) {
4698  		mas->index = pivots[mas->offset] + 1;
4699  again:
4700  		mas->offset++;
4701  		if (likely(mas->offset < mas->end))
4702  			mas->last = pivots[mas->offset];
4703  		else
4704  			mas->last = mas->max;
4705  	} else  {
4706  		if (mas->last >= max) {
4707  			mas->status = ma_overflow;
4708  			return NULL;
4709  		}
4710  
4711  		if (mas_next_node(mas, node, max)) {
4712  			mas_rewalk(mas, save_point);
4713  			goto retry;
4714  		}
4715  
4716  		if (WARN_ON_ONCE(mas_is_overflow(mas)))
4717  			return NULL;
4718  
4719  		mas->offset = 0;
4720  		mas->index = mas->min;
4721  		node = mas_mn(mas);
4722  		type = mte_node_type(mas->node);
4723  		pivots = ma_pivots(node, type);
4724  		mas->last = pivots[0];
4725  	}
4726  
4727  	slots = ma_slots(node, type);
4728  	entry = mt_slot(mas->tree, slots, mas->offset);
4729  	if (unlikely(mas_rewalk_if_dead(mas, node, save_point)))
4730  		goto retry;
4731  
4732  	if (entry)
4733  		return entry;
4734  
4735  
4736  	if (!empty) {
4737  		if (mas->last >= max) {
4738  			mas->status = ma_overflow;
4739  			return NULL;
4740  		}
4741  
4742  		mas->index = mas->last + 1;
4743  		goto again;
4744  	}
4745  
4746  	return entry;
4747  }
4748  
4749  /*
4750   * mas_next_entry() - Internal function to get the next entry.
4751   * @mas: The maple state
4752   * @limit: The maximum range start.
4753   *
4754   * Set the @mas->node to the next entry and the range_start to
4755   * the beginning value for the entry.  Does not check beyond @limit.
4756   * Sets @mas->index and @mas->last to the range, Does not update @mas->index and
4757   * @mas->last on overflow.
4758   * Restarts on dead nodes.
4759   *
4760   * Return: the next entry or %NULL.
4761   */
mas_next_entry(struct ma_state * mas,unsigned long limit)4762  static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
4763  {
4764  	if (mas->last >= limit) {
4765  		mas->status = ma_overflow;
4766  		return NULL;
4767  	}
4768  
4769  	return mas_next_slot(mas, limit, false);
4770  }
4771  
4772  /*
4773   * mas_rev_awalk() - Internal function.  Reverse allocation walk.  Find the
4774   * highest gap address of a given size in a given node and descend.
4775   * @mas: The maple state
4776   * @size: The needed size.
4777   *
4778   * Return: True if found in a leaf, false otherwise.
4779   *
4780   */
mas_rev_awalk(struct ma_state * mas,unsigned long size,unsigned long * gap_min,unsigned long * gap_max)4781  static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
4782  		unsigned long *gap_min, unsigned long *gap_max)
4783  {
4784  	enum maple_type type = mte_node_type(mas->node);
4785  	struct maple_node *node = mas_mn(mas);
4786  	unsigned long *pivots, *gaps;
4787  	void __rcu **slots;
4788  	unsigned long gap = 0;
4789  	unsigned long max, min;
4790  	unsigned char offset;
4791  
4792  	if (unlikely(mas_is_err(mas)))
4793  		return true;
4794  
4795  	if (ma_is_dense(type)) {
4796  		/* dense nodes. */
4797  		mas->offset = (unsigned char)(mas->index - mas->min);
4798  		return true;
4799  	}
4800  
4801  	pivots = ma_pivots(node, type);
4802  	slots = ma_slots(node, type);
4803  	gaps = ma_gaps(node, type);
4804  	offset = mas->offset;
4805  	min = mas_safe_min(mas, pivots, offset);
4806  	/* Skip out of bounds. */
4807  	while (mas->last < min)
4808  		min = mas_safe_min(mas, pivots, --offset);
4809  
4810  	max = mas_safe_pivot(mas, pivots, offset, type);
4811  	while (mas->index <= max) {
4812  		gap = 0;
4813  		if (gaps)
4814  			gap = gaps[offset];
4815  		else if (!mas_slot(mas, slots, offset))
4816  			gap = max - min + 1;
4817  
4818  		if (gap) {
4819  			if ((size <= gap) && (size <= mas->last - min + 1))
4820  				break;
4821  
4822  			if (!gaps) {
4823  				/* Skip the next slot, it cannot be a gap. */
4824  				if (offset < 2)
4825  					goto ascend;
4826  
4827  				offset -= 2;
4828  				max = pivots[offset];
4829  				min = mas_safe_min(mas, pivots, offset);
4830  				continue;
4831  			}
4832  		}
4833  
4834  		if (!offset)
4835  			goto ascend;
4836  
4837  		offset--;
4838  		max = min - 1;
4839  		min = mas_safe_min(mas, pivots, offset);
4840  	}
4841  
4842  	if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
4843  		goto no_space;
4844  
4845  	if (unlikely(ma_is_leaf(type))) {
4846  		mas->offset = offset;
4847  		*gap_min = min;
4848  		*gap_max = min + gap - 1;
4849  		return true;
4850  	}
4851  
4852  	/* descend, only happens under lock. */
4853  	mas->node = mas_slot(mas, slots, offset);
4854  	mas->min = min;
4855  	mas->max = max;
4856  	mas->offset = mas_data_end(mas);
4857  	return false;
4858  
4859  ascend:
4860  	if (!mte_is_root(mas->node))
4861  		return false;
4862  
4863  no_space:
4864  	mas_set_err(mas, -EBUSY);
4865  	return false;
4866  }
4867  
mas_anode_descend(struct ma_state * mas,unsigned long size)4868  static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
4869  {
4870  	enum maple_type type = mte_node_type(mas->node);
4871  	unsigned long pivot, min, gap = 0;
4872  	unsigned char offset, data_end;
4873  	unsigned long *gaps, *pivots;
4874  	void __rcu **slots;
4875  	struct maple_node *node;
4876  	bool found = false;
4877  
4878  	if (ma_is_dense(type)) {
4879  		mas->offset = (unsigned char)(mas->index - mas->min);
4880  		return true;
4881  	}
4882  
4883  	node = mas_mn(mas);
4884  	pivots = ma_pivots(node, type);
4885  	slots = ma_slots(node, type);
4886  	gaps = ma_gaps(node, type);
4887  	offset = mas->offset;
4888  	min = mas_safe_min(mas, pivots, offset);
4889  	data_end = ma_data_end(node, type, pivots, mas->max);
4890  	for (; offset <= data_end; offset++) {
4891  		pivot = mas_safe_pivot(mas, pivots, offset, type);
4892  
4893  		/* Not within lower bounds */
4894  		if (mas->index > pivot)
4895  			goto next_slot;
4896  
4897  		if (gaps)
4898  			gap = gaps[offset];
4899  		else if (!mas_slot(mas, slots, offset))
4900  			gap = min(pivot, mas->last) - max(mas->index, min) + 1;
4901  		else
4902  			goto next_slot;
4903  
4904  		if (gap >= size) {
4905  			if (ma_is_leaf(type)) {
4906  				found = true;
4907  				goto done;
4908  			}
4909  			if (mas->index <= pivot) {
4910  				mas->node = mas_slot(mas, slots, offset);
4911  				mas->min = min;
4912  				mas->max = pivot;
4913  				offset = 0;
4914  				break;
4915  			}
4916  		}
4917  next_slot:
4918  		min = pivot + 1;
4919  		if (mas->last <= pivot) {
4920  			mas_set_err(mas, -EBUSY);
4921  			return true;
4922  		}
4923  	}
4924  
4925  	if (mte_is_root(mas->node))
4926  		found = true;
4927  done:
4928  	mas->offset = offset;
4929  	return found;
4930  }
4931  
4932  /**
4933   * mas_walk() - Search for @mas->index in the tree.
4934   * @mas: The maple state.
4935   *
4936   * mas->index and mas->last will be set to the range if there is a value.  If
4937   * mas->status is ma_none, reset to ma_start
4938   *
4939   * Return: the entry at the location or %NULL.
4940   */
mas_walk(struct ma_state * mas)4941  void *mas_walk(struct ma_state *mas)
4942  {
4943  	void *entry;
4944  
4945  	if (!mas_is_active(mas) || !mas_is_start(mas))
4946  		mas->status = ma_start;
4947  retry:
4948  	entry = mas_state_walk(mas);
4949  	if (mas_is_start(mas)) {
4950  		goto retry;
4951  	} else if (mas_is_none(mas)) {
4952  		mas->index = 0;
4953  		mas->last = ULONG_MAX;
4954  	} else if (mas_is_ptr(mas)) {
4955  		if (!mas->index) {
4956  			mas->last = 0;
4957  			return entry;
4958  		}
4959  
4960  		mas->index = 1;
4961  		mas->last = ULONG_MAX;
4962  		mas->status = ma_none;
4963  		return NULL;
4964  	}
4965  
4966  	return entry;
4967  }
4968  EXPORT_SYMBOL_GPL(mas_walk);
4969  
mas_rewind_node(struct ma_state * mas)4970  static inline bool mas_rewind_node(struct ma_state *mas)
4971  {
4972  	unsigned char slot;
4973  
4974  	do {
4975  		if (mte_is_root(mas->node)) {
4976  			slot = mas->offset;
4977  			if (!slot)
4978  				return false;
4979  		} else {
4980  			mas_ascend(mas);
4981  			slot = mas->offset;
4982  		}
4983  	} while (!slot);
4984  
4985  	mas->offset = --slot;
4986  	return true;
4987  }
4988  
4989  /*
4990   * mas_skip_node() - Internal function.  Skip over a node.
4991   * @mas: The maple state.
4992   *
4993   * Return: true if there is another node, false otherwise.
4994   */
mas_skip_node(struct ma_state * mas)4995  static inline bool mas_skip_node(struct ma_state *mas)
4996  {
4997  	if (mas_is_err(mas))
4998  		return false;
4999  
5000  	do {
5001  		if (mte_is_root(mas->node)) {
5002  			if (mas->offset >= mas_data_end(mas)) {
5003  				mas_set_err(mas, -EBUSY);
5004  				return false;
5005  			}
5006  		} else {
5007  			mas_ascend(mas);
5008  		}
5009  	} while (mas->offset >= mas_data_end(mas));
5010  
5011  	mas->offset++;
5012  	return true;
5013  }
5014  
5015  /*
5016   * mas_awalk() - Allocation walk.  Search from low address to high, for a gap of
5017   * @size
5018   * @mas: The maple state
5019   * @size: The size of the gap required
5020   *
5021   * Search between @mas->index and @mas->last for a gap of @size.
5022   */
mas_awalk(struct ma_state * mas,unsigned long size)5023  static inline void mas_awalk(struct ma_state *mas, unsigned long size)
5024  {
5025  	struct maple_enode *last = NULL;
5026  
5027  	/*
5028  	 * There are 4 options:
5029  	 * go to child (descend)
5030  	 * go back to parent (ascend)
5031  	 * no gap found. (return, slot == MAPLE_NODE_SLOTS)
5032  	 * found the gap. (return, slot != MAPLE_NODE_SLOTS)
5033  	 */
5034  	while (!mas_is_err(mas) && !mas_anode_descend(mas, size)) {
5035  		if (last == mas->node)
5036  			mas_skip_node(mas);
5037  		else
5038  			last = mas->node;
5039  	}
5040  }
5041  
5042  /*
5043   * mas_sparse_area() - Internal function.  Return upper or lower limit when
5044   * searching for a gap in an empty tree.
5045   * @mas: The maple state
5046   * @min: the minimum range
5047   * @max: The maximum range
5048   * @size: The size of the gap
5049   * @fwd: Searching forward or back
5050   */
mas_sparse_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size,bool fwd)5051  static inline int mas_sparse_area(struct ma_state *mas, unsigned long min,
5052  				unsigned long max, unsigned long size, bool fwd)
5053  {
5054  	if (!unlikely(mas_is_none(mas)) && min == 0) {
5055  		min++;
5056  		/*
5057  		 * At this time, min is increased, we need to recheck whether
5058  		 * the size is satisfied.
5059  		 */
5060  		if (min > max || max - min + 1 < size)
5061  			return -EBUSY;
5062  	}
5063  	/* mas_is_ptr */
5064  
5065  	if (fwd) {
5066  		mas->index = min;
5067  		mas->last = min + size - 1;
5068  	} else {
5069  		mas->last = max;
5070  		mas->index = max - size + 1;
5071  	}
5072  	return 0;
5073  }
5074  
5075  /*
5076   * mas_empty_area() - Get the lowest address within the range that is
5077   * sufficient for the size requested.
5078   * @mas: The maple state
5079   * @min: The lowest value of the range
5080   * @max: The highest value of the range
5081   * @size: The size needed
5082   */
mas_empty_area(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5083  int mas_empty_area(struct ma_state *mas, unsigned long min,
5084  		unsigned long max, unsigned long size)
5085  {
5086  	unsigned char offset;
5087  	unsigned long *pivots;
5088  	enum maple_type mt;
5089  	struct maple_node *node;
5090  
5091  	if (min > max)
5092  		return -EINVAL;
5093  
5094  	if (size == 0 || max - min < size - 1)
5095  		return -EINVAL;
5096  
5097  	if (mas_is_start(mas))
5098  		mas_start(mas);
5099  	else if (mas->offset >= 2)
5100  		mas->offset -= 2;
5101  	else if (!mas_skip_node(mas))
5102  		return -EBUSY;
5103  
5104  	/* Empty set */
5105  	if (mas_is_none(mas) || mas_is_ptr(mas))
5106  		return mas_sparse_area(mas, min, max, size, true);
5107  
5108  	/* The start of the window can only be within these values */
5109  	mas->index = min;
5110  	mas->last = max;
5111  	mas_awalk(mas, size);
5112  
5113  	if (unlikely(mas_is_err(mas)))
5114  		return xa_err(mas->node);
5115  
5116  	offset = mas->offset;
5117  	if (unlikely(offset == MAPLE_NODE_SLOTS))
5118  		return -EBUSY;
5119  
5120  	node = mas_mn(mas);
5121  	mt = mte_node_type(mas->node);
5122  	pivots = ma_pivots(node, mt);
5123  	min = mas_safe_min(mas, pivots, offset);
5124  	if (mas->index < min)
5125  		mas->index = min;
5126  	mas->last = mas->index + size - 1;
5127  	mas->end = ma_data_end(node, mt, pivots, mas->max);
5128  	return 0;
5129  }
5130  EXPORT_SYMBOL_GPL(mas_empty_area);
5131  
5132  /*
5133   * mas_empty_area_rev() - Get the highest address within the range that is
5134   * sufficient for the size requested.
5135   * @mas: The maple state
5136   * @min: The lowest value of the range
5137   * @max: The highest value of the range
5138   * @size: The size needed
5139   */
mas_empty_area_rev(struct ma_state * mas,unsigned long min,unsigned long max,unsigned long size)5140  int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
5141  		unsigned long max, unsigned long size)
5142  {
5143  	struct maple_enode *last = mas->node;
5144  
5145  	if (min > max)
5146  		return -EINVAL;
5147  
5148  	if (size == 0 || max - min < size - 1)
5149  		return -EINVAL;
5150  
5151  	if (mas_is_start(mas))
5152  		mas_start(mas);
5153  	else if ((mas->offset < 2) && (!mas_rewind_node(mas)))
5154  		return -EBUSY;
5155  
5156  	if (unlikely(mas_is_none(mas) || mas_is_ptr(mas)))
5157  		return mas_sparse_area(mas, min, max, size, false);
5158  	else if (mas->offset >= 2)
5159  		mas->offset -= 2;
5160  	else
5161  		mas->offset = mas_data_end(mas);
5162  
5163  
5164  	/* The start of the window can only be within these values. */
5165  	mas->index = min;
5166  	mas->last = max;
5167  
5168  	while (!mas_rev_awalk(mas, size, &min, &max)) {
5169  		if (last == mas->node) {
5170  			if (!mas_rewind_node(mas))
5171  				return -EBUSY;
5172  		} else {
5173  			last = mas->node;
5174  		}
5175  	}
5176  
5177  	if (mas_is_err(mas))
5178  		return xa_err(mas->node);
5179  
5180  	if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
5181  		return -EBUSY;
5182  
5183  	/* Trim the upper limit to the max. */
5184  	if (max < mas->last)
5185  		mas->last = max;
5186  
5187  	mas->index = mas->last - size + 1;
5188  	mas->end = mas_data_end(mas);
5189  	return 0;
5190  }
5191  EXPORT_SYMBOL_GPL(mas_empty_area_rev);
5192  
5193  /*
5194   * mte_dead_leaves() - Mark all leaves of a node as dead.
5195   * @enode: the encoded node
5196   * @mt: the maple tree
5197   * @slots: Pointer to the slot array
5198   *
5199   * Must hold the write lock.
5200   *
5201   * Return: The number of leaves marked as dead.
5202   */
5203  static inline
mte_dead_leaves(struct maple_enode * enode,struct maple_tree * mt,void __rcu ** slots)5204  unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
5205  			      void __rcu **slots)
5206  {
5207  	struct maple_node *node;
5208  	enum maple_type type;
5209  	void *entry;
5210  	int offset;
5211  
5212  	for (offset = 0; offset < mt_slot_count(enode); offset++) {
5213  		entry = mt_slot(mt, slots, offset);
5214  		type = mte_node_type(entry);
5215  		node = mte_to_node(entry);
5216  		/* Use both node and type to catch LE & BE metadata */
5217  		if (!node || !type)
5218  			break;
5219  
5220  		mte_set_node_dead(entry);
5221  		node->type = type;
5222  		rcu_assign_pointer(slots[offset], node);
5223  	}
5224  
5225  	return offset;
5226  }
5227  
5228  /**
5229   * mte_dead_walk() - Walk down a dead tree to just before the leaves
5230   * @enode: The maple encoded node
5231   * @offset: The starting offset
5232   *
5233   * Note: This can only be used from the RCU callback context.
5234   */
mte_dead_walk(struct maple_enode ** enode,unsigned char offset)5235  static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
5236  {
5237  	struct maple_node *node, *next;
5238  	void __rcu **slots = NULL;
5239  
5240  	next = mte_to_node(*enode);
5241  	do {
5242  		*enode = ma_enode_ptr(next);
5243  		node = mte_to_node(*enode);
5244  		slots = ma_slots(node, node->type);
5245  		next = rcu_dereference_protected(slots[offset],
5246  					lock_is_held(&rcu_callback_map));
5247  		offset = 0;
5248  	} while (!ma_is_leaf(next->type));
5249  
5250  	return slots;
5251  }
5252  
5253  /**
5254   * mt_free_walk() - Walk & free a tree in the RCU callback context
5255   * @head: The RCU head that's within the node.
5256   *
5257   * Note: This can only be used from the RCU callback context.
5258   */
mt_free_walk(struct rcu_head * head)5259  static void mt_free_walk(struct rcu_head *head)
5260  {
5261  	void __rcu **slots;
5262  	struct maple_node *node, *start;
5263  	struct maple_enode *enode;
5264  	unsigned char offset;
5265  	enum maple_type type;
5266  
5267  	node = container_of(head, struct maple_node, rcu);
5268  
5269  	if (ma_is_leaf(node->type))
5270  		goto free_leaf;
5271  
5272  	start = node;
5273  	enode = mt_mk_node(node, node->type);
5274  	slots = mte_dead_walk(&enode, 0);
5275  	node = mte_to_node(enode);
5276  	do {
5277  		mt_free_bulk(node->slot_len, slots);
5278  		offset = node->parent_slot + 1;
5279  		enode = node->piv_parent;
5280  		if (mte_to_node(enode) == node)
5281  			goto free_leaf;
5282  
5283  		type = mte_node_type(enode);
5284  		slots = ma_slots(mte_to_node(enode), type);
5285  		if ((offset < mt_slots[type]) &&
5286  		    rcu_dereference_protected(slots[offset],
5287  					      lock_is_held(&rcu_callback_map)))
5288  			slots = mte_dead_walk(&enode, offset);
5289  		node = mte_to_node(enode);
5290  	} while ((node != start) || (node->slot_len < offset));
5291  
5292  	slots = ma_slots(node, node->type);
5293  	mt_free_bulk(node->slot_len, slots);
5294  
5295  free_leaf:
5296  	mt_free_rcu(&node->rcu);
5297  }
5298  
mte_destroy_descend(struct maple_enode ** enode,struct maple_tree * mt,struct maple_enode * prev,unsigned char offset)5299  static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
5300  	struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
5301  {
5302  	struct maple_node *node;
5303  	struct maple_enode *next = *enode;
5304  	void __rcu **slots = NULL;
5305  	enum maple_type type;
5306  	unsigned char next_offset = 0;
5307  
5308  	do {
5309  		*enode = next;
5310  		node = mte_to_node(*enode);
5311  		type = mte_node_type(*enode);
5312  		slots = ma_slots(node, type);
5313  		next = mt_slot_locked(mt, slots, next_offset);
5314  		if ((mte_dead_node(next)))
5315  			next = mt_slot_locked(mt, slots, ++next_offset);
5316  
5317  		mte_set_node_dead(*enode);
5318  		node->type = type;
5319  		node->piv_parent = prev;
5320  		node->parent_slot = offset;
5321  		offset = next_offset;
5322  		next_offset = 0;
5323  		prev = *enode;
5324  	} while (!mte_is_leaf(next));
5325  
5326  	return slots;
5327  }
5328  
mt_destroy_walk(struct maple_enode * enode,struct maple_tree * mt,bool free)5329  static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
5330  			    bool free)
5331  {
5332  	void __rcu **slots;
5333  	struct maple_node *node = mte_to_node(enode);
5334  	struct maple_enode *start;
5335  
5336  	if (mte_is_leaf(enode)) {
5337  		node->type = mte_node_type(enode);
5338  		goto free_leaf;
5339  	}
5340  
5341  	start = enode;
5342  	slots = mte_destroy_descend(&enode, mt, start, 0);
5343  	node = mte_to_node(enode); // Updated in the above call.
5344  	do {
5345  		enum maple_type type;
5346  		unsigned char offset;
5347  		struct maple_enode *parent, *tmp;
5348  
5349  		node->slot_len = mte_dead_leaves(enode, mt, slots);
5350  		if (free)
5351  			mt_free_bulk(node->slot_len, slots);
5352  		offset = node->parent_slot + 1;
5353  		enode = node->piv_parent;
5354  		if (mte_to_node(enode) == node)
5355  			goto free_leaf;
5356  
5357  		type = mte_node_type(enode);
5358  		slots = ma_slots(mte_to_node(enode), type);
5359  		if (offset >= mt_slots[type])
5360  			goto next;
5361  
5362  		tmp = mt_slot_locked(mt, slots, offset);
5363  		if (mte_node_type(tmp) && mte_to_node(tmp)) {
5364  			parent = enode;
5365  			enode = tmp;
5366  			slots = mte_destroy_descend(&enode, mt, parent, offset);
5367  		}
5368  next:
5369  		node = mte_to_node(enode);
5370  	} while (start != enode);
5371  
5372  	node = mte_to_node(enode);
5373  	node->slot_len = mte_dead_leaves(enode, mt, slots);
5374  	if (free)
5375  		mt_free_bulk(node->slot_len, slots);
5376  
5377  free_leaf:
5378  	if (free)
5379  		mt_free_rcu(&node->rcu);
5380  	else
5381  		mt_clear_meta(mt, node, node->type);
5382  }
5383  
5384  /*
5385   * mte_destroy_walk() - Free a tree or sub-tree.
5386   * @enode: the encoded maple node (maple_enode) to start
5387   * @mt: the tree to free - needed for node types.
5388   *
5389   * Must hold the write lock.
5390   */
mte_destroy_walk(struct maple_enode * enode,struct maple_tree * mt)5391  static inline void mte_destroy_walk(struct maple_enode *enode,
5392  				    struct maple_tree *mt)
5393  {
5394  	struct maple_node *node = mte_to_node(enode);
5395  
5396  	if (mt_in_rcu(mt)) {
5397  		mt_destroy_walk(enode, mt, false);
5398  		call_rcu(&node->rcu, mt_free_walk);
5399  	} else {
5400  		mt_destroy_walk(enode, mt, true);
5401  	}
5402  }
5403  /* Interface */
5404  
5405  /**
5406   * mas_store() - Store an @entry.
5407   * @mas: The maple state.
5408   * @entry: The entry to store.
5409   *
5410   * The @mas->index and @mas->last is used to set the range for the @entry.
5411   *
5412   * Return: the first entry between mas->index and mas->last or %NULL.
5413   */
mas_store(struct ma_state * mas,void * entry)5414  void *mas_store(struct ma_state *mas, void *entry)
5415  {
5416  	int request;
5417  	MA_WR_STATE(wr_mas, mas, entry);
5418  
5419  	trace_ma_write(__func__, mas, 0, entry);
5420  #ifdef CONFIG_DEBUG_MAPLE_TREE
5421  	if (MAS_WARN_ON(mas, mas->index > mas->last))
5422  		pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry);
5423  
5424  	if (mas->index > mas->last) {
5425  		mas_set_err(mas, -EINVAL);
5426  		return NULL;
5427  	}
5428  
5429  #endif
5430  
5431  	/*
5432  	 * Storing is the same operation as insert with the added caveat that it
5433  	 * can overwrite entries.  Although this seems simple enough, one may
5434  	 * want to examine what happens if a single store operation was to
5435  	 * overwrite multiple entries within a self-balancing B-Tree.
5436  	 */
5437  	mas_wr_prealloc_setup(&wr_mas);
5438  	mas_wr_store_type(&wr_mas);
5439  	if (mas->mas_flags & MA_STATE_PREALLOC) {
5440  		mas_wr_store_entry(&wr_mas);
5441  		MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5442  		return wr_mas.content;
5443  	}
5444  
5445  	request = mas_prealloc_calc(mas, entry);
5446  	if (!request)
5447  		goto store;
5448  
5449  	mas_node_count(mas, request);
5450  	if (mas_is_err(mas))
5451  		return NULL;
5452  
5453  store:
5454  	mas_wr_store_entry(&wr_mas);
5455  	mas_destroy(mas);
5456  	return wr_mas.content;
5457  }
5458  EXPORT_SYMBOL_GPL(mas_store);
5459  
5460  /**
5461   * mas_store_gfp() - Store a value into the tree.
5462   * @mas: The maple state
5463   * @entry: The entry to store
5464   * @gfp: The GFP_FLAGS to use for allocations if necessary.
5465   *
5466   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
5467   * be allocated.
5468   */
mas_store_gfp(struct ma_state * mas,void * entry,gfp_t gfp)5469  int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp)
5470  {
5471  	unsigned long index = mas->index;
5472  	unsigned long last = mas->last;
5473  	MA_WR_STATE(wr_mas, mas, entry);
5474  	int ret = 0;
5475  
5476  retry:
5477  	mas_wr_preallocate(&wr_mas, entry);
5478  	if (unlikely(mas_nomem(mas, gfp))) {
5479  		if (!entry)
5480  			__mas_set_range(mas, index, last);
5481  		goto retry;
5482  	}
5483  
5484  	if (mas_is_err(mas)) {
5485  		ret = xa_err(mas->node);
5486  		goto out;
5487  	}
5488  
5489  	mas_wr_store_entry(&wr_mas);
5490  out:
5491  	mas_destroy(mas);
5492  	return ret;
5493  }
5494  EXPORT_SYMBOL_GPL(mas_store_gfp);
5495  
5496  /**
5497   * mas_store_prealloc() - Store a value into the tree using memory
5498   * preallocated in the maple state.
5499   * @mas: The maple state
5500   * @entry: The entry to store.
5501   */
mas_store_prealloc(struct ma_state * mas,void * entry)5502  void mas_store_prealloc(struct ma_state *mas, void *entry)
5503  {
5504  	MA_WR_STATE(wr_mas, mas, entry);
5505  
5506  	if (mas->store_type == wr_store_root) {
5507  		mas_wr_prealloc_setup(&wr_mas);
5508  		goto store;
5509  	}
5510  
5511  	mas_wr_walk_descend(&wr_mas);
5512  	if (mas->store_type != wr_spanning_store) {
5513  		/* set wr_mas->content to current slot */
5514  		wr_mas.content = mas_slot_locked(mas, wr_mas.slots, mas->offset);
5515  		mas_wr_end_piv(&wr_mas);
5516  	}
5517  
5518  store:
5519  	trace_ma_write(__func__, mas, 0, entry);
5520  	mas_wr_store_entry(&wr_mas);
5521  	MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas));
5522  	mas_destroy(mas);
5523  }
5524  EXPORT_SYMBOL_GPL(mas_store_prealloc);
5525  
5526  /**
5527   * mas_preallocate() - Preallocate enough nodes for a store operation
5528   * @mas: The maple state
5529   * @entry: The entry that will be stored
5530   * @gfp: The GFP_FLAGS to use for allocations.
5531   *
5532   * Return: 0 on success, -ENOMEM if memory could not be allocated.
5533   */
mas_preallocate(struct ma_state * mas,void * entry,gfp_t gfp)5534  int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp)
5535  {
5536  	MA_WR_STATE(wr_mas, mas, entry);
5537  	int ret = 0;
5538  	int request;
5539  
5540  	mas_wr_prealloc_setup(&wr_mas);
5541  	mas_wr_store_type(&wr_mas);
5542  	request = mas_prealloc_calc(mas, entry);
5543  	if (!request)
5544  		return ret;
5545  
5546  	mas_node_count_gfp(mas, request, gfp);
5547  	if (mas_is_err(mas)) {
5548  		mas_set_alloc_req(mas, 0);
5549  		ret = xa_err(mas->node);
5550  		mas_destroy(mas);
5551  		mas_reset(mas);
5552  		return ret;
5553  	}
5554  
5555  	mas->mas_flags |= MA_STATE_PREALLOC;
5556  	return ret;
5557  }
5558  EXPORT_SYMBOL_GPL(mas_preallocate);
5559  
5560  /*
5561   * mas_destroy() - destroy a maple state.
5562   * @mas: The maple state
5563   *
5564   * Upon completion, check the left-most node and rebalance against the node to
5565   * the right if necessary.  Frees any allocated nodes associated with this maple
5566   * state.
5567   */
mas_destroy(struct ma_state * mas)5568  void mas_destroy(struct ma_state *mas)
5569  {
5570  	struct maple_alloc *node;
5571  	unsigned long total;
5572  
5573  	/*
5574  	 * When using mas_for_each() to insert an expected number of elements,
5575  	 * it is possible that the number inserted is less than the expected
5576  	 * number.  To fix an invalid final node, a check is performed here to
5577  	 * rebalance the previous node with the final node.
5578  	 */
5579  	if (mas->mas_flags & MA_STATE_REBALANCE) {
5580  		unsigned char end;
5581  		if (mas_is_err(mas))
5582  			mas_reset(mas);
5583  		mas_start(mas);
5584  		mtree_range_walk(mas);
5585  		end = mas->end + 1;
5586  		if (end < mt_min_slot_count(mas->node) - 1)
5587  			mas_destroy_rebalance(mas, end);
5588  
5589  		mas->mas_flags &= ~MA_STATE_REBALANCE;
5590  	}
5591  	mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC);
5592  
5593  	total = mas_allocated(mas);
5594  	while (total) {
5595  		node = mas->alloc;
5596  		mas->alloc = node->slot[0];
5597  		if (node->node_count > 1) {
5598  			size_t count = node->node_count - 1;
5599  
5600  			mt_free_bulk(count, (void __rcu **)&node->slot[1]);
5601  			total -= count;
5602  		}
5603  		mt_free_one(ma_mnode_ptr(node));
5604  		total--;
5605  	}
5606  
5607  	mas->alloc = NULL;
5608  }
5609  EXPORT_SYMBOL_GPL(mas_destroy);
5610  
5611  /*
5612   * mas_expected_entries() - Set the expected number of entries that will be inserted.
5613   * @mas: The maple state
5614   * @nr_entries: The number of expected entries.
5615   *
5616   * This will attempt to pre-allocate enough nodes to store the expected number
5617   * of entries.  The allocations will occur using the bulk allocator interface
5618   * for speed.  Please call mas_destroy() on the @mas after inserting the entries
5619   * to ensure any unused nodes are freed.
5620   *
5621   * Return: 0 on success, -ENOMEM if memory could not be allocated.
5622   */
mas_expected_entries(struct ma_state * mas,unsigned long nr_entries)5623  int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
5624  {
5625  	int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2;
5626  	struct maple_enode *enode = mas->node;
5627  	int nr_nodes;
5628  	int ret;
5629  
5630  	/*
5631  	 * Sometimes it is necessary to duplicate a tree to a new tree, such as
5632  	 * forking a process and duplicating the VMAs from one tree to a new
5633  	 * tree.  When such a situation arises, it is known that the new tree is
5634  	 * not going to be used until the entire tree is populated.  For
5635  	 * performance reasons, it is best to use a bulk load with RCU disabled.
5636  	 * This allows for optimistic splitting that favours the left and reuse
5637  	 * of nodes during the operation.
5638  	 */
5639  
5640  	/* Optimize splitting for bulk insert in-order */
5641  	mas->mas_flags |= MA_STATE_BULK;
5642  
5643  	/*
5644  	 * Avoid overflow, assume a gap between each entry and a trailing null.
5645  	 * If this is wrong, it just means allocation can happen during
5646  	 * insertion of entries.
5647  	 */
5648  	nr_nodes = max(nr_entries, nr_entries * 2 + 1);
5649  	if (!mt_is_alloc(mas->tree))
5650  		nonleaf_cap = MAPLE_RANGE64_SLOTS - 2;
5651  
5652  	/* Leaves; reduce slots to keep space for expansion */
5653  	nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2);
5654  	/* Internal nodes */
5655  	nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap);
5656  	/* Add working room for split (2 nodes) + new parents */
5657  	mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL);
5658  
5659  	/* Detect if allocations run out */
5660  	mas->mas_flags |= MA_STATE_PREALLOC;
5661  
5662  	if (!mas_is_err(mas))
5663  		return 0;
5664  
5665  	ret = xa_err(mas->node);
5666  	mas->node = enode;
5667  	mas_destroy(mas);
5668  	return ret;
5669  
5670  }
5671  EXPORT_SYMBOL_GPL(mas_expected_entries);
5672  
mas_next_setup(struct ma_state * mas,unsigned long max,void ** entry)5673  static bool mas_next_setup(struct ma_state *mas, unsigned long max,
5674  		void **entry)
5675  {
5676  	bool was_none = mas_is_none(mas);
5677  
5678  	if (unlikely(mas->last >= max)) {
5679  		mas->status = ma_overflow;
5680  		return true;
5681  	}
5682  
5683  	switch (mas->status) {
5684  	case ma_active:
5685  		return false;
5686  	case ma_none:
5687  		fallthrough;
5688  	case ma_pause:
5689  		mas->status = ma_start;
5690  		fallthrough;
5691  	case ma_start:
5692  		mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
5693  		break;
5694  	case ma_overflow:
5695  		/* Overflowed before, but the max changed */
5696  		mas->status = ma_active;
5697  		break;
5698  	case ma_underflow:
5699  		/* The user expects the mas to be one before where it is */
5700  		mas->status = ma_active;
5701  		*entry = mas_walk(mas);
5702  		if (*entry)
5703  			return true;
5704  		break;
5705  	case ma_root:
5706  		break;
5707  	case ma_error:
5708  		return true;
5709  	}
5710  
5711  	if (likely(mas_is_active(mas))) /* Fast path */
5712  		return false;
5713  
5714  	if (mas_is_ptr(mas)) {
5715  		*entry = NULL;
5716  		if (was_none && mas->index == 0) {
5717  			mas->index = mas->last = 0;
5718  			return true;
5719  		}
5720  		mas->index = 1;
5721  		mas->last = ULONG_MAX;
5722  		mas->status = ma_none;
5723  		return true;
5724  	}
5725  
5726  	if (mas_is_none(mas))
5727  		return true;
5728  
5729  	return false;
5730  }
5731  
5732  /**
5733   * mas_next() - Get the next entry.
5734   * @mas: The maple state
5735   * @max: The maximum index to check.
5736   *
5737   * Returns the next entry after @mas->index.
5738   * Must hold rcu_read_lock or the write lock.
5739   * Can return the zero entry.
5740   *
5741   * Return: The next entry or %NULL
5742   */
mas_next(struct ma_state * mas,unsigned long max)5743  void *mas_next(struct ma_state *mas, unsigned long max)
5744  {
5745  	void *entry = NULL;
5746  
5747  	if (mas_next_setup(mas, max, &entry))
5748  		return entry;
5749  
5750  	/* Retries on dead nodes handled by mas_next_slot */
5751  	return mas_next_slot(mas, max, false);
5752  }
5753  EXPORT_SYMBOL_GPL(mas_next);
5754  
5755  /**
5756   * mas_next_range() - Advance the maple state to the next range
5757   * @mas: The maple state
5758   * @max: The maximum index to check.
5759   *
5760   * Sets @mas->index and @mas->last to the range.
5761   * Must hold rcu_read_lock or the write lock.
5762   * Can return the zero entry.
5763   *
5764   * Return: The next entry or %NULL
5765   */
mas_next_range(struct ma_state * mas,unsigned long max)5766  void *mas_next_range(struct ma_state *mas, unsigned long max)
5767  {
5768  	void *entry = NULL;
5769  
5770  	if (mas_next_setup(mas, max, &entry))
5771  		return entry;
5772  
5773  	/* Retries on dead nodes handled by mas_next_slot */
5774  	return mas_next_slot(mas, max, true);
5775  }
5776  EXPORT_SYMBOL_GPL(mas_next_range);
5777  
5778  /**
5779   * mt_next() - get the next value in the maple tree
5780   * @mt: The maple tree
5781   * @index: The start index
5782   * @max: The maximum index to check
5783   *
5784   * Takes RCU read lock internally to protect the search, which does not
5785   * protect the returned pointer after dropping RCU read lock.
5786   * See also: Documentation/core-api/maple_tree.rst
5787   *
5788   * Return: The entry higher than @index or %NULL if nothing is found.
5789   */
mt_next(struct maple_tree * mt,unsigned long index,unsigned long max)5790  void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
5791  {
5792  	void *entry = NULL;
5793  	MA_STATE(mas, mt, index, index);
5794  
5795  	rcu_read_lock();
5796  	entry = mas_next(&mas, max);
5797  	rcu_read_unlock();
5798  	return entry;
5799  }
5800  EXPORT_SYMBOL_GPL(mt_next);
5801  
mas_prev_setup(struct ma_state * mas,unsigned long min,void ** entry)5802  static bool mas_prev_setup(struct ma_state *mas, unsigned long min, void **entry)
5803  {
5804  	if (unlikely(mas->index <= min)) {
5805  		mas->status = ma_underflow;
5806  		return true;
5807  	}
5808  
5809  	switch (mas->status) {
5810  	case ma_active:
5811  		return false;
5812  	case ma_start:
5813  		break;
5814  	case ma_none:
5815  		fallthrough;
5816  	case ma_pause:
5817  		mas->status = ma_start;
5818  		break;
5819  	case ma_underflow:
5820  		/* underflowed before but the min changed */
5821  		mas->status = ma_active;
5822  		break;
5823  	case ma_overflow:
5824  		/* User expects mas to be one after where it is */
5825  		mas->status = ma_active;
5826  		*entry = mas_walk(mas);
5827  		if (*entry)
5828  			return true;
5829  		break;
5830  	case ma_root:
5831  		break;
5832  	case ma_error:
5833  		return true;
5834  	}
5835  
5836  	if (mas_is_start(mas))
5837  		mas_walk(mas);
5838  
5839  	if (unlikely(mas_is_ptr(mas))) {
5840  		if (!mas->index) {
5841  			mas->status = ma_none;
5842  			return true;
5843  		}
5844  		mas->index = mas->last = 0;
5845  		*entry = mas_root(mas);
5846  		return true;
5847  	}
5848  
5849  	if (mas_is_none(mas)) {
5850  		if (mas->index) {
5851  			/* Walked to out-of-range pointer? */
5852  			mas->index = mas->last = 0;
5853  			mas->status = ma_root;
5854  			*entry = mas_root(mas);
5855  			return true;
5856  		}
5857  		return true;
5858  	}
5859  
5860  	return false;
5861  }
5862  
5863  /**
5864   * mas_prev() - Get the previous entry
5865   * @mas: The maple state
5866   * @min: The minimum value to check.
5867   *
5868   * Must hold rcu_read_lock or the write lock.
5869   * Will reset mas to ma_start if the status is ma_none.  Will stop on not
5870   * searchable nodes.
5871   *
5872   * Return: the previous value or %NULL.
5873   */
mas_prev(struct ma_state * mas,unsigned long min)5874  void *mas_prev(struct ma_state *mas, unsigned long min)
5875  {
5876  	void *entry = NULL;
5877  
5878  	if (mas_prev_setup(mas, min, &entry))
5879  		return entry;
5880  
5881  	return mas_prev_slot(mas, min, false);
5882  }
5883  EXPORT_SYMBOL_GPL(mas_prev);
5884  
5885  /**
5886   * mas_prev_range() - Advance to the previous range
5887   * @mas: The maple state
5888   * @min: The minimum value to check.
5889   *
5890   * Sets @mas->index and @mas->last to the range.
5891   * Must hold rcu_read_lock or the write lock.
5892   * Will reset mas to ma_start if the node is ma_none.  Will stop on not
5893   * searchable nodes.
5894   *
5895   * Return: the previous value or %NULL.
5896   */
mas_prev_range(struct ma_state * mas,unsigned long min)5897  void *mas_prev_range(struct ma_state *mas, unsigned long min)
5898  {
5899  	void *entry = NULL;
5900  
5901  	if (mas_prev_setup(mas, min, &entry))
5902  		return entry;
5903  
5904  	return mas_prev_slot(mas, min, true);
5905  }
5906  EXPORT_SYMBOL_GPL(mas_prev_range);
5907  
5908  /**
5909   * mt_prev() - get the previous value in the maple tree
5910   * @mt: The maple tree
5911   * @index: The start index
5912   * @min: The minimum index to check
5913   *
5914   * Takes RCU read lock internally to protect the search, which does not
5915   * protect the returned pointer after dropping RCU read lock.
5916   * See also: Documentation/core-api/maple_tree.rst
5917   *
5918   * Return: The entry before @index or %NULL if nothing is found.
5919   */
mt_prev(struct maple_tree * mt,unsigned long index,unsigned long min)5920  void *mt_prev(struct maple_tree *mt, unsigned long index, unsigned long min)
5921  {
5922  	void *entry = NULL;
5923  	MA_STATE(mas, mt, index, index);
5924  
5925  	rcu_read_lock();
5926  	entry = mas_prev(&mas, min);
5927  	rcu_read_unlock();
5928  	return entry;
5929  }
5930  EXPORT_SYMBOL_GPL(mt_prev);
5931  
5932  /**
5933   * mas_pause() - Pause a mas_find/mas_for_each to drop the lock.
5934   * @mas: The maple state to pause
5935   *
5936   * Some users need to pause a walk and drop the lock they're holding in
5937   * order to yield to a higher priority thread or carry out an operation
5938   * on an entry.  Those users should call this function before they drop
5939   * the lock.  It resets the @mas to be suitable for the next iteration
5940   * of the loop after the user has reacquired the lock.  If most entries
5941   * found during a walk require you to call mas_pause(), the mt_for_each()
5942   * iterator may be more appropriate.
5943   *
5944   */
mas_pause(struct ma_state * mas)5945  void mas_pause(struct ma_state *mas)
5946  {
5947  	mas->status = ma_pause;
5948  	mas->node = NULL;
5949  }
5950  EXPORT_SYMBOL_GPL(mas_pause);
5951  
5952  /**
5953   * mas_find_setup() - Internal function to set up mas_find*().
5954   * @mas: The maple state
5955   * @max: The maximum index
5956   * @entry: Pointer to the entry
5957   *
5958   * Returns: True if entry is the answer, false otherwise.
5959   */
mas_find_setup(struct ma_state * mas,unsigned long max,void ** entry)5960  static __always_inline bool mas_find_setup(struct ma_state *mas, unsigned long max, void **entry)
5961  {
5962  	switch (mas->status) {
5963  	case ma_active:
5964  		if (mas->last < max)
5965  			return false;
5966  		return true;
5967  	case ma_start:
5968  		break;
5969  	case ma_pause:
5970  		if (unlikely(mas->last >= max))
5971  			return true;
5972  
5973  		mas->index = ++mas->last;
5974  		mas->status = ma_start;
5975  		break;
5976  	case ma_none:
5977  		if (unlikely(mas->last >= max))
5978  			return true;
5979  
5980  		mas->index = mas->last;
5981  		mas->status = ma_start;
5982  		break;
5983  	case ma_underflow:
5984  		/* mas is pointing at entry before unable to go lower */
5985  		if (unlikely(mas->index >= max)) {
5986  			mas->status = ma_overflow;
5987  			return true;
5988  		}
5989  
5990  		mas->status = ma_active;
5991  		*entry = mas_walk(mas);
5992  		if (*entry)
5993  			return true;
5994  		break;
5995  	case ma_overflow:
5996  		if (unlikely(mas->last >= max))
5997  			return true;
5998  
5999  		mas->status = ma_active;
6000  		*entry = mas_walk(mas);
6001  		if (*entry)
6002  			return true;
6003  		break;
6004  	case ma_root:
6005  		break;
6006  	case ma_error:
6007  		return true;
6008  	}
6009  
6010  	if (mas_is_start(mas)) {
6011  		/* First run or continue */
6012  		if (mas->index > max)
6013  			return true;
6014  
6015  		*entry = mas_walk(mas);
6016  		if (*entry)
6017  			return true;
6018  
6019  	}
6020  
6021  	if (unlikely(mas_is_ptr(mas)))
6022  		goto ptr_out_of_range;
6023  
6024  	if (unlikely(mas_is_none(mas)))
6025  		return true;
6026  
6027  	if (mas->index == max)
6028  		return true;
6029  
6030  	return false;
6031  
6032  ptr_out_of_range:
6033  	mas->status = ma_none;
6034  	mas->index = 1;
6035  	mas->last = ULONG_MAX;
6036  	return true;
6037  }
6038  
6039  /**
6040   * mas_find() - On the first call, find the entry at or after mas->index up to
6041   * %max.  Otherwise, find the entry after mas->index.
6042   * @mas: The maple state
6043   * @max: The maximum value to check.
6044   *
6045   * Must hold rcu_read_lock or the write lock.
6046   * If an entry exists, last and index are updated accordingly.
6047   * May set @mas->status to ma_overflow.
6048   *
6049   * Return: The entry or %NULL.
6050   */
mas_find(struct ma_state * mas,unsigned long max)6051  void *mas_find(struct ma_state *mas, unsigned long max)
6052  {
6053  	void *entry = NULL;
6054  
6055  	if (mas_find_setup(mas, max, &entry))
6056  		return entry;
6057  
6058  	/* Retries on dead nodes handled by mas_next_slot */
6059  	entry = mas_next_slot(mas, max, false);
6060  	/* Ignore overflow */
6061  	mas->status = ma_active;
6062  	return entry;
6063  }
6064  EXPORT_SYMBOL_GPL(mas_find);
6065  
6066  /**
6067   * mas_find_range() - On the first call, find the entry at or after
6068   * mas->index up to %max.  Otherwise, advance to the next slot mas->index.
6069   * @mas: The maple state
6070   * @max: The maximum value to check.
6071   *
6072   * Must hold rcu_read_lock or the write lock.
6073   * If an entry exists, last and index are updated accordingly.
6074   * May set @mas->status to ma_overflow.
6075   *
6076   * Return: The entry or %NULL.
6077   */
mas_find_range(struct ma_state * mas,unsigned long max)6078  void *mas_find_range(struct ma_state *mas, unsigned long max)
6079  {
6080  	void *entry = NULL;
6081  
6082  	if (mas_find_setup(mas, max, &entry))
6083  		return entry;
6084  
6085  	/* Retries on dead nodes handled by mas_next_slot */
6086  	return mas_next_slot(mas, max, true);
6087  }
6088  EXPORT_SYMBOL_GPL(mas_find_range);
6089  
6090  /**
6091   * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
6092   * @mas: The maple state
6093   * @min: The minimum index
6094   * @entry: Pointer to the entry
6095   *
6096   * Returns: True if entry is the answer, false otherwise.
6097   */
mas_find_rev_setup(struct ma_state * mas,unsigned long min,void ** entry)6098  static bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
6099  		void **entry)
6100  {
6101  
6102  	switch (mas->status) {
6103  	case ma_active:
6104  		goto active;
6105  	case ma_start:
6106  		break;
6107  	case ma_pause:
6108  		if (unlikely(mas->index <= min)) {
6109  			mas->status = ma_underflow;
6110  			return true;
6111  		}
6112  		mas->last = --mas->index;
6113  		mas->status = ma_start;
6114  		break;
6115  	case ma_none:
6116  		if (mas->index <= min)
6117  			goto none;
6118  
6119  		mas->last = mas->index;
6120  		mas->status = ma_start;
6121  		break;
6122  	case ma_overflow: /* user expects the mas to be one after where it is */
6123  		if (unlikely(mas->index <= min)) {
6124  			mas->status = ma_underflow;
6125  			return true;
6126  		}
6127  
6128  		mas->status = ma_active;
6129  		break;
6130  	case ma_underflow: /* user expects the mas to be one before where it is */
6131  		if (unlikely(mas->index <= min))
6132  			return true;
6133  
6134  		mas->status = ma_active;
6135  		break;
6136  	case ma_root:
6137  		break;
6138  	case ma_error:
6139  		return true;
6140  	}
6141  
6142  	if (mas_is_start(mas)) {
6143  		/* First run or continue */
6144  		if (mas->index < min)
6145  			return true;
6146  
6147  		*entry = mas_walk(mas);
6148  		if (*entry)
6149  			return true;
6150  	}
6151  
6152  	if (unlikely(mas_is_ptr(mas)))
6153  		goto none;
6154  
6155  	if (unlikely(mas_is_none(mas))) {
6156  		/*
6157  		 * Walked to the location, and there was nothing so the previous
6158  		 * location is 0.
6159  		 */
6160  		mas->last = mas->index = 0;
6161  		mas->status = ma_root;
6162  		*entry = mas_root(mas);
6163  		return true;
6164  	}
6165  
6166  active:
6167  	if (mas->index < min)
6168  		return true;
6169  
6170  	return false;
6171  
6172  none:
6173  	mas->status = ma_none;
6174  	return true;
6175  }
6176  
6177  /**
6178   * mas_find_rev: On the first call, find the first non-null entry at or below
6179   * mas->index down to %min.  Otherwise find the first non-null entry below
6180   * mas->index down to %min.
6181   * @mas: The maple state
6182   * @min: The minimum value to check.
6183   *
6184   * Must hold rcu_read_lock or the write lock.
6185   * If an entry exists, last and index are updated accordingly.
6186   * May set @mas->status to ma_underflow.
6187   *
6188   * Return: The entry or %NULL.
6189   */
mas_find_rev(struct ma_state * mas,unsigned long min)6190  void *mas_find_rev(struct ma_state *mas, unsigned long min)
6191  {
6192  	void *entry = NULL;
6193  
6194  	if (mas_find_rev_setup(mas, min, &entry))
6195  		return entry;
6196  
6197  	/* Retries on dead nodes handled by mas_prev_slot */
6198  	return mas_prev_slot(mas, min, false);
6199  
6200  }
6201  EXPORT_SYMBOL_GPL(mas_find_rev);
6202  
6203  /**
6204   * mas_find_range_rev: On the first call, find the first non-null entry at or
6205   * below mas->index down to %min.  Otherwise advance to the previous slot after
6206   * mas->index down to %min.
6207   * @mas: The maple state
6208   * @min: The minimum value to check.
6209   *
6210   * Must hold rcu_read_lock or the write lock.
6211   * If an entry exists, last and index are updated accordingly.
6212   * May set @mas->status to ma_underflow.
6213   *
6214   * Return: The entry or %NULL.
6215   */
mas_find_range_rev(struct ma_state * mas,unsigned long min)6216  void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
6217  {
6218  	void *entry = NULL;
6219  
6220  	if (mas_find_rev_setup(mas, min, &entry))
6221  		return entry;
6222  
6223  	/* Retries on dead nodes handled by mas_prev_slot */
6224  	return mas_prev_slot(mas, min, true);
6225  }
6226  EXPORT_SYMBOL_GPL(mas_find_range_rev);
6227  
6228  /**
6229   * mas_erase() - Find the range in which index resides and erase the entire
6230   * range.
6231   * @mas: The maple state
6232   *
6233   * Must hold the write lock.
6234   * Searches for @mas->index, sets @mas->index and @mas->last to the range and
6235   * erases that range.
6236   *
6237   * Return: the entry that was erased or %NULL, @mas->index and @mas->last are updated.
6238   */
mas_erase(struct ma_state * mas)6239  void *mas_erase(struct ma_state *mas)
6240  {
6241  	void *entry;
6242  	unsigned long index = mas->index;
6243  	MA_WR_STATE(wr_mas, mas, NULL);
6244  
6245  	if (!mas_is_active(mas) || !mas_is_start(mas))
6246  		mas->status = ma_start;
6247  
6248  write_retry:
6249  	entry = mas_state_walk(mas);
6250  	if (!entry)
6251  		return NULL;
6252  
6253  	/* Must reset to ensure spanning writes of last slot are detected */
6254  	mas_reset(mas);
6255  	mas_wr_preallocate(&wr_mas, NULL);
6256  	if (mas_nomem(mas, GFP_KERNEL)) {
6257  		/* in case the range of entry changed when unlocked */
6258  		mas->index = mas->last = index;
6259  		goto write_retry;
6260  	}
6261  
6262  	if (mas_is_err(mas))
6263  		goto out;
6264  
6265  	mas_wr_store_entry(&wr_mas);
6266  out:
6267  	mas_destroy(mas);
6268  	return entry;
6269  }
6270  EXPORT_SYMBOL_GPL(mas_erase);
6271  
6272  /**
6273   * mas_nomem() - Check if there was an error allocating and do the allocation
6274   * if necessary If there are allocations, then free them.
6275   * @mas: The maple state
6276   * @gfp: The GFP_FLAGS to use for allocations
6277   * Return: true on allocation, false otherwise.
6278   */
mas_nomem(struct ma_state * mas,gfp_t gfp)6279  bool mas_nomem(struct ma_state *mas, gfp_t gfp)
6280  	__must_hold(mas->tree->ma_lock)
6281  {
6282  	if (likely(mas->node != MA_ERROR(-ENOMEM)))
6283  		return false;
6284  
6285  	if (gfpflags_allow_blocking(gfp) && !mt_external_lock(mas->tree)) {
6286  		mtree_unlock(mas->tree);
6287  		mas_alloc_nodes(mas, gfp);
6288  		mtree_lock(mas->tree);
6289  	} else {
6290  		mas_alloc_nodes(mas, gfp);
6291  	}
6292  
6293  	if (!mas_allocated(mas))
6294  		return false;
6295  
6296  	mas->status = ma_start;
6297  	return true;
6298  }
6299  
maple_tree_init(void)6300  void __init maple_tree_init(void)
6301  {
6302  	maple_node_cache = kmem_cache_create("maple_node",
6303  			sizeof(struct maple_node), sizeof(struct maple_node),
6304  			SLAB_PANIC, NULL);
6305  }
6306  
6307  /**
6308   * mtree_load() - Load a value stored in a maple tree
6309   * @mt: The maple tree
6310   * @index: The index to load
6311   *
6312   * Return: the entry or %NULL
6313   */
mtree_load(struct maple_tree * mt,unsigned long index)6314  void *mtree_load(struct maple_tree *mt, unsigned long index)
6315  {
6316  	MA_STATE(mas, mt, index, index);
6317  	void *entry;
6318  
6319  	trace_ma_read(__func__, &mas);
6320  	rcu_read_lock();
6321  retry:
6322  	entry = mas_start(&mas);
6323  	if (unlikely(mas_is_none(&mas)))
6324  		goto unlock;
6325  
6326  	if (unlikely(mas_is_ptr(&mas))) {
6327  		if (index)
6328  			entry = NULL;
6329  
6330  		goto unlock;
6331  	}
6332  
6333  	entry = mtree_lookup_walk(&mas);
6334  	if (!entry && unlikely(mas_is_start(&mas)))
6335  		goto retry;
6336  unlock:
6337  	rcu_read_unlock();
6338  	if (xa_is_zero(entry))
6339  		return NULL;
6340  
6341  	return entry;
6342  }
6343  EXPORT_SYMBOL(mtree_load);
6344  
6345  /**
6346   * mtree_store_range() - Store an entry at a given range.
6347   * @mt: The maple tree
6348   * @index: The start of the range
6349   * @last: The end of the range
6350   * @entry: The entry to store
6351   * @gfp: The GFP_FLAGS to use for allocations
6352   *
6353   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6354   * be allocated.
6355   */
mtree_store_range(struct maple_tree * mt,unsigned long index,unsigned long last,void * entry,gfp_t gfp)6356  int mtree_store_range(struct maple_tree *mt, unsigned long index,
6357  		unsigned long last, void *entry, gfp_t gfp)
6358  {
6359  	MA_STATE(mas, mt, index, last);
6360  	int ret = 0;
6361  
6362  	trace_ma_write(__func__, &mas, 0, entry);
6363  	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6364  		return -EINVAL;
6365  
6366  	if (index > last)
6367  		return -EINVAL;
6368  
6369  	mtree_lock(mt);
6370  	ret = mas_store_gfp(&mas, entry, gfp);
6371  	mtree_unlock(mt);
6372  
6373  	return ret;
6374  }
6375  EXPORT_SYMBOL(mtree_store_range);
6376  
6377  /**
6378   * mtree_store() - Store an entry at a given index.
6379   * @mt: The maple tree
6380   * @index: The index to store the value
6381   * @entry: The entry to store
6382   * @gfp: The GFP_FLAGS to use for allocations
6383   *
6384   * Return: 0 on success, -EINVAL on invalid request, -ENOMEM if memory could not
6385   * be allocated.
6386   */
mtree_store(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6387  int mtree_store(struct maple_tree *mt, unsigned long index, void *entry,
6388  		 gfp_t gfp)
6389  {
6390  	return mtree_store_range(mt, index, index, entry, gfp);
6391  }
6392  EXPORT_SYMBOL(mtree_store);
6393  
6394  /**
6395   * mtree_insert_range() - Insert an entry at a given range if there is no value.
6396   * @mt: The maple tree
6397   * @first: The start of the range
6398   * @last: The end of the range
6399   * @entry: The entry to store
6400   * @gfp: The GFP_FLAGS to use for allocations.
6401   *
6402   * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6403   * request, -ENOMEM if memory could not be allocated.
6404   */
mtree_insert_range(struct maple_tree * mt,unsigned long first,unsigned long last,void * entry,gfp_t gfp)6405  int mtree_insert_range(struct maple_tree *mt, unsigned long first,
6406  		unsigned long last, void *entry, gfp_t gfp)
6407  {
6408  	MA_STATE(ms, mt, first, last);
6409  	int ret = 0;
6410  
6411  	if (WARN_ON_ONCE(xa_is_advanced(entry)))
6412  		return -EINVAL;
6413  
6414  	if (first > last)
6415  		return -EINVAL;
6416  
6417  	mtree_lock(mt);
6418  retry:
6419  	mas_insert(&ms, entry);
6420  	if (mas_nomem(&ms, gfp))
6421  		goto retry;
6422  
6423  	mtree_unlock(mt);
6424  	if (mas_is_err(&ms))
6425  		ret = xa_err(ms.node);
6426  
6427  	mas_destroy(&ms);
6428  	return ret;
6429  }
6430  EXPORT_SYMBOL(mtree_insert_range);
6431  
6432  /**
6433   * mtree_insert() - Insert an entry at a given index if there is no value.
6434   * @mt: The maple tree
6435   * @index : The index to store the value
6436   * @entry: The entry to store
6437   * @gfp: The GFP_FLAGS to use for allocations.
6438   *
6439   * Return: 0 on success, -EEXISTS if the range is occupied, -EINVAL on invalid
6440   * request, -ENOMEM if memory could not be allocated.
6441   */
mtree_insert(struct maple_tree * mt,unsigned long index,void * entry,gfp_t gfp)6442  int mtree_insert(struct maple_tree *mt, unsigned long index, void *entry,
6443  		 gfp_t gfp)
6444  {
6445  	return mtree_insert_range(mt, index, index, entry, gfp);
6446  }
6447  EXPORT_SYMBOL(mtree_insert);
6448  
mtree_alloc_range(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6449  int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
6450  		void *entry, unsigned long size, unsigned long min,
6451  		unsigned long max, gfp_t gfp)
6452  {
6453  	int ret = 0;
6454  
6455  	MA_STATE(mas, mt, 0, 0);
6456  	if (!mt_is_alloc(mt))
6457  		return -EINVAL;
6458  
6459  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6460  		return -EINVAL;
6461  
6462  	mtree_lock(mt);
6463  retry:
6464  	ret = mas_empty_area(&mas, min, max, size);
6465  	if (ret)
6466  		goto unlock;
6467  
6468  	mas_insert(&mas, entry);
6469  	/*
6470  	 * mas_nomem() may release the lock, causing the allocated area
6471  	 * to be unavailable, so try to allocate a free area again.
6472  	 */
6473  	if (mas_nomem(&mas, gfp))
6474  		goto retry;
6475  
6476  	if (mas_is_err(&mas))
6477  		ret = xa_err(mas.node);
6478  	else
6479  		*startp = mas.index;
6480  
6481  unlock:
6482  	mtree_unlock(mt);
6483  	mas_destroy(&mas);
6484  	return ret;
6485  }
6486  EXPORT_SYMBOL(mtree_alloc_range);
6487  
6488  /**
6489   * mtree_alloc_cyclic() - Find somewhere to store this entry in the tree.
6490   * @mt: The maple tree.
6491   * @startp: Pointer to ID.
6492   * @range_lo: Lower bound of range to search.
6493   * @range_hi: Upper bound of range to search.
6494   * @entry: The entry to store.
6495   * @next: Pointer to next ID to allocate.
6496   * @gfp: The GFP_FLAGS to use for allocations.
6497   *
6498   * Finds an empty entry in @mt after @next, stores the new index into
6499   * the @id pointer, stores the entry at that index, then updates @next.
6500   *
6501   * @mt must be initialized with the MT_FLAGS_ALLOC_RANGE flag.
6502   *
6503   * Context: Any context.  Takes and releases the mt.lock.  May sleep if
6504   * the @gfp flags permit.
6505   *
6506   * Return: 0 if the allocation succeeded without wrapping, 1 if the
6507   * allocation succeeded after wrapping, -ENOMEM if memory could not be
6508   * allocated, -EINVAL if @mt cannot be used, or -EBUSY if there are no
6509   * free entries.
6510   */
mtree_alloc_cyclic(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long range_lo,unsigned long range_hi,unsigned long * next,gfp_t gfp)6511  int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
6512  		void *entry, unsigned long range_lo, unsigned long range_hi,
6513  		unsigned long *next, gfp_t gfp)
6514  {
6515  	int ret;
6516  
6517  	MA_STATE(mas, mt, 0, 0);
6518  
6519  	if (!mt_is_alloc(mt))
6520  		return -EINVAL;
6521  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6522  		return -EINVAL;
6523  	mtree_lock(mt);
6524  	ret = mas_alloc_cyclic(&mas, startp, entry, range_lo, range_hi,
6525  			       next, gfp);
6526  	mtree_unlock(mt);
6527  	return ret;
6528  }
6529  EXPORT_SYMBOL(mtree_alloc_cyclic);
6530  
mtree_alloc_rrange(struct maple_tree * mt,unsigned long * startp,void * entry,unsigned long size,unsigned long min,unsigned long max,gfp_t gfp)6531  int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
6532  		void *entry, unsigned long size, unsigned long min,
6533  		unsigned long max, gfp_t gfp)
6534  {
6535  	int ret = 0;
6536  
6537  	MA_STATE(mas, mt, 0, 0);
6538  	if (!mt_is_alloc(mt))
6539  		return -EINVAL;
6540  
6541  	if (WARN_ON_ONCE(mt_is_reserved(entry)))
6542  		return -EINVAL;
6543  
6544  	mtree_lock(mt);
6545  retry:
6546  	ret = mas_empty_area_rev(&mas, min, max, size);
6547  	if (ret)
6548  		goto unlock;
6549  
6550  	mas_insert(&mas, entry);
6551  	/*
6552  	 * mas_nomem() may release the lock, causing the allocated area
6553  	 * to be unavailable, so try to allocate a free area again.
6554  	 */
6555  	if (mas_nomem(&mas, gfp))
6556  		goto retry;
6557  
6558  	if (mas_is_err(&mas))
6559  		ret = xa_err(mas.node);
6560  	else
6561  		*startp = mas.index;
6562  
6563  unlock:
6564  	mtree_unlock(mt);
6565  	mas_destroy(&mas);
6566  	return ret;
6567  }
6568  EXPORT_SYMBOL(mtree_alloc_rrange);
6569  
6570  /**
6571   * mtree_erase() - Find an index and erase the entire range.
6572   * @mt: The maple tree
6573   * @index: The index to erase
6574   *
6575   * Erasing is the same as a walk to an entry then a store of a NULL to that
6576   * ENTIRE range.  In fact, it is implemented as such using the advanced API.
6577   *
6578   * Return: The entry stored at the @index or %NULL
6579   */
mtree_erase(struct maple_tree * mt,unsigned long index)6580  void *mtree_erase(struct maple_tree *mt, unsigned long index)
6581  {
6582  	void *entry = NULL;
6583  
6584  	MA_STATE(mas, mt, index, index);
6585  	trace_ma_op(__func__, &mas);
6586  
6587  	mtree_lock(mt);
6588  	entry = mas_erase(&mas);
6589  	mtree_unlock(mt);
6590  
6591  	return entry;
6592  }
6593  EXPORT_SYMBOL(mtree_erase);
6594  
6595  /*
6596   * mas_dup_free() - Free an incomplete duplication of a tree.
6597   * @mas: The maple state of a incomplete tree.
6598   *
6599   * The parameter @mas->node passed in indicates that the allocation failed on
6600   * this node. This function frees all nodes starting from @mas->node in the
6601   * reverse order of mas_dup_build(). There is no need to hold the source tree
6602   * lock at this time.
6603   */
mas_dup_free(struct ma_state * mas)6604  static void mas_dup_free(struct ma_state *mas)
6605  {
6606  	struct maple_node *node;
6607  	enum maple_type type;
6608  	void __rcu **slots;
6609  	unsigned char count, i;
6610  
6611  	/* Maybe the first node allocation failed. */
6612  	if (mas_is_none(mas))
6613  		return;
6614  
6615  	while (!mte_is_root(mas->node)) {
6616  		mas_ascend(mas);
6617  		if (mas->offset) {
6618  			mas->offset--;
6619  			do {
6620  				mas_descend(mas);
6621  				mas->offset = mas_data_end(mas);
6622  			} while (!mte_is_leaf(mas->node));
6623  
6624  			mas_ascend(mas);
6625  		}
6626  
6627  		node = mte_to_node(mas->node);
6628  		type = mte_node_type(mas->node);
6629  		slots = ma_slots(node, type);
6630  		count = mas_data_end(mas) + 1;
6631  		for (i = 0; i < count; i++)
6632  			((unsigned long *)slots)[i] &= ~MAPLE_NODE_MASK;
6633  		mt_free_bulk(count, slots);
6634  	}
6635  
6636  	node = mte_to_node(mas->node);
6637  	mt_free_one(node);
6638  }
6639  
6640  /*
6641   * mas_copy_node() - Copy a maple node and replace the parent.
6642   * @mas: The maple state of source tree.
6643   * @new_mas: The maple state of new tree.
6644   * @parent: The parent of the new node.
6645   *
6646   * Copy @mas->node to @new_mas->node, set @parent to be the parent of
6647   * @new_mas->node. If memory allocation fails, @mas is set to -ENOMEM.
6648   */
mas_copy_node(struct ma_state * mas,struct ma_state * new_mas,struct maple_pnode * parent)6649  static inline void mas_copy_node(struct ma_state *mas, struct ma_state *new_mas,
6650  		struct maple_pnode *parent)
6651  {
6652  	struct maple_node *node = mte_to_node(mas->node);
6653  	struct maple_node *new_node = mte_to_node(new_mas->node);
6654  	unsigned long val;
6655  
6656  	/* Copy the node completely. */
6657  	memcpy(new_node, node, sizeof(struct maple_node));
6658  	/* Update the parent node pointer. */
6659  	val = (unsigned long)node->parent & MAPLE_NODE_MASK;
6660  	new_node->parent = ma_parent_ptr(val | (unsigned long)parent);
6661  }
6662  
6663  /*
6664   * mas_dup_alloc() - Allocate child nodes for a maple node.
6665   * @mas: The maple state of source tree.
6666   * @new_mas: The maple state of new tree.
6667   * @gfp: The GFP_FLAGS to use for allocations.
6668   *
6669   * This function allocates child nodes for @new_mas->node during the duplication
6670   * process. If memory allocation fails, @mas is set to -ENOMEM.
6671   */
mas_dup_alloc(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6672  static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas,
6673  		gfp_t gfp)
6674  {
6675  	struct maple_node *node = mte_to_node(mas->node);
6676  	struct maple_node *new_node = mte_to_node(new_mas->node);
6677  	enum maple_type type;
6678  	unsigned char request, count, i;
6679  	void __rcu **slots;
6680  	void __rcu **new_slots;
6681  	unsigned long val;
6682  
6683  	/* Allocate memory for child nodes. */
6684  	type = mte_node_type(mas->node);
6685  	new_slots = ma_slots(new_node, type);
6686  	request = mas_data_end(mas) + 1;
6687  	count = mt_alloc_bulk(gfp, request, (void **)new_slots);
6688  	if (unlikely(count < request)) {
6689  		memset(new_slots, 0, request * sizeof(void *));
6690  		mas_set_err(mas, -ENOMEM);
6691  		return;
6692  	}
6693  
6694  	/* Restore node type information in slots. */
6695  	slots = ma_slots(node, type);
6696  	for (i = 0; i < count; i++) {
6697  		val = (unsigned long)mt_slot_locked(mas->tree, slots, i);
6698  		val &= MAPLE_NODE_MASK;
6699  		((unsigned long *)new_slots)[i] |= val;
6700  	}
6701  }
6702  
6703  /*
6704   * mas_dup_build() - Build a new maple tree from a source tree
6705   * @mas: The maple state of source tree, need to be in MAS_START state.
6706   * @new_mas: The maple state of new tree, need to be in MAS_START state.
6707   * @gfp: The GFP_FLAGS to use for allocations.
6708   *
6709   * This function builds a new tree in DFS preorder. If the memory allocation
6710   * fails, the error code -ENOMEM will be set in @mas, and @new_mas points to the
6711   * last node. mas_dup_free() will free the incomplete duplication of a tree.
6712   *
6713   * Note that the attributes of the two trees need to be exactly the same, and the
6714   * new tree needs to be empty, otherwise -EINVAL will be set in @mas.
6715   */
mas_dup_build(struct ma_state * mas,struct ma_state * new_mas,gfp_t gfp)6716  static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas,
6717  		gfp_t gfp)
6718  {
6719  	struct maple_node *node;
6720  	struct maple_pnode *parent = NULL;
6721  	struct maple_enode *root;
6722  	enum maple_type type;
6723  
6724  	if (unlikely(mt_attr(mas->tree) != mt_attr(new_mas->tree)) ||
6725  	    unlikely(!mtree_empty(new_mas->tree))) {
6726  		mas_set_err(mas, -EINVAL);
6727  		return;
6728  	}
6729  
6730  	root = mas_start(mas);
6731  	if (mas_is_ptr(mas) || mas_is_none(mas))
6732  		goto set_new_tree;
6733  
6734  	node = mt_alloc_one(gfp);
6735  	if (!node) {
6736  		new_mas->status = ma_none;
6737  		mas_set_err(mas, -ENOMEM);
6738  		return;
6739  	}
6740  
6741  	type = mte_node_type(mas->node);
6742  	root = mt_mk_node(node, type);
6743  	new_mas->node = root;
6744  	new_mas->min = 0;
6745  	new_mas->max = ULONG_MAX;
6746  	root = mte_mk_root(root);
6747  	while (1) {
6748  		mas_copy_node(mas, new_mas, parent);
6749  		if (!mte_is_leaf(mas->node)) {
6750  			/* Only allocate child nodes for non-leaf nodes. */
6751  			mas_dup_alloc(mas, new_mas, gfp);
6752  			if (unlikely(mas_is_err(mas)))
6753  				return;
6754  		} else {
6755  			/*
6756  			 * This is the last leaf node and duplication is
6757  			 * completed.
6758  			 */
6759  			if (mas->max == ULONG_MAX)
6760  				goto done;
6761  
6762  			/* This is not the last leaf node and needs to go up. */
6763  			do {
6764  				mas_ascend(mas);
6765  				mas_ascend(new_mas);
6766  			} while (mas->offset == mas_data_end(mas));
6767  
6768  			/* Move to the next subtree. */
6769  			mas->offset++;
6770  			new_mas->offset++;
6771  		}
6772  
6773  		mas_descend(mas);
6774  		parent = ma_parent_ptr(mte_to_node(new_mas->node));
6775  		mas_descend(new_mas);
6776  		mas->offset = 0;
6777  		new_mas->offset = 0;
6778  	}
6779  done:
6780  	/* Specially handle the parent of the root node. */
6781  	mte_to_node(root)->parent = ma_parent_ptr(mas_tree_parent(new_mas));
6782  set_new_tree:
6783  	/* Make them the same height */
6784  	new_mas->tree->ma_flags = mas->tree->ma_flags;
6785  	rcu_assign_pointer(new_mas->tree->ma_root, root);
6786  }
6787  
6788  /**
6789   * __mt_dup(): Duplicate an entire maple tree
6790   * @mt: The source maple tree
6791   * @new: The new maple tree
6792   * @gfp: The GFP_FLAGS to use for allocations
6793   *
6794   * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6795   * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6796   * new child nodes in non-leaf nodes. The new node is exactly the same as the
6797   * source node except for all the addresses stored in it. It will be faster than
6798   * traversing all elements in the source tree and inserting them one by one into
6799   * the new tree.
6800   * The user needs to ensure that the attributes of the source tree and the new
6801   * tree are the same, and the new tree needs to be an empty tree, otherwise
6802   * -EINVAL will be returned.
6803   * Note that the user needs to manually lock the source tree and the new tree.
6804   *
6805   * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6806   * the attributes of the two trees are different or the new tree is not an empty
6807   * tree.
6808   */
__mt_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6809  int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6810  {
6811  	int ret = 0;
6812  	MA_STATE(mas, mt, 0, 0);
6813  	MA_STATE(new_mas, new, 0, 0);
6814  
6815  	mas_dup_build(&mas, &new_mas, gfp);
6816  	if (unlikely(mas_is_err(&mas))) {
6817  		ret = xa_err(mas.node);
6818  		if (ret == -ENOMEM)
6819  			mas_dup_free(&new_mas);
6820  	}
6821  
6822  	return ret;
6823  }
6824  EXPORT_SYMBOL(__mt_dup);
6825  
6826  /**
6827   * mtree_dup(): Duplicate an entire maple tree
6828   * @mt: The source maple tree
6829   * @new: The new maple tree
6830   * @gfp: The GFP_FLAGS to use for allocations
6831   *
6832   * This function duplicates a maple tree in Depth-First Search (DFS) pre-order
6833   * traversal. It uses memcpy() to copy nodes in the source tree and allocate
6834   * new child nodes in non-leaf nodes. The new node is exactly the same as the
6835   * source node except for all the addresses stored in it. It will be faster than
6836   * traversing all elements in the source tree and inserting them one by one into
6837   * the new tree.
6838   * The user needs to ensure that the attributes of the source tree and the new
6839   * tree are the same, and the new tree needs to be an empty tree, otherwise
6840   * -EINVAL will be returned.
6841   *
6842   * Return: 0 on success, -ENOMEM if memory could not be allocated, -EINVAL If
6843   * the attributes of the two trees are different or the new tree is not an empty
6844   * tree.
6845   */
mtree_dup(struct maple_tree * mt,struct maple_tree * new,gfp_t gfp)6846  int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp)
6847  {
6848  	int ret = 0;
6849  	MA_STATE(mas, mt, 0, 0);
6850  	MA_STATE(new_mas, new, 0, 0);
6851  
6852  	mas_lock(&new_mas);
6853  	mas_lock_nested(&mas, SINGLE_DEPTH_NESTING);
6854  	mas_dup_build(&mas, &new_mas, gfp);
6855  	mas_unlock(&mas);
6856  	if (unlikely(mas_is_err(&mas))) {
6857  		ret = xa_err(mas.node);
6858  		if (ret == -ENOMEM)
6859  			mas_dup_free(&new_mas);
6860  	}
6861  
6862  	mas_unlock(&new_mas);
6863  	return ret;
6864  }
6865  EXPORT_SYMBOL(mtree_dup);
6866  
6867  /**
6868   * __mt_destroy() - Walk and free all nodes of a locked maple tree.
6869   * @mt: The maple tree
6870   *
6871   * Note: Does not handle locking.
6872   */
__mt_destroy(struct maple_tree * mt)6873  void __mt_destroy(struct maple_tree *mt)
6874  {
6875  	void *root = mt_root_locked(mt);
6876  
6877  	rcu_assign_pointer(mt->ma_root, NULL);
6878  	if (xa_is_node(root))
6879  		mte_destroy_walk(root, mt);
6880  
6881  	mt->ma_flags = mt_attr(mt);
6882  }
6883  EXPORT_SYMBOL_GPL(__mt_destroy);
6884  
6885  /**
6886   * mtree_destroy() - Destroy a maple tree
6887   * @mt: The maple tree
6888   *
6889   * Frees all resources used by the tree.  Handles locking.
6890   */
mtree_destroy(struct maple_tree * mt)6891  void mtree_destroy(struct maple_tree *mt)
6892  {
6893  	mtree_lock(mt);
6894  	__mt_destroy(mt);
6895  	mtree_unlock(mt);
6896  }
6897  EXPORT_SYMBOL(mtree_destroy);
6898  
6899  /**
6900   * mt_find() - Search from the start up until an entry is found.
6901   * @mt: The maple tree
6902   * @index: Pointer which contains the start location of the search
6903   * @max: The maximum value of the search range
6904   *
6905   * Takes RCU read lock internally to protect the search, which does not
6906   * protect the returned pointer after dropping RCU read lock.
6907   * See also: Documentation/core-api/maple_tree.rst
6908   *
6909   * In case that an entry is found @index is updated to point to the next
6910   * possible entry independent whether the found entry is occupying a
6911   * single index or a range if indices.
6912   *
6913   * Return: The entry at or after the @index or %NULL
6914   */
mt_find(struct maple_tree * mt,unsigned long * index,unsigned long max)6915  void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max)
6916  {
6917  	MA_STATE(mas, mt, *index, *index);
6918  	void *entry;
6919  #ifdef CONFIG_DEBUG_MAPLE_TREE
6920  	unsigned long copy = *index;
6921  #endif
6922  
6923  	trace_ma_read(__func__, &mas);
6924  
6925  	if ((*index) > max)
6926  		return NULL;
6927  
6928  	rcu_read_lock();
6929  retry:
6930  	entry = mas_state_walk(&mas);
6931  	if (mas_is_start(&mas))
6932  		goto retry;
6933  
6934  	if (unlikely(xa_is_zero(entry)))
6935  		entry = NULL;
6936  
6937  	if (entry)
6938  		goto unlock;
6939  
6940  	while (mas_is_active(&mas) && (mas.last < max)) {
6941  		entry = mas_next_entry(&mas, max);
6942  		if (likely(entry && !xa_is_zero(entry)))
6943  			break;
6944  	}
6945  
6946  	if (unlikely(xa_is_zero(entry)))
6947  		entry = NULL;
6948  unlock:
6949  	rcu_read_unlock();
6950  	if (likely(entry)) {
6951  		*index = mas.last + 1;
6952  #ifdef CONFIG_DEBUG_MAPLE_TREE
6953  		if (MT_WARN_ON(mt, (*index) && ((*index) <= copy)))
6954  			pr_err("index not increased! %lx <= %lx\n",
6955  			       *index, copy);
6956  #endif
6957  	}
6958  
6959  	return entry;
6960  }
6961  EXPORT_SYMBOL(mt_find);
6962  
6963  /**
6964   * mt_find_after() - Search from the start up until an entry is found.
6965   * @mt: The maple tree
6966   * @index: Pointer which contains the start location of the search
6967   * @max: The maximum value to check
6968   *
6969   * Same as mt_find() except that it checks @index for 0 before
6970   * searching. If @index == 0, the search is aborted. This covers a wrap
6971   * around of @index to 0 in an iterator loop.
6972   *
6973   * Return: The entry at or after the @index or %NULL
6974   */
mt_find_after(struct maple_tree * mt,unsigned long * index,unsigned long max)6975  void *mt_find_after(struct maple_tree *mt, unsigned long *index,
6976  		    unsigned long max)
6977  {
6978  	if (!(*index))
6979  		return NULL;
6980  
6981  	return mt_find(mt, index, max);
6982  }
6983  EXPORT_SYMBOL(mt_find_after);
6984  
6985  #ifdef CONFIG_DEBUG_MAPLE_TREE
6986  atomic_t maple_tree_tests_run;
6987  EXPORT_SYMBOL_GPL(maple_tree_tests_run);
6988  atomic_t maple_tree_tests_passed;
6989  EXPORT_SYMBOL_GPL(maple_tree_tests_passed);
6990  
6991  #ifndef __KERNEL__
6992  extern void kmem_cache_set_non_kernel(struct kmem_cache *, unsigned int);
mt_set_non_kernel(unsigned int val)6993  void mt_set_non_kernel(unsigned int val)
6994  {
6995  	kmem_cache_set_non_kernel(maple_node_cache, val);
6996  }
6997  
6998  extern void kmem_cache_set_callback(struct kmem_cache *cachep,
6999  		void (*callback)(void *));
mt_set_callback(void (* callback)(void *))7000  void mt_set_callback(void (*callback)(void *))
7001  {
7002  	kmem_cache_set_callback(maple_node_cache, callback);
7003  }
7004  
7005  extern void kmem_cache_set_private(struct kmem_cache *cachep, void *private);
mt_set_private(void * private)7006  void mt_set_private(void *private)
7007  {
7008  	kmem_cache_set_private(maple_node_cache, private);
7009  }
7010  
7011  extern unsigned long kmem_cache_get_alloc(struct kmem_cache *);
mt_get_alloc_size(void)7012  unsigned long mt_get_alloc_size(void)
7013  {
7014  	return kmem_cache_get_alloc(maple_node_cache);
7015  }
7016  
7017  extern void kmem_cache_zero_nr_tallocated(struct kmem_cache *);
mt_zero_nr_tallocated(void)7018  void mt_zero_nr_tallocated(void)
7019  {
7020  	kmem_cache_zero_nr_tallocated(maple_node_cache);
7021  }
7022  
7023  extern unsigned int kmem_cache_nr_tallocated(struct kmem_cache *);
mt_nr_tallocated(void)7024  unsigned int mt_nr_tallocated(void)
7025  {
7026  	return kmem_cache_nr_tallocated(maple_node_cache);
7027  }
7028  
7029  extern unsigned int kmem_cache_nr_allocated(struct kmem_cache *);
mt_nr_allocated(void)7030  unsigned int mt_nr_allocated(void)
7031  {
7032  	return kmem_cache_nr_allocated(maple_node_cache);
7033  }
7034  
mt_cache_shrink(void)7035  void mt_cache_shrink(void)
7036  {
7037  }
7038  #else
7039  /*
7040   * mt_cache_shrink() - For testing, don't use this.
7041   *
7042   * Certain testcases can trigger an OOM when combined with other memory
7043   * debugging configuration options.  This function is used to reduce the
7044   * possibility of an out of memory even due to kmem_cache objects remaining
7045   * around for longer than usual.
7046   */
mt_cache_shrink(void)7047  void mt_cache_shrink(void)
7048  {
7049  	kmem_cache_shrink(maple_node_cache);
7050  
7051  }
7052  EXPORT_SYMBOL_GPL(mt_cache_shrink);
7053  
7054  #endif /* not defined __KERNEL__ */
7055  /*
7056   * mas_get_slot() - Get the entry in the maple state node stored at @offset.
7057   * @mas: The maple state
7058   * @offset: The offset into the slot array to fetch.
7059   *
7060   * Return: The entry stored at @offset.
7061   */
mas_get_slot(struct ma_state * mas,unsigned char offset)7062  static inline struct maple_enode *mas_get_slot(struct ma_state *mas,
7063  		unsigned char offset)
7064  {
7065  	return mas_slot(mas, ma_slots(mas_mn(mas), mte_node_type(mas->node)),
7066  			offset);
7067  }
7068  
7069  /* Depth first search, post-order */
mas_dfs_postorder(struct ma_state * mas,unsigned long max)7070  static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
7071  {
7072  
7073  	struct maple_enode *p, *mn = mas->node;
7074  	unsigned long p_min, p_max;
7075  
7076  	mas_next_node(mas, mas_mn(mas), max);
7077  	if (!mas_is_overflow(mas))
7078  		return;
7079  
7080  	if (mte_is_root(mn))
7081  		return;
7082  
7083  	mas->node = mn;
7084  	mas_ascend(mas);
7085  	do {
7086  		p = mas->node;
7087  		p_min = mas->min;
7088  		p_max = mas->max;
7089  		mas_prev_node(mas, 0);
7090  	} while (!mas_is_underflow(mas));
7091  
7092  	mas->node = p;
7093  	mas->max = p_max;
7094  	mas->min = p_min;
7095  }
7096  
7097  /* Tree validations */
7098  static void mt_dump_node(const struct maple_tree *mt, void *entry,
7099  		unsigned long min, unsigned long max, unsigned int depth,
7100  		enum mt_dump_format format);
mt_dump_range(unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7101  static void mt_dump_range(unsigned long min, unsigned long max,
7102  			  unsigned int depth, enum mt_dump_format format)
7103  {
7104  	static const char spaces[] = "                                ";
7105  
7106  	switch(format) {
7107  	case mt_dump_hex:
7108  		if (min == max)
7109  			pr_info("%.*s%lx: ", depth * 2, spaces, min);
7110  		else
7111  			pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
7112  		break;
7113  	case mt_dump_dec:
7114  		if (min == max)
7115  			pr_info("%.*s%lu: ", depth * 2, spaces, min);
7116  		else
7117  			pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
7118  	}
7119  }
7120  
mt_dump_entry(void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7121  static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
7122  			  unsigned int depth, enum mt_dump_format format)
7123  {
7124  	mt_dump_range(min, max, depth, format);
7125  
7126  	if (xa_is_value(entry))
7127  		pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
7128  				xa_to_value(entry), entry);
7129  	else if (xa_is_zero(entry))
7130  		pr_cont("zero (%ld)\n", xa_to_internal(entry));
7131  	else if (mt_is_reserved(entry))
7132  		pr_cont("UNKNOWN ENTRY (%p)\n", entry);
7133  	else
7134  		pr_cont("%p\n", entry);
7135  }
7136  
mt_dump_range64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7137  static void mt_dump_range64(const struct maple_tree *mt, void *entry,
7138  		unsigned long min, unsigned long max, unsigned int depth,
7139  		enum mt_dump_format format)
7140  {
7141  	struct maple_range_64 *node = &mte_to_node(entry)->mr64;
7142  	bool leaf = mte_is_leaf(entry);
7143  	unsigned long first = min;
7144  	int i;
7145  
7146  	pr_cont(" contents: ");
7147  	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
7148  		switch(format) {
7149  		case mt_dump_hex:
7150  			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
7151  			break;
7152  		case mt_dump_dec:
7153  			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
7154  		}
7155  	}
7156  	pr_cont("%p\n", node->slot[i]);
7157  	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
7158  		unsigned long last = max;
7159  
7160  		if (i < (MAPLE_RANGE64_SLOTS - 1))
7161  			last = node->pivot[i];
7162  		else if (!node->slot[i] && max != mt_node_max(entry))
7163  			break;
7164  		if (last == 0 && i > 0)
7165  			break;
7166  		if (leaf)
7167  			mt_dump_entry(mt_slot(mt, node->slot, i),
7168  					first, last, depth + 1, format);
7169  		else if (node->slot[i])
7170  			mt_dump_node(mt, mt_slot(mt, node->slot, i),
7171  					first, last, depth + 1, format);
7172  
7173  		if (last == max)
7174  			break;
7175  		if (last > max) {
7176  			switch(format) {
7177  			case mt_dump_hex:
7178  				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
7179  					node, last, max, i);
7180  				break;
7181  			case mt_dump_dec:
7182  				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
7183  					node, last, max, i);
7184  			}
7185  		}
7186  		first = last + 1;
7187  	}
7188  }
7189  
mt_dump_arange64(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7190  static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
7191  	unsigned long min, unsigned long max, unsigned int depth,
7192  	enum mt_dump_format format)
7193  {
7194  	struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
7195  	unsigned long first = min;
7196  	int i;
7197  
7198  	pr_cont(" contents: ");
7199  	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
7200  		switch (format) {
7201  		case mt_dump_hex:
7202  			pr_cont("%lx ", node->gap[i]);
7203  			break;
7204  		case mt_dump_dec:
7205  			pr_cont("%lu ", node->gap[i]);
7206  		}
7207  	}
7208  	pr_cont("| %02X %02X| ", node->meta.end, node->meta.gap);
7209  	for (i = 0; i < MAPLE_ARANGE64_SLOTS - 1; i++) {
7210  		switch (format) {
7211  		case mt_dump_hex:
7212  			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
7213  			break;
7214  		case mt_dump_dec:
7215  			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
7216  		}
7217  	}
7218  	pr_cont("%p\n", node->slot[i]);
7219  	for (i = 0; i < MAPLE_ARANGE64_SLOTS; i++) {
7220  		unsigned long last = max;
7221  
7222  		if (i < (MAPLE_ARANGE64_SLOTS - 1))
7223  			last = node->pivot[i];
7224  		else if (!node->slot[i])
7225  			break;
7226  		if (last == 0 && i > 0)
7227  			break;
7228  		if (node->slot[i])
7229  			mt_dump_node(mt, mt_slot(mt, node->slot, i),
7230  					first, last, depth + 1, format);
7231  
7232  		if (last == max)
7233  			break;
7234  		if (last > max) {
7235  			switch(format) {
7236  			case mt_dump_hex:
7237  				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
7238  					node, last, max, i);
7239  				break;
7240  			case mt_dump_dec:
7241  				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
7242  					node, last, max, i);
7243  			}
7244  		}
7245  		first = last + 1;
7246  	}
7247  }
7248  
mt_dump_node(const struct maple_tree * mt,void * entry,unsigned long min,unsigned long max,unsigned int depth,enum mt_dump_format format)7249  static void mt_dump_node(const struct maple_tree *mt, void *entry,
7250  		unsigned long min, unsigned long max, unsigned int depth,
7251  		enum mt_dump_format format)
7252  {
7253  	struct maple_node *node = mte_to_node(entry);
7254  	unsigned int type = mte_node_type(entry);
7255  	unsigned int i;
7256  
7257  	mt_dump_range(min, max, depth, format);
7258  
7259  	pr_cont("node %p depth %d type %d parent %p", node, depth, type,
7260  			node ? node->parent : NULL);
7261  	switch (type) {
7262  	case maple_dense:
7263  		pr_cont("\n");
7264  		for (i = 0; i < MAPLE_NODE_SLOTS; i++) {
7265  			if (min + i > max)
7266  				pr_cont("OUT OF RANGE: ");
7267  			mt_dump_entry(mt_slot(mt, node->slot, i),
7268  					min + i, min + i, depth, format);
7269  		}
7270  		break;
7271  	case maple_leaf_64:
7272  	case maple_range_64:
7273  		mt_dump_range64(mt, entry, min, max, depth, format);
7274  		break;
7275  	case maple_arange_64:
7276  		mt_dump_arange64(mt, entry, min, max, depth, format);
7277  		break;
7278  
7279  	default:
7280  		pr_cont(" UNKNOWN TYPE\n");
7281  	}
7282  }
7283  
mt_dump(const struct maple_tree * mt,enum mt_dump_format format)7284  void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
7285  {
7286  	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
7287  
7288  	pr_info("maple_tree(%p) flags %X, height %u root %p\n",
7289  		 mt, mt->ma_flags, mt_height(mt), entry);
7290  	if (!xa_is_node(entry))
7291  		mt_dump_entry(entry, 0, 0, 0, format);
7292  	else if (entry)
7293  		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
7294  }
7295  EXPORT_SYMBOL_GPL(mt_dump);
7296  
7297  /*
7298   * Calculate the maximum gap in a node and check if that's what is reported in
7299   * the parent (unless root).
7300   */
mas_validate_gaps(struct ma_state * mas)7301  static void mas_validate_gaps(struct ma_state *mas)
7302  {
7303  	struct maple_enode *mte = mas->node;
7304  	struct maple_node *p_mn, *node = mte_to_node(mte);
7305  	enum maple_type mt = mte_node_type(mas->node);
7306  	unsigned long gap = 0, max_gap = 0;
7307  	unsigned long p_end, p_start = mas->min;
7308  	unsigned char p_slot, offset;
7309  	unsigned long *gaps = NULL;
7310  	unsigned long *pivots = ma_pivots(node, mt);
7311  	unsigned int i;
7312  
7313  	if (ma_is_dense(mt)) {
7314  		for (i = 0; i < mt_slot_count(mte); i++) {
7315  			if (mas_get_slot(mas, i)) {
7316  				if (gap > max_gap)
7317  					max_gap = gap;
7318  				gap = 0;
7319  				continue;
7320  			}
7321  			gap++;
7322  		}
7323  		goto counted;
7324  	}
7325  
7326  	gaps = ma_gaps(node, mt);
7327  	for (i = 0; i < mt_slot_count(mte); i++) {
7328  		p_end = mas_safe_pivot(mas, pivots, i, mt);
7329  
7330  		if (!gaps) {
7331  			if (!mas_get_slot(mas, i))
7332  				gap = p_end - p_start + 1;
7333  		} else {
7334  			void *entry = mas_get_slot(mas, i);
7335  
7336  			gap = gaps[i];
7337  			MT_BUG_ON(mas->tree, !entry);
7338  
7339  			if (gap > p_end - p_start + 1) {
7340  				pr_err("%p[%u] %lu >= %lu - %lu + 1 (%lu)\n",
7341  				       mas_mn(mas), i, gap, p_end, p_start,
7342  				       p_end - p_start + 1);
7343  				MT_BUG_ON(mas->tree, gap > p_end - p_start + 1);
7344  			}
7345  		}
7346  
7347  		if (gap > max_gap)
7348  			max_gap = gap;
7349  
7350  		p_start = p_end + 1;
7351  		if (p_end >= mas->max)
7352  			break;
7353  	}
7354  
7355  counted:
7356  	if (mt == maple_arange_64) {
7357  		MT_BUG_ON(mas->tree, !gaps);
7358  		offset = ma_meta_gap(node);
7359  		if (offset > i) {
7360  			pr_err("gap offset %p[%u] is invalid\n", node, offset);
7361  			MT_BUG_ON(mas->tree, 1);
7362  		}
7363  
7364  		if (gaps[offset] != max_gap) {
7365  			pr_err("gap %p[%u] is not the largest gap %lu\n",
7366  			       node, offset, max_gap);
7367  			MT_BUG_ON(mas->tree, 1);
7368  		}
7369  
7370  		for (i++ ; i < mt_slot_count(mte); i++) {
7371  			if (gaps[i] != 0) {
7372  				pr_err("gap %p[%u] beyond node limit != 0\n",
7373  				       node, i);
7374  				MT_BUG_ON(mas->tree, 1);
7375  			}
7376  		}
7377  	}
7378  
7379  	if (mte_is_root(mte))
7380  		return;
7381  
7382  	p_slot = mte_parent_slot(mas->node);
7383  	p_mn = mte_parent(mte);
7384  	MT_BUG_ON(mas->tree, max_gap > mas->max);
7385  	if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
7386  		pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
7387  		mt_dump(mas->tree, mt_dump_hex);
7388  		MT_BUG_ON(mas->tree, 1);
7389  	}
7390  }
7391  
mas_validate_parent_slot(struct ma_state * mas)7392  static void mas_validate_parent_slot(struct ma_state *mas)
7393  {
7394  	struct maple_node *parent;
7395  	struct maple_enode *node;
7396  	enum maple_type p_type;
7397  	unsigned char p_slot;
7398  	void __rcu **slots;
7399  	int i;
7400  
7401  	if (mte_is_root(mas->node))
7402  		return;
7403  
7404  	p_slot = mte_parent_slot(mas->node);
7405  	p_type = mas_parent_type(mas, mas->node);
7406  	parent = mte_parent(mas->node);
7407  	slots = ma_slots(parent, p_type);
7408  	MT_BUG_ON(mas->tree, mas_mn(mas) == parent);
7409  
7410  	/* Check prev/next parent slot for duplicate node entry */
7411  
7412  	for (i = 0; i < mt_slots[p_type]; i++) {
7413  		node = mas_slot(mas, slots, i);
7414  		if (i == p_slot) {
7415  			if (node != mas->node)
7416  				pr_err("parent %p[%u] does not have %p\n",
7417  					parent, i, mas_mn(mas));
7418  			MT_BUG_ON(mas->tree, node != mas->node);
7419  		} else if (node == mas->node) {
7420  			pr_err("Invalid child %p at parent %p[%u] p_slot %u\n",
7421  			       mas_mn(mas), parent, i, p_slot);
7422  			MT_BUG_ON(mas->tree, node == mas->node);
7423  		}
7424  	}
7425  }
7426  
mas_validate_child_slot(struct ma_state * mas)7427  static void mas_validate_child_slot(struct ma_state *mas)
7428  {
7429  	enum maple_type type = mte_node_type(mas->node);
7430  	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7431  	unsigned long *pivots = ma_pivots(mte_to_node(mas->node), type);
7432  	struct maple_enode *child;
7433  	unsigned char i;
7434  
7435  	if (mte_is_leaf(mas->node))
7436  		return;
7437  
7438  	for (i = 0; i < mt_slots[type]; i++) {
7439  		child = mas_slot(mas, slots, i);
7440  
7441  		if (!child) {
7442  			pr_err("Non-leaf node lacks child at %p[%u]\n",
7443  			       mas_mn(mas), i);
7444  			MT_BUG_ON(mas->tree, 1);
7445  		}
7446  
7447  		if (mte_parent_slot(child) != i) {
7448  			pr_err("Slot error at %p[%u]: child %p has pslot %u\n",
7449  			       mas_mn(mas), i, mte_to_node(child),
7450  			       mte_parent_slot(child));
7451  			MT_BUG_ON(mas->tree, 1);
7452  		}
7453  
7454  		if (mte_parent(child) != mte_to_node(mas->node)) {
7455  			pr_err("child %p has parent %p not %p\n",
7456  			       mte_to_node(child), mte_parent(child),
7457  			       mte_to_node(mas->node));
7458  			MT_BUG_ON(mas->tree, 1);
7459  		}
7460  
7461  		if (i < mt_pivots[type] && pivots[i] == mas->max)
7462  			break;
7463  	}
7464  }
7465  
7466  /*
7467   * Validate all pivots are within mas->min and mas->max, check metadata ends
7468   * where the maximum ends and ensure there is no slots or pivots set outside of
7469   * the end of the data.
7470   */
mas_validate_limits(struct ma_state * mas)7471  static void mas_validate_limits(struct ma_state *mas)
7472  {
7473  	int i;
7474  	unsigned long prev_piv = 0;
7475  	enum maple_type type = mte_node_type(mas->node);
7476  	void __rcu **slots = ma_slots(mte_to_node(mas->node), type);
7477  	unsigned long *pivots = ma_pivots(mas_mn(mas), type);
7478  
7479  	for (i = 0; i < mt_slots[type]; i++) {
7480  		unsigned long piv;
7481  
7482  		piv = mas_safe_pivot(mas, pivots, i, type);
7483  
7484  		if (!piv && (i != 0)) {
7485  			pr_err("Missing node limit pivot at %p[%u]",
7486  			       mas_mn(mas), i);
7487  			MAS_WARN_ON(mas, 1);
7488  		}
7489  
7490  		if (prev_piv > piv) {
7491  			pr_err("%p[%u] piv %lu < prev_piv %lu\n",
7492  				mas_mn(mas), i, piv, prev_piv);
7493  			MAS_WARN_ON(mas, piv < prev_piv);
7494  		}
7495  
7496  		if (piv < mas->min) {
7497  			pr_err("%p[%u] %lu < %lu\n", mas_mn(mas), i,
7498  				piv, mas->min);
7499  			MAS_WARN_ON(mas, piv < mas->min);
7500  		}
7501  		if (piv > mas->max) {
7502  			pr_err("%p[%u] %lu > %lu\n", mas_mn(mas), i,
7503  				piv, mas->max);
7504  			MAS_WARN_ON(mas, piv > mas->max);
7505  		}
7506  		prev_piv = piv;
7507  		if (piv == mas->max)
7508  			break;
7509  	}
7510  
7511  	if (mas_data_end(mas) != i) {
7512  		pr_err("node%p: data_end %u != the last slot offset %u\n",
7513  		       mas_mn(mas), mas_data_end(mas), i);
7514  		MT_BUG_ON(mas->tree, 1);
7515  	}
7516  
7517  	for (i += 1; i < mt_slots[type]; i++) {
7518  		void *entry = mas_slot(mas, slots, i);
7519  
7520  		if (entry && (i != mt_slots[type] - 1)) {
7521  			pr_err("%p[%u] should not have entry %p\n", mas_mn(mas),
7522  			       i, entry);
7523  			MT_BUG_ON(mas->tree, entry != NULL);
7524  		}
7525  
7526  		if (i < mt_pivots[type]) {
7527  			unsigned long piv = pivots[i];
7528  
7529  			if (!piv)
7530  				continue;
7531  
7532  			pr_err("%p[%u] should not have piv %lu\n",
7533  			       mas_mn(mas), i, piv);
7534  			MAS_WARN_ON(mas, i < mt_pivots[type] - 1);
7535  		}
7536  	}
7537  }
7538  
mt_validate_nulls(struct maple_tree * mt)7539  static void mt_validate_nulls(struct maple_tree *mt)
7540  {
7541  	void *entry, *last = (void *)1;
7542  	unsigned char offset = 0;
7543  	void __rcu **slots;
7544  	MA_STATE(mas, mt, 0, 0);
7545  
7546  	mas_start(&mas);
7547  	if (mas_is_none(&mas) || (mas_is_ptr(&mas)))
7548  		return;
7549  
7550  	while (!mte_is_leaf(mas.node))
7551  		mas_descend(&mas);
7552  
7553  	slots = ma_slots(mte_to_node(mas.node), mte_node_type(mas.node));
7554  	do {
7555  		entry = mas_slot(&mas, slots, offset);
7556  		if (!last && !entry) {
7557  			pr_err("Sequential nulls end at %p[%u]\n",
7558  				mas_mn(&mas), offset);
7559  		}
7560  		MT_BUG_ON(mt, !last && !entry);
7561  		last = entry;
7562  		if (offset == mas_data_end(&mas)) {
7563  			mas_next_node(&mas, mas_mn(&mas), ULONG_MAX);
7564  			if (mas_is_overflow(&mas))
7565  				return;
7566  			offset = 0;
7567  			slots = ma_slots(mte_to_node(mas.node),
7568  					 mte_node_type(mas.node));
7569  		} else {
7570  			offset++;
7571  		}
7572  
7573  	} while (!mas_is_overflow(&mas));
7574  }
7575  
7576  /*
7577   * validate a maple tree by checking:
7578   * 1. The limits (pivots are within mas->min to mas->max)
7579   * 2. The gap is correctly set in the parents
7580   */
mt_validate(struct maple_tree * mt)7581  void mt_validate(struct maple_tree *mt)
7582  	__must_hold(mas->tree->ma_lock)
7583  {
7584  	unsigned char end;
7585  
7586  	MA_STATE(mas, mt, 0, 0);
7587  	mas_start(&mas);
7588  	if (!mas_is_active(&mas))
7589  		return;
7590  
7591  	while (!mte_is_leaf(mas.node))
7592  		mas_descend(&mas);
7593  
7594  	while (!mas_is_overflow(&mas)) {
7595  		MAS_WARN_ON(&mas, mte_dead_node(mas.node));
7596  		end = mas_data_end(&mas);
7597  		if (MAS_WARN_ON(&mas, (end < mt_min_slot_count(mas.node)) &&
7598  				(mas.max != ULONG_MAX))) {
7599  			pr_err("Invalid size %u of %p\n", end, mas_mn(&mas));
7600  		}
7601  
7602  		mas_validate_parent_slot(&mas);
7603  		mas_validate_limits(&mas);
7604  		mas_validate_child_slot(&mas);
7605  		if (mt_is_alloc(mt))
7606  			mas_validate_gaps(&mas);
7607  		mas_dfs_postorder(&mas, ULONG_MAX);
7608  	}
7609  	mt_validate_nulls(mt);
7610  }
7611  EXPORT_SYMBOL_GPL(mt_validate);
7612  
mas_dump(const struct ma_state * mas)7613  void mas_dump(const struct ma_state *mas)
7614  {
7615  	pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node);
7616  	switch (mas->status) {
7617  	case ma_active:
7618  		pr_err("(ma_active)");
7619  		break;
7620  	case ma_none:
7621  		pr_err("(ma_none)");
7622  		break;
7623  	case ma_root:
7624  		pr_err("(ma_root)");
7625  		break;
7626  	case ma_start:
7627  		pr_err("(ma_start) ");
7628  		break;
7629  	case ma_pause:
7630  		pr_err("(ma_pause) ");
7631  		break;
7632  	case ma_overflow:
7633  		pr_err("(ma_overflow) ");
7634  		break;
7635  	case ma_underflow:
7636  		pr_err("(ma_underflow) ");
7637  		break;
7638  	case ma_error:
7639  		pr_err("(ma_error) ");
7640  		break;
7641  	}
7642  
7643  	pr_err("Store Type: ");
7644  	switch (mas->store_type) {
7645  	case wr_invalid:
7646  		pr_err("invalid store type\n");
7647  		break;
7648  	case wr_new_root:
7649  		pr_err("new_root\n");
7650  		break;
7651  	case wr_store_root:
7652  		pr_err("store_root\n");
7653  		break;
7654  	case wr_exact_fit:
7655  		pr_err("exact_fit\n");
7656  		break;
7657  	case wr_split_store:
7658  		pr_err("split_store\n");
7659  		break;
7660  	case wr_slot_store:
7661  		pr_err("slot_store\n");
7662  		break;
7663  	case wr_append:
7664  		pr_err("append\n");
7665  		break;
7666  	case wr_node_store:
7667  		pr_err("node_store\n");
7668  		break;
7669  	case wr_spanning_store:
7670  		pr_err("spanning_store\n");
7671  		break;
7672  	case wr_rebalance:
7673  		pr_err("rebalance\n");
7674  		break;
7675  	}
7676  
7677  	pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end,
7678  	       mas->index, mas->last);
7679  	pr_err("     min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n",
7680  	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
7681  	if (mas->index > mas->last)
7682  		pr_err("Check index & last\n");
7683  }
7684  EXPORT_SYMBOL_GPL(mas_dump);
7685  
mas_wr_dump(const struct ma_wr_state * wr_mas)7686  void mas_wr_dump(const struct ma_wr_state *wr_mas)
7687  {
7688  	pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n",
7689  	       wr_mas->node, wr_mas->r_min, wr_mas->r_max);
7690  	pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
7691  	       wr_mas->type, wr_mas->offset_end, wr_mas->mas->end,
7692  	       wr_mas->end_piv);
7693  }
7694  EXPORT_SYMBOL_GPL(mas_wr_dump);
7695  
7696  #endif /* CONFIG_DEBUG_MAPLE_TREE */
7697