1  /* SPDX-License-Identifier: GPL-2.0+ */
2  #ifndef _LINUX_MAPLE_TREE_H
3  #define _LINUX_MAPLE_TREE_H
4  /*
5   * Maple Tree - An RCU-safe adaptive tree for storing ranges
6   * Copyright (c) 2018-2022 Oracle
7   * Authors:     Liam R. Howlett <Liam.Howlett@Oracle.com>
8   *              Matthew Wilcox <willy@infradead.org>
9   */
10  
11  #include <linux/kernel.h>
12  #include <linux/rcupdate.h>
13  #include <linux/spinlock.h>
14  /* #define CONFIG_MAPLE_RCU_DISABLED */
15  
16  /*
17   * Allocated nodes are mutable until they have been inserted into the tree,
18   * at which time they cannot change their type until they have been removed
19   * from the tree and an RCU grace period has passed.
20   *
21   * Removed nodes have their ->parent set to point to themselves.  RCU readers
22   * check ->parent before relying on the value that they loaded from the
23   * slots array.  This lets us reuse the slots array for the RCU head.
24   *
25   * Nodes in the tree point to their parent unless bit 0 is set.
26   */
27  #if defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64)
28  /* 64bit sizes */
29  #define MAPLE_NODE_SLOTS	31	/* 256 bytes including ->parent */
30  #define MAPLE_RANGE64_SLOTS	16	/* 256 bytes */
31  #define MAPLE_ARANGE64_SLOTS	10	/* 240 bytes */
32  #define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 1)
33  #else
34  /* 32bit sizes */
35  #define MAPLE_NODE_SLOTS	63	/* 256 bytes including ->parent */
36  #define MAPLE_RANGE64_SLOTS	32	/* 256 bytes */
37  #define MAPLE_ARANGE64_SLOTS	21	/* 240 bytes */
38  #define MAPLE_ALLOC_SLOTS	(MAPLE_NODE_SLOTS - 2)
39  #endif /* defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) */
40  
41  #define MAPLE_NODE_MASK		255UL
42  
43  /*
44   * The node->parent of the root node has bit 0 set and the rest of the pointer
45   * is a pointer to the tree itself.  No more bits are available in this pointer
46   * (on m68k, the data structure may only be 2-byte aligned).
47   *
48   * Internal non-root nodes can only have maple_range_* nodes as parents.  The
49   * parent pointer is 256B aligned like all other tree nodes.  When storing a 32
50   * or 64 bit values, the offset can fit into 4 bits.  The 16 bit values need an
51   * extra bit to store the offset.  This extra bit comes from a reuse of the last
52   * bit in the node type.  This is possible by using bit 1 to indicate if bit 2
53   * is part of the type or the slot.
54   *
55   * Once the type is decided, the decision of an allocation range type or a
56   * range type is done by examining the immutable tree flag for the
57   * MT_FLAGS_ALLOC_RANGE flag.
58   *
59   *  Node types:
60   *   0x??1 = Root
61   *   0x?00 = 16 bit nodes
62   *   0x010 = 32 bit nodes
63   *   0x110 = 64 bit nodes
64   *
65   *  Slot size and location in the parent pointer:
66   *   type  : slot location
67   *   0x??1 : Root
68   *   0x?00 : 16 bit values, type in 0-1, slot in 2-6
69   *   0x010 : 32 bit values, type in 0-2, slot in 3-6
70   *   0x110 : 64 bit values, type in 0-2, slot in 3-6
71   */
72  
73  /*
74   * This metadata is used to optimize the gap updating code and in reverse
75   * searching for gaps or any other code that needs to find the end of the data.
76   */
77  struct maple_metadata {
78  	unsigned char end;
79  	unsigned char gap;
80  };
81  
82  /*
83   * Leaf nodes do not store pointers to nodes, they store user data.  Users may
84   * store almost any bit pattern.  As noted above, the optimisation of storing an
85   * entry at 0 in the root pointer cannot be done for data which have the bottom
86   * two bits set to '10'.  We also reserve values with the bottom two bits set to
87   * '10' which are below 4096 (ie 2, 6, 10 .. 4094) for internal use.  Some APIs
88   * return errnos as a negative errno shifted right by two bits and the bottom
89   * two bits set to '10', and while choosing to store these values in the array
90   * is not an error, it may lead to confusion if you're testing for an error with
91   * mas_is_err().
92   *
93   * Non-leaf nodes store the type of the node pointed to (enum maple_type in bits
94   * 3-6), bit 2 is reserved.  That leaves bits 0-1 unused for now.
95   *
96   * In regular B-Tree terms, pivots are called keys.  The term pivot is used to
97   * indicate that the tree is specifying ranges,  Pivots may appear in the
98   * subtree with an entry attached to the value whereas keys are unique to a
99   * specific position of a B-tree.  Pivot values are inclusive of the slot with
100   * the same index.
101   */
102  
103  struct maple_range_64 {
104  	struct maple_pnode *parent;
105  	unsigned long pivot[MAPLE_RANGE64_SLOTS - 1];
106  	union {
107  		void __rcu *slot[MAPLE_RANGE64_SLOTS];
108  		struct {
109  			void __rcu *pad[MAPLE_RANGE64_SLOTS - 1];
110  			struct maple_metadata meta;
111  		};
112  	};
113  };
114  
115  /*
116   * At tree creation time, the user can specify that they're willing to trade off
117   * storing fewer entries in a tree in return for storing more information in
118   * each node.
119   *
120   * The maple tree supports recording the largest range of NULL entries available
121   * in this node, also called gaps.  This optimises the tree for allocating a
122   * range.
123   */
124  struct maple_arange_64 {
125  	struct maple_pnode *parent;
126  	unsigned long pivot[MAPLE_ARANGE64_SLOTS - 1];
127  	void __rcu *slot[MAPLE_ARANGE64_SLOTS];
128  	unsigned long gap[MAPLE_ARANGE64_SLOTS];
129  	struct maple_metadata meta;
130  };
131  
132  struct maple_alloc {
133  	unsigned long total;
134  	unsigned char node_count;
135  	unsigned int request_count;
136  	struct maple_alloc *slot[MAPLE_ALLOC_SLOTS];
137  };
138  
139  struct maple_topiary {
140  	struct maple_pnode *parent;
141  	struct maple_enode *next; /* Overlaps the pivot */
142  };
143  
144  enum maple_type {
145  	maple_dense,
146  	maple_leaf_64,
147  	maple_range_64,
148  	maple_arange_64,
149  };
150  
151  enum store_type {
152  	wr_invalid,
153  	wr_new_root,
154  	wr_store_root,
155  	wr_exact_fit,
156  	wr_spanning_store,
157  	wr_split_store,
158  	wr_rebalance,
159  	wr_append,
160  	wr_node_store,
161  	wr_slot_store,
162  };
163  
164  /**
165   * DOC: Maple tree flags
166   *
167   * * MT_FLAGS_ALLOC_RANGE	- Track gaps in this tree
168   * * MT_FLAGS_USE_RCU		- Operate in RCU mode
169   * * MT_FLAGS_HEIGHT_OFFSET	- The position of the tree height in the flags
170   * * MT_FLAGS_HEIGHT_MASK	- The mask for the maple tree height value
171   * * MT_FLAGS_LOCK_MASK		- How the mt_lock is used
172   * * MT_FLAGS_LOCK_IRQ		- Acquired irq-safe
173   * * MT_FLAGS_LOCK_BH		- Acquired bh-safe
174   * * MT_FLAGS_LOCK_EXTERN	- mt_lock is not used
175   *
176   * MAPLE_HEIGHT_MAX	The largest height that can be stored
177   */
178  #define MT_FLAGS_ALLOC_RANGE	0x01
179  #define MT_FLAGS_USE_RCU	0x02
180  #define MT_FLAGS_HEIGHT_OFFSET	0x02
181  #define MT_FLAGS_HEIGHT_MASK	0x7C
182  #define MT_FLAGS_LOCK_MASK	0x300
183  #define MT_FLAGS_LOCK_IRQ	0x100
184  #define MT_FLAGS_LOCK_BH	0x200
185  #define MT_FLAGS_LOCK_EXTERN	0x300
186  #define MT_FLAGS_ALLOC_WRAPPED	0x0800
187  
188  #define MAPLE_HEIGHT_MAX	31
189  
190  
191  #define MAPLE_NODE_TYPE_MASK	0x0F
192  #define MAPLE_NODE_TYPE_SHIFT	0x03
193  
194  #define MAPLE_RESERVED_RANGE	4096
195  
196  #ifdef CONFIG_LOCKDEP
197  typedef struct lockdep_map *lockdep_map_p;
198  #define mt_lock_is_held(mt)                                             \
199  	(!(mt)->ma_external_lock || lock_is_held((mt)->ma_external_lock))
200  
201  #define mt_write_lock_is_held(mt)					\
202  	(!(mt)->ma_external_lock ||					\
203  	 lock_is_held_type((mt)->ma_external_lock, 0))
204  
205  #define mt_set_external_lock(mt, lock)					\
206  	(mt)->ma_external_lock = &(lock)->dep_map
207  
208  #define mt_on_stack(mt)			(mt).ma_external_lock = NULL
209  #else
210  typedef struct { /* nothing */ } lockdep_map_p;
211  #define mt_lock_is_held(mt)		1
212  #define mt_write_lock_is_held(mt)	1
213  #define mt_set_external_lock(mt, lock)	do { } while (0)
214  #define mt_on_stack(mt)			do { } while (0)
215  #endif
216  
217  /*
218   * If the tree contains a single entry at index 0, it is usually stored in
219   * tree->ma_root.  To optimise for the page cache, an entry which ends in '00',
220   * '01' or '11' is stored in the root, but an entry which ends in '10' will be
221   * stored in a node.  Bits 3-6 are used to store enum maple_type.
222   *
223   * The flags are used both to store some immutable information about this tree
224   * (set at tree creation time) and dynamic information set under the spinlock.
225   *
226   * Another use of flags are to indicate global states of the tree.  This is the
227   * case with the MAPLE_USE_RCU flag, which indicates the tree is currently in
228   * RCU mode.  This mode was added to allow the tree to reuse nodes instead of
229   * re-allocating and RCU freeing nodes when there is a single user.
230   */
231  struct maple_tree {
232  	union {
233  		spinlock_t	ma_lock;
234  		lockdep_map_p	ma_external_lock;
235  	};
236  	unsigned int	ma_flags;
237  	void __rcu      *ma_root;
238  };
239  
240  /**
241   * MTREE_INIT() - Initialize a maple tree
242   * @name: The maple tree name
243   * @__flags: The maple tree flags
244   *
245   */
246  #define MTREE_INIT(name, __flags) {					\
247  	.ma_lock = __SPIN_LOCK_UNLOCKED((name).ma_lock),		\
248  	.ma_flags = __flags,						\
249  	.ma_root = NULL,						\
250  }
251  
252  /**
253   * MTREE_INIT_EXT() - Initialize a maple tree with an external lock.
254   * @name: The tree name
255   * @__flags: The maple tree flags
256   * @__lock: The external lock
257   */
258  #ifdef CONFIG_LOCKDEP
259  #define MTREE_INIT_EXT(name, __flags, __lock) {				\
260  	.ma_external_lock = &(__lock).dep_map,				\
261  	.ma_flags = (__flags),						\
262  	.ma_root = NULL,						\
263  }
264  #else
265  #define MTREE_INIT_EXT(name, __flags, __lock)	MTREE_INIT(name, __flags)
266  #endif
267  
268  #define DEFINE_MTREE(name)						\
269  	struct maple_tree name = MTREE_INIT(name, 0)
270  
271  #define mtree_lock(mt)		spin_lock((&(mt)->ma_lock))
272  #define mtree_lock_nested(mas, subclass) \
273  		spin_lock_nested((&(mt)->ma_lock), subclass)
274  #define mtree_unlock(mt)	spin_unlock((&(mt)->ma_lock))
275  
276  /*
277   * The Maple Tree squeezes various bits in at various points which aren't
278   * necessarily obvious.  Usually, this is done by observing that pointers are
279   * N-byte aligned and thus the bottom log_2(N) bits are available for use.  We
280   * don't use the high bits of pointers to store additional information because
281   * we don't know what bits are unused on any given architecture.
282   *
283   * Nodes are 256 bytes in size and are also aligned to 256 bytes, giving us 8
284   * low bits for our own purposes.  Nodes are currently of 4 types:
285   * 1. Single pointer (Range is 0-0)
286   * 2. Non-leaf Allocation Range nodes
287   * 3. Non-leaf Range nodes
288   * 4. Leaf Range nodes All nodes consist of a number of node slots,
289   *    pivots, and a parent pointer.
290   */
291  
292  struct maple_node {
293  	union {
294  		struct {
295  			struct maple_pnode *parent;
296  			void __rcu *slot[MAPLE_NODE_SLOTS];
297  		};
298  		struct {
299  			void *pad;
300  			struct rcu_head rcu;
301  			struct maple_enode *piv_parent;
302  			unsigned char parent_slot;
303  			enum maple_type type;
304  			unsigned char slot_len;
305  			unsigned int ma_flags;
306  		};
307  		struct maple_range_64 mr64;
308  		struct maple_arange_64 ma64;
309  		struct maple_alloc alloc;
310  	};
311  };
312  
313  /*
314   * More complicated stores can cause two nodes to become one or three and
315   * potentially alter the height of the tree.  Either half of the tree may need
316   * to be rebalanced against the other.  The ma_topiary struct is used to track
317   * which nodes have been 'cut' from the tree so that the change can be done
318   * safely at a later date.  This is done to support RCU.
319   */
320  struct ma_topiary {
321  	struct maple_enode *head;
322  	struct maple_enode *tail;
323  	struct maple_tree *mtree;
324  };
325  
326  void *mtree_load(struct maple_tree *mt, unsigned long index);
327  
328  int mtree_insert(struct maple_tree *mt, unsigned long index,
329  		void *entry, gfp_t gfp);
330  int mtree_insert_range(struct maple_tree *mt, unsigned long first,
331  		unsigned long last, void *entry, gfp_t gfp);
332  int mtree_alloc_range(struct maple_tree *mt, unsigned long *startp,
333  		void *entry, unsigned long size, unsigned long min,
334  		unsigned long max, gfp_t gfp);
335  int mtree_alloc_cyclic(struct maple_tree *mt, unsigned long *startp,
336  		void *entry, unsigned long range_lo, unsigned long range_hi,
337  		unsigned long *next, gfp_t gfp);
338  int mtree_alloc_rrange(struct maple_tree *mt, unsigned long *startp,
339  		void *entry, unsigned long size, unsigned long min,
340  		unsigned long max, gfp_t gfp);
341  
342  int mtree_store_range(struct maple_tree *mt, unsigned long first,
343  		      unsigned long last, void *entry, gfp_t gfp);
344  int mtree_store(struct maple_tree *mt, unsigned long index,
345  		void *entry, gfp_t gfp);
346  void *mtree_erase(struct maple_tree *mt, unsigned long index);
347  
348  int mtree_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
349  int __mt_dup(struct maple_tree *mt, struct maple_tree *new, gfp_t gfp);
350  
351  void mtree_destroy(struct maple_tree *mt);
352  void __mt_destroy(struct maple_tree *mt);
353  
354  /**
355   * mtree_empty() - Determine if a tree has any present entries.
356   * @mt: Maple Tree.
357   *
358   * Context: Any context.
359   * Return: %true if the tree contains only NULL pointers.
360   */
mtree_empty(const struct maple_tree * mt)361  static inline bool mtree_empty(const struct maple_tree *mt)
362  {
363  	return mt->ma_root == NULL;
364  }
365  
366  /* Advanced API */
367  
368  /*
369   * Maple State Status
370   * ma_active means the maple state is pointing to a node and offset and can
371   * continue operating on the tree.
372   * ma_start means we have not searched the tree.
373   * ma_root means we have searched the tree and the entry we found lives in
374   * the root of the tree (ie it has index 0, length 1 and is the only entry in
375   * the tree).
376   * ma_none means we have searched the tree and there is no node in the
377   * tree for this entry.  For example, we searched for index 1 in an empty
378   * tree.  Or we have a tree which points to a full leaf node and we
379   * searched for an entry which is larger than can be contained in that
380   * leaf node.
381   * ma_pause means the data within the maple state may be stale, restart the
382   * operation
383   * ma_overflow means the search has reached the upper limit of the search
384   * ma_underflow means the search has reached the lower limit of the search
385   * ma_error means there was an error, check the node for the error number.
386   */
387  enum maple_status {
388  	ma_active,
389  	ma_start,
390  	ma_root,
391  	ma_none,
392  	ma_pause,
393  	ma_overflow,
394  	ma_underflow,
395  	ma_error,
396  };
397  
398  /*
399   * The maple state is defined in the struct ma_state and is used to keep track
400   * of information during operations, and even between operations when using the
401   * advanced API.
402   *
403   * If state->node has bit 0 set then it references a tree location which is not
404   * a node (eg the root).  If bit 1 is set, the rest of the bits are a negative
405   * errno.  Bit 2 (the 'unallocated slots' bit) is clear.  Bits 3-6 indicate the
406   * node type.
407   *
408   * state->alloc either has a request number of nodes or an allocated node.  If
409   * stat->alloc has a requested number of nodes, the first bit will be set (0x1)
410   * and the remaining bits are the value.  If state->alloc is a node, then the
411   * node will be of type maple_alloc.  maple_alloc has MAPLE_NODE_SLOTS - 1 for
412   * storing more allocated nodes, a total number of nodes allocated, and the
413   * node_count in this node.  node_count is the number of allocated nodes in this
414   * node.  The scaling beyond MAPLE_NODE_SLOTS - 1 is handled by storing further
415   * nodes into state->alloc->slot[0]'s node.  Nodes are taken from state->alloc
416   * by removing a node from the state->alloc node until state->alloc->node_count
417   * is 1, when state->alloc is returned and the state->alloc->slot[0] is promoted
418   * to state->alloc.  Nodes are pushed onto state->alloc by putting the current
419   * state->alloc into the pushed node's slot[0].
420   *
421   * The state also contains the implied min/max of the state->node, the depth of
422   * this search, and the offset. The implied min/max are either from the parent
423   * node or are 0-oo for the root node.  The depth is incremented or decremented
424   * every time a node is walked down or up.  The offset is the slot/pivot of
425   * interest in the node - either for reading or writing.
426   *
427   * When returning a value the maple state index and last respectively contain
428   * the start and end of the range for the entry.  Ranges are inclusive in the
429   * Maple Tree.
430   *
431   * The status of the state is used to determine how the next action should treat
432   * the state.  For instance, if the status is ma_start then the next action
433   * should start at the root of the tree and walk down.  If the status is
434   * ma_pause then the node may be stale data and should be discarded.  If the
435   * status is ma_overflow, then the last action hit the upper limit.
436   *
437   */
438  struct ma_state {
439  	struct maple_tree *tree;	/* The tree we're operating in */
440  	unsigned long index;		/* The index we're operating on - range start */
441  	unsigned long last;		/* The last index we're operating on - range end */
442  	struct maple_enode *node;	/* The node containing this entry */
443  	unsigned long min;		/* The minimum index of this node - implied pivot min */
444  	unsigned long max;		/* The maximum index of this node - implied pivot max */
445  	struct maple_alloc *alloc;	/* Allocated nodes for this operation */
446  	enum maple_status status;	/* The status of the state (active, start, none, etc) */
447  	unsigned char depth;		/* depth of tree descent during write */
448  	unsigned char offset;
449  	unsigned char mas_flags;
450  	unsigned char end;		/* The end of the node */
451  	enum store_type store_type;	/* The type of store needed for this operation */
452  };
453  
454  struct ma_wr_state {
455  	struct ma_state *mas;
456  	struct maple_node *node;	/* Decoded mas->node */
457  	unsigned long r_min;		/* range min */
458  	unsigned long r_max;		/* range max */
459  	enum maple_type type;		/* mas->node type */
460  	unsigned char offset_end;	/* The offset where the write ends */
461  	unsigned long *pivots;		/* mas->node->pivots pointer */
462  	unsigned long end_piv;		/* The pivot at the offset end */
463  	void __rcu **slots;		/* mas->node->slots pointer */
464  	void *entry;			/* The entry to write */
465  	void *content;			/* The existing entry that is being overwritten */
466  };
467  
468  #define mas_lock(mas)           spin_lock(&((mas)->tree->ma_lock))
469  #define mas_lock_nested(mas, subclass) \
470  		spin_lock_nested(&((mas)->tree->ma_lock), subclass)
471  #define mas_unlock(mas)         spin_unlock(&((mas)->tree->ma_lock))
472  
473  /*
474   * Special values for ma_state.node.
475   * MA_ERROR represents an errno.  After dropping the lock and attempting
476   * to resolve the error, the walk would have to be restarted from the
477   * top of the tree as the tree may have been modified.
478   */
479  #define MA_ERROR(err) \
480  		((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
481  
482  #define MA_STATE(name, mt, first, end)					\
483  	struct ma_state name = {					\
484  		.tree = mt,						\
485  		.index = first,						\
486  		.last = end,						\
487  		.node = NULL,						\
488  		.status = ma_start,					\
489  		.min = 0,						\
490  		.max = ULONG_MAX,					\
491  		.alloc = NULL,						\
492  		.mas_flags = 0,						\
493  		.store_type = wr_invalid,				\
494  	}
495  
496  #define MA_WR_STATE(name, ma_state, wr_entry)				\
497  	struct ma_wr_state name = {					\
498  		.mas = ma_state,					\
499  		.content = NULL,					\
500  		.entry = wr_entry,					\
501  	}
502  
503  #define MA_TOPIARY(name, tree)						\
504  	struct ma_topiary name = {					\
505  		.head = NULL,						\
506  		.tail = NULL,						\
507  		.mtree = tree,						\
508  	}
509  
510  void *mas_walk(struct ma_state *mas);
511  void *mas_store(struct ma_state *mas, void *entry);
512  void *mas_erase(struct ma_state *mas);
513  int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp);
514  void mas_store_prealloc(struct ma_state *mas, void *entry);
515  void *mas_find(struct ma_state *mas, unsigned long max);
516  void *mas_find_range(struct ma_state *mas, unsigned long max);
517  void *mas_find_rev(struct ma_state *mas, unsigned long min);
518  void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
519  int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp);
520  int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
521  		void *entry, unsigned long range_lo, unsigned long range_hi,
522  		unsigned long *next, gfp_t gfp);
523  
524  bool mas_nomem(struct ma_state *mas, gfp_t gfp);
525  void mas_pause(struct ma_state *mas);
526  void maple_tree_init(void);
527  void mas_destroy(struct ma_state *mas);
528  int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);
529  
530  void *mas_prev(struct ma_state *mas, unsigned long min);
531  void *mas_prev_range(struct ma_state *mas, unsigned long max);
532  void *mas_next(struct ma_state *mas, unsigned long max);
533  void *mas_next_range(struct ma_state *mas, unsigned long max);
534  
535  int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
536  		   unsigned long size);
537  /*
538   * This finds an empty area from the highest address to the lowest.
539   * AKA "Topdown" version,
540   */
541  int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
542  		       unsigned long max, unsigned long size);
543  
mas_init(struct ma_state * mas,struct maple_tree * tree,unsigned long addr)544  static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
545  			    unsigned long addr)
546  {
547  	memset(mas, 0, sizeof(struct ma_state));
548  	mas->tree = tree;
549  	mas->index = mas->last = addr;
550  	mas->max = ULONG_MAX;
551  	mas->status = ma_start;
552  	mas->node = NULL;
553  }
554  
mas_is_active(struct ma_state * mas)555  static inline bool mas_is_active(struct ma_state *mas)
556  {
557  	return mas->status == ma_active;
558  }
559  
mas_is_err(struct ma_state * mas)560  static inline bool mas_is_err(struct ma_state *mas)
561  {
562  	return mas->status == ma_error;
563  }
564  
565  /**
566   * mas_reset() - Reset a Maple Tree operation state.
567   * @mas: Maple Tree operation state.
568   *
569   * Resets the error or walk state of the @mas so future walks of the
570   * array will start from the root.  Use this if you have dropped the
571   * lock and want to reuse the ma_state.
572   *
573   * Context: Any context.
574   */
mas_reset(struct ma_state * mas)575  static __always_inline void mas_reset(struct ma_state *mas)
576  {
577  	mas->status = ma_start;
578  	mas->node = NULL;
579  }
580  
581  /**
582   * mas_for_each() - Iterate over a range of the maple tree.
583   * @__mas: Maple Tree operation state (maple_state)
584   * @__entry: Entry retrieved from the tree
585   * @__max: maximum index to retrieve from the tree
586   *
587   * When returned, mas->index and mas->last will hold the entire range for the
588   * entry.
589   *
590   * Note: may return the zero entry.
591   */
592  #define mas_for_each(__mas, __entry, __max) \
593  	while (((__entry) = mas_find((__mas), (__max))) != NULL)
594  
595  #ifdef CONFIG_DEBUG_MAPLE_TREE
596  enum mt_dump_format {
597  	mt_dump_dec,
598  	mt_dump_hex,
599  };
600  
601  extern atomic_t maple_tree_tests_run;
602  extern atomic_t maple_tree_tests_passed;
603  
604  void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
605  void mas_dump(const struct ma_state *mas);
606  void mas_wr_dump(const struct ma_wr_state *wr_mas);
607  void mt_validate(struct maple_tree *mt);
608  void mt_cache_shrink(void);
609  #define MT_BUG_ON(__tree, __x) do {					\
610  	atomic_inc(&maple_tree_tests_run);				\
611  	if (__x) {							\
612  		pr_info("BUG at %s:%d (%u)\n",				\
613  		__func__, __LINE__, __x);				\
614  		mt_dump(__tree, mt_dump_hex);				\
615  		pr_info("Pass: %u Run:%u\n",				\
616  			atomic_read(&maple_tree_tests_passed),		\
617  			atomic_read(&maple_tree_tests_run));		\
618  		dump_stack();						\
619  	} else {							\
620  		atomic_inc(&maple_tree_tests_passed);			\
621  	}								\
622  } while (0)
623  
624  #define MAS_BUG_ON(__mas, __x) do {					\
625  	atomic_inc(&maple_tree_tests_run);				\
626  	if (__x) {							\
627  		pr_info("BUG at %s:%d (%u)\n",				\
628  		__func__, __LINE__, __x);				\
629  		mas_dump(__mas);					\
630  		mt_dump((__mas)->tree, mt_dump_hex);			\
631  		pr_info("Pass: %u Run:%u\n",				\
632  			atomic_read(&maple_tree_tests_passed),		\
633  			atomic_read(&maple_tree_tests_run));		\
634  		dump_stack();						\
635  	} else {							\
636  		atomic_inc(&maple_tree_tests_passed);			\
637  	}								\
638  } while (0)
639  
640  #define MAS_WR_BUG_ON(__wrmas, __x) do {				\
641  	atomic_inc(&maple_tree_tests_run);				\
642  	if (__x) {							\
643  		pr_info("BUG at %s:%d (%u)\n",				\
644  		__func__, __LINE__, __x);				\
645  		mas_wr_dump(__wrmas);					\
646  		mas_dump((__wrmas)->mas);				\
647  		mt_dump((__wrmas)->mas->tree, mt_dump_hex);		\
648  		pr_info("Pass: %u Run:%u\n",				\
649  			atomic_read(&maple_tree_tests_passed),		\
650  			atomic_read(&maple_tree_tests_run));		\
651  		dump_stack();						\
652  	} else {							\
653  		atomic_inc(&maple_tree_tests_passed);			\
654  	}								\
655  } while (0)
656  
657  #define MT_WARN_ON(__tree, __x)  ({					\
658  	int ret = !!(__x);						\
659  	atomic_inc(&maple_tree_tests_run);				\
660  	if (ret) {							\
661  		pr_info("WARN at %s:%d (%u)\n",				\
662  		__func__, __LINE__, __x);				\
663  		mt_dump(__tree, mt_dump_hex);				\
664  		pr_info("Pass: %u Run:%u\n",				\
665  			atomic_read(&maple_tree_tests_passed),		\
666  			atomic_read(&maple_tree_tests_run));		\
667  		dump_stack();						\
668  	} else {							\
669  		atomic_inc(&maple_tree_tests_passed);			\
670  	}								\
671  	unlikely(ret);							\
672  })
673  
674  #define MAS_WARN_ON(__mas, __x) ({					\
675  	int ret = !!(__x);						\
676  	atomic_inc(&maple_tree_tests_run);				\
677  	if (ret) {							\
678  		pr_info("WARN at %s:%d (%u)\n",				\
679  		__func__, __LINE__, __x);				\
680  		mas_dump(__mas);					\
681  		mt_dump((__mas)->tree, mt_dump_hex);			\
682  		pr_info("Pass: %u Run:%u\n",				\
683  			atomic_read(&maple_tree_tests_passed),		\
684  			atomic_read(&maple_tree_tests_run));		\
685  		dump_stack();						\
686  	} else {							\
687  		atomic_inc(&maple_tree_tests_passed);			\
688  	}								\
689  	unlikely(ret);							\
690  })
691  
692  #define MAS_WR_WARN_ON(__wrmas, __x) ({					\
693  	int ret = !!(__x);						\
694  	atomic_inc(&maple_tree_tests_run);				\
695  	if (ret) {							\
696  		pr_info("WARN at %s:%d (%u)\n",				\
697  		__func__, __LINE__, __x);				\
698  		mas_wr_dump(__wrmas);					\
699  		mas_dump((__wrmas)->mas);				\
700  		mt_dump((__wrmas)->mas->tree, mt_dump_hex);		\
701  		pr_info("Pass: %u Run:%u\n",				\
702  			atomic_read(&maple_tree_tests_passed),		\
703  			atomic_read(&maple_tree_tests_run));		\
704  		dump_stack();						\
705  	} else {							\
706  		atomic_inc(&maple_tree_tests_passed);			\
707  	}								\
708  	unlikely(ret);							\
709  })
710  #else
711  #define MT_BUG_ON(__tree, __x)		BUG_ON(__x)
712  #define MAS_BUG_ON(__mas, __x)		BUG_ON(__x)
713  #define MAS_WR_BUG_ON(__mas, __x)	BUG_ON(__x)
714  #define MT_WARN_ON(__tree, __x)		WARN_ON(__x)
715  #define MAS_WARN_ON(__mas, __x)		WARN_ON(__x)
716  #define MAS_WR_WARN_ON(__mas, __x)	WARN_ON(__x)
717  #endif /* CONFIG_DEBUG_MAPLE_TREE */
718  
719  /**
720   * __mas_set_range() - Set up Maple Tree operation state to a sub-range of the
721   * current location.
722   * @mas: Maple Tree operation state.
723   * @start: New start of range in the Maple Tree.
724   * @last: New end of range in the Maple Tree.
725   *
726   * set the internal maple state values to a sub-range.
727   * Please use mas_set_range() if you do not know where you are in the tree.
728   */
__mas_set_range(struct ma_state * mas,unsigned long start,unsigned long last)729  static inline void __mas_set_range(struct ma_state *mas, unsigned long start,
730  		unsigned long last)
731  {
732  	/* Ensure the range starts within the current slot */
733  	MAS_WARN_ON(mas, mas_is_active(mas) &&
734  		   (mas->index > start || mas->last < start));
735  	mas->index = start;
736  	mas->last = last;
737  }
738  
739  /**
740   * mas_set_range() - Set up Maple Tree operation state for a different index.
741   * @mas: Maple Tree operation state.
742   * @start: New start of range in the Maple Tree.
743   * @last: New end of range in the Maple Tree.
744   *
745   * Move the operation state to refer to a different range.  This will
746   * have the effect of starting a walk from the top; see mas_next()
747   * to move to an adjacent index.
748   */
749  static inline
mas_set_range(struct ma_state * mas,unsigned long start,unsigned long last)750  void mas_set_range(struct ma_state *mas, unsigned long start, unsigned long last)
751  {
752  	mas_reset(mas);
753  	__mas_set_range(mas, start, last);
754  }
755  
756  /**
757   * mas_set() - Set up Maple Tree operation state for a different index.
758   * @mas: Maple Tree operation state.
759   * @index: New index into the Maple Tree.
760   *
761   * Move the operation state to refer to a different index.  This will
762   * have the effect of starting a walk from the top; see mas_next()
763   * to move to an adjacent index.
764   */
mas_set(struct ma_state * mas,unsigned long index)765  static inline void mas_set(struct ma_state *mas, unsigned long index)
766  {
767  
768  	mas_set_range(mas, index, index);
769  }
770  
mt_external_lock(const struct maple_tree * mt)771  static inline bool mt_external_lock(const struct maple_tree *mt)
772  {
773  	return (mt->ma_flags & MT_FLAGS_LOCK_MASK) == MT_FLAGS_LOCK_EXTERN;
774  }
775  
776  /**
777   * mt_init_flags() - Initialise an empty maple tree with flags.
778   * @mt: Maple Tree
779   * @flags: maple tree flags.
780   *
781   * If you need to initialise a Maple Tree with special flags (eg, an
782   * allocation tree), use this function.
783   *
784   * Context: Any context.
785   */
mt_init_flags(struct maple_tree * mt,unsigned int flags)786  static inline void mt_init_flags(struct maple_tree *mt, unsigned int flags)
787  {
788  	mt->ma_flags = flags;
789  	if (!mt_external_lock(mt))
790  		spin_lock_init(&mt->ma_lock);
791  	rcu_assign_pointer(mt->ma_root, NULL);
792  }
793  
794  /**
795   * mt_init() - Initialise an empty maple tree.
796   * @mt: Maple Tree
797   *
798   * An empty Maple Tree.
799   *
800   * Context: Any context.
801   */
mt_init(struct maple_tree * mt)802  static inline void mt_init(struct maple_tree *mt)
803  {
804  	mt_init_flags(mt, 0);
805  }
806  
mt_in_rcu(struct maple_tree * mt)807  static inline bool mt_in_rcu(struct maple_tree *mt)
808  {
809  #ifdef CONFIG_MAPLE_RCU_DISABLED
810  	return false;
811  #endif
812  	return mt->ma_flags & MT_FLAGS_USE_RCU;
813  }
814  
815  /**
816   * mt_clear_in_rcu() - Switch the tree to non-RCU mode.
817   * @mt: The Maple Tree
818   */
mt_clear_in_rcu(struct maple_tree * mt)819  static inline void mt_clear_in_rcu(struct maple_tree *mt)
820  {
821  	if (!mt_in_rcu(mt))
822  		return;
823  
824  	if (mt_external_lock(mt)) {
825  		WARN_ON(!mt_lock_is_held(mt));
826  		mt->ma_flags &= ~MT_FLAGS_USE_RCU;
827  	} else {
828  		mtree_lock(mt);
829  		mt->ma_flags &= ~MT_FLAGS_USE_RCU;
830  		mtree_unlock(mt);
831  	}
832  }
833  
834  /**
835   * mt_set_in_rcu() - Switch the tree to RCU safe mode.
836   * @mt: The Maple Tree
837   */
mt_set_in_rcu(struct maple_tree * mt)838  static inline void mt_set_in_rcu(struct maple_tree *mt)
839  {
840  	if (mt_in_rcu(mt))
841  		return;
842  
843  	if (mt_external_lock(mt)) {
844  		WARN_ON(!mt_lock_is_held(mt));
845  		mt->ma_flags |= MT_FLAGS_USE_RCU;
846  	} else {
847  		mtree_lock(mt);
848  		mt->ma_flags |= MT_FLAGS_USE_RCU;
849  		mtree_unlock(mt);
850  	}
851  }
852  
mt_height(const struct maple_tree * mt)853  static inline unsigned int mt_height(const struct maple_tree *mt)
854  {
855  	return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET;
856  }
857  
858  void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max);
859  void *mt_find_after(struct maple_tree *mt, unsigned long *index,
860  		    unsigned long max);
861  void *mt_prev(struct maple_tree *mt, unsigned long index,  unsigned long min);
862  void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);
863  
864  /**
865   * mt_for_each - Iterate over each entry starting at index until max.
866   * @__tree: The Maple Tree
867   * @__entry: The current entry
868   * @__index: The index to start the search from. Subsequently used as iterator.
869   * @__max: The maximum limit for @index
870   *
871   * This iterator skips all entries, which resolve to a NULL pointer,
872   * e.g. entries which has been reserved with XA_ZERO_ENTRY.
873   */
874  #define mt_for_each(__tree, __entry, __index, __max) \
875  	for (__entry = mt_find(__tree, &(__index), __max); \
876  		__entry; __entry = mt_find_after(__tree, &(__index), __max))
877  
878  #endif /*_LINUX_MAPLE_TREE_H */
879