1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _BCACHEFS_BTREE_TYPES_H
3  #define _BCACHEFS_BTREE_TYPES_H
4  
5  #include <linux/list.h>
6  #include <linux/rhashtable.h>
7  
8  #include "bbpos_types.h"
9  #include "btree_key_cache_types.h"
10  #include "buckets_types.h"
11  #include "darray.h"
12  #include "errcode.h"
13  #include "journal_types.h"
14  #include "replicas_types.h"
15  #include "six.h"
16  
17  struct open_bucket;
18  struct btree_update;
19  struct btree_trans;
20  
21  #define MAX_BSETS		3U
22  
23  struct btree_nr_keys {
24  
25  	/*
26  	 * Amount of live metadata (i.e. size of node after a compaction) in
27  	 * units of u64s
28  	 */
29  	u16			live_u64s;
30  	u16			bset_u64s[MAX_BSETS];
31  
32  	/* live keys only: */
33  	u16			packed_keys;
34  	u16			unpacked_keys;
35  };
36  
37  struct bset_tree {
38  	/*
39  	 * We construct a binary tree in an array as if the array
40  	 * started at 1, so that things line up on the same cachelines
41  	 * better: see comments in bset.c at cacheline_to_bkey() for
42  	 * details
43  	 */
44  
45  	/* size of the binary tree and prev array */
46  	u16			size;
47  
48  	/* function of size - precalculated for to_inorder() */
49  	u16			extra;
50  
51  	u16			data_offset;
52  	u16			aux_data_offset;
53  	u16			end_offset;
54  };
55  
56  struct btree_write {
57  	struct journal_entry_pin	journal;
58  };
59  
60  struct btree_alloc {
61  	struct open_buckets	ob;
62  	__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
63  };
64  
65  struct btree_bkey_cached_common {
66  	struct six_lock		lock;
67  	u8			level;
68  	u8			btree_id;
69  	bool			cached;
70  };
71  
72  struct btree {
73  	struct btree_bkey_cached_common c;
74  
75  	struct rhash_head	hash;
76  	u64			hash_val;
77  
78  	unsigned long		flags;
79  	u16			written;
80  	u8			nsets;
81  	u8			nr_key_bits;
82  	u16			version_ondisk;
83  
84  	struct bkey_format	format;
85  
86  	struct btree_node	*data;
87  	void			*aux_data;
88  
89  	/*
90  	 * Sets of sorted keys - the real btree node - plus a binary search tree
91  	 *
92  	 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
93  	 * to the memory we have allocated for this btree node. Additionally,
94  	 * set[0]->data points to the entire btree node as it exists on disk.
95  	 */
96  	struct bset_tree	set[MAX_BSETS];
97  
98  	struct btree_nr_keys	nr;
99  	u16			sib_u64s[2];
100  	u16			whiteout_u64s;
101  	u8			byte_order;
102  	u8			unpack_fn_len;
103  
104  	struct btree_write	writes[2];
105  
106  	/* Key/pointer for this btree node */
107  	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
108  
109  	/*
110  	 * XXX: add a delete sequence number, so when bch2_btree_node_relock()
111  	 * fails because the lock sequence number has changed - i.e. the
112  	 * contents were modified - we can still relock the node if it's still
113  	 * the one we want, without redoing the traversal
114  	 */
115  
116  	/*
117  	 * For asynchronous splits/interior node updates:
118  	 * When we do a split, we allocate new child nodes and update the parent
119  	 * node to point to them: we update the parent in memory immediately,
120  	 * but then we must wait until the children have been written out before
121  	 * the update to the parent can be written - this is a list of the
122  	 * btree_updates that are blocking this node from being
123  	 * written:
124  	 */
125  	struct list_head	write_blocked;
126  
127  	/*
128  	 * Also for asynchronous splits/interior node updates:
129  	 * If a btree node isn't reachable yet, we don't want to kick off
130  	 * another write - because that write also won't yet be reachable and
131  	 * marking it as completed before it's reachable would be incorrect:
132  	 */
133  	unsigned long		will_make_reachable;
134  
135  	struct open_buckets	ob;
136  
137  	/* lru list */
138  	struct list_head	list;
139  };
140  
141  #define BCH_BTREE_CACHE_NOT_FREED_REASONS()	\
142  	x(lock_intent)				\
143  	x(lock_write)				\
144  	x(dirty)				\
145  	x(read_in_flight)			\
146  	x(write_in_flight)			\
147  	x(noevict)				\
148  	x(write_blocked)			\
149  	x(will_make_reachable)			\
150  	x(access_bit)
151  
152  enum bch_btree_cache_not_freed_reasons {
153  #define x(n) BCH_BTREE_CACHE_NOT_FREED_##n,
154  	BCH_BTREE_CACHE_NOT_FREED_REASONS()
155  #undef x
156  	BCH_BTREE_CACHE_NOT_FREED_REASONS_NR,
157  };
158  
159  struct btree_cache_list {
160  	unsigned		idx;
161  	struct shrinker		*shrink;
162  	struct list_head	list;
163  	size_t			nr;
164  };
165  
166  struct btree_cache {
167  	struct rhashtable	table;
168  	bool			table_init_done;
169  	/*
170  	 * We never free a struct btree, except on shutdown - we just put it on
171  	 * the btree_cache_freed list and reuse it later. This simplifies the
172  	 * code, and it doesn't cost us much memory as the memory usage is
173  	 * dominated by buffers that hold the actual btree node data and those
174  	 * can be freed - and the number of struct btrees allocated is
175  	 * effectively bounded.
176  	 *
177  	 * btree_cache_freeable effectively is a small cache - we use it because
178  	 * high order page allocations can be rather expensive, and it's quite
179  	 * common to delete and allocate btree nodes in quick succession. It
180  	 * should never grow past ~2-3 nodes in practice.
181  	 */
182  	struct mutex		lock;
183  	struct list_head	freeable;
184  	struct list_head	freed_pcpu;
185  	struct list_head	freed_nonpcpu;
186  	struct btree_cache_list	live[2];
187  
188  	size_t			nr_freeable;
189  	size_t			nr_reserve;
190  	size_t			nr_by_btree[BTREE_ID_NR];
191  	atomic_long_t		nr_dirty;
192  
193  	/* shrinker stats */
194  	size_t			nr_freed;
195  	u64			not_freed[BCH_BTREE_CACHE_NOT_FREED_REASONS_NR];
196  
197  	/*
198  	 * If we need to allocate memory for a new btree node and that
199  	 * allocation fails, we can cannibalize another node in the btree cache
200  	 * to satisfy the allocation - lock to guarantee only one thread does
201  	 * this at a time:
202  	 */
203  	struct task_struct	*alloc_lock;
204  	struct closure_waitlist	alloc_wait;
205  
206  	struct bbpos		pinned_nodes_start;
207  	struct bbpos		pinned_nodes_end;
208  	/* btree id mask: 0 for leaves, 1 for interior */
209  	u64			pinned_nodes_mask[2];
210  };
211  
212  struct btree_node_iter {
213  	struct btree_node_iter_set {
214  		u16	k, end;
215  	} data[MAX_BSETS];
216  };
217  
218  #define BTREE_ITER_FLAGS()			\
219  	x(slots)				\
220  	x(intent)				\
221  	x(prefetch)				\
222  	x(is_extents)				\
223  	x(not_extents)				\
224  	x(cached)				\
225  	x(with_key_cache)			\
226  	x(with_updates)				\
227  	x(with_journal)				\
228  	x(snapshot_field)			\
229  	x(all_snapshots)			\
230  	x(filter_snapshots)			\
231  	x(nopreserve)				\
232  	x(cached_nofill)			\
233  	x(key_cache_fill)			\
234  
235  #define STR_HASH_FLAGS()			\
236  	x(must_create)				\
237  	x(must_replace)
238  
239  #define BTREE_UPDATE_FLAGS()			\
240  	x(internal_snapshot_node)		\
241  	x(nojournal)				\
242  	x(key_cache_reclaim)
243  
244  
245  /*
246   * BTREE_TRIGGER_norun - don't run triggers at all
247   *
248   * BTREE_TRIGGER_transactional - we're running transactional triggers as part of
249   * a transaction commit: triggers may generate new updates
250   *
251   * BTREE_TRIGGER_atomic - we're running atomic triggers during a transaction
252   * commit: we have our journal reservation, we're holding btree node write
253   * locks, and we know the transaction is going to commit (returning an error
254   * here is a fatal error, causing us to go emergency read-only)
255   *
256   * BTREE_TRIGGER_gc - we're in gc/fsck: running triggers to recalculate e.g. disk usage
257   *
258   * BTREE_TRIGGER_insert - @new is entering the btree
259   * BTREE_TRIGGER_overwrite - @old is leaving the btree
260   *
261   * BTREE_TRIGGER_bucket_invalidate - signal from bucket invalidate path to alloc
262   * trigger
263   */
264  #define BTREE_TRIGGER_FLAGS()			\
265  	x(norun)				\
266  	x(transactional)			\
267  	x(atomic)				\
268  	x(check_repair)				\
269  	x(gc)					\
270  	x(insert)				\
271  	x(overwrite)				\
272  	x(is_root)				\
273  	x(bucket_invalidate)
274  
275  enum {
276  #define x(n) BTREE_ITER_FLAG_BIT_##n,
277  	BTREE_ITER_FLAGS()
278  	STR_HASH_FLAGS()
279  	BTREE_UPDATE_FLAGS()
280  	BTREE_TRIGGER_FLAGS()
281  #undef x
282  };
283  
284  /* iter flags must fit in a u16: */
285  //BUILD_BUG_ON(BTREE_ITER_FLAG_BIT_key_cache_fill > 15);
286  
287  enum btree_iter_update_trigger_flags {
288  #define x(n) BTREE_ITER_##n	= 1U << BTREE_ITER_FLAG_BIT_##n,
289  	BTREE_ITER_FLAGS()
290  #undef x
291  #define x(n) STR_HASH_##n	= 1U << BTREE_ITER_FLAG_BIT_##n,
292  	STR_HASH_FLAGS()
293  #undef x
294  #define x(n) BTREE_UPDATE_##n	= 1U << BTREE_ITER_FLAG_BIT_##n,
295  	BTREE_UPDATE_FLAGS()
296  #undef x
297  #define x(n) BTREE_TRIGGER_##n	= 1U << BTREE_ITER_FLAG_BIT_##n,
298  	BTREE_TRIGGER_FLAGS()
299  #undef x
300  };
301  
302  enum btree_path_uptodate {
303  	BTREE_ITER_UPTODATE		= 0,
304  	BTREE_ITER_NEED_RELOCK		= 1,
305  	BTREE_ITER_NEED_TRAVERSE	= 2,
306  };
307  
308  #if defined(CONFIG_BCACHEFS_LOCK_TIME_STATS) || defined(CONFIG_BCACHEFS_DEBUG)
309  #define TRACK_PATH_ALLOCATED
310  #endif
311  
312  typedef u16 btree_path_idx_t;
313  
314  struct btree_path {
315  	btree_path_idx_t	sorted_idx;
316  	u8			ref;
317  	u8			intent_ref;
318  
319  	/* btree_iter_copy starts here: */
320  	struct bpos		pos;
321  
322  	enum btree_id		btree_id:5;
323  	bool			cached:1;
324  	bool			preserve:1;
325  	enum btree_path_uptodate uptodate:2;
326  	/*
327  	 * When true, failing to relock this path will cause the transaction to
328  	 * restart:
329  	 */
330  	bool			should_be_locked:1;
331  	unsigned		level:3,
332  				locks_want:3;
333  	u8			nodes_locked;
334  
335  	struct btree_path_level {
336  		struct btree	*b;
337  		struct btree_node_iter iter;
338  		u32		lock_seq;
339  #ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
340  		u64             lock_taken_time;
341  #endif
342  	}			l[BTREE_MAX_DEPTH];
343  #ifdef TRACK_PATH_ALLOCATED
344  	unsigned long		ip_allocated;
345  #endif
346  };
347  
path_l(struct btree_path * path)348  static inline struct btree_path_level *path_l(struct btree_path *path)
349  {
350  	return path->l + path->level;
351  }
352  
btree_path_ip_allocated(struct btree_path * path)353  static inline unsigned long btree_path_ip_allocated(struct btree_path *path)
354  {
355  #ifdef TRACK_PATH_ALLOCATED
356  	return path->ip_allocated;
357  #else
358  	return _THIS_IP_;
359  #endif
360  }
361  
362  /*
363   * @pos			- iterator's current position
364   * @level		- current btree depth
365   * @locks_want		- btree level below which we start taking intent locks
366   * @nodes_locked	- bitmask indicating which nodes in @nodes are locked
367   * @nodes_intent_locked	- bitmask indicating which locks are intent locks
368   */
369  struct btree_iter {
370  	struct btree_trans	*trans;
371  	btree_path_idx_t	path;
372  	btree_path_idx_t	update_path;
373  	btree_path_idx_t	key_cache_path;
374  
375  	enum btree_id		btree_id:8;
376  	u8			min_depth;
377  
378  	/* btree_iter_copy starts here: */
379  	u16			flags;
380  
381  	/* When we're filtering by snapshot, the snapshot ID we're looking for: */
382  	unsigned		snapshot;
383  
384  	struct bpos		pos;
385  	/*
386  	 * Current unpacked key - so that bch2_btree_iter_next()/
387  	 * bch2_btree_iter_next_slot() can correctly advance pos.
388  	 */
389  	struct bkey		k;
390  
391  	/* BTREE_ITER_with_journal: */
392  	size_t			journal_idx;
393  #ifdef TRACK_PATH_ALLOCATED
394  	unsigned long		ip_allocated;
395  #endif
396  };
397  
398  #define BKEY_CACHED_ACCESSED		0
399  #define BKEY_CACHED_DIRTY		1
400  
401  struct bkey_cached {
402  	struct btree_bkey_cached_common c;
403  
404  	unsigned long		flags;
405  	u16			u64s;
406  	struct bkey_cached_key	key;
407  
408  	struct rhash_head	hash;
409  
410  	struct journal_entry_pin journal;
411  	u64			seq;
412  
413  	struct bkey_i		*k;
414  	struct rcu_head		rcu;
415  };
416  
btree_node_pos(struct btree_bkey_cached_common * b)417  static inline struct bpos btree_node_pos(struct btree_bkey_cached_common *b)
418  {
419  	return !b->cached
420  		? container_of(b, struct btree, c)->key.k.p
421  		: container_of(b, struct bkey_cached, c)->key.pos;
422  }
423  
424  struct btree_insert_entry {
425  	unsigned		flags;
426  	u8			bkey_type;
427  	enum btree_id		btree_id:8;
428  	u8			level:4;
429  	bool			cached:1;
430  	bool			insert_trigger_run:1;
431  	bool			overwrite_trigger_run:1;
432  	bool			key_cache_already_flushed:1;
433  	/*
434  	 * @old_k may be a key from the journal; @old_btree_u64s always refers
435  	 * to the size of the key being overwritten in the btree:
436  	 */
437  	u8			old_btree_u64s;
438  	btree_path_idx_t	path;
439  	struct bkey_i		*k;
440  	/* key being overwritten: */
441  	struct bkey		old_k;
442  	const struct bch_val	*old_v;
443  	unsigned long		ip_allocated;
444  };
445  
446  /* Number of btree paths we preallocate, usually enough */
447  #define BTREE_ITER_INITIAL		64
448  /*
449   * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
450   * paths should run inside this limit, and if they don't it usually indicates a
451   * bug (leaking/duplicated btree paths).
452   *
453   * exception: some fsck paths
454   *
455   * bugs with excessive path usage seem to have possibly been eliminated now, so
456   * we might consider eliminating this (and btree_trans_too_many_iter()) at some
457   * point.
458   */
459  #define BTREE_ITER_NORMAL_LIMIT		256
460  /* never exceed limit */
461  #define BTREE_ITER_MAX			(1U << 10)
462  
463  struct btree_trans_commit_hook;
464  typedef int (btree_trans_commit_hook_fn)(struct btree_trans *, struct btree_trans_commit_hook *);
465  
466  struct btree_trans_commit_hook {
467  	btree_trans_commit_hook_fn	*fn;
468  	struct btree_trans_commit_hook	*next;
469  };
470  
471  #define BTREE_TRANS_MEM_MAX	(1U << 16)
472  
473  #define BTREE_TRANS_MAX_LOCK_HOLD_TIME_NS	10000
474  
475  struct btree_trans_paths {
476  	unsigned long		nr_paths;
477  	struct btree_path	paths[];
478  };
479  
480  struct btree_trans {
481  	struct bch_fs		*c;
482  
483  	unsigned long		*paths_allocated;
484  	struct btree_path	*paths;
485  	btree_path_idx_t	*sorted;
486  	struct btree_insert_entry *updates;
487  
488  	void			*mem;
489  	unsigned		mem_top;
490  	unsigned		mem_bytes;
491  
492  	btree_path_idx_t	nr_sorted;
493  	btree_path_idx_t	nr_paths;
494  	btree_path_idx_t	nr_paths_max;
495  	btree_path_idx_t	nr_updates;
496  	u8			fn_idx;
497  	u8			lock_must_abort;
498  	bool			lock_may_not_fail:1;
499  	bool			srcu_held:1;
500  	bool			locked:1;
501  	bool			pf_memalloc_nofs:1;
502  	bool			write_locked:1;
503  	bool			used_mempool:1;
504  	bool			in_traverse_all:1;
505  	bool			paths_sorted:1;
506  	bool			memory_allocation_failure:1;
507  	bool			journal_transaction_names:1;
508  	bool			journal_replay_not_finished:1;
509  	bool			notrace_relock_fail:1;
510  	enum bch_errcode	restarted:16;
511  	u32			restart_count;
512  
513  	u64			last_begin_time;
514  	unsigned long		last_begin_ip;
515  	unsigned long		last_restarted_ip;
516  	unsigned long		last_unlock_ip;
517  	unsigned long		srcu_lock_time;
518  
519  	const char		*fn;
520  	struct btree_bkey_cached_common *locking;
521  	struct six_lock_waiter	locking_wait;
522  	int			srcu_idx;
523  
524  	/* update path: */
525  	u16			journal_entries_u64s;
526  	u16			journal_entries_size;
527  	struct jset_entry	*journal_entries;
528  
529  	struct btree_trans_commit_hook *hooks;
530  	struct journal_entry_pin *journal_pin;
531  
532  	struct journal_res	journal_res;
533  	u64			*journal_seq;
534  	struct disk_reservation *disk_res;
535  
536  	struct bch_fs_usage_base fs_usage_delta;
537  
538  	unsigned		journal_u64s;
539  	unsigned		extra_disk_res; /* XXX kill */
540  
541  #ifdef CONFIG_DEBUG_LOCK_ALLOC
542  	struct lockdep_map	dep_map;
543  #endif
544  	/* Entries before this are zeroed out on every bch2_trans_get() call */
545  
546  	struct list_head	list;
547  	struct closure		ref;
548  
549  	unsigned long		_paths_allocated[BITS_TO_LONGS(BTREE_ITER_INITIAL)];
550  	struct btree_trans_paths trans_paths;
551  	struct btree_path	_paths[BTREE_ITER_INITIAL];
552  	btree_path_idx_t	_sorted[BTREE_ITER_INITIAL + 4];
553  	struct btree_insert_entry _updates[BTREE_ITER_INITIAL];
554  };
555  
btree_iter_path(struct btree_trans * trans,struct btree_iter * iter)556  static inline struct btree_path *btree_iter_path(struct btree_trans *trans, struct btree_iter *iter)
557  {
558  	return trans->paths + iter->path;
559  }
560  
btree_iter_key_cache_path(struct btree_trans * trans,struct btree_iter * iter)561  static inline struct btree_path *btree_iter_key_cache_path(struct btree_trans *trans, struct btree_iter *iter)
562  {
563  	return iter->key_cache_path
564  		? trans->paths + iter->key_cache_path
565  		: NULL;
566  }
567  
568  #define BCH_BTREE_WRITE_TYPES()						\
569  	x(initial,		0)					\
570  	x(init_next_bset,	1)					\
571  	x(cache_reclaim,	2)					\
572  	x(journal_reclaim,	3)					\
573  	x(interior,		4)
574  
575  enum btree_write_type {
576  #define x(t, n) BTREE_WRITE_##t,
577  	BCH_BTREE_WRITE_TYPES()
578  #undef x
579  	BTREE_WRITE_TYPE_NR,
580  };
581  
582  #define BTREE_WRITE_TYPE_MASK	(roundup_pow_of_two(BTREE_WRITE_TYPE_NR) - 1)
583  #define BTREE_WRITE_TYPE_BITS	ilog2(roundup_pow_of_two(BTREE_WRITE_TYPE_NR))
584  
585  #define BTREE_FLAGS()							\
586  	x(read_in_flight)						\
587  	x(read_error)							\
588  	x(dirty)							\
589  	x(need_write)							\
590  	x(write_blocked)						\
591  	x(will_make_reachable)						\
592  	x(noevict)							\
593  	x(write_idx)							\
594  	x(accessed)							\
595  	x(write_in_flight)						\
596  	x(write_in_flight_inner)					\
597  	x(just_written)							\
598  	x(dying)							\
599  	x(fake)								\
600  	x(need_rewrite)							\
601  	x(never_write)							\
602  	x(pinned)
603  
604  enum btree_flags {
605  	/* First bits for btree node write type */
606  	BTREE_NODE_FLAGS_START = BTREE_WRITE_TYPE_BITS - 1,
607  #define x(flag)	BTREE_NODE_##flag,
608  	BTREE_FLAGS()
609  #undef x
610  };
611  
612  #define x(flag)								\
613  static inline bool btree_node_ ## flag(struct btree *b)			\
614  {	return test_bit(BTREE_NODE_ ## flag, &b->flags); }		\
615  									\
616  static inline void set_btree_node_ ## flag(struct btree *b)		\
617  {	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
618  									\
619  static inline void clear_btree_node_ ## flag(struct btree *b)		\
620  {	clear_bit(BTREE_NODE_ ## flag, &b->flags); }
621  
BTREE_FLAGS()622  BTREE_FLAGS()
623  #undef x
624  
625  static inline struct btree_write *btree_current_write(struct btree *b)
626  {
627  	return b->writes + btree_node_write_idx(b);
628  }
629  
btree_prev_write(struct btree * b)630  static inline struct btree_write *btree_prev_write(struct btree *b)
631  {
632  	return b->writes + (btree_node_write_idx(b) ^ 1);
633  }
634  
bset_tree_last(struct btree * b)635  static inline struct bset_tree *bset_tree_last(struct btree *b)
636  {
637  	EBUG_ON(!b->nsets);
638  	return b->set + b->nsets - 1;
639  }
640  
641  static inline void *
__btree_node_offset_to_ptr(const struct btree * b,u16 offset)642  __btree_node_offset_to_ptr(const struct btree *b, u16 offset)
643  {
644  	return (void *) ((u64 *) b->data + 1 + offset);
645  }
646  
647  static inline u16
__btree_node_ptr_to_offset(const struct btree * b,const void * p)648  __btree_node_ptr_to_offset(const struct btree *b, const void *p)
649  {
650  	u16 ret = (u64 *) p - 1 - (u64 *) b->data;
651  
652  	EBUG_ON(__btree_node_offset_to_ptr(b, ret) != p);
653  	return ret;
654  }
655  
bset(const struct btree * b,const struct bset_tree * t)656  static inline struct bset *bset(const struct btree *b,
657  				const struct bset_tree *t)
658  {
659  	return __btree_node_offset_to_ptr(b, t->data_offset);
660  }
661  
set_btree_bset_end(struct btree * b,struct bset_tree * t)662  static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
663  {
664  	t->end_offset =
665  		__btree_node_ptr_to_offset(b, vstruct_last(bset(b, t)));
666  }
667  
set_btree_bset(struct btree * b,struct bset_tree * t,const struct bset * i)668  static inline void set_btree_bset(struct btree *b, struct bset_tree *t,
669  				  const struct bset *i)
670  {
671  	t->data_offset = __btree_node_ptr_to_offset(b, i);
672  	set_btree_bset_end(b, t);
673  }
674  
btree_bset_first(struct btree * b)675  static inline struct bset *btree_bset_first(struct btree *b)
676  {
677  	return bset(b, b->set);
678  }
679  
btree_bset_last(struct btree * b)680  static inline struct bset *btree_bset_last(struct btree *b)
681  {
682  	return bset(b, bset_tree_last(b));
683  }
684  
685  static inline u16
__btree_node_key_to_offset(const struct btree * b,const struct bkey_packed * k)686  __btree_node_key_to_offset(const struct btree *b, const struct bkey_packed *k)
687  {
688  	return __btree_node_ptr_to_offset(b, k);
689  }
690  
691  static inline struct bkey_packed *
__btree_node_offset_to_key(const struct btree * b,u16 k)692  __btree_node_offset_to_key(const struct btree *b, u16 k)
693  {
694  	return __btree_node_offset_to_ptr(b, k);
695  }
696  
btree_bkey_first_offset(const struct bset_tree * t)697  static inline unsigned btree_bkey_first_offset(const struct bset_tree *t)
698  {
699  	return t->data_offset + offsetof(struct bset, _data) / sizeof(u64);
700  }
701  
702  #define btree_bkey_first(_b, _t)					\
703  ({									\
704  	EBUG_ON(bset(_b, _t)->start !=					\
705  		__btree_node_offset_to_key(_b, btree_bkey_first_offset(_t)));\
706  									\
707  	bset(_b, _t)->start;						\
708  })
709  
710  #define btree_bkey_last(_b, _t)						\
711  ({									\
712  	EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) !=	\
713  		vstruct_last(bset(_b, _t)));				\
714  									\
715  	__btree_node_offset_to_key(_b, (_t)->end_offset);		\
716  })
717  
bset_u64s(struct bset_tree * t)718  static inline unsigned bset_u64s(struct bset_tree *t)
719  {
720  	return t->end_offset - t->data_offset -
721  		sizeof(struct bset) / sizeof(u64);
722  }
723  
bset_dead_u64s(struct btree * b,struct bset_tree * t)724  static inline unsigned bset_dead_u64s(struct btree *b, struct bset_tree *t)
725  {
726  	return bset_u64s(t) - b->nr.bset_u64s[t - b->set];
727  }
728  
bset_byte_offset(struct btree * b,void * i)729  static inline unsigned bset_byte_offset(struct btree *b, void *i)
730  {
731  	return i - (void *) b->data;
732  }
733  
734  enum btree_node_type {
735  	BKEY_TYPE_btree,
736  #define x(kwd, val, ...) BKEY_TYPE_##kwd = val + 1,
737  	BCH_BTREE_IDS()
738  #undef x
739  	BKEY_TYPE_NR
740  };
741  
742  /* Type of a key in btree @id at level @level: */
__btree_node_type(unsigned level,enum btree_id id)743  static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id)
744  {
745  	return level ? BKEY_TYPE_btree : (unsigned) id + 1;
746  }
747  
748  /* Type of keys @b contains: */
btree_node_type(struct btree * b)749  static inline enum btree_node_type btree_node_type(struct btree *b)
750  {
751  	return __btree_node_type(b->c.level, b->c.btree_id);
752  }
753  
754  const char *bch2_btree_node_type_str(enum btree_node_type);
755  
756  #define BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS		\
757  	(BIT_ULL(BKEY_TYPE_extents)|			\
758  	 BIT_ULL(BKEY_TYPE_alloc)|			\
759  	 BIT_ULL(BKEY_TYPE_inodes)|			\
760  	 BIT_ULL(BKEY_TYPE_stripes)|			\
761  	 BIT_ULL(BKEY_TYPE_reflink)|			\
762  	 BIT_ULL(BKEY_TYPE_subvolumes)|			\
763  	 BIT_ULL(BKEY_TYPE_btree))
764  
765  #define BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS		\
766  	(BIT_ULL(BKEY_TYPE_alloc)|			\
767  	 BIT_ULL(BKEY_TYPE_inodes)|			\
768  	 BIT_ULL(BKEY_TYPE_stripes)|			\
769  	 BIT_ULL(BKEY_TYPE_snapshots))
770  
771  #define BTREE_NODE_TYPE_HAS_TRIGGERS			\
772  	(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS|		\
773  	 BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS)
774  
btree_node_type_has_trans_triggers(enum btree_node_type type)775  static inline bool btree_node_type_has_trans_triggers(enum btree_node_type type)
776  {
777  	return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS;
778  }
779  
btree_node_type_has_atomic_triggers(enum btree_node_type type)780  static inline bool btree_node_type_has_atomic_triggers(enum btree_node_type type)
781  {
782  	return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_ATOMIC_TRIGGERS;
783  }
784  
btree_node_type_has_triggers(enum btree_node_type type)785  static inline bool btree_node_type_has_triggers(enum btree_node_type type)
786  {
787  	return BIT_ULL(type) & BTREE_NODE_TYPE_HAS_TRIGGERS;
788  }
789  
btree_node_type_is_extents(enum btree_node_type type)790  static inline bool btree_node_type_is_extents(enum btree_node_type type)
791  {
792  	const u64 mask = 0
793  #define x(name, nr, flags, ...)	|((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
794  	BCH_BTREE_IDS()
795  #undef x
796  	;
797  
798  	return BIT_ULL(type) & mask;
799  }
800  
btree_id_is_extents(enum btree_id btree)801  static inline bool btree_id_is_extents(enum btree_id btree)
802  {
803  	return btree_node_type_is_extents(__btree_node_type(0, btree));
804  }
805  
btree_type_has_snapshots(enum btree_id id)806  static inline bool btree_type_has_snapshots(enum btree_id id)
807  {
808  	const u64 mask = 0
809  #define x(name, nr, flags, ...)	|((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
810  	BCH_BTREE_IDS()
811  #undef x
812  	;
813  
814  	return BIT_ULL(id) & mask;
815  }
816  
btree_type_has_snapshot_field(enum btree_id id)817  static inline bool btree_type_has_snapshot_field(enum btree_id id)
818  {
819  	const u64 mask = 0
820  #define x(name, nr, flags, ...)	|((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
821  	BCH_BTREE_IDS()
822  #undef x
823  	;
824  
825  	return BIT_ULL(id) & mask;
826  }
827  
btree_type_has_ptrs(enum btree_id id)828  static inline bool btree_type_has_ptrs(enum btree_id id)
829  {
830  	const u64 mask = 0
831  #define x(name, nr, flags, ...)	|((!!((flags) & BTREE_ID_DATA)) << nr)
832  	BCH_BTREE_IDS()
833  #undef x
834  	;
835  
836  	return BIT_ULL(id) & mask;
837  }
838  
839  struct btree_root {
840  	struct btree		*b;
841  
842  	/* On disk root - see async splits: */
843  	__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
844  	u8			level;
845  	u8			alive;
846  	s16			error;
847  };
848  
849  enum btree_gc_coalesce_fail_reason {
850  	BTREE_GC_COALESCE_FAIL_RESERVE_GET,
851  	BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
852  	BTREE_GC_COALESCE_FAIL_FORMAT_FITS,
853  };
854  
855  enum btree_node_sibling {
856  	btree_prev_sib,
857  	btree_next_sib,
858  };
859  
860  struct get_locks_fail {
861  	unsigned	l;
862  	struct btree	*b;
863  };
864  
865  #endif /* _BCACHEFS_BTREE_TYPES_H */
866