1  /* SPDX-License-Identifier: GPL-2.0-only */
2  /*
3   * Copyright (C) 2011 Red Hat, Inc.
4   *
5   * This file is released under the GPL.
6   */
7  #ifndef _LINUX_DM_BTREE_H
8  #define _LINUX_DM_BTREE_H
9  
10  #include "dm-block-manager.h"
11  
12  struct dm_transaction_manager;
13  
14  /*----------------------------------------------------------------*/
15  
16  /*
17   * Annotations used to check on-disk metadata is handled as little-endian.
18   */
19  #ifdef __CHECKER__
20  #  define __dm_written_to_disk(x) __releases(x)
21  #  define __dm_reads_from_disk(x) __acquires(x)
22  #  define __dm_bless_for_disk(x) __acquire(x)
23  #  define __dm_unbless_for_disk(x) __release(x)
24  #else
25  #  define __dm_written_to_disk(x)
26  #  define __dm_reads_from_disk(x)
27  #  define __dm_bless_for_disk(x)
28  #  define __dm_unbless_for_disk(x)
29  #endif
30  
31  /*----------------------------------------------------------------*/
32  
33  /*
34   * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized
35   * values.
36   */
37  
38  /*
39   * Information about the values stored within the btree.
40   */
41  struct dm_btree_value_type {
42  	void *context;
43  
44  	/*
45  	 * The size in bytes of each value.
46  	 */
47  	uint32_t size;
48  
49  	/*
50  	 * Any of these methods can be safely set to NULL if you do not
51  	 * need the corresponding feature.
52  	 */
53  
54  	/*
55  	 * The btree is making a duplicate of a run of values, for instance
56  	 * because previously-shared btree nodes have now diverged.
57  	 * @value argument is the new copy that the copy function may modify.
58  	 * (Probably it just wants to increment a reference count
59  	 * somewhere.) This method is _not_ called for insertion of a new
60  	 * value: It is assumed the ref count is already 1.
61  	 */
62  	void (*inc)(void *context, const void *value, unsigned int count);
63  
64  	/*
65  	 * These values are being deleted.  The btree takes care of freeing
66  	 * the memory pointed to by @value.  Often the del function just
67  	 * needs to decrement a reference counts somewhere.
68  	 */
69  	void (*dec)(void *context, const void *value, unsigned int count);
70  
71  	/*
72  	 * A test for equality between two values.  When a value is
73  	 * overwritten with a new one, the old one has the dec method
74  	 * called _unless_ the new and old value are deemed equal.
75  	 */
76  	int (*equal)(void *context, const void *value1, const void *value2);
77  };
78  
79  /*
80   * The shape and contents of a btree.
81   */
82  struct dm_btree_info {
83  	struct dm_transaction_manager *tm;
84  
85  	/*
86  	 * Number of nested btrees. (Not the depth of a single tree.)
87  	 */
88  	unsigned int levels;
89  	struct dm_btree_value_type value_type;
90  };
91  
92  /*
93   * Set up an empty tree.  O(1).
94   */
95  int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root);
96  
97  /*
98   * Delete a tree.  O(n) - this is the slow one!  It can also block, so
99   * please don't call it on an IO path.
100   */
101  int dm_btree_del(struct dm_btree_info *info, dm_block_t root);
102  
103  /*
104   * All the lookup functions return -ENODATA if the key cannot be found.
105   */
106  
107  /*
108   * Tries to find a key that matches exactly.  O(ln(n))
109   */
110  int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
111  		    uint64_t *keys, void *value_le);
112  
113  /*
114   * Tries to find the first key where the bottom level key is >= to that
115   * given.  Useful for skipping empty sections of the btree.
116   */
117  int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
118  			 uint64_t *keys, uint64_t *rkey, void *value_le);
119  
120  /*
121   * Insertion (or overwrite an existing value).  O(ln(n))
122   */
123  int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
124  		    uint64_t *keys, void *value, dm_block_t *new_root)
125  	__dm_written_to_disk(value);
126  
127  /*
128   * A variant of insert that indicates whether it actually inserted or just
129   * overwrote.  Useful if you're keeping track of the number of entries in a
130   * tree.
131   */
132  int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root,
133  			   uint64_t *keys, void *value, dm_block_t *new_root,
134  			   int *inserted)
135  			   __dm_written_to_disk(value);
136  
137  /*
138   * Remove a key if present.  This doesn't remove empty sub trees.  Normally
139   * subtrees represent a separate entity, like a snapshot map, so this is
140   * correct behaviour.  O(ln(n)).
141   */
142  int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
143  		    uint64_t *keys, dm_block_t *new_root);
144  
145  /*
146   * Removes a _contiguous_ run of values starting from 'keys' and not
147   * reaching keys2 (where keys2 is keys with the final key replaced with
148   * 'end_key').  'end_key' is the one-past-the-end value.  'keys' may be
149   * altered.
150   */
151  int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
152  			   uint64_t *keys, uint64_t end_key,
153  			   dm_block_t *new_root, unsigned int *nr_removed);
154  
155  /*
156   * Returns < 0 on failure.  Otherwise the number of key entries that have
157   * been filled out.  Remember trees can have zero entries, and as such have
158   * no lowest key.
159   */
160  int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
161  			     uint64_t *result_keys);
162  
163  /*
164   * Returns < 0 on failure.  Otherwise the number of key entries that have
165   * been filled out.  Remember trees can have zero entries, and as such have
166   * no highest key.
167   */
168  int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
169  			      uint64_t *result_keys);
170  
171  /*
172   * Iterate through the a btree, calling fn() on each entry.
173   * It only works for single level trees and is internally recursive, so
174   * monitor stack usage carefully.
175   */
176  int dm_btree_walk(struct dm_btree_info *info, dm_block_t root,
177  		  int (*fn)(void *context, uint64_t *keys, void *leaf),
178  		  void *context);
179  
180  
181  /*----------------------------------------------------------------*/
182  
183  /*
184   * Cursor API.  This does not follow the rolling lock convention.  Since we
185   * know the order that values are required we can issue prefetches to speed
186   * up iteration.  Use on a single level btree only.
187   */
188  #define DM_BTREE_CURSOR_MAX_DEPTH 16
189  
190  struct cursor_node {
191  	struct dm_block *b;
192  	unsigned int index;
193  };
194  
195  struct dm_btree_cursor {
196  	struct dm_btree_info *info;
197  	dm_block_t root;
198  
199  	bool prefetch_leaves;
200  	unsigned int depth;
201  	struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH];
202  };
203  
204  /*
205   * Creates a fresh cursor.  If prefetch_leaves is set then it is assumed
206   * the btree contains block indexes that will be prefetched.  The cursor is
207   * quite large, so you probably don't want to put it on the stack.
208   */
209  int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
210  			  bool prefetch_leaves, struct dm_btree_cursor *c);
211  void dm_btree_cursor_end(struct dm_btree_cursor *c);
212  int dm_btree_cursor_next(struct dm_btree_cursor *c);
213  int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count);
214  int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
215  
216  #endif	/* _LINUX_DM_BTREE_H */
217