1  /* SPDX-License-Identifier: GPL-2.0 */
2  
3  #ifndef _LINUX_SIX_H
4  #define _LINUX_SIX_H
5  
6  /**
7   * DOC: SIX locks overview
8   *
9   * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
10   * but with an additional state: read/shared, intent, exclusive/write
11   *
12   * The purpose of the intent state is to allow for greater concurrency on tree
13   * structures without deadlocking. In general, a read can't be upgraded to a
14   * write lock without deadlocking, so an operation that updates multiple nodes
15   * will have to take write locks for the full duration of the operation.
16   *
17   * But by adding an intent state, which is exclusive with other intent locks but
18   * not with readers, we can take intent locks at the start of the operation,
19   * and then take write locks only for the actual update to each individual
20   * nodes, without deadlocking.
21   *
22   * Example usage:
23   *   six_lock_read(&foo->lock);
24   *   six_unlock_read(&foo->lock);
25   *
26   * An intent lock must be held before taking a write lock:
27   *   six_lock_intent(&foo->lock);
28   *   six_lock_write(&foo->lock);
29   *   six_unlock_write(&foo->lock);
30   *   six_unlock_intent(&foo->lock);
31   *
32   * Other operations:
33   *   six_trylock_read()
34   *   six_trylock_intent()
35   *   six_trylock_write()
36   *
37   *   six_lock_downgrade()	convert from intent to read
38   *   six_lock_tryupgrade()	attempt to convert from read to intent, may fail
39   *
40   * There are also interfaces that take the lock type as an enum:
41   *
42   *   six_lock_type(&foo->lock, SIX_LOCK_read);
43   *   six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
44   *   six_lock_type(&foo->lock, SIX_LOCK_write);
45   *   six_unlock_type(&foo->lock, SIX_LOCK_write);
46   *   six_unlock_type(&foo->lock, SIX_LOCK_intent);
47   *
48   * Lock sequence numbers - unlock(), relock():
49   *
50   *   Locks embed sequences numbers, which are incremented on write lock/unlock.
51   *   This allows locks to be dropped and the retaken iff the state they protect
52   *   hasn't changed; this makes it much easier to avoid holding locks while e.g.
53   *   doing IO or allocating memory.
54   *
55   *   Example usage:
56   *     six_lock_read(&foo->lock);
57   *     u32 seq = six_lock_seq(&foo->lock);
58   *     six_unlock_read(&foo->lock);
59   *
60   *     some_operation_that_may_block();
61   *
62   *     if (six_relock_read(&foo->lock, seq)) { ... }
63   *
64   *   If the relock operation succeeds, it is as if the lock was never unlocked.
65   *
66   * Reentrancy:
67   *
68   *   Six locks are not by themselves reentrant, but have counters for both the
69   *   read and intent states that can be used to provide reentrancy by an upper
70   *   layer that tracks held locks. If a lock is known to already be held in the
71   *   read or intent state, six_lock_increment() can be used to bump the "lock
72   *   held in this state" counter, increasing the number of unlock calls that
73   *   will be required to fully unlock it.
74   *
75   *   Example usage:
76   *     six_lock_read(&foo->lock);
77   *     six_lock_increment(&foo->lock, SIX_LOCK_read);
78   *     six_unlock_read(&foo->lock);
79   *     six_unlock_read(&foo->lock);
80   *   foo->lock is now fully unlocked.
81   *
82   *   Since the intent state supercedes read, it's legal to increment the read
83   *   counter when holding an intent lock, but not the reverse.
84   *
85   *   A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
86   *   is not legal.
87   *
88   * should_sleep_fn:
89   *
90   *   There is a six_lock() variant that takes a function pointer that is called
91   *   immediately prior to schedule() when blocking, and may return an error to
92   *   abort.
93   *
94   *   One possible use for this feature is when objects being locked are part of
95   *   a cache and may reused, and lock ordering is based on a property of the
96   *   object that will change when the object is reused - i.e. logical key order.
97   *
98   *   If looking up an object in the cache may race with object reuse, and lock
99   *   ordering is required to prevent deadlock, object reuse may change the
100   *   correct lock order for that object and cause a deadlock. should_sleep_fn
101   *   can be used to check if the object is still the object we want and avoid
102   *   this deadlock.
103   *
104   * Wait list entry interface:
105   *
106   *   There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
107   *   wait list entry. By embedding six_lock_waiter into another object, and by
108   *   traversing lock waitlists, it is then possible for an upper layer to
109   *   implement full cycle detection for deadlock avoidance.
110   *
111   *   should_sleep_fn should be used for invoking the cycle detector, walking the
112   *   graph of held locks to check for a deadlock. The upper layer must track
113   *   held locks for each thread, and each thread's held locks must be reachable
114   *   from its six_lock_waiter object.
115   *
116   *   six_lock_waiter() will add the wait object to the waitlist re-trying taking
117   *   the lock, and before calling should_sleep_fn, and the wait object will not
118   *   be removed from the waitlist until either the lock has been successfully
119   *   acquired, or we aborted because should_sleep_fn returned an error.
120   *
121   *   Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
122   *   have timestamps in strictly ascending order - this is so the timestamp can
123   *   be used as a cursor for lock graph traverse.
124   */
125  
126  #include <linux/lockdep.h>
127  #include <linux/sched.h>
128  #include <linux/types.h>
129  
130  enum six_lock_type {
131  	SIX_LOCK_read,
132  	SIX_LOCK_intent,
133  	SIX_LOCK_write,
134  };
135  
136  struct six_lock {
137  	atomic_t		state;
138  	u32			seq;
139  	unsigned		intent_lock_recurse;
140  	struct task_struct	*owner;
141  	unsigned __percpu	*readers;
142  	raw_spinlock_t		wait_lock;
143  	struct list_head	wait_list;
144  #ifdef CONFIG_DEBUG_LOCK_ALLOC
145  	struct lockdep_map	dep_map;
146  #endif
147  };
148  
149  struct six_lock_waiter {
150  	struct list_head	list;
151  	struct task_struct	*task;
152  	enum six_lock_type	lock_want;
153  	bool			lock_acquired;
154  	u64			start_time;
155  };
156  
157  typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
158  
159  void six_lock_exit(struct six_lock *lock);
160  
161  enum six_lock_init_flags {
162  	SIX_LOCK_INIT_PCPU	= 1U << 0,
163  };
164  
165  void __six_lock_init(struct six_lock *lock, const char *name,
166  		     struct lock_class_key *key, enum six_lock_init_flags flags);
167  
168  /**
169   * six_lock_init - initialize a six lock
170   * @lock:	lock to initialize
171   * @flags:	optional flags, i.e. SIX_LOCK_INIT_PCPU
172   */
173  #define six_lock_init(lock, flags)					\
174  do {									\
175  	static struct lock_class_key __key;				\
176  									\
177  	__six_lock_init((lock), #lock, &__key, flags);			\
178  } while (0)
179  
180  /**
181   * six_lock_seq - obtain current lock sequence number
182   * @lock:	six_lock to obtain sequence number for
183   *
184   * @lock should be held for read or intent, and not write
185   *
186   * By saving the lock sequence number, we can unlock @lock and then (typically
187   * after some blocking operation) attempt to relock it: the relock will succeed
188   * if the sequence number hasn't changed, meaning no write locks have been taken
189   * and state corresponding to what @lock protects is still valid.
190   */
six_lock_seq(const struct six_lock * lock)191  static inline u32 six_lock_seq(const struct six_lock *lock)
192  {
193  	return lock->seq;
194  }
195  
196  bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
197  
198  /**
199   * six_trylock_type - attempt to take a six lock without blocking
200   * @lock:	lock to take
201   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
202   *
203   * Return: true on success, false on failure.
204   */
six_trylock_type(struct six_lock * lock,enum six_lock_type type)205  static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
206  {
207  	return six_trylock_ip(lock, type, _THIS_IP_);
208  }
209  
210  int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
211  		       struct six_lock_waiter *wait,
212  		       six_lock_should_sleep_fn should_sleep_fn, void *p,
213  		       unsigned long ip);
214  
215  /**
216   * six_lock_waiter - take a lock, with full waitlist interface
217   * @lock:	lock to take
218   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
219   * @wait:	pointer to wait object, which will be added to lock's waitlist
220   * @should_sleep_fn: callback run after adding to waitlist, immediately prior
221   *		to scheduling
222   * @p:		passed through to @should_sleep_fn
223   *
224   * This is a convenience wrapper around six_lock_ip_waiter(), see that function
225   * for full documentation.
226   *
227   * Return: 0 on success, or the return code from @should_sleep_fn on failure.
228   */
six_lock_waiter(struct six_lock * lock,enum six_lock_type type,struct six_lock_waiter * wait,six_lock_should_sleep_fn should_sleep_fn,void * p)229  static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
230  				  struct six_lock_waiter *wait,
231  				  six_lock_should_sleep_fn should_sleep_fn, void *p)
232  {
233  	return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
234  }
235  
236  /**
237   * six_lock_ip - take a six lock lock
238   * @lock:	lock to take
239   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
240   * @should_sleep_fn: callback run after adding to waitlist, immediately prior
241   *		to scheduling
242   * @p:		passed through to @should_sleep_fn
243   * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
244   *
245   * Return: 0 on success, or the return code from @should_sleep_fn on failure.
246   */
six_lock_ip(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p,unsigned long ip)247  static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
248  			      six_lock_should_sleep_fn should_sleep_fn, void *p,
249  			      unsigned long ip)
250  {
251  	struct six_lock_waiter wait;
252  
253  	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
254  }
255  
256  /**
257   * six_lock_type - take a six lock lock
258   * @lock:	lock to take
259   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
260   * @should_sleep_fn: callback run after adding to waitlist, immediately prior
261   *		to scheduling
262   * @p:		passed through to @should_sleep_fn
263   *
264   * Return: 0 on success, or the return code from @should_sleep_fn on failure.
265   */
six_lock_type(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p)266  static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
267  				six_lock_should_sleep_fn should_sleep_fn, void *p)
268  {
269  	struct six_lock_waiter wait;
270  
271  	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
272  }
273  
274  bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
275  		   unsigned seq, unsigned long ip);
276  
277  /**
278   * six_relock_type - attempt to re-take a lock that was held previously
279   * @lock:	lock to take
280   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
281   * @seq:	lock sequence number obtained from six_lock_seq() while lock was
282   *		held previously
283   *
284   * Return: true on success, false on failure.
285   */
six_relock_type(struct six_lock * lock,enum six_lock_type type,unsigned seq)286  static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
287  				   unsigned seq)
288  {
289  	return six_relock_ip(lock, type, seq, _THIS_IP_);
290  }
291  
292  void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
293  
294  /**
295   * six_unlock_type - drop a six lock
296   * @lock:	lock to unlock
297   * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
298   *
299   * When a lock is held multiple times (because six_lock_incement()) was used),
300   * this decrements the 'lock held' counter by one.
301   *
302   * For example:
303   * six_lock_read(&foo->lock);				read count 1
304   * six_lock_increment(&foo->lock, SIX_LOCK_read);	read count 2
305   * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 1
306   * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 0
307   */
six_unlock_type(struct six_lock * lock,enum six_lock_type type)308  static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
309  {
310  	six_unlock_ip(lock, type, _THIS_IP_);
311  }
312  
313  #define __SIX_LOCK(type)						\
314  static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
315  {									\
316  	return six_trylock_ip(lock, SIX_LOCK_##type, ip);		\
317  }									\
318  									\
319  static inline bool six_trylock_##type(struct six_lock *lock)		\
320  {									\
321  	return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_);	\
322  }									\
323  									\
324  static inline int six_lock_ip_waiter_##type(struct six_lock *lock,	\
325  			   struct six_lock_waiter *wait,		\
326  			   six_lock_should_sleep_fn should_sleep_fn, void *p,\
327  			   unsigned long ip)				\
328  {									\
329  	return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
330  }									\
331  									\
332  static inline int six_lock_ip_##type(struct six_lock *lock,		\
333  		    six_lock_should_sleep_fn should_sleep_fn, void *p,	\
334  		    unsigned long ip)					\
335  {									\
336  	return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
337  }									\
338  									\
339  static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
340  {									\
341  	return six_relock_ip(lock, SIX_LOCK_##type, seq, ip);		\
342  }									\
343  									\
344  static inline bool six_relock_##type(struct six_lock *lock, u32 seq)	\
345  {									\
346  	return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_);	\
347  }									\
348  									\
349  static inline int six_lock_##type(struct six_lock *lock,		\
350  				  six_lock_should_sleep_fn fn, void *p)\
351  {									\
352  	return six_lock_ip_##type(lock, fn, p, _THIS_IP_);		\
353  }									\
354  									\
355  static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip)	\
356  {									\
357  	six_unlock_ip(lock, SIX_LOCK_##type, ip);			\
358  }									\
359  									\
360  static inline void six_unlock_##type(struct six_lock *lock)		\
361  {									\
362  	six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_);		\
363  }
364  
365  __SIX_LOCK(read)
366  __SIX_LOCK(intent)
367  __SIX_LOCK(write)
368  #undef __SIX_LOCK
369  
370  void six_lock_downgrade(struct six_lock *);
371  bool six_lock_tryupgrade(struct six_lock *);
372  bool six_trylock_convert(struct six_lock *, enum six_lock_type,
373  			 enum six_lock_type);
374  
375  void six_lock_increment(struct six_lock *, enum six_lock_type);
376  
377  void six_lock_wakeup_all(struct six_lock *);
378  
379  struct six_lock_count {
380  	unsigned n[3];
381  };
382  
383  struct six_lock_count six_lock_counts(struct six_lock *);
384  void six_lock_readers_add(struct six_lock *, int);
385  
386  #endif /* _LINUX_SIX_H */
387