1 // SPDX-License-Identifier: GPL-2.0-only
2
3 /*
4 * rcuref - A scalable reference count implementation for RCU managed objects
5 *
6 * rcuref is provided to replace open coded reference count implementations
7 * based on atomic_t. It protects explicitely RCU managed objects which can
8 * be visible even after the last reference has been dropped and the object
9 * is heading towards destruction.
10 *
11 * A common usage pattern is:
12 *
13 * get()
14 * rcu_read_lock();
15 * p = get_ptr();
16 * if (p && !atomic_inc_not_zero(&p->refcnt))
17 * p = NULL;
18 * rcu_read_unlock();
19 * return p;
20 *
21 * put()
22 * if (!atomic_dec_return(&->refcnt)) {
23 * remove_ptr(p);
24 * kfree_rcu((p, rcu);
25 * }
26 *
27 * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has
28 * O(N^2) behaviour under contention with N concurrent operations.
29 *
30 * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales
31 * better under contention.
32 *
33 * Why not refcount?
34 * =================
35 *
36 * In principle it should be possible to make refcount use the rcuref
37 * scheme, but the destruction race described below cannot be prevented
38 * unless the protected object is RCU managed.
39 *
40 * Theory of operation
41 * ===================
42 *
43 * rcuref uses an unsigned integer reference counter. As long as the
44 * counter value is greater than or equal to RCUREF_ONEREF and not larger
45 * than RCUREF_MAXREF the reference is alive:
46 *
47 * ONEREF MAXREF SATURATED RELEASED DEAD NOREF
48 * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF
49 * <---valid --------> <-------saturation zone-------> <-----dead zone----->
50 *
51 * The get() and put() operations do unconditional increments and
52 * decrements. The result is checked after the operation. This optimizes
53 * for the fast path.
54 *
55 * If the reference count is saturated or dead, then the increments and
56 * decrements are not harmful as the reference count still stays in the
57 * respective zones and is always set back to STATURATED resp. DEAD. The
58 * zones have room for 2^28 racing operations in each direction, which
59 * makes it practically impossible to escape the zones.
60 *
61 * Once the last reference is dropped the reference count becomes
62 * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The
63 * slowpath then tries to set the reference count from RCUREF_NOREF to
64 * RCUREF_DEAD via a cmpxchg(). This opens a small window where a
65 * concurrent rcuref_get() can acquire the reference count and bring it
66 * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD.
67 *
68 * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in
69 * DEAD + 1, which is inside the dead zone. If that happens the reference
70 * count is put back to DEAD.
71 *
72 * The actual race is possible due to the unconditional increment and
73 * decrements in rcuref_get() and rcuref_put():
74 *
75 * T1 T2
76 * get() put()
77 * if (atomic_add_negative(-1, &ref->refcnt))
78 * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD);
79 *
80 * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1
81 *
82 * As the result of T1's add is negative, the get() goes into the slow path
83 * and observes refcnt being in the dead zone which makes the operation fail.
84 *
85 * Possible critical states:
86 *
87 * Context Counter References Operation
88 * T1 0 1 init()
89 * T2 1 2 get()
90 * T1 0 1 put()
91 * T2 -1 0 put() tries to mark dead
92 * T1 0 1 get()
93 * T2 0 1 put() mark dead fails
94 * T1 -1 0 put() tries to mark dead
95 * T1 DEAD 0 put() mark dead succeeds
96 * T2 DEAD+1 0 get() fails and puts it back to DEAD
97 *
98 * Of course there are more complex scenarios, but the above illustrates
99 * the working principle. The rest is left to the imagination of the
100 * reader.
101 *
102 * Deconstruction race
103 * ===================
104 *
105 * The release operation must be protected by prohibiting a grace period in
106 * order to prevent a possible use after free:
107 *
108 * T1 T2
109 * put() get()
110 * // ref->refcnt = ONEREF
111 * if (!atomic_add_negative(-1, &ref->refcnt))
112 * return false; <- Not taken
113 *
114 * // ref->refcnt == NOREF
115 * --> preemption
116 * // Elevates ref->refcnt to ONEREF
117 * if (!atomic_add_negative(1, &ref->refcnt))
118 * return true; <- taken
119 *
120 * if (put(&p->ref)) { <-- Succeeds
121 * remove_pointer(p);
122 * kfree_rcu(p, rcu);
123 * }
124 *
125 * RCU grace period ends, object is freed
126 *
127 * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF
128 *
129 * This is prevented by disabling preemption around the put() operation as
130 * that's in most kernel configurations cheaper than a rcu_read_lock() /
131 * rcu_read_unlock() pair and in many cases even a NOOP. In any case it
132 * prevents the grace period which keeps the object alive until all put()
133 * operations complete.
134 *
135 * Saturation protection
136 * =====================
137 *
138 * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX).
139 * Once this is exceedded the reference count becomes stale by setting it
140 * to RCUREF_SATURATED, which will cause a memory leak, but it prevents
141 * wrap arounds which obviously cause worse problems than a memory
142 * leak. When saturation is reached a warning is emitted.
143 *
144 * Race conditions
145 * ===============
146 *
147 * All reference count increment/decrement operations are unconditional and
148 * only verified after the fact. This optimizes for the good case and takes
149 * the occasional race vs. a dead or already saturated refcount into
150 * account. The saturation and dead zones are large enough to accomodate
151 * for that.
152 *
153 * Memory ordering
154 * ===============
155 *
156 * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
157 * and provide only what is strictly required for refcounts.
158 *
159 * The increments are fully relaxed; these will not provide ordering. The
160 * rationale is that whatever is used to obtain the object to increase the
161 * reference count on will provide the ordering. For locked data
162 * structures, its the lock acquire, for RCU/lockless data structures its
163 * the dependent load.
164 *
165 * rcuref_get() provides a control dependency ordering future stores which
166 * ensures that the object is not modified when acquiring a reference
167 * fails.
168 *
169 * rcuref_put() provides release order, i.e. all prior loads and stores
170 * will be issued before. It also provides a control dependency ordering
171 * against the subsequent destruction of the object.
172 *
173 * If rcuref_put() successfully dropped the last reference and marked the
174 * object DEAD it also provides acquire ordering.
175 */
176
177 #include <linux/export.h>
178 #include <linux/rcuref.h>
179
180 /**
181 * rcuref_get_slowpath - Slowpath of rcuref_get()
182 * @ref: Pointer to the reference count
183 *
184 * Invoked when the reference count is outside of the valid zone.
185 *
186 * Return:
187 * False if the reference count was already marked dead
188 *
189 * True if the reference count is saturated, which prevents the
190 * object from being deconstructed ever.
191 */
rcuref_get_slowpath(rcuref_t * ref)192 bool rcuref_get_slowpath(rcuref_t *ref)
193 {
194 unsigned int cnt = atomic_read(&ref->refcnt);
195
196 /*
197 * If the reference count was already marked dead, undo the
198 * increment so it stays in the middle of the dead zone and return
199 * fail.
200 */
201 if (cnt >= RCUREF_RELEASED) {
202 atomic_set(&ref->refcnt, RCUREF_DEAD);
203 return false;
204 }
205
206 /*
207 * If it was saturated, warn and mark it so. In case the increment
208 * was already on a saturated value restore the saturation
209 * marker. This keeps it in the middle of the saturation zone and
210 * prevents the reference count from overflowing. This leaks the
211 * object memory, but prevents the obvious reference count overflow
212 * damage.
213 */
214 if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory"))
215 atomic_set(&ref->refcnt, RCUREF_SATURATED);
216 return true;
217 }
218 EXPORT_SYMBOL_GPL(rcuref_get_slowpath);
219
220 /**
221 * rcuref_put_slowpath - Slowpath of __rcuref_put()
222 * @ref: Pointer to the reference count
223 *
224 * Invoked when the reference count is outside of the valid zone.
225 *
226 * Return:
227 * True if this was the last reference with no future references
228 * possible. This signals the caller that it can safely schedule the
229 * object, which is protected by the reference counter, for
230 * deconstruction.
231 *
232 * False if there are still active references or the put() raced
233 * with a concurrent get()/put() pair. Caller is not allowed to
234 * deconstruct the protected object.
235 */
rcuref_put_slowpath(rcuref_t * ref)236 bool rcuref_put_slowpath(rcuref_t *ref)
237 {
238 unsigned int cnt = atomic_read(&ref->refcnt);
239
240 /* Did this drop the last reference? */
241 if (likely(cnt == RCUREF_NOREF)) {
242 /*
243 * Carefully try to set the reference count to RCUREF_DEAD.
244 *
245 * This can fail if a concurrent get() operation has
246 * elevated it again or the corresponding put() even marked
247 * it dead already. Both are valid situations and do not
248 * require a retry. If this fails the caller is not
249 * allowed to deconstruct the object.
250 */
251 if (!atomic_try_cmpxchg_release(&ref->refcnt, &cnt, RCUREF_DEAD))
252 return false;
253
254 /*
255 * The caller can safely schedule the object for
256 * deconstruction. Provide acquire ordering.
257 */
258 smp_acquire__after_ctrl_dep();
259 return true;
260 }
261
262 /*
263 * If the reference count was already in the dead zone, then this
264 * put() operation is imbalanced. Warn, put the reference count back to
265 * DEAD and tell the caller to not deconstruct the object.
266 */
267 if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) {
268 atomic_set(&ref->refcnt, RCUREF_DEAD);
269 return false;
270 }
271
272 /*
273 * This is a put() operation on a saturated refcount. Restore the
274 * mean saturation value and tell the caller to not deconstruct the
275 * object.
276 */
277 if (cnt > RCUREF_MAXREF)
278 atomic_set(&ref->refcnt, RCUREF_SATURATED);
279 return false;
280 }
281 EXPORT_SYMBOL_GPL(rcuref_put_slowpath);
282