Lines Matching +full:- +full:b

1 // SPDX-License-Identifier: GPL-2.0
21 #include "super-io.h"
30 (unsigned) BTREE_NODE_LEVEL(bn), bn->keys.seq); in bch2_btree_node_header_to_text()
32 bch2_bpos_to_text(out, bn->min_key); in bch2_btree_node_header_to_text()
35 bch2_bpos_to_text(out, bn->max_key); in bch2_btree_node_header_to_text()
38 void bch2_btree_node_io_unlock(struct btree *b) in bch2_btree_node_io_unlock() argument
40 EBUG_ON(!btree_node_write_in_flight(b)); in bch2_btree_node_io_unlock()
42 clear_btree_node_write_in_flight_inner(b); in bch2_btree_node_io_unlock()
43 clear_btree_node_write_in_flight(b); in bch2_btree_node_io_unlock()
44 wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); in bch2_btree_node_io_unlock()
47 void bch2_btree_node_io_lock(struct btree *b) in bch2_btree_node_io_lock() argument
49 wait_on_bit_lock_io(&b->flags, BTREE_NODE_write_in_flight, in bch2_btree_node_io_lock()
53 void __bch2_btree_node_wait_on_read(struct btree *b) in __bch2_btree_node_wait_on_read() argument
55 wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, in __bch2_btree_node_wait_on_read()
59 void __bch2_btree_node_wait_on_write(struct btree *b) in __bch2_btree_node_wait_on_write() argument
61 wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, in __bch2_btree_node_wait_on_write()
65 void bch2_btree_node_wait_on_read(struct btree *b) in bch2_btree_node_wait_on_read() argument
67 wait_on_bit_io(&b->flags, BTREE_NODE_read_in_flight, in bch2_btree_node_wait_on_read()
71 void bch2_btree_node_wait_on_write(struct btree *b) in bch2_btree_node_wait_on_write() argument
73 wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight, in bch2_btree_node_wait_on_write()
77 static void verify_no_dups(struct btree *b, in verify_no_dups() argument
90 struct bkey l = bkey_unpack_key(b, p); in verify_no_dups()
91 struct bkey r = bkey_unpack_key(b, k); in verify_no_dups()
102 for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) in set_needs_whiteout()
103 k->needs_whiteout = v; in set_needs_whiteout()
110 mempool_free(p, &c->btree_bounce_pool); in btree_bounce_free()
121 BUG_ON(size > c->opts.btree_node_size); in btree_bounce_alloc()
127 p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); in btree_bounce_alloc()
136 unsigned n = nr, a = nr / 2, b, c, d; in sort_bkey_ptrs() local
144 a--; in sort_bkey_ptrs()
145 else if (--n) in sort_bkey_ptrs()
150 for (b = a; c = 2 * b + 1, (d = c + 1) < n;) in sort_bkey_ptrs()
151 b = bch2_bkey_cmp_packed(bt, in sort_bkey_ptrs()
155 b = c; in sort_bkey_ptrs()
157 while (b != a && in sort_bkey_ptrs()
160 ptrs[b]) >= 0) in sort_bkey_ptrs()
161 b = (b - 1) / 2; in sort_bkey_ptrs()
162 c = b; in sort_bkey_ptrs()
163 while (b != a) { in sort_bkey_ptrs()
164 b = (b - 1) / 2; in sort_bkey_ptrs()
165 swap(ptrs[b], ptrs[c]); in sort_bkey_ptrs()
170 static void bch2_sort_whiteouts(struct bch_fs *c, struct btree *b) in bch2_sort_whiteouts() argument
174 size_t bytes = b->whiteout_u64s * sizeof(u64); in bch2_sort_whiteouts()
176 if (!b->whiteout_u64s) in bch2_sort_whiteouts()
183 for (k = unwritten_whiteouts_start(b); in bch2_sort_whiteouts()
184 k != unwritten_whiteouts_end(b); in bch2_sort_whiteouts()
186 *--ptrs = k; in bch2_sort_whiteouts()
188 sort_bkey_ptrs(b, ptrs, ptrs_end - ptrs); in bch2_sort_whiteouts()
198 verify_no_dups(b, new_whiteouts, in bch2_sort_whiteouts()
199 (void *) ((u64 *) new_whiteouts + b->whiteout_u64s)); in bch2_sort_whiteouts()
201 memcpy_u64s(unwritten_whiteouts_start(b), in bch2_sort_whiteouts()
202 new_whiteouts, b->whiteout_u64s); in bch2_sort_whiteouts()
207 static bool should_compact_bset(struct btree *b, struct bset_tree *t, in should_compact_bset() argument
210 if (!bset_dead_u64s(b, t)) in should_compact_bset()
215 return should_compact_bset_lazy(b, t) || in should_compact_bset()
216 (compacting && !bset_written(b, bset(b, t))); in should_compact_bset()
224 static bool bch2_drop_whiteouts(struct btree *b, enum compact_mode mode) in bch2_drop_whiteouts() argument
228 for_each_bset(b, t) { in bch2_drop_whiteouts()
229 struct bset *i = bset(b, t); in bch2_drop_whiteouts()
233 if (t != b->set && !bset_written(b, i)) { in bch2_drop_whiteouts()
235 dst = max(write_block(b), in bch2_drop_whiteouts()
236 (void *) btree_bkey_last(b, t - 1)); in bch2_drop_whiteouts()
242 if (!should_compact_bset(b, t, ret, mode)) { in bch2_drop_whiteouts()
245 le16_to_cpu(src->keys.u64s) * in bch2_drop_whiteouts()
247 i = &dst->keys; in bch2_drop_whiteouts()
248 set_btree_bset(b, t, i); in bch2_drop_whiteouts()
253 start = btree_bkey_first(b, t); in bch2_drop_whiteouts()
254 end = btree_bkey_last(b, t); in bch2_drop_whiteouts()
258 i = &dst->keys; in bch2_drop_whiteouts()
259 set_btree_bset(b, t, i); in bch2_drop_whiteouts()
262 out = i->start; in bch2_drop_whiteouts()
271 BUG_ON(k->needs_whiteout); in bch2_drop_whiteouts()
275 i->u64s = cpu_to_le16((u64 *) out - i->_data); in bch2_drop_whiteouts()
276 set_btree_bset_end(b, t); in bch2_drop_whiteouts()
277 bch2_bset_set_no_aux_tree(b, t); in bch2_drop_whiteouts()
281 bch2_verify_btree_nr_keys(b); in bch2_drop_whiteouts()
283 bch2_btree_build_aux_trees(b); in bch2_drop_whiteouts()
288 bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, in bch2_compact_whiteouts() argument
291 return bch2_drop_whiteouts(b, mode); in bch2_compact_whiteouts()
294 static void btree_node_sort(struct bch_fs *c, struct btree *b, in btree_node_sort() argument
301 struct bset *start_bset = bset(b, &b->set[start_idx]); in btree_node_sort()
304 unsigned i, u64s = 0, bytes, shift = end_idx - start_idx - 1; in btree_node_sort()
306 end_idx == b->nsets; in btree_node_sort()
308 sort_iter_stack_init(&sort_iter, b); in btree_node_sort()
310 for (t = b->set + start_idx; in btree_node_sort()
311 t < b->set + end_idx; in btree_node_sort()
313 u64s += le16_to_cpu(bset(b, t)->u64s); in btree_node_sort()
315 btree_bkey_first(b, t), in btree_node_sort()
316 btree_bkey_last(b, t)); in btree_node_sort()
320 ? btree_buf_bytes(b) in btree_node_sort()
327 u64s = bch2_sort_keys(out->keys.start, &sort_iter.iter); in btree_node_sort()
329 out->keys.u64s = cpu_to_le16(u64s); in btree_node_sort()
331 BUG_ON(vstruct_end(&out->keys) > (void *) out + bytes); in btree_node_sort()
334 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], in btree_node_sort()
338 for (t = b->set + start_idx; t < b->set + end_idx; t++) in btree_node_sort()
339 seq = max(seq, le64_to_cpu(bset(b, t)->journal_seq)); in btree_node_sort()
340 start_bset->journal_seq = cpu_to_le64(seq); in btree_node_sort()
343 u64s = le16_to_cpu(out->keys.u64s); in btree_node_sort()
345 BUG_ON(bytes != btree_buf_bytes(b)); in btree_node_sort()
352 *out = *b->data; in btree_node_sort()
353 out->keys.u64s = cpu_to_le16(u64s); in btree_node_sort()
354 swap(out, b->data); in btree_node_sort()
355 set_btree_bset(b, b->set, &b->data->keys); in btree_node_sort()
357 start_bset->u64s = out->keys.u64s; in btree_node_sort()
358 memcpy_u64s(start_bset->start, in btree_node_sort()
359 out->keys.start, in btree_node_sort()
360 le16_to_cpu(out->keys.u64s)); in btree_node_sort()
364 b->nr.bset_u64s[start_idx] += in btree_node_sort()
365 b->nr.bset_u64s[i]; in btree_node_sort()
367 b->nsets -= shift; in btree_node_sort()
369 for (i = start_idx + 1; i < b->nsets; i++) { in btree_node_sort()
370 b->nr.bset_u64s[i] = b->nr.bset_u64s[i + shift]; in btree_node_sort()
371 b->set[i] = b->set[i + shift]; in btree_node_sort()
374 for (i = b->nsets; i < MAX_BSETS; i++) in btree_node_sort()
375 b->nr.bset_u64s[i] = 0; in btree_node_sort()
377 set_btree_bset_end(b, &b->set[start_idx]); in btree_node_sort()
378 bch2_bset_set_no_aux_tree(b, &b->set[start_idx]); in btree_node_sort()
382 bch2_verify_btree_nr_keys(b); in btree_node_sort()
393 BUG_ON(dst->nsets != 1); in bch2_btree_sort_into()
395 bch2_bset_set_no_aux_tree(dst, dst->set); in bch2_btree_sort_into()
401 &dst->format, in bch2_btree_sort_into()
404 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort], in bch2_btree_sort_into()
407 set_btree_bset_end(dst, dst->set); in bch2_btree_sort_into()
409 dst->nr.live_u64s += nr.live_u64s; in bch2_btree_sort_into()
410 dst->nr.bset_u64s[0] += nr.bset_u64s[0]; in bch2_btree_sort_into()
411 dst->nr.packed_keys += nr.packed_keys; in bch2_btree_sort_into()
412 dst->nr.unpacked_keys += nr.unpacked_keys; in bch2_btree_sort_into()
419 * too many bsets - sort some of them together:
421 static bool btree_node_compact(struct bch_fs *c, struct btree *b) in btree_node_compact() argument
427 unwritten_idx < b->nsets; in btree_node_compact()
429 if (!bset_written(b, bset(b, &b->set[unwritten_idx]))) in btree_node_compact()
432 if (b->nsets - unwritten_idx > 1) { in btree_node_compact()
433 btree_node_sort(c, b, unwritten_idx, b->nsets); in btree_node_compact()
438 btree_node_sort(c, b, 0, unwritten_idx); in btree_node_compact()
445 void bch2_btree_build_aux_trees(struct btree *b) in bch2_btree_build_aux_trees() argument
447 for_each_bset(b, t) in bch2_btree_build_aux_trees()
448 bch2_bset_build_aux_tree(b, t, in bch2_btree_build_aux_trees()
449 !bset_written(b, bset(b, t)) && in bch2_btree_build_aux_trees()
450 t == bset_tree_last(b)); in bch2_btree_build_aux_trees()
463 static inline bool should_compact_all(struct bch_fs *c, struct btree *b) in should_compact_all() argument
468 return bset_u64s(&b->set[1]) > 1U << mid_u64s_bits; in should_compact_all()
472 * @bch_btree_init_next - initialize a new (unwritten) bset that can then be
475 * Safe to call if there already is an unwritten bset - will only add a new bset
476 * if @b doesn't already have one.
480 void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) in bch2_btree_init_next() argument
482 struct bch_fs *c = trans->c; in bch2_btree_init_next()
486 EBUG_ON(!six_lock_counts(&b->c.lock).n[SIX_LOCK_write]); in bch2_btree_init_next()
487 BUG_ON(bset_written(b, bset(b, &b->set[1]))); in bch2_btree_init_next()
488 BUG_ON(btree_node_just_written(b)); in bch2_btree_init_next()
490 if (b->nsets == MAX_BSETS && in bch2_btree_init_next()
491 !btree_node_write_in_flight(b) && in bch2_btree_init_next()
492 should_compact_all(c, b)) { in bch2_btree_init_next()
493 bch2_btree_node_write(c, b, SIX_LOCK_write, in bch2_btree_init_next()
498 if (b->nsets == MAX_BSETS && in bch2_btree_init_next()
499 btree_node_compact(c, b)) in bch2_btree_init_next()
502 BUG_ON(b->nsets >= MAX_BSETS); in bch2_btree_init_next()
504 bne = want_new_bset(c, b); in bch2_btree_init_next()
506 bch2_bset_init_next(b, bne); in bch2_btree_init_next()
508 bch2_btree_build_aux_trees(b); in bch2_btree_init_next()
511 bch2_trans_node_reinit_iter(trans, b); in bch2_btree_init_next()
516 struct btree *b, struct bset *i, struct bkey_packed *k, in btree_err_msg() argument
524 prt_printf(out, "on %s ", ca->name); in btree_err_msg()
526 bch2_btree_pos_to_text(out, c, b); in btree_err_msg()
531 b->written, btree_ptr_sectors_written(bkey_i_to_s_c(&b->key))); in btree_err_msg()
533 prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); in btree_err_msg()
536 (unsigned long)(void *)k - in btree_err_msg()
545 struct btree *b, in __btree_err() argument
554 bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; in __btree_err()
557 btree_err_msg(&out, c, ca, b, i, k, b->written, write); in __btree_err()
565 ret = c->opts.errors == BCH_ON_ERROR_continue in __btree_err()
567 : -BCH_ERR_fsck_errors_not_fixed; in __btree_err()
571 if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) in __btree_err()
572 ret = -BCH_ERR_btree_node_read_err_fixable; in __btree_err()
573 if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) in __btree_err()
574 ret = -BCH_ERR_btree_node_read_err_bad_node; in __btree_err()
576 if (!silent && ret != -BCH_ERR_btree_node_read_err_fixable) in __btree_err()
580 case -BCH_ERR_btree_node_read_err_fixable: in __btree_err()
583 : -BCH_ERR_fsck_fix; in __btree_err()
584 if (ret != -BCH_ERR_fsck_fix && in __btree_err()
585 ret != -BCH_ERR_fsck_ignore) in __btree_err()
587 ret = -BCH_ERR_fsck_fix; in __btree_err()
589 case -BCH_ERR_btree_node_read_err_want_retry: in __btree_err()
590 case -BCH_ERR_btree_node_read_err_must_retry: in __btree_err()
594 case -BCH_ERR_btree_node_read_err_bad_node: in __btree_err()
599 case -BCH_ERR_btree_node_read_err_incompatible: in __btree_err()
602 ret = -BCH_ERR_fsck_errors_not_fixed; in __btree_err()
613 #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \ argument
615 int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
619 if (_ret != -BCH_ERR_fsck_fix) { \
634 void bch2_btree_node_drop_keys_outside_node(struct btree *b) in bch2_btree_node_drop_keys_outside_node() argument
636 for_each_bset(b, t) { in bch2_btree_node_drop_keys_outside_node()
637 struct bset *i = bset(b, t); in bch2_btree_node_drop_keys_outside_node()
640 for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) in bch2_btree_node_drop_keys_outside_node()
641 if (bkey_cmp_left_packed(b, k, &b->data->min_key) >= 0) in bch2_btree_node_drop_keys_outside_node()
644 if (k != i->start) { in bch2_btree_node_drop_keys_outside_node()
645 unsigned shift = (u64 *) k - (u64 *) i->start; in bch2_btree_node_drop_keys_outside_node()
647 memmove_u64s_down(i->start, k, in bch2_btree_node_drop_keys_outside_node()
648 (u64 *) vstruct_end(i) - (u64 *) k); in bch2_btree_node_drop_keys_outside_node()
649 i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); in bch2_btree_node_drop_keys_outside_node()
650 set_btree_bset_end(b, t); in bch2_btree_node_drop_keys_outside_node()
653 for (k = i->start; k != vstruct_last(i); k = bkey_p_next(k)) in bch2_btree_node_drop_keys_outside_node()
654 if (bkey_cmp_left_packed(b, k, &b->data->max_key) > 0) in bch2_btree_node_drop_keys_outside_node()
658 i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); in bch2_btree_node_drop_keys_outside_node()
659 set_btree_bset_end(b, t); in bch2_btree_node_drop_keys_outside_node()
667 bch2_bset_set_no_aux_tree(b, b->set); in bch2_btree_node_drop_keys_outside_node()
668 bch2_btree_build_aux_trees(b); in bch2_btree_node_drop_keys_outside_node()
669 b->nr = bch2_btree_node_count_keys(b); in bch2_btree_node_drop_keys_outside_node()
674 for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { in bch2_btree_node_drop_keys_outside_node()
675 BUG_ON(bpos_lt(k.k->p, b->data->min_key)); in bch2_btree_node_drop_keys_outside_node()
676 BUG_ON(bpos_gt(k.k->p, b->data->max_key)); in bch2_btree_node_drop_keys_outside_node()
681 struct btree *b, struct bset *i, in validate_bset() argument
685 unsigned version = le16_to_cpu(i->version); in validate_bset()
686 unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); in validate_bset()
692 -BCH_ERR_btree_node_read_err_incompatible, in validate_bset()
693 c, ca, b, i, NULL, in validate_bset()
699 if (btree_err_on(version < c->sb.version_min, in validate_bset()
700 -BCH_ERR_btree_node_read_err_fixable, in validate_bset()
701 c, NULL, b, i, NULL, in validate_bset()
704 version, c->sb.version_min)) { in validate_bset()
705 mutex_lock(&c->sb_lock); in validate_bset()
706 c->disk_sb.sb->version_min = cpu_to_le16(version); in validate_bset()
708 mutex_unlock(&c->sb_lock); in validate_bset()
712 BCH_VERSION_MAJOR(c->sb.version), in validate_bset()
713 -BCH_ERR_btree_node_read_err_fixable, in validate_bset()
714 c, NULL, b, i, NULL, in validate_bset()
717 version, c->sb.version)) { in validate_bset()
718 mutex_lock(&c->sb_lock); in validate_bset()
719 c->disk_sb.sb->version = cpu_to_le16(version); in validate_bset()
721 mutex_unlock(&c->sb_lock); in validate_bset()
725 -BCH_ERR_btree_node_read_err_incompatible, in validate_bset()
726 c, ca, b, i, NULL, in validate_bset()
732 -BCH_ERR_btree_node_read_err_fixable, in validate_bset()
733 c, ca, b, i, NULL, in validate_bset()
737 i->u64s = 0; in validate_bset()
739 btree_err_on(offset && !i->u64s, in validate_bset()
740 -BCH_ERR_btree_node_read_err_fixable, in validate_bset()
741 c, ca, b, i, NULL, in validate_bset()
746 -BCH_ERR_btree_node_read_err_want_retry, in validate_bset()
747 c, ca, b, i, NULL, in validate_bset()
756 if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { in validate_bset()
758 &bkey_i_to_btree_ptr_v2(&b->key)->v; in validate_bset()
761 btree_err_on(bp->seq != bn->keys.seq, in validate_bset()
762 -BCH_ERR_btree_node_read_err_must_retry, in validate_bset()
763 c, ca, b, NULL, NULL, in validate_bset()
768 btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, in validate_bset()
769 -BCH_ERR_btree_node_read_err_must_retry, in validate_bset()
770 c, ca, b, i, NULL, in validate_bset()
774 btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, in validate_bset()
775 -BCH_ERR_btree_node_read_err_must_retry, in validate_bset()
776 c, ca, b, i, NULL, in validate_bset()
781 compat_btree_node(b->c.level, b->c.btree_id, version, in validate_bset()
784 if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { in validate_bset()
786 &bkey_i_to_btree_ptr_v2(&b->key)->v; in validate_bset()
789 b->data->min_key = bp->min_key; in validate_bset()
790 b->data->max_key = b->key.k.p; in validate_bset()
793 btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), in validate_bset()
794 -BCH_ERR_btree_node_read_err_must_retry, in validate_bset()
795 c, ca, b, NULL, NULL, in validate_bset()
799 bch2_bpos_to_text(&buf1, bn->min_key), buf1.buf), in validate_bset()
801 bch2_bpos_to_text(&buf2, bp->min_key), buf2.buf)); in validate_bset()
804 btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), in validate_bset()
805 -BCH_ERR_btree_node_read_err_must_retry, in validate_bset()
806 c, ca, b, i, NULL, in validate_bset()
810 bch2_bpos_to_text(&buf1, bn->max_key), buf1.buf)); in validate_bset()
813 compat_btree_node(b->c.level, b->c.btree_id, version, in validate_bset()
816 btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), in validate_bset()
817 -BCH_ERR_btree_node_read_err_bad_node, in validate_bset()
818 c, ca, b, i, NULL, in validate_bset()
822 bch2_bkey_format_to_text(&buf2, &bn->format), buf2.buf)); in validate_bset()
825 compat_bformat(b->c.level, b->c.btree_id, version, in validate_bset()
827 &bn->format); in validate_bset()
835 static int bset_key_validate(struct bch_fs *c, struct btree *b, in bset_key_validate() argument
839 return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: in bset_key_validate()
840 (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: in bset_key_validate()
844 static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, in bkey_packed_valid() argument
850 if (k->format > KEY_FORMAT_CURRENT) in bkey_packed_valid()
853 if (!bkeyp_u64s_valid(&b->format, k)) in bkey_packed_valid()
857 struct bkey_s u = __bkey_disassemble(b, k, &tmp); in bkey_packed_valid()
858 return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); in bkey_packed_valid()
861 static int validate_bset_keys(struct bch_fs *c, struct btree *b, in validate_bset_keys() argument
865 unsigned version = le16_to_cpu(i->version); in validate_bset_keys()
868 bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && in validate_bset_keys()
869 BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); in validate_bset_keys()
872 for (k = i->start; in validate_bset_keys()
879 -BCH_ERR_btree_node_read_err_fixable, in validate_bset_keys()
880 c, NULL, b, i, k, in validate_bset_keys()
883 i->u64s = cpu_to_le16((u64 *) k - i->_data); in validate_bset_keys()
887 if (btree_err_on(k->format > KEY_FORMAT_CURRENT, in validate_bset_keys()
888 -BCH_ERR_btree_node_read_err_fixable, in validate_bset_keys()
889 c, NULL, b, i, k, in validate_bset_keys()
891 "invalid bkey format %u", k->format)) in validate_bset_keys()
894 if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), in validate_bset_keys()
895 -BCH_ERR_btree_node_read_err_fixable, in validate_bset_keys()
896 c, NULL, b, i, k, in validate_bset_keys()
898 "bad k->u64s %u (min %u max %zu)", k->u64s, in validate_bset_keys()
899 bkeyp_key_u64s(&b->format, k), in validate_bset_keys()
900 U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) in validate_bset_keys()
904 bch2_bkey_compat(b->c.level, b->c.btree_id, version, in validate_bset_keys()
906 &b->format, k); in validate_bset_keys()
908 u = __bkey_disassemble(b, k, &tmp); in validate_bset_keys()
910 ret = bset_key_validate(c, b, u.s_c, updated_range, write); in validate_bset_keys()
911 if (ret == -BCH_ERR_fsck_delete_bkey) in validate_bset_keys()
917 bch2_bkey_compat(b->c.level, b->c.btree_id, version, in validate_bset_keys()
919 &b->format, k); in validate_bset_keys()
921 if (prev && bkey_iter_cmp(b, prev, k) > 0) { in validate_bset_keys()
922 struct bkey up = bkey_unpack_key(b, prev); in validate_bset_keys()
930 if (btree_err(-BCH_ERR_btree_node_read_err_fixable, in validate_bset_keys()
931 c, NULL, b, i, k, in validate_bset_keys()
941 next_good_key = k->u64s; in validate_bset_keys()
951 if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) { in validate_bset_keys()
953 next_good_key < (u64 *) vstruct_last(i) - (u64 *) k; in validate_bset_keys()
955 if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) in validate_bset_keys()
963 next_good_key = (u64 *) vstruct_last(i) - (u64 *) k; in validate_bset_keys()
966 le16_add_cpu(&i->u64s, -next_good_key); in validate_bset_keys()
967 memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); in validate_bset_keys()
975 struct btree *b, bool have_retry, bool *saw_error) in bch2_btree_node_read_done() argument
983 bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && in bch2_btree_node_read_done()
984 BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); in bch2_btree_node_read_done()
986 unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); in bch2_btree_node_read_done()
992 b->version_ondisk = U16_MAX; in bch2_btree_node_read_done()
994 b->written = 0; in bch2_btree_node_read_done()
996 iter = mempool_alloc(&c->fill_iter, GFP_NOFS); in bch2_btree_node_read_done()
997 sort_iter_init(iter, b, (btree_blocks(c) + 1) * 2); in bch2_btree_node_read_done()
1000 btree_err(-BCH_ERR_btree_node_read_err_must_retry, in bch2_btree_node_read_done()
1001 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1005 btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), in bch2_btree_node_read_done()
1006 -BCH_ERR_btree_node_read_err_must_retry, in bch2_btree_node_read_done()
1007 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1010 bset_magic(c), le64_to_cpu(b->data->magic)); in bch2_btree_node_read_done()
1012 if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { in bch2_btree_node_read_done()
1014 &bkey_i_to_btree_ptr_v2(&b->key)->v; in bch2_btree_node_read_done()
1016 bch2_bpos_to_text(&buf, b->data->min_key); in bch2_btree_node_read_done()
1017 prt_str(&buf, "-"); in bch2_btree_node_read_done()
1018 bch2_bpos_to_text(&buf, b->data->max_key); in bch2_btree_node_read_done()
1020 btree_err_on(b->data->keys.seq != bp->seq, in bch2_btree_node_read_done()
1021 -BCH_ERR_btree_node_read_err_must_retry, in bch2_btree_node_read_done()
1022 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1026 bch2_btree_node_header_to_text(&buf, b->data), in bch2_btree_node_read_done()
1029 btree_err_on(!b->data->keys.seq, in bch2_btree_node_read_done()
1030 -BCH_ERR_btree_node_read_err_must_retry, in bch2_btree_node_read_done()
1031 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1035 bch2_btree_node_header_to_text(&buf, b->data), in bch2_btree_node_read_done()
1039 while (b->written < (ptr_written ?: btree_sectors(c))) { in bch2_btree_node_read_done()
1042 bool first = !b->written; in bch2_btree_node_read_done()
1045 if (!b->written) { in bch2_btree_node_read_done()
1046 i = &b->data->keys; in bch2_btree_node_read_done()
1049 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1050 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1054 nonce = btree_nonce(i, b->written << 9); in bch2_btree_node_read_done()
1056 struct bch_csum csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data); in bch2_btree_node_read_done()
1057 csum_bad = bch2_crc_cmp(b->data->csum, csum); in bch2_btree_node_read_done()
1062 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1063 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1067 bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), b->data->csum, csum), in bch2_btree_node_read_done()
1070 ret = bset_encrypt(c, i, b->written << 9); in bch2_btree_node_read_done()
1075 btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && in bch2_btree_node_read_done()
1076 !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), in bch2_btree_node_read_done()
1077 -BCH_ERR_btree_node_read_err_incompatible, in bch2_btree_node_read_done()
1078 c, NULL, b, NULL, NULL, in bch2_btree_node_read_done()
1082 sectors = vstruct_sectors(b->data, c->block_bits); in bch2_btree_node_read_done()
1084 bne = write_block(b); in bch2_btree_node_read_done()
1085 i = &bne->keys; in bch2_btree_node_read_done()
1087 if (i->seq != b->data->keys.seq) in bch2_btree_node_read_done()
1091 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1092 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1096 nonce = btree_nonce(i, b->written << 9); in bch2_btree_node_read_done()
1098 csum_bad = bch2_crc_cmp(bne->csum, csum); in bch2_btree_node_read_done()
1103 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1104 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1108 bch2_csum_err_msg(&buf, BSET_CSUM_TYPE(i), bne->csum, csum), in bch2_btree_node_read_done()
1111 ret = bset_encrypt(c, i, b->written << 9); in bch2_btree_node_read_done()
1116 sectors = vstruct_sectors(bne, c->block_bits); in bch2_btree_node_read_done()
1119 b->version_ondisk = min(b->version_ondisk, in bch2_btree_node_read_done()
1120 le16_to_cpu(i->version)); in bch2_btree_node_read_done()
1122 ret = validate_bset(c, ca, b, i, b->written, sectors, in bch2_btree_node_read_done()
1127 if (!b->written) in bch2_btree_node_read_done()
1128 btree_node_set_format(b, b->data->format); in bch2_btree_node_read_done()
1130 ret = validate_bset_keys(c, b, i, READ, have_retry, saw_error); in bch2_btree_node_read_done()
1137 le64_to_cpu(i->journal_seq), in bch2_btree_node_read_done()
1141 -BCH_ERR_btree_node_read_err_fixable, in bch2_btree_node_read_done()
1142 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1145 le64_to_cpu(i->journal_seq)); in bch2_btree_node_read_done()
1148 -BCH_ERR_btree_node_read_err_fixable, in bch2_btree_node_read_done()
1149 c, ca, b, i, NULL, in bch2_btree_node_read_done()
1151 "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", in bch2_btree_node_read_done()
1152 le64_to_cpu(i->journal_seq), in bch2_btree_node_read_done()
1153 b->written, b->written + sectors, ptr_written); in bch2_btree_node_read_done()
1155 b->written += sectors; in bch2_btree_node_read_done()
1164 max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq)); in bch2_btree_node_read_done()
1168 btree_err_on(b->written < ptr_written, in bch2_btree_node_read_done()
1169 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1170 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1173 ptr_written, b->written); in bch2_btree_node_read_done()
1175 for (bne = write_block(b); in bch2_btree_node_read_done()
1176 bset_byte_offset(b, bne) < btree_buf_bytes(b); in bch2_btree_node_read_done()
1178 btree_err_on(bne->keys.seq == b->data->keys.seq && in bch2_btree_node_read_done()
1180 le64_to_cpu(bne->keys.journal_seq), in bch2_btree_node_read_done()
1182 -BCH_ERR_btree_node_read_err_want_retry, in bch2_btree_node_read_done()
1183 c, ca, b, NULL, NULL, in bch2_btree_node_read_done()
1188 sorted = btree_bounce_alloc(c, btree_buf_bytes(b), &used_mempool); in bch2_btree_node_read_done()
1189 sorted->keys.u64s = 0; in bch2_btree_node_read_done()
1191 set_btree_bset(b, b->set, &b->data->keys); in bch2_btree_node_read_done()
1193 b->nr = bch2_key_sort_fix_overlapping(c, &sorted->keys, iter); in bch2_btree_node_read_done()
1194 memset((uint8_t *)(sorted + 1) + b->nr.live_u64s * sizeof(u64), 0, in bch2_btree_node_read_done()
1195 btree_buf_bytes(b) - in bch2_btree_node_read_done()
1196 sizeof(struct btree_node) - in bch2_btree_node_read_done()
1197 b->nr.live_u64s * sizeof(u64)); in bch2_btree_node_read_done()
1199 u64s = le16_to_cpu(sorted->keys.u64s); in bch2_btree_node_read_done()
1200 *sorted = *b->data; in bch2_btree_node_read_done()
1201 sorted->keys.u64s = cpu_to_le16(u64s); in bch2_btree_node_read_done()
1202 swap(sorted, b->data); in bch2_btree_node_read_done()
1203 set_btree_bset(b, b->set, &b->data->keys); in bch2_btree_node_read_done()
1204 b->nsets = 1; in bch2_btree_node_read_done()
1205 b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); in bch2_btree_node_read_done()
1207 BUG_ON(b->nr.live_u64s != u64s); in bch2_btree_node_read_done()
1209 btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); in bch2_btree_node_read_done()
1212 bch2_btree_node_drop_keys_outside_node(b); in bch2_btree_node_read_done()
1214 i = &b->data->keys; in bch2_btree_node_read_done()
1215 for (k = i->start; k != vstruct_last(i);) { in bch2_btree_node_read_done()
1217 struct bkey_s u = __bkey_disassemble(b, k, &tmp); in bch2_btree_node_read_done()
1220 if (ret == -BCH_ERR_fsck_delete_bkey || in bch2_btree_node_read_done()
1222 !bversion_cmp(u.k->bversion, MAX_VERSION))) { in bch2_btree_node_read_done()
1223 btree_keys_account_key_drop(&b->nr, 0, k); in bch2_btree_node_read_done()
1225 i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); in bch2_btree_node_read_done()
1227 (u64 *) vstruct_end(i) - (u64 *) k); in bch2_btree_node_read_done()
1228 set_btree_bset_end(b, b->set); in bch2_btree_node_read_done()
1234 if (u.k->type == KEY_TYPE_btree_ptr_v2) { in bch2_btree_node_read_done()
1237 bp.v->mem_ptr = 0; in bch2_btree_node_read_done()
1243 bch2_bset_build_aux_tree(b, b->set, false); in bch2_btree_node_read_done()
1245 set_needs_whiteout(btree_bset_first(b), true); in bch2_btree_node_read_done()
1247 btree_node_reset_sib_u64s(b); in bch2_btree_node_read_done()
1250 bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { in bch2_btree_node_read_done()
1251 struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); in bch2_btree_node_read_done()
1253 if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) in bch2_btree_node_read_done()
1254 set_btree_node_need_rewrite(b); in bch2_btree_node_read_done()
1259 set_btree_node_need_rewrite(b); in bch2_btree_node_read_done()
1261 mempool_free(iter, &c->fill_iter); in bch2_btree_node_read_done()
1263 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); in bch2_btree_node_read_done()
1266 if (ret == -BCH_ERR_btree_node_read_err_want_retry || in bch2_btree_node_read_done()
1267 ret == -BCH_ERR_btree_node_read_err_must_retry) { in bch2_btree_node_read_done()
1270 set_btree_node_read_error(b); in bch2_btree_node_read_done()
1271 bch2_btree_lost_data(c, b->c.btree_id); in bch2_btree_node_read_done()
1280 struct bch_fs *c = rb->c; in btree_node_read_work()
1281 struct bch_dev *ca = rb->have_ioref ? bch2_dev_have_ref(c, rb->pick.ptr.dev) : NULL; in btree_node_read_work()
1282 struct btree *b = rb->b; in btree_node_read_work() local
1283 struct bio *bio = &rb->bio; in btree_node_read_work()
1294 ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ); in btree_node_read_work()
1295 rb->have_ioref = ca != NULL; in btree_node_read_work()
1297 bio->bi_iter.bi_sector = rb->pick.ptr.offset; in btree_node_read_work()
1298 bio->bi_iter.bi_size = btree_buf_bytes(b); in btree_node_read_work()
1300 if (rb->have_ioref) { in btree_node_read_work()
1301 bio_set_dev(bio, ca->disk_sb.bdev); in btree_node_read_work()
1304 bio->bi_status = BLK_STS_REMOVED; in btree_node_read_work()
1308 bch2_btree_pos_to_text(&buf, c, b); in btree_node_read_work()
1309 bch2_dev_io_err_on(ca && bio->bi_status, ca, BCH_MEMBER_ERROR_read, in btree_node_read_work()
1311 bch2_blk_status_to_str(bio->bi_status), buf.buf); in btree_node_read_work()
1312 if (rb->have_ioref) in btree_node_read_work()
1313 percpu_ref_put(&ca->io_ref); in btree_node_read_work()
1314 rb->have_ioref = false; in btree_node_read_work()
1316 bch2_mark_io_failure(&failed, &rb->pick); in btree_node_read_work()
1319 bkey_i_to_s_c(&b->key), in btree_node_read_work()
1320 &failed, &rb->pick) > 0; in btree_node_read_work()
1322 if (!bio->bi_status && in btree_node_read_work()
1323 !bch2_btree_node_read_done(c, ca, b, can_retry, &saw_error)) { in btree_node_read_work()
1332 set_btree_node_read_error(b); in btree_node_read_work()
1333 bch2_btree_lost_data(c, b->c.btree_id); in btree_node_read_work()
1338 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], in btree_node_read_work()
1339 rb->start_time); in btree_node_read_work()
1340 bio_put(&rb->bio); in btree_node_read_work()
1343 !btree_node_read_error(b) && in btree_node_read_work()
1344 c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) { in btree_node_read_work()
1346 bch2_bpos_to_text(&buf, b->key.k.p); in btree_node_read_work()
1348 __func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf); in btree_node_read_work()
1350 bch2_btree_node_rewrite_async(c, b); in btree_node_read_work()
1354 clear_btree_node_read_in_flight(b); in btree_node_read_work()
1355 wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); in btree_node_read_work()
1362 struct bch_fs *c = rb->c; in btree_node_read_endio()
1364 if (rb->have_ioref) { in btree_node_read_endio()
1365 struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); in btree_node_read_endio()
1367 bch2_latency_acct(ca, rb->start_time, READ); in btree_node_read_endio()
1370 queue_work(c->btree_read_complete_wq, &rb->work); in btree_node_read_endio()
1376 struct btree *b; member
1389 if (le64_to_cpu(bn->magic) != bset_magic(c)) in btree_node_sectors_written()
1394 offset += vstruct_sectors(bn, c->block_bits); in btree_node_sectors_written()
1397 if (bne->keys.seq != bn->keys.seq) in btree_node_sectors_written()
1399 offset += vstruct_sectors(bne, c->block_bits); in btree_node_sectors_written()
1416 if (bne->keys.seq == bn->keys.seq) in btree_node_has_extra_bsets()
1428 struct bch_fs *c = ra->c; in CLOSURE_CALLBACK()
1429 struct btree *b = ra->b; in CLOSURE_CALLBACK() local
1433 int ret = 0, best = -1, write = READ; in CLOSURE_CALLBACK()
1435 __le64 seq = b->key.k.type == KEY_TYPE_btree_ptr_v2 in CLOSURE_CALLBACK()
1436 ? bkey_i_to_btree_ptr_v2(&b->key)->v.seq : 0; in CLOSURE_CALLBACK()
1439 for (i = 0; i < ra->nr; i++) { in CLOSURE_CALLBACK()
1440 struct btree_node *bn = ra->buf[i]; in CLOSURE_CALLBACK()
1442 if (ra->err[i]) in CLOSURE_CALLBACK()
1445 if (le64_to_cpu(bn->magic) != bset_magic(c) || in CLOSURE_CALLBACK()
1446 (seq && seq != bn->keys.seq)) in CLOSURE_CALLBACK()
1455 written2 = btree_node_sectors_written(c, ra->buf[i]); in CLOSURE_CALLBACK()
1456 if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, in CLOSURE_CALLBACK()
1457 c, NULL, b, NULL, NULL, in CLOSURE_CALLBACK()
1461 btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), in CLOSURE_CALLBACK()
1462 -BCH_ERR_btree_node_read_err_fixable, in CLOSURE_CALLBACK()
1463 c, NULL, b, NULL, NULL, in CLOSURE_CALLBACK()
1466 btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), in CLOSURE_CALLBACK()
1467 -BCH_ERR_btree_node_read_err_fixable, in CLOSURE_CALLBACK()
1468 c, NULL, b, NULL, NULL, in CLOSURE_CALLBACK()
1480 for (i = 0; i < ra->nr; i++) { in CLOSURE_CALLBACK()
1481 struct btree_node *bn = ra->buf[i]; in CLOSURE_CALLBACK()
1486 if (ra->err[i]) in CLOSURE_CALLBACK()
1493 sectors = vstruct_sectors(bn, c->block_bits); in CLOSURE_CALLBACK()
1495 bne = ra->buf[i] + (offset << 9); in CLOSURE_CALLBACK()
1496 if (bne->keys.seq != bn->keys.seq) in CLOSURE_CALLBACK()
1498 sectors = vstruct_sectors(bne, c->block_bits); in CLOSURE_CALLBACK()
1501 prt_printf(&buf, " %u-%u", offset, offset + sectors); in CLOSURE_CALLBACK()
1503 le64_to_cpu(bne->keys.journal_seq), false)) in CLOSURE_CALLBACK()
1509 bne = ra->buf[i] + (offset << 9); in CLOSURE_CALLBACK()
1510 if (bne->keys.seq == bn->keys.seq) { in CLOSURE_CALLBACK()
1515 sectors = vstruct_sectors(bne, c->block_bits); in CLOSURE_CALLBACK()
1516 prt_printf(&buf, " %u-%u", offset, offset + sectors); in CLOSURE_CALLBACK()
1518 le64_to_cpu(bne->keys.journal_seq), false)) in CLOSURE_CALLBACK()
1529 memcpy(b->data, ra->buf[best], btree_buf_bytes(b)); in CLOSURE_CALLBACK()
1530 ret = bch2_btree_node_read_done(c, NULL, b, false, saw_error); in CLOSURE_CALLBACK()
1532 ret = -1; in CLOSURE_CALLBACK()
1536 set_btree_node_read_error(b); in CLOSURE_CALLBACK()
1537 bch2_btree_lost_data(c, b->c.btree_id); in CLOSURE_CALLBACK()
1539 bch2_btree_node_rewrite_async(c, b); in CLOSURE_CALLBACK()
1541 for (i = 0; i < ra->nr; i++) { in CLOSURE_CALLBACK()
1542 mempool_free(ra->buf[i], &c->btree_bounce_pool); in CLOSURE_CALLBACK()
1543 bio_put(ra->bio[i]); in CLOSURE_CALLBACK()
1546 closure_debug_destroy(&ra->cl); in CLOSURE_CALLBACK()
1550 clear_btree_node_read_in_flight(b); in CLOSURE_CALLBACK()
1551 wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); in CLOSURE_CALLBACK()
1558 struct bch_fs *c = rb->c; in btree_node_read_all_replicas_endio()
1559 struct btree_node_read_all *ra = rb->ra; in btree_node_read_all_replicas_endio()
1561 if (rb->have_ioref) { in btree_node_read_all_replicas_endio()
1562 struct bch_dev *ca = bch2_dev_have_ref(c, rb->pick.ptr.dev); in btree_node_read_all_replicas_endio()
1564 bch2_latency_acct(ca, rb->start_time, READ); in btree_node_read_all_replicas_endio()
1567 ra->err[rb->idx] = bio->bi_status; in btree_node_read_all_replicas_endio()
1568 closure_put(&ra->cl); in btree_node_read_all_replicas_endio()
1575 static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool sync) in btree_node_read_all_replicas() argument
1577 struct bkey_s_c k = bkey_i_to_s_c(&b->key); in btree_node_read_all_replicas()
1586 return -BCH_ERR_ENOMEM_btree_node_read_all_replicas; in btree_node_read_all_replicas()
1588 closure_init(&ra->cl, NULL); in btree_node_read_all_replicas()
1589 ra->c = c; in btree_node_read_all_replicas()
1590 ra->b = b; in btree_node_read_all_replicas()
1591 ra->nr = bch2_bkey_nr_ptrs(k); in btree_node_read_all_replicas()
1593 for (i = 0; i < ra->nr; i++) { in btree_node_read_all_replicas()
1594 ra->buf[i] = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); in btree_node_read_all_replicas()
1595 ra->bio[i] = bio_alloc_bioset(NULL, in btree_node_read_all_replicas()
1596 buf_pages(ra->buf[i], btree_buf_bytes(b)), in btree_node_read_all_replicas()
1599 &c->btree_bio); in btree_node_read_all_replicas()
1606 container_of(ra->bio[i], struct btree_read_bio, bio); in btree_node_read_all_replicas()
1607 rb->c = c; in btree_node_read_all_replicas()
1608 rb->b = b; in btree_node_read_all_replicas()
1609 rb->ra = ra; in btree_node_read_all_replicas()
1610 rb->start_time = local_clock(); in btree_node_read_all_replicas()
1611 rb->have_ioref = ca != NULL; in btree_node_read_all_replicas()
1612 rb->idx = i; in btree_node_read_all_replicas()
1613 rb->pick = pick; in btree_node_read_all_replicas()
1614 rb->bio.bi_iter.bi_sector = pick.ptr.offset; in btree_node_read_all_replicas()
1615 rb->bio.bi_end_io = btree_node_read_all_replicas_endio; in btree_node_read_all_replicas()
1616 bch2_bio_map(&rb->bio, ra->buf[i], btree_buf_bytes(b)); in btree_node_read_all_replicas()
1618 if (rb->have_ioref) { in btree_node_read_all_replicas()
1619 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], in btree_node_read_all_replicas()
1620 bio_sectors(&rb->bio)); in btree_node_read_all_replicas()
1621 bio_set_dev(&rb->bio, ca->disk_sb.bdev); in btree_node_read_all_replicas()
1623 closure_get(&ra->cl); in btree_node_read_all_replicas()
1624 submit_bio(&rb->bio); in btree_node_read_all_replicas()
1626 ra->err[i] = BLK_STS_REMOVED; in btree_node_read_all_replicas()
1633 closure_sync(&ra->cl); in btree_node_read_all_replicas()
1634 btree_node_read_all_replicas_done(&ra->cl.work); in btree_node_read_all_replicas()
1636 continue_at(&ra->cl, btree_node_read_all_replicas_done, in btree_node_read_all_replicas()
1637 c->btree_read_complete_wq); in btree_node_read_all_replicas()
1643 void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, in bch2_btree_node_read() argument
1646 struct bch_fs *c = trans->c; in bch2_btree_node_read()
1653 trace_and_count(c, btree_node_read, trans, b); in bch2_btree_node_read()
1656 !btree_node_read_all_replicas(c, b, sync)) in bch2_btree_node_read()
1659 ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), in bch2_btree_node_read()
1666 bch2_btree_pos_to_text(&buf, c, b); in bch2_btree_node_read()
1669 if (c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_check_topology) && in bch2_btree_node_read()
1670 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) in bch2_btree_node_read()
1673 set_btree_node_read_error(b); in bch2_btree_node_read()
1674 bch2_btree_lost_data(c, b->c.btree_id); in bch2_btree_node_read()
1675 clear_btree_node_read_in_flight(b); in bch2_btree_node_read()
1676 wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); in bch2_btree_node_read()
1684 buf_pages(b->data, btree_buf_bytes(b)), in bch2_btree_node_read()
1687 &c->btree_bio); in bch2_btree_node_read()
1689 rb->c = c; in bch2_btree_node_read()
1690 rb->b = b; in bch2_btree_node_read()
1691 rb->ra = NULL; in bch2_btree_node_read()
1692 rb->start_time = local_clock(); in bch2_btree_node_read()
1693 rb->have_ioref = ca != NULL; in bch2_btree_node_read()
1694 rb->pick = pick; in bch2_btree_node_read()
1695 INIT_WORK(&rb->work, btree_node_read_work); in bch2_btree_node_read()
1696 bio->bi_iter.bi_sector = pick.ptr.offset; in bch2_btree_node_read()
1697 bio->bi_end_io = btree_node_read_endio; in bch2_btree_node_read()
1698 bch2_bio_map(bio, b->data, btree_buf_bytes(b)); in bch2_btree_node_read()
1700 if (rb->have_ioref) { in bch2_btree_node_read()
1701 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_btree], in bch2_btree_node_read()
1703 bio_set_dev(bio, ca->disk_sb.bdev); in bch2_btree_node_read()
1707 bch2_latency_acct(ca, rb->start_time, READ); in bch2_btree_node_read()
1708 btree_node_read_work(&rb->work); in bch2_btree_node_read()
1713 bio->bi_status = BLK_STS_REMOVED; in bch2_btree_node_read()
1716 btree_node_read_work(&rb->work); in bch2_btree_node_read()
1718 queue_work(c->btree_read_complete_wq, &rb->work); in bch2_btree_node_read()
1725 struct bch_fs *c = trans->c; in __bch2_btree_root_read()
1727 struct btree *b; in __bch2_btree_root_read() local
1737 b = bch2_btree_node_mem_alloc(trans, level != 0); in __bch2_btree_root_read()
1740 BUG_ON(IS_ERR(b)); in __bch2_btree_root_read()
1742 bkey_copy(&b->key, k); in __bch2_btree_root_read()
1743 BUG_ON(bch2_btree_node_hash_insert(&c->btree_cache, b, level, id)); in __bch2_btree_root_read()
1745 set_btree_node_read_in_flight(b); in __bch2_btree_root_read()
1749 bch2_btree_node_read(trans, b, true); in __bch2_btree_root_read()
1751 if (btree_node_read_error(b)) { in __bch2_btree_root_read()
1752 mutex_lock(&c->btree_cache.lock); in __bch2_btree_root_read()
1753 bch2_btree_node_hash_remove(&c->btree_cache, b); in __bch2_btree_root_read()
1754 mutex_unlock(&c->btree_cache.lock); in __bch2_btree_root_read()
1756 ret = -BCH_ERR_btree_node_read_error; in __bch2_btree_root_read()
1760 bch2_btree_set_root_for_read(c, b); in __bch2_btree_root_read()
1762 six_unlock_write(&b->c.lock); in __bch2_btree_root_read()
1763 six_unlock_intent(&b->c.lock); in __bch2_btree_root_read()
1774 static void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, in bch2_btree_complete_write() argument
1779 old = READ_ONCE(b->will_make_reachable); in bch2_btree_complete_write()
1786 } while (!try_cmpxchg(&b->will_make_reachable, &old, new)); in bch2_btree_complete_write()
1789 closure_put(&((struct btree_update *) new)->cl); in bch2_btree_complete_write()
1791 bch2_journal_pin_drop(&c->journal, &w->journal); in bch2_btree_complete_write()
1794 static void __btree_node_write_done(struct bch_fs *c, struct btree *b) in __btree_node_write_done() argument
1796 struct btree_write *w = btree_prev_write(b); in __btree_node_write_done()
1800 bch2_btree_complete_write(c, b, w); in __btree_node_write_done()
1802 old = READ_ONCE(b->flags); in __btree_node_write_done()
1824 } while (!try_cmpxchg(&b->flags, &old, new)); in __btree_node_write_done()
1827 __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED|type); in __btree_node_write_done()
1829 wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); in __btree_node_write_done()
1832 static void btree_node_write_done(struct bch_fs *c, struct btree *b) in btree_node_write_done() argument
1836 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); in btree_node_write_done()
1840 __btree_node_write_done(c, b); in btree_node_write_done()
1841 six_unlock_read(&b->c.lock); in btree_node_write_done()
1848 struct bch_fs *c = wbio->wbio.c; in btree_node_write_work()
1849 struct btree *b = wbio->wbio.bio.bi_private; in btree_node_write_work() local
1853 wbio->data_bytes, in btree_node_write_work()
1854 wbio->wbio.used_mempool, in btree_node_write_work()
1855 wbio->data); in btree_node_write_work()
1857 bch2_bkey_drop_ptrs(bkey_i_to_s(&wbio->key), ptr, in btree_node_write_work()
1858 bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); in btree_node_write_work()
1860 if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&wbio->key))) { in btree_node_write_work()
1861 ret = -BCH_ERR_btree_node_write_all_failed; in btree_node_write_work()
1865 if (wbio->wbio.first_btree_write) { in btree_node_write_work()
1866 if (wbio->wbio.failed.nr) { in btree_node_write_work()
1871 bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, in btree_node_write_work()
1876 !wbio->wbio.failed.nr)); in btree_node_write_work()
1881 bio_put(&wbio->wbio.bio); in btree_node_write_work()
1882 btree_node_write_done(c, b); in btree_node_write_work()
1885 set_btree_node_noevict(b); in btree_node_write_work()
1894 struct bch_write_bio *parent = wbio->split ? wbio->parent : NULL; in btree_node_write_endio()
1897 struct bch_fs *c = wbio->c; in btree_node_write_endio()
1898 struct btree *b = wbio->bio.bi_private; in btree_node_write_endio() local
1899 struct bch_dev *ca = wbio->have_ioref ? bch2_dev_have_ref(c, wbio->dev) : NULL; in btree_node_write_endio()
1902 if (wbio->have_ioref) in btree_node_write_endio()
1903 bch2_latency_acct(ca, wbio->submit_time, WRITE); in btree_node_write_endio()
1906 bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, in btree_node_write_endio()
1908 bch2_blk_status_to_str(bio->bi_status)) || in btree_node_write_endio()
1910 spin_lock_irqsave(&c->btree_write_error_lock, flags); in btree_node_write_endio()
1911 bch2_dev_list_add_dev(&orig->failed, wbio->dev); in btree_node_write_endio()
1912 spin_unlock_irqrestore(&c->btree_write_error_lock, flags); in btree_node_write_endio()
1915 if (wbio->have_ioref) in btree_node_write_endio()
1916 percpu_ref_put(&ca->io_ref); in btree_node_write_endio()
1920 bio_endio(&parent->bio); in btree_node_write_endio()
1924 clear_btree_node_write_in_flight_inner(b); in btree_node_write_endio()
1925 wake_up_bit(&b->flags, BTREE_NODE_write_in_flight_inner); in btree_node_write_endio()
1926 INIT_WORK(&wb->work, btree_node_write_work); in btree_node_write_endio()
1927 queue_work(c->btree_io_complete_wq, &wb->work); in btree_node_write_endio()
1930 static int validate_bset_for_write(struct bch_fs *c, struct btree *b, in validate_bset_for_write() argument
1935 int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), in validate_bset_for_write()
1942 ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: in validate_bset_for_write()
1943 validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); in validate_bset_for_write()
1957 bkey_copy(&tmp.k, &wbio->key); in btree_write_submit()
1960 ptr->offset += wbio->sector_offset; in btree_write_submit()
1962 bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, in btree_write_submit()
1966 void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) in __bch2_btree_node_write() argument
1987 * We may only have a read lock on the btree node - the dirty bit is our in __bch2_btree_node_write()
1993 old = READ_ONCE(b->flags); in __bch2_btree_node_write()
2009 if (b->written && in __bch2_btree_node_write()
2026 } while (!try_cmpxchg_acquire(&b->flags, &old, new)); in __bch2_btree_node_write()
2031 BUG_ON((type == BTREE_WRITE_initial) != (b->written == 0)); in __bch2_btree_node_write()
2033 atomic_long_dec(&c->btree_cache.nr_dirty); in __bch2_btree_node_write()
2035 BUG_ON(btree_node_fake(b)); in __bch2_btree_node_write()
2036 BUG_ON((b->will_make_reachable != 0) != !b->written); in __bch2_btree_node_write()
2038 BUG_ON(b->written >= btree_sectors(c)); in __bch2_btree_node_write()
2039 BUG_ON(b->written & (block_sectors(c) - 1)); in __bch2_btree_node_write()
2040 BUG_ON(bset_written(b, btree_bset_last(b))); in __bch2_btree_node_write()
2041 BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c)); in __bch2_btree_node_write()
2042 BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format))); in __bch2_btree_node_write()
2044 bch2_sort_whiteouts(c, b); in __bch2_btree_node_write()
2046 sort_iter_stack_init(&sort_iter, b); in __bch2_btree_node_write()
2048 bytes = !b->written in __bch2_btree_node_write()
2052 bytes += b->whiteout_u64s * sizeof(u64); in __bch2_btree_node_write()
2054 for_each_bset(b, t) { in __bch2_btree_node_write()
2055 i = bset(b, t); in __bch2_btree_node_write()
2057 if (bset_written(b, i)) in __bch2_btree_node_write()
2060 bytes += le16_to_cpu(i->u64s) * sizeof(u64); in __bch2_btree_node_write()
2062 btree_bkey_first(b, t), in __bch2_btree_node_write()
2063 btree_bkey_last(b, t)); in __bch2_btree_node_write()
2064 seq = max(seq, le64_to_cpu(i->journal_seq)); in __bch2_btree_node_write()
2067 BUG_ON(b->written && !seq); in __bch2_btree_node_write()
2077 if (!b->written) { in __bch2_btree_node_write()
2079 *bn = *b->data; in __bch2_btree_node_write()
2080 i = &bn->keys; in __bch2_btree_node_write()
2083 bne->keys = b->data->keys; in __bch2_btree_node_write()
2084 i = &bne->keys; in __bch2_btree_node_write()
2087 i->journal_seq = cpu_to_le64(seq); in __bch2_btree_node_write()
2088 i->u64s = 0; in __bch2_btree_node_write()
2091 unwritten_whiteouts_start(b), in __bch2_btree_node_write()
2092 unwritten_whiteouts_end(b)); in __bch2_btree_node_write()
2095 u64s = bch2_sort_keys_keep_unwritten_whiteouts(i->start, &sort_iter.iter); in __bch2_btree_node_write()
2096 le16_add_cpu(&i->u64s, u64s); in __bch2_btree_node_write()
2098 b->whiteout_u64s = 0; in __bch2_btree_node_write()
2100 BUG_ON(!b->written && i->u64s != b->data->keys.u64s); in __bch2_btree_node_write()
2105 if (b->written && !i->u64s) in __bch2_btree_node_write()
2108 bytes_to_write = vstruct_end(i) - data; in __bch2_btree_node_write()
2111 if (!b->written && in __bch2_btree_node_write()
2112 b->key.k.type == KEY_TYPE_btree_ptr_v2) in __bch2_btree_node_write()
2113 BUG_ON(btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)) != sectors_to_write); in __bch2_btree_node_write()
2116 (sectors_to_write << 9) - bytes_to_write); in __bch2_btree_node_write()
2118 BUG_ON(b->written + sectors_to_write > btree_sectors(c)); in __bch2_btree_node_write()
2120 BUG_ON(i->seq != b->data->keys.seq); in __bch2_btree_node_write()
2122 i->version = cpu_to_le16(c->sb.version); in __bch2_btree_node_write()
2123 SET_BSET_OFFSET(i, b->written); in __bch2_btree_node_write()
2130 if (le16_to_cpu(i->version) < bcachefs_metadata_version_current) in __bch2_btree_node_write()
2135 validate_bset_for_write(c, b, i, sectors_to_write)) in __bch2_btree_node_write()
2138 ret = bset_encrypt(c, i, b->written << 9); in __bch2_btree_node_write()
2143 nonce = btree_nonce(i, b->written << 9); in __bch2_btree_node_write()
2146 bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn); in __bch2_btree_node_write()
2148 bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); in __bch2_btree_node_write()
2152 validate_bset_for_write(c, b, i, sectors_to_write)) in __bch2_btree_node_write()
2156 * We handle btree write errors by immediately halting the journal - in __bch2_btree_node_write()
2166 * never journalled (interior nodes, see btree_update_nodes_written()) - in __bch2_btree_node_write()
2170 * Make sure to update b->written so bch2_btree_init_next() doesn't in __bch2_btree_node_write()
2173 if (bch2_journal_error(&c->journal) || in __bch2_btree_node_write()
2174 c->opts.nochanges) in __bch2_btree_node_write()
2177 trace_and_count(c, btree_node_write, b, bytes_to_write, sectors_to_write); in __bch2_btree_node_write()
2183 &c->btree_bio), in __bch2_btree_node_write()
2185 wbio_init(&wbio->wbio.bio); in __bch2_btree_node_write()
2186 wbio->data = data; in __bch2_btree_node_write()
2187 wbio->data_bytes = bytes; in __bch2_btree_node_write()
2188 wbio->sector_offset = b->written; in __bch2_btree_node_write()
2189 wbio->wbio.c = c; in __bch2_btree_node_write()
2190 wbio->wbio.used_mempool = used_mempool; in __bch2_btree_node_write()
2191 wbio->wbio.first_btree_write = !b->written; in __bch2_btree_node_write()
2192 wbio->wbio.bio.bi_end_io = btree_node_write_endio; in __bch2_btree_node_write()
2193 wbio->wbio.bio.bi_private = b; in __bch2_btree_node_write()
2195 bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9); in __bch2_btree_node_write()
2197 bkey_copy(&wbio->key, &b->key); in __bch2_btree_node_write()
2199 b->written += sectors_to_write; in __bch2_btree_node_write()
2201 if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2) in __bch2_btree_node_write()
2202 bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written = in __bch2_btree_node_write()
2203 cpu_to_le16(b->written); in __bch2_btree_node_write()
2205 atomic64_inc(&c->btree_write_stats[type].nr); in __bch2_btree_node_write()
2206 atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes); in __bch2_btree_node_write()
2208 INIT_WORK(&wbio->work, btree_write_submit); in __bch2_btree_node_write()
2209 queue_work(c->btree_write_submit_wq, &wbio->work); in __bch2_btree_node_write()
2212 set_btree_node_noevict(b); in __bch2_btree_node_write()
2213 b->written += sectors_to_write; in __bch2_btree_node_write()
2216 __btree_node_write_done(c, b); in __bch2_btree_node_write()
2222 bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) in bch2_btree_post_write_cleanup() argument
2227 if (!btree_node_just_written(b)) in bch2_btree_post_write_cleanup()
2230 BUG_ON(b->whiteout_u64s); in bch2_btree_post_write_cleanup()
2232 clear_btree_node_just_written(b); in bch2_btree_post_write_cleanup()
2235 * Note: immediately after write, bset_written() doesn't work - the in bch2_btree_post_write_cleanup()
2247 if (b->nsets > 1) { in bch2_btree_post_write_cleanup()
2248 btree_node_sort(c, b, 0, b->nsets); in bch2_btree_post_write_cleanup()
2251 invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); in bch2_btree_post_write_cleanup()
2254 for_each_bset(b, t) in bch2_btree_post_write_cleanup()
2255 set_needs_whiteout(bset(b, t), true); in bch2_btree_post_write_cleanup()
2257 bch2_btree_verify(c, b); in bch2_btree_post_write_cleanup()
2263 BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b)); in bch2_btree_post_write_cleanup()
2265 bne = want_new_bset(c, b); in bch2_btree_post_write_cleanup()
2267 bch2_bset_init_next(b, bne); in bch2_btree_post_write_cleanup()
2269 bch2_btree_build_aux_trees(b); in bch2_btree_post_write_cleanup()
2277 void bch2_btree_node_write(struct bch_fs *c, struct btree *b, in bch2_btree_node_write() argument
2283 six_lock_tryupgrade(&b->c.lock))) { in bch2_btree_node_write()
2284 __bch2_btree_node_write(c, b, flags); in bch2_btree_node_write()
2287 if (btree_node_just_written(b) && in bch2_btree_node_write()
2288 six_trylock_write(&b->c.lock)) { in bch2_btree_node_write()
2289 bch2_btree_post_write_cleanup(c, b); in bch2_btree_node_write()
2290 six_unlock_write(&b->c.lock); in bch2_btree_node_write()
2294 six_lock_downgrade(&b->c.lock); in bch2_btree_node_write()
2296 __bch2_btree_node_write(c, b, flags); in bch2_btree_node_write()
2298 btree_node_just_written(b)) in bch2_btree_node_write()
2299 bch2_btree_post_write_cleanup(c, b); in bch2_btree_node_write()
2307 struct btree *b; in __bch2_btree_flush_all() local
2312 for_each_cached_btree(b, c, tbl, i, pos) in __bch2_btree_flush_all()
2313 if (test_bit(flag, &b->flags)) { in __bch2_btree_flush_all()
2315 wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); in __bch2_btree_flush_all()
2348 u64 nr = atomic64_read(&c->btree_write_stats[i].nr); in bch2_btree_write_stats_to_text()
2349 u64 bytes = atomic64_read(&c->btree_write_stats[i].bytes); in bch2_btree_write_stats_to_text()