Lines Matching +full:j +full:- +full:to +full:- +full:k

1 // SPDX-License-Identifier: GPL-2.0
17 #include "sb-clean.h"
22 lockdep_assert_held(&c->sb_lock); in bch2_journal_pos_from_member_info_set()
25 struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); in bch2_journal_pos_from_member_info_set()
27 m->last_journal_bucket = cpu_to_le32(ca->journal.cur_idx); in bch2_journal_pos_from_member_info_set()
28 m->last_journal_bucket_offset = cpu_to_le32(ca->mi.bucket_size - ca->journal.sectors_free); in bch2_journal_pos_from_member_info_set()
34 mutex_lock(&c->sb_lock); in bch2_journal_pos_from_member_info_resume()
36 struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); in bch2_journal_pos_from_member_info_resume()
39 if (idx < ca->journal.nr) in bch2_journal_pos_from_member_info_resume()
40 ca->journal.cur_idx = idx; in bch2_journal_pos_from_member_info_resume()
42 if (offset <= ca->mi.bucket_size) in bch2_journal_pos_from_member_info_resume()
43 ca->journal.sectors_free = ca->mi.bucket_size - offset; in bch2_journal_pos_from_member_info_resume()
45 mutex_unlock(&c->sb_lock); in bch2_journal_pos_from_member_info_resume()
49 struct journal_replay *j) in bch2_journal_ptrs_to_text() argument
51 darray_for_each(j->ptrs, i) { in bch2_journal_ptrs_to_text()
52 if (i != j->ptrs.data) in bch2_journal_ptrs_to_text()
55 i->dev, i->bucket, i->bucket_offset, i->sector); in bch2_journal_ptrs_to_text()
60 struct journal_replay *j) in bch2_journal_replay_to_text() argument
62 prt_printf(out, "seq %llu ", le64_to_cpu(j->j.seq)); in bch2_journal_replay_to_text()
64 bch2_journal_ptrs_to_text(out, c, j); in bch2_journal_replay_to_text()
66 for_each_jset_entry_type(entry, &j->j, BCH_JSET_ENTRY_datetime) { in bch2_journal_replay_to_text()
69 bch2_prt_datetime(out, le64_to_cpu(datetime->seconds)); in bch2_journal_replay_to_text()
78 [1] = ((__le32 *) &jset->seq)[0], in journal_nonce()
79 [2] = ((__le32 *) &jset->seq)[1], in journal_nonce()
84 static bool jset_csum_good(struct bch_fs *c, struct jset *j, struct bch_csum *csum) in jset_csum_good() argument
86 if (!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(j))) { in jset_csum_good()
91 *csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j); in jset_csum_good()
92 return !bch2_crc_cmp(j->csum, *csum); in jset_csum_good()
97 return (seq - c->journal_entries_base_seq) & (~0U >> 1); in journal_entry_radix_idx()
104 genradix_ptr(&c->journal_entries, in __journal_replay_free()
105 journal_entry_radix_idx(c, le64_to_cpu(i->j.seq))); in __journal_replay_free()
115 i->ignore_blacklisted = true; in journal_replay_free()
117 i->ignore_not_dirty = true; in journal_replay_free()
119 if (!c->opts.read_entire_journal) in journal_replay_free()
134 * Given a journal entry we just read, add it to the list of journal entries to
139 struct journal_list *jlist, struct jset *j) in journal_entry_add() argument
143 size_t bytes = vstruct_bytes(j); in journal_entry_add()
144 u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0; in journal_entry_add()
148 if (!c->journal.oldest_seq_found_ondisk || in journal_entry_add()
149 le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) in journal_entry_add()
150 c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); in journal_entry_add()
153 if (!c->opts.read_entire_journal && in journal_entry_add()
154 le64_to_cpu(j->seq) < jlist->last_seq) in journal_entry_add()
160 * within the range of +-2billion of the filrst one we find. in journal_entry_add()
162 if (!c->journal_entries_base_seq) in journal_entry_add()
163 c->journal_entries_base_seq = max_t(s64, 1, le64_to_cpu(j->seq) - S32_MAX); in journal_entry_add()
166 if (last_seq > jlist->last_seq && !c->opts.read_entire_journal) { in journal_entry_add()
167 genradix_for_each_from(&c->journal_entries, iter, _i, in journal_entry_add()
168 journal_entry_radix_idx(c, jlist->last_seq)) { in journal_entry_add()
174 if (le64_to_cpu(i->j.seq) >= last_seq) in journal_entry_add()
181 jlist->last_seq = max(jlist->last_seq, last_seq); in journal_entry_add()
183 _i = genradix_ptr_alloc(&c->journal_entries, in journal_entry_add()
184 journal_entry_radix_idx(c, le64_to_cpu(j->seq)), in journal_entry_add()
187 return -BCH_ERR_ENOMEM_journal_entry_add; in journal_entry_add()
195 bool identical = bytes == vstruct_bytes(&dup->j) && in journal_entry_add()
196 !memcmp(j, &dup->j, bytes); in journal_entry_add()
199 dup->csum_good; in journal_entry_add()
202 darray_for_each(dup->ptrs, ptr) in journal_entry_add()
203 if (ptr->dev == ca->dev_idx) in journal_entry_add()
206 ret = darray_push(&dup->ptrs, entry_ptr); in journal_entry_add()
228 i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); in journal_entry_add()
230 return -BCH_ERR_ENOMEM_journal_entry_add; in journal_entry_add()
232 darray_init(&i->ptrs); in journal_entry_add()
233 i->csum_good = entry_ptr.csum_good; in journal_entry_add()
234 i->ignore_blacklisted = false; in journal_entry_add()
235 i->ignore_not_dirty = false; in journal_entry_add()
236 unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct"); in journal_entry_add()
240 darray_for_each(dup->ptrs, ptr) in journal_entry_add()
241 darray_push(&i->ptrs, *ptr); in journal_entry_add()
244 darray_push(&i->ptrs, entry_ptr); in journal_entry_add()
277 bch2_prt_jset_entry_type(out, entry->type); in journal_entry_err_msg()
284 prt_printf(out, " seq=%llu", le64_to_cpu(jset->seq)); in journal_entry_err_msg()
288 (u64 *) entry - jset->_data, in journal_entry_err_msg()
289 le32_to_cpu(jset->u64s)); in journal_entry_err_msg()
310 ret = -BCH_ERR_fsck_errors_not_fixed; \
329 struct bkey_i *k, in journal_validate_key() argument
337 if (journal_entry_err_on(!k->k.u64s, in journal_validate_key()
340 "k->u64s 0")) { in journal_validate_key()
341 entry->u64s = cpu_to_le16((u64 *) k - entry->_data); in journal_validate_key()
346 if (journal_entry_err_on((void *) bkey_next(k) > in journal_validate_key()
351 entry->u64s = cpu_to_le16((u64 *) k - entry->_data); in journal_validate_key()
356 if (journal_entry_err_on(k->k.format != KEY_FORMAT_CURRENT, in journal_validate_key()
359 "bad format %u", k->k.format)) { in journal_validate_key()
360 le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); in journal_validate_key()
361 memmove(k, bkey_next(k), next - (void *) bkey_next(k)); in journal_validate_key()
368 write, NULL, bkey_to_packed(k)); in journal_validate_key()
370 ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), in journal_validate_key()
372 if (ret == -BCH_ERR_fsck_delete_bkey) { in journal_validate_key()
373 le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); in journal_validate_key()
374 memmove(k, bkey_next(k), next - (void *) bkey_next(k)); in journal_validate_key()
383 write, NULL, bkey_to_packed(k)); in journal_validate_key()
394 struct bkey_i *k = entry->start; in journal_entry_btree_keys_validate() local
396 while (k != vstruct_last(entry)) { in journal_entry_btree_keys_validate()
398 entry->level, in journal_entry_btree_keys_validate()
399 entry->btree_id, in journal_entry_btree_keys_validate()
400 k, version, big_endian, in journal_entry_btree_keys_validate()
407 k = bkey_next(k); in journal_entry_btree_keys_validate()
418 jset_entry_for_each_key(entry, k) { in journal_entry_btree_keys_to_text()
421 bch2_prt_jset_entry_type(out, entry->type); in journal_entry_btree_keys_to_text()
424 prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level); in journal_entry_btree_keys_to_text()
425 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k)); in journal_entry_btree_keys_to_text()
436 struct bkey_i *k = entry->start; in journal_entry_btree_root_validate() local
439 if (journal_entry_err_on(!entry->u64s || in journal_entry_btree_root_validate()
440 le16_to_cpu(entry->u64s) != k->k.u64s, in journal_entry_btree_root_validate()
446 * we don't want to null out this jset_entry, in journal_entry_btree_root_validate()
448 * we were _supposed_ to have a btree root in journal_entry_btree_root_validate()
450 entry->u64s = 0; in journal_entry_btree_root_validate()
455 ret = journal_validate_key(c, jset, entry, 1, entry->btree_id, k, in journal_entry_btree_root_validate()
492 if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 1, in journal_entry_blacklist_validate()
508 prt_printf(out, "seq=%llu", le64_to_cpu(bl->seq)); in journal_entry_blacklist_to_text()
520 if (journal_entry_err_on(le16_to_cpu(entry->u64s) != 2, in journal_entry_blacklist_v2_validate()
530 if (journal_entry_err_on(le64_to_cpu(bl_entry->start) > in journal_entry_blacklist_v2_validate()
531 le64_to_cpu(bl_entry->end), in journal_entry_blacklist_v2_validate()
549 le64_to_cpu(bl->start), in journal_entry_blacklist_v2_to_text()
550 le64_to_cpu(bl->end)); in journal_entry_blacklist_v2_to_text()
561 unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); in journal_entry_usage_validate()
583 bch2_prt_fs_usage_type(out, u->entry.btree_id); in journal_entry_usage_to_text()
584 prt_printf(out, " v=%llu", le64_to_cpu(u->v)); in journal_entry_usage_to_text()
595 unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); in journal_entry_data_usage_validate()
600 bytes < sizeof(*u) + u->r.nr_devs, in journal_entry_data_usage_validate()
608 if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), in journal_entry_data_usage_validate()
627 bch2_replicas_entry_to_text(out, &u->r); in journal_entry_data_usage_to_text()
628 prt_printf(out, "=%llu", le64_to_cpu(u->v)); in journal_entry_data_usage_to_text()
639 unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); in journal_entry_clock_validate()
650 if (journal_entry_err_on(clock->rw > 1, in journal_entry_clock_validate()
668 prt_printf(out, "%s=%llu", clock->rw ? "write" : "read", le64_to_cpu(clock->time)); in journal_entry_clock_to_text()
679 unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); in journal_entry_dev_usage_validate()
692 if (journal_entry_err_on(u->pad, in journal_entry_dev_usage_validate()
714 prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); in journal_entry_dev_usage_to_text()
721 le64_to_cpu(u->d[i].buckets), in journal_entry_dev_usage_to_text()
722 le64_to_cpu(u->d[i].sectors), in journal_entry_dev_usage_to_text()
723 le64_to_cpu(u->d[i].fragmented)); in journal_entry_dev_usage_to_text()
741 unsigned bytes = vstruct_bytes(entry) - offsetof(struct jset_entry_log, d); in journal_entry_log_to_text()
743 prt_printf(out, "%.*s", bytes, l->d); in journal_entry_log_to_text()
806 bch2_prt_datetime(out, le64_to_cpu(datetime->seconds)); in journal_entry_datetime_to_text()
832 return entry->type < BCH_JSET_ENTRY_NR in bch2_journal_entry_validate()
833 ? bch2_jset_entry_ops[entry->type].validate(c, jset, entry, in bch2_journal_entry_validate()
841 bch2_prt_jset_entry_type(out, entry->type); in bch2_journal_entry_to_text()
843 if (entry->type < BCH_JSET_ENTRY_NR) { in bch2_journal_entry_to_text()
845 bch2_jset_entry_ops[entry->type].to_text(out, c, entry); in bch2_journal_entry_to_text()
852 unsigned version = le32_to_cpu(jset->version); in jset_validate_entries()
860 jset->u64s = cpu_to_le32((u64 *) entry - jset->_data); in jset_validate_entries()
881 if (le64_to_cpu(jset->magic) != jset_magic(c)) in jset_validate()
884 version = le32_to_cpu(jset->version); in jset_validate()
889 ca ? ca->name : c->name, in jset_validate()
890 sector, le64_to_cpu(jset->seq), in jset_validate()
893 /* don't try to continue: */ in jset_validate()
894 return -EINVAL; in jset_validate()
901 ca ? ca->name : c->name, in jset_validate()
902 sector, le64_to_cpu(jset->seq), in jset_validate()
908 le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), in jset_validate()
912 le64_to_cpu(jset->last_seq), in jset_validate()
913 le64_to_cpu(jset->seq))) { in jset_validate()
914 jset->last_seq = jset->seq; in jset_validate()
934 if (le64_to_cpu(jset->magic) != jset_magic(c)) in jset_validate_early()
937 version = le32_to_cpu(jset->version); in jset_validate_early()
942 ca ? ca->name : c->name, in jset_validate_early()
943 sector, le64_to_cpu(jset->seq), in jset_validate_early()
946 /* don't try to continue: */ in jset_validate_early()
947 return -EINVAL; in jset_validate_early()
958 ca ? ca->name : c->name, in jset_validate_early()
959 sector, le64_to_cpu(jset->seq), bytes)) in jset_validate_early()
960 le32_add_cpu(&jset->u64s, in jset_validate_early()
961 -((bytes - (bucket_sectors_left << 9)) / 8)); in jset_validate_early()
978 return -BCH_ERR_ENOMEM_journal_read_buf_realloc; in journal_read_buf_realloc()
983 return -BCH_ERR_ENOMEM_journal_read_buf_realloc; in journal_read_buf_realloc()
985 kvfree(b->data); in journal_read_buf_realloc()
986 b->data = n; in journal_read_buf_realloc()
987 b->size = new_size; in journal_read_buf_realloc()
996 struct bch_fs *c = ca->fs; in journal_read_bucket()
997 struct journal_device *ja = &ca->journal; in journal_read_bucket()
998 struct jset *j = NULL; in journal_read_bucket() local
1000 u64 offset = bucket_to_sector(ca, ja->buckets[bucket]), in journal_read_bucket()
1001 end = offset + ca->mi.bucket_size; in journal_read_bucket()
1014 end - offset, buf->size >> 9); in journal_read_bucket()
1015 nr_bvecs = buf_pages(buf->data, sectors_read << 9); in journal_read_bucket()
1019 return -BCH_ERR_ENOMEM_journal_read_bucket; in journal_read_bucket()
1020 bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ); in journal_read_bucket()
1022 bio->bi_iter.bi_sector = offset; in journal_read_bucket()
1023 bch2_bio_map(bio, buf->data, sectors_read << 9); in journal_read_bucket()
1041 j = buf->data; in journal_read_bucket()
1044 ret = jset_validate_early(c, ca, j, offset, in journal_read_bucket()
1045 end - offset, sectors_read); in journal_read_bucket()
1048 sectors = vstruct_sectors(j, c->block_bits); in journal_read_bucket()
1051 if (vstruct_bytes(j) > buf->size) { in journal_read_bucket()
1053 vstruct_bytes(j)); in journal_read_bucket()
1072 if (le64_to_cpu(j->seq) > ja->highest_seq_found) { in journal_read_bucket()
1073 ja->highest_seq_found = le64_to_cpu(j->seq); in journal_read_bucket()
1074 ja->cur_idx = bucket; in journal_read_bucket()
1075 ja->sectors_free = ca->mi.bucket_size - in journal_read_bucket()
1076 bucket_remainder(ca, offset) - sectors; in journal_read_bucket()
1080 * This happens sometimes if we don't have discards on - in journal_read_bucket()
1085 if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket]) in journal_read_bucket()
1088 ja->bucket_seq[bucket] = le64_to_cpu(j->seq); in journal_read_bucket()
1090 enum bch_csum_type csum_type = JSET_CSUM_TYPE(j); in journal_read_bucket()
1092 csum_good = jset_csum_good(c, j, &csum); in journal_read_bucket()
1098 bch2_csum_err_msg(&err, csum_type, j->csum, csum), in journal_read_bucket()
1102 ret = bch2_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j), in journal_read_bucket()
1103 j->encrypted_start, in journal_read_bucket()
1104 vstruct_end(j) - (void *) j->encrypted_start); in journal_read_bucket()
1107 mutex_lock(&jlist->lock); in journal_read_bucket()
1110 .dev = ca->dev_idx, in journal_read_bucket()
1112 .bucket_offset = offset - in journal_read_bucket()
1113 bucket_to_sector(ca, ja->buckets[bucket]), in journal_read_bucket()
1115 }, jlist, j); in journal_read_bucket()
1116 mutex_unlock(&jlist->lock); in journal_read_bucket()
1129 sectors_read -= sectors; in journal_read_bucket()
1130 j = ((void *) j) + (sectors << 9); in journal_read_bucket()
1144 struct bch_fs *c = ca->fs; in CLOSURE_CALLBACK()
1146 container_of(cl->parent, struct journal_list, cl); in CLOSURE_CALLBACK()
1151 if (!ja->nr) in CLOSURE_CALLBACK()
1158 pr_debug("%u journal buckets", ja->nr); in CLOSURE_CALLBACK()
1160 for (i = 0; i < ja->nr; i++) { in CLOSURE_CALLBACK()
1167 * Set dirty_idx to indicate the entire journal is full and needs to be in CLOSURE_CALLBACK()
1168 * reclaimed - journal reclaim will immediately reclaim whatever isn't in CLOSURE_CALLBACK()
1171 ja->discard_idx = ja->dirty_idx_ondisk = in CLOSURE_CALLBACK()
1172 ja->dirty_idx = (ja->cur_idx + 1) % ja->nr; in CLOSURE_CALLBACK()
1174 bch_verbose(c, "journal read done on device %s, ret %i", ca->name, ret); in CLOSURE_CALLBACK()
1176 percpu_ref_put(&ca->io_ref); in CLOSURE_CALLBACK()
1180 mutex_lock(&jlist->lock); in CLOSURE_CALLBACK()
1181 jlist->ret = ret; in CLOSURE_CALLBACK()
1182 mutex_unlock(&jlist->lock); in CLOSURE_CALLBACK()
1205 if (!c->opts.fsck && in bch2_journal_read()
1209 if ((ca->mi.state == BCH_MEMBER_STATE_rw || in bch2_journal_read()
1210 ca->mi.state == BCH_MEMBER_STATE_ro) && in bch2_journal_read()
1211 percpu_ref_tryget(&ca->io_ref)) in bch2_journal_read()
1212 closure_call(&ca->journal.read, in bch2_journal_read()
1230 * Find most recent flush entry, and ignore newer non flush entries - in bch2_journal_read()
1233 genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { in bch2_journal_read()
1242 *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1; in bch2_journal_read()
1244 if (JSET_NO_FLUSH(&i->j)) { in bch2_journal_read()
1245 i->ignore_blacklisted = true; in bch2_journal_read()
1249 if (!last_write_torn && !i->csum_good) { in bch2_journal_read()
1251 i->ignore_blacklisted = true; in bch2_journal_read()
1255 if (journal_entry_err_on(le64_to_cpu(i->j.last_seq) > le64_to_cpu(i->j.seq), in bch2_journal_read()
1256 c, le32_to_cpu(i->j.version), &i->j, NULL, in bch2_journal_read()
1259 le64_to_cpu(i->j.last_seq), in bch2_journal_read()
1260 le64_to_cpu(i->j.seq))) in bch2_journal_read()
1261 i->j.last_seq = i->j.seq; in bch2_journal_read()
1263 *last_seq = le64_to_cpu(i->j.last_seq); in bch2_journal_read()
1264 *blacklist_seq = le64_to_cpu(i->j.seq) + 1; in bch2_journal_read()
1275 "journal read done, but no entries found after dropping non-flushes"); in bch2_journal_read()
1279 bch_info(c, "journal read done, replaying entries %llu-%llu", in bch2_journal_read()
1280 *last_seq, *blacklist_seq - 1); in bch2_journal_read()
1283 bch_info(c, "dropped unflushed entries %llu-%llu", in bch2_journal_read()
1284 *blacklist_seq, *start_seq - 1); in bch2_journal_read()
1287 genradix_for_each(&c->journal_entries, radix_iter, _i) { in bch2_journal_read()
1293 seq = le64_to_cpu(i->j.seq); in bch2_journal_read()
1300 fsck_err_on(!JSET_NO_FLUSH(&i->j), c, in bch2_journal_read()
1303 i->ignore_blacklisted = true; in bch2_journal_read()
1309 genradix_for_each(&c->journal_entries, radix_iter, _i) { in bch2_journal_read()
1315 BUG_ON(seq > le64_to_cpu(i->j.seq)); in bch2_journal_read()
1317 while (seq < le64_to_cpu(i->j.seq)) { in bch2_journal_read()
1321 while (seq < le64_to_cpu(i->j.seq) && in bch2_journal_read()
1325 if (seq == le64_to_cpu(i->j.seq)) in bch2_journal_read()
1330 while (seq < le64_to_cpu(i->j.seq) && in bch2_journal_read()
1336 prt_printf(&buf1, " size %zu", vstruct_sectors(&prev->j, c->block_bits)); in bch2_journal_read()
1341 missing_end = seq - 1; in bch2_journal_read()
1343 "journal entries %llu-%llu missing! (replaying %llu-%llu)\n" in bch2_journal_read()
1347 *last_seq, *blacklist_seq - 1, in bch2_journal_read()
1358 genradix_for_each(&c->journal_entries, radix_iter, _i) { in bch2_journal_read()
1369 darray_for_each(i->ptrs, ptr) { in bch2_journal_read()
1370 struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); in bch2_journal_read()
1372 if (!ptr->csum_good) in bch2_journal_read()
1373 bch_err_dev_offset(ca, ptr->sector, in bch2_journal_read()
1375 le64_to_cpu(i->j.seq), in bch2_journal_read()
1376 i->csum_good ? " (had good copy on another device)" : ""); in bch2_journal_read()
1380 bch2_dev_have_ref(c, i->ptrs.data[0].dev), in bch2_journal_read()
1381 &i->j, in bch2_journal_read()
1382 i->ptrs.data[0].sector, in bch2_journal_read()
1387 darray_for_each(i->ptrs, ptr) in bch2_journal_read()
1388 replicas_entry_add_dev(&replicas.e, ptr->dev); in bch2_journal_read()
1397 (le64_to_cpu(i->j.seq) == *last_seq || in bch2_journal_read()
1400 le64_to_cpu(i->j.seq), buf.buf))) { in bch2_journal_read()
1414 static void __journal_write_alloc(struct journal *j, in __journal_write_alloc() argument
1421 struct bch_fs *c = container_of(j, struct bch_fs, journal); in __journal_write_alloc()
1429 for (i = 0; i < devs_sorted->nr; i++) { in __journal_write_alloc()
1430 ca = rcu_dereference(c->devs[devs_sorted->devs[i]]); in __journal_write_alloc()
1434 ja = &ca->journal; in __journal_write_alloc()
1440 if (!ca->mi.durability || in __journal_write_alloc()
1441 ca->mi.state != BCH_MEMBER_STATE_rw || in __journal_write_alloc()
1442 !ja->nr || in __journal_write_alloc()
1443 bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) || in __journal_write_alloc()
1444 sectors > ja->sectors_free) in __journal_write_alloc()
1447 bch2_dev_stripe_increment(ca, &j->wp.stripe); in __journal_write_alloc()
1449 bch2_bkey_append_ptr(&w->key, in __journal_write_alloc()
1452 ja->buckets[ja->cur_idx]) + in __journal_write_alloc()
1453 ca->mi.bucket_size - in __journal_write_alloc()
1454 ja->sectors_free, in __journal_write_alloc()
1455 .dev = ca->dev_idx, in __journal_write_alloc()
1458 ja->sectors_free -= sectors; in __journal_write_alloc()
1459 ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); in __journal_write_alloc()
1461 *replicas += ca->mi.durability; in __journal_write_alloc()
1469 * journal_write_alloc - decide where to write next journal entry
1471 * @j: journal object
1472 * @w: journal buf (entry to be written)
1474 * Returns: 0 on success, or -EROFS on failure
1476 static int journal_write_alloc(struct journal *j, struct journal_buf *w) in journal_write_alloc() argument
1478 struct bch_fs *c = container_of(j, struct bch_fs, journal); in journal_write_alloc()
1483 unsigned sectors = vstruct_sectors(w->data, c->block_bits); in journal_write_alloc()
1484 unsigned target = c->opts.metadata_target ?: in journal_write_alloc()
1485 c->opts.foreground_target; in journal_write_alloc()
1487 READ_ONCE(c->opts.metadata_replicas); in journal_write_alloc()
1489 READ_ONCE(c->opts.metadata_replicas_required)); in journal_write_alloc()
1495 devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs); in journal_write_alloc()
1497 __journal_write_alloc(j, w, &devs_sorted, in journal_write_alloc()
1504 ca = rcu_dereference(c->devs[devs_sorted.devs[i]]); in journal_write_alloc()
1508 ja = &ca->journal; in journal_write_alloc()
1510 if (sectors > ja->sectors_free && in journal_write_alloc()
1511 sectors <= ca->mi.bucket_size && in journal_write_alloc()
1512 bch2_journal_dev_buckets_available(j, ja, in journal_write_alloc()
1514 ja->cur_idx = (ja->cur_idx + 1) % ja->nr; in journal_write_alloc()
1515 ja->sectors_free = ca->mi.bucket_size; in journal_write_alloc()
1518 * ja->bucket_seq[ja->cur_idx] must always have in journal_write_alloc()
1521 ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); in journal_write_alloc()
1525 __journal_write_alloc(j, w, &devs_sorted, in journal_write_alloc()
1536 BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX); in journal_write_alloc()
1538 return replicas >= replicas_need ? 0 : -EROFS; in journal_write_alloc()
1541 static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) in journal_buf_realloc() argument
1543 struct bch_fs *c = container_of(j, struct bch_fs, journal); in journal_buf_realloc()
1545 /* we aren't holding j->lock: */ in journal_buf_realloc()
1546 unsigned new_size = READ_ONCE(j->buf_size_want); in journal_buf_realloc()
1549 if (buf->buf_size >= new_size) in journal_buf_realloc()
1561 memcpy(new_buf, buf->data, buf->buf_size); in journal_buf_realloc()
1563 spin_lock(&j->lock); in journal_buf_realloc()
1564 swap(buf->data, new_buf); in journal_buf_realloc()
1565 swap(buf->buf_size, new_size); in journal_buf_realloc()
1566 spin_unlock(&j->lock); in journal_buf_realloc()
1571 static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j) in journal_last_unwritten_buf() argument
1573 return j->buf + (journal_last_unwritten_seq(j) & JOURNAL_BUF_MASK); in journal_last_unwritten_buf()
1579 struct journal *j = container_of(w, struct journal, buf[w->idx]); in CLOSURE_CALLBACK() local
1580 struct bch_fs *c = container_of(j, struct bch_fs, journal); in CLOSURE_CALLBACK()
1583 u64 seq = le64_to_cpu(w->data->seq); in CLOSURE_CALLBACK()
1586 bch2_time_stats_update(!JSET_NO_FLUSH(w->data) in CLOSURE_CALLBACK()
1587 ? j->flush_write_time in CLOSURE_CALLBACK()
1588 : j->noflush_write_time, j->write_start_time); in CLOSURE_CALLBACK()
1590 if (!w->devs_written.nr) { in CLOSURE_CALLBACK()
1591 bch_err(c, "unable to write journal to sufficient devices"); in CLOSURE_CALLBACK()
1592 err = -EIO; in CLOSURE_CALLBACK()
1595 w->devs_written); in CLOSURE_CALLBACK()
1597 err = -EIO; in CLOSURE_CALLBACK()
1605 spin_lock(&j->lock); in CLOSURE_CALLBACK()
1606 if (seq >= j->pin.front) in CLOSURE_CALLBACK()
1607 journal_seq_pin(j, seq)->devs = w->devs_written; in CLOSURE_CALLBACK()
1608 if (err && (!j->err_seq || seq < j->err_seq)) in CLOSURE_CALLBACK()
1609 j->err_seq = seq; in CLOSURE_CALLBACK()
1610 w->write_done = true; in CLOSURE_CALLBACK()
1614 for (seq = journal_last_unwritten_seq(j); in CLOSURE_CALLBACK()
1615 seq <= journal_cur_seq(j); in CLOSURE_CALLBACK()
1617 w = j->buf + (seq & JOURNAL_BUF_MASK); in CLOSURE_CALLBACK()
1618 if (!w->write_done) in CLOSURE_CALLBACK()
1621 if (!j->err_seq && !JSET_NO_FLUSH(w->data)) { in CLOSURE_CALLBACK()
1622 j->flushed_seq_ondisk = seq; in CLOSURE_CALLBACK()
1623 j->last_seq_ondisk = w->last_seq; in CLOSURE_CALLBACK()
1626 closure_wake_up(&c->freelist_wait); in CLOSURE_CALLBACK()
1630 j->seq_ondisk = seq; in CLOSURE_CALLBACK()
1639 if (j->watermark != BCH_WATERMARK_stripe) in CLOSURE_CALLBACK()
1640 journal_reclaim_kick(&c->journal); in CLOSURE_CALLBACK()
1642 old.v = atomic64_read(&j->reservations.counter); in CLOSURE_CALLBACK()
1649 } while (!atomic64_try_cmpxchg(&j->reservations.counter, in CLOSURE_CALLBACK()
1652 closure_wake_up(&w->wait); in CLOSURE_CALLBACK()
1657 bch2_journal_reclaim_fast(j); in CLOSURE_CALLBACK()
1658 bch2_journal_space_available(j); in CLOSURE_CALLBACK()
1660 track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], false); in CLOSURE_CALLBACK()
1662 journal_wake(j); in CLOSURE_CALLBACK()
1665 if (journal_last_unwritten_seq(j) == journal_cur_seq(j) && in CLOSURE_CALLBACK()
1667 struct journal_buf *buf = journal_cur_buf(j); in CLOSURE_CALLBACK()
1668 long delta = buf->expires - jiffies; in CLOSURE_CALLBACK()
1671 * We don't close a journal entry to write it while there's in CLOSURE_CALLBACK()
1672 * previous entries still in flight - the current journal entry in CLOSURE_CALLBACK()
1673 * might want to be written now: in CLOSURE_CALLBACK()
1675 mod_delayed_work(j->wq, &j->write_work, max(0L, delta)); in CLOSURE_CALLBACK()
1679 * We don't typically trigger journal writes from her - the next journal in CLOSURE_CALLBACK()
1681 * allocated, in bch2_journal_write() - but the journal write error path in CLOSURE_CALLBACK()
1684 bch2_journal_do_writes(j); in CLOSURE_CALLBACK()
1685 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
1691 struct bch_dev *ca = jbio->ca; in journal_write_endio()
1692 struct journal *j = &ca->fs->journal; in journal_write_endio() local
1693 struct journal_buf *w = j->buf + jbio->buf_idx; in journal_write_endio()
1695 if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_write, in journal_write_endio()
1697 le64_to_cpu(w->data->seq), in journal_write_endio()
1698 bch2_blk_status_to_str(bio->bi_status)) || in journal_write_endio()
1702 spin_lock_irqsave(&j->err_lock, flags); in journal_write_endio()
1703 bch2_dev_list_drop_dev(&w->devs_written, ca->dev_idx); in journal_write_endio()
1704 spin_unlock_irqrestore(&j->err_lock, flags); in journal_write_endio()
1707 closure_put(&w->io); in journal_write_endio()
1708 percpu_ref_put(&ca->io_ref); in journal_write_endio()
1714 struct journal *j = container_of(w, struct journal, buf[w->idx]); in CLOSURE_CALLBACK() local
1715 struct bch_fs *c = container_of(j, struct bch_fs, journal); in CLOSURE_CALLBACK()
1716 unsigned sectors = vstruct_sectors(w->data, c->block_bits); in CLOSURE_CALLBACK()
1718 extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { in CLOSURE_CALLBACK()
1719 struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE); in CLOSURE_CALLBACK()
1726 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal], in CLOSURE_CALLBACK()
1729 struct journal_device *ja = &ca->journal; in CLOSURE_CALLBACK()
1730 struct bio *bio = &ja->bio[w->idx]->bio; in CLOSURE_CALLBACK()
1731 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); in CLOSURE_CALLBACK()
1732 bio->bi_iter.bi_sector = ptr->offset; in CLOSURE_CALLBACK()
1733 bio->bi_end_io = journal_write_endio; in CLOSURE_CALLBACK()
1734 bio->bi_private = ca; in CLOSURE_CALLBACK()
1736 BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector); in CLOSURE_CALLBACK()
1737 ca->prev_journal_sector = bio->bi_iter.bi_sector; in CLOSURE_CALLBACK()
1739 if (!JSET_NO_FLUSH(w->data)) in CLOSURE_CALLBACK()
1740 bio->bi_opf |= REQ_FUA; in CLOSURE_CALLBACK()
1741 if (!JSET_NO_FLUSH(w->data) && !w->separate_flush) in CLOSURE_CALLBACK()
1742 bio->bi_opf |= REQ_PREFLUSH; in CLOSURE_CALLBACK()
1744 bch2_bio_map(bio, w->data, sectors << 9); in CLOSURE_CALLBACK()
1749 ja->bucket_seq[ja->cur_idx] = le64_to_cpu(w->data->seq); in CLOSURE_CALLBACK()
1752 continue_at(cl, journal_write_done, j->wq); in CLOSURE_CALLBACK()
1758 struct journal *j = container_of(w, struct journal, buf[w->idx]); in CLOSURE_CALLBACK() local
1759 struct bch_fs *c = container_of(j, struct bch_fs, journal); in CLOSURE_CALLBACK()
1761 if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { in CLOSURE_CALLBACK()
1762 spin_lock(&j->lock); in CLOSURE_CALLBACK()
1763 if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { in CLOSURE_CALLBACK()
1764 closure_wait(&j->async_wait, cl); in CLOSURE_CALLBACK()
1765 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
1766 continue_at(cl, journal_write_preflush, j->wq); in CLOSURE_CALLBACK()
1769 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
1772 if (w->separate_flush) { in CLOSURE_CALLBACK()
1774 percpu_ref_get(&ca->io_ref); in CLOSURE_CALLBACK()
1776 struct journal_device *ja = &ca->journal; in CLOSURE_CALLBACK()
1777 struct bio *bio = &ja->bio[w->idx]->bio; in CLOSURE_CALLBACK()
1778 bio_reset(bio, ca->disk_sb.bdev, in CLOSURE_CALLBACK()
1780 bio->bi_end_io = journal_write_endio; in CLOSURE_CALLBACK()
1781 bio->bi_private = ca; in CLOSURE_CALLBACK()
1785 continue_at(cl, journal_write_submit, j->wq); in CLOSURE_CALLBACK()
1788 * no need to punt to another work item if we're not waiting on in CLOSURE_CALLBACK()
1791 journal_write_submit(&cl->work); in CLOSURE_CALLBACK()
1795 static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) in bch2_journal_write_prep() argument
1797 struct bch_fs *c = container_of(j, struct bch_fs, journal); in bch2_journal_write_prep()
1799 struct jset *jset = w->data; in bch2_journal_write_prep()
1804 u64 seq = le64_to_cpu(jset->seq); in bch2_journal_write_prep()
1812 * If we wanted to be really fancy here, we could sort all the keys in in bch2_journal_write_prep()
1813 * the jset and drop keys that were overwritten - probably not worth it: in bch2_journal_write_prep()
1816 unsigned u64s = le16_to_cpu(i->u64s); in bch2_journal_write_prep()
1824 * entry gets written we have to propagate them to in bch2_journal_write_prep()
1825 * c->btree_roots in bch2_journal_write_prep()
1827 * But, every journal entry we write has to contain all the in bch2_journal_write_prep()
1829 * to c->btree_roots we have to get any missing btree roots and in bch2_journal_write_prep()
1830 * add them to this journal entry: in bch2_journal_write_prep()
1832 switch (i->type) { in bch2_journal_write_prep()
1835 __set_bit(i->btree_id, &btree_roots_have); in bch2_journal_write_prep()
1838 EBUG_ON(!w->need_flush_to_write_buffer); in bch2_journal_write_prep()
1843 jset_entry_for_each_key(i, k) { in bch2_journal_write_prep()
1844 ret = bch2_journal_key_to_wb(c, &wb, i->btree_id, k); in bch2_journal_write_prep()
1846 bch2_fs_fatal_error(c, "flushing journal keys to btree write buffer: %s", in bch2_journal_write_prep()
1852 i->type = BCH_JSET_ENTRY_btree_keys; in bch2_journal_write_prep()
1860 bch2_fs_fatal_error(c, "error flushing journal keys to btree write buffer: %s", in bch2_journal_write_prep()
1866 spin_lock(&c->journal.lock); in bch2_journal_write_prep()
1867 w->need_flush_to_write_buffer = false; in bch2_journal_write_prep()
1868 spin_unlock(&c->journal.lock); in bch2_journal_write_prep()
1876 d->entry.type = BCH_JSET_ENTRY_datetime; in bch2_journal_write_prep()
1877 d->seconds = cpu_to_le64(ktime_get_real_seconds()); in bch2_journal_write_prep()
1880 u64s = (u64 *) end - (u64 *) start; in bch2_journal_write_prep()
1882 WARN_ON(u64s > j->entry_u64s_reserved); in bch2_journal_write_prep()
1884 le32_add_cpu(&jset->u64s, u64s); in bch2_journal_write_prep()
1886 sectors = vstruct_sectors(jset, c->block_bits); in bch2_journal_write_prep()
1889 if (sectors > w->sectors) { in bch2_journal_write_prep()
1891 vstruct_bytes(jset), w->sectors << 9, in bch2_journal_write_prep()
1892 u64s, w->u64s_reserved, j->entry_u64s_reserved); in bch2_journal_write_prep()
1893 return -EINVAL; in bch2_journal_write_prep()
1896 jset->magic = cpu_to_le64(jset_magic(c)); in bch2_journal_write_prep()
1897 jset->version = cpu_to_le32(c->sb.version); in bch2_journal_write_prep()
1903 j->last_empty_seq = seq; in bch2_journal_write_prep()
1908 if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current) in bch2_journal_write_prep()
1916 jset->encrypted_start, in bch2_journal_write_prep()
1917 vstruct_end(jset) - (void *) jset->encrypted_start); in bch2_journal_write_prep()
1921 jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), in bch2_journal_write_prep()
1928 memset((void *) jset + bytes, 0, (sectors << 9) - bytes); in bch2_journal_write_prep()
1932 static int bch2_journal_write_pick_flush(struct journal *j, struct journal_buf *w) in bch2_journal_write_pick_flush() argument
1934 struct bch_fs *c = container_of(j, struct bch_fs, journal); in bch2_journal_write_pick_flush()
1935 int error = bch2_journal_error(j); in bch2_journal_write_pick_flush()
1938 * If the journal is in an error state - we did an emergency shutdown - in bch2_journal_write_pick_flush()
1939 * we prefer to continue doing journal writes. We just mark them as in bch2_journal_write_pick_flush()
1941 * list_journal tool - this helps in debugging. in bch2_journal_write_pick_flush()
1947 * previously - we can't leave the journal without any flush writes in in bch2_journal_write_pick_flush()
1953 if (error && test_bit(JOURNAL_need_flush_write, &j->flags)) in bch2_journal_write_pick_flush()
1954 return -EIO; in bch2_journal_write_pick_flush()
1957 w->noflush || in bch2_journal_write_pick_flush()
1958 (!w->must_flush && in bch2_journal_write_pick_flush()
1959 time_before(jiffies, j->last_flush_write + in bch2_journal_write_pick_flush()
1960 msecs_to_jiffies(c->opts.journal_flush_delay)) && in bch2_journal_write_pick_flush()
1961 test_bit(JOURNAL_may_skip_flush, &j->flags))) { in bch2_journal_write_pick_flush()
1962 w->noflush = true; in bch2_journal_write_pick_flush()
1963 SET_JSET_NO_FLUSH(w->data, true); in bch2_journal_write_pick_flush()
1964 w->data->last_seq = 0; in bch2_journal_write_pick_flush()
1965 w->last_seq = 0; in bch2_journal_write_pick_flush()
1967 j->nr_noflush_writes++; in bch2_journal_write_pick_flush()
1969 w->must_flush = true; in bch2_journal_write_pick_flush()
1970 j->last_flush_write = jiffies; in bch2_journal_write_pick_flush()
1971 j->nr_flush_writes++; in bch2_journal_write_pick_flush()
1972 clear_bit(JOURNAL_need_flush_write, &j->flags); in bch2_journal_write_pick_flush()
1981 struct journal *j = container_of(w, struct journal, buf[w->idx]); in CLOSURE_CALLBACK() local
1982 struct bch_fs *c = container_of(j, struct bch_fs, journal); in CLOSURE_CALLBACK()
1990 BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); in CLOSURE_CALLBACK()
1991 BUG_ON(!w->write_started); in CLOSURE_CALLBACK()
1992 BUG_ON(w->write_allocated); in CLOSURE_CALLBACK()
1993 BUG_ON(w->write_done); in CLOSURE_CALLBACK()
1995 j->write_start_time = local_clock(); in CLOSURE_CALLBACK()
1997 spin_lock(&j->lock); in CLOSURE_CALLBACK()
1999 w->separate_flush = true; in CLOSURE_CALLBACK()
2001 ret = bch2_journal_write_pick_flush(j, w); in CLOSURE_CALLBACK()
2002 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
2006 mutex_lock(&j->buf_lock); in CLOSURE_CALLBACK()
2007 journal_buf_realloc(j, w); in CLOSURE_CALLBACK()
2009 ret = bch2_journal_write_prep(j, w); in CLOSURE_CALLBACK()
2010 mutex_unlock(&j->buf_lock); in CLOSURE_CALLBACK()
2014 j->entry_bytes_written += vstruct_bytes(w->data); in CLOSURE_CALLBACK()
2017 spin_lock(&j->lock); in CLOSURE_CALLBACK()
2018 ret = journal_write_alloc(j, w); in CLOSURE_CALLBACK()
2019 if (!ret || !j->can_discard) in CLOSURE_CALLBACK()
2022 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
2023 bch2_journal_do_discards(j); in CLOSURE_CALLBACK()
2030 prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu: %s"), in CLOSURE_CALLBACK()
2031 le64_to_cpu(w->data->seq), in CLOSURE_CALLBACK()
2033 __bch2_journal_debug_to_text(&buf, j); in CLOSURE_CALLBACK()
2034 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
2041 * write is allocated, no longer need to account for it in in CLOSURE_CALLBACK()
2044 w->sectors = 0; in CLOSURE_CALLBACK()
2045 w->write_allocated = true; in CLOSURE_CALLBACK()
2051 bch2_journal_space_available(j); in CLOSURE_CALLBACK()
2052 bch2_journal_do_writes(j); in CLOSURE_CALLBACK()
2053 spin_unlock(&j->lock); in CLOSURE_CALLBACK()
2055 w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); in CLOSURE_CALLBACK()
2057 if (c->opts.nochanges) in CLOSURE_CALLBACK()
2061 * Mark journal replicas before we submit the write to guarantee in CLOSURE_CALLBACK()
2065 w->devs_written); in CLOSURE_CALLBACK()
2070 if (!JSET_NO_FLUSH(w->data)) in CLOSURE_CALLBACK()
2071 continue_at(cl, journal_write_preflush, j->wq); in CLOSURE_CALLBACK()
2073 continue_at(cl, journal_write_submit, j->wq); in CLOSURE_CALLBACK()
2076 continue_at(cl, journal_write_done, j->wq); in CLOSURE_CALLBACK()
2080 continue_at(cl, journal_write_done, j->wq); in CLOSURE_CALLBACK()