Lines Matching +full:k +full:- +full:to +full:- +full:j
1 // SPDX-License-Identifier: GPL-2.0
20 * entries (same as garbage collection would), then we replay them - reinserting
30 struct closure *cl = bio->bi_private; in journal_read_endio()
38 struct journal_device *ja = &ca->journal; in journal_read_bucket()
39 struct bio *bio = &ja->bio; in journal_read_bucket()
42 struct jset *j, *data = ca->set->journal.w[0].data; in journal_read_bucket() local
46 sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]); in journal_read_bucket()
52 while (offset < ca->sb.bucket_size) { in journal_read_bucket()
53 reread: left = ca->sb.bucket_size - offset; in journal_read_bucket()
56 bio_reset(bio, ca->bdev, REQ_OP_READ); in journal_read_bucket()
57 bio->bi_iter.bi_sector = bucket + offset; in journal_read_bucket()
58 bio->bi_iter.bi_size = len << 9; in journal_read_bucket()
60 bio->bi_end_io = journal_read_endio; in journal_read_bucket()
61 bio->bi_private = &cl; in journal_read_bucket()
64 closure_bio_submit(ca->set, bio, &cl); in journal_read_bucket()
73 j = data; in journal_read_bucket()
76 size_t blocks, bytes = set_bytes(j); in journal_read_bucket()
78 if (j->magic != jset_magic(&ca->sb)) { in journal_read_bucket()
93 if (j->csum != csum_set(j)) { in journal_read_bucket()
99 blocks = set_blocks(j, block_bytes(ca)); in journal_read_bucket()
103 * i->j.seq, the node on head has the smallest (oldest) in journal_read_bucket()
110 * i->j.seq < j->last_seq, it means the oldest jset in journal_read_bucket()
112 * this list. Otherwise, j is a candidate jset for in journal_read_bucket()
118 if (i->j.seq >= j->last_seq) in journal_read_bucket()
120 list_del(&i->list); in journal_read_bucket()
126 if (j->seq == i->j.seq) in journal_read_bucket()
130 * if j->seq is less than any i->j.last_seq in journal_read_bucket()
131 * in list, j is an expired and useless jset. in journal_read_bucket()
133 if (j->seq < i->j.last_seq) in journal_read_bucket()
137 * 'where' points to first jset in list which in journal_read_bucket()
138 * is elder then j. in journal_read_bucket()
140 if (j->seq > i->j.seq) { in journal_read_bucket()
141 where = &i->list; in journal_read_bucket()
148 i = kmalloc(offsetof(struct journal_replay, j) + in journal_read_bucket()
151 return -ENOMEM; in journal_read_bucket()
152 unsafe_memcpy(&i->j, j, bytes, in journal_read_bucket()
154 /* Add to the location after 'where' points to */ in journal_read_bucket()
155 list_add(&i->list, where); in journal_read_bucket()
158 if (j->seq > ja->seq[bucket_index]) in journal_read_bucket()
159 ja->seq[bucket_index] = j->seq; in journal_read_bucket()
161 offset += blocks * ca->sb.block_size; in journal_read_bucket()
162 len -= blocks * ca->sb.block_size; in journal_read_bucket()
163 j = ((void *) j) + blocks * block_bytes(ca); in journal_read_bucket()
181 struct cache *ca = c->cache; in bch_journal_read()
183 struct journal_device *ja = &ca->journal; in bch_journal_read()
189 pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); in bch_journal_read()
192 * Read journal buckets ordered by golden ratio hash to quickly in bch_journal_read()
195 for (i = 0; i < ca->sb.njournal_buckets; i++) { in bch_journal_read()
198 * correctness due to the scenario that the journal in bch_journal_read()
201 l = (i * 2654435769U) % ca->sb.njournal_buckets; in bch_journal_read()
214 pr_debug("falling back to linear search\n"); in bch_journal_read()
216 for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets) in bch_journal_read()
221 if (l == ca->sb.njournal_buckets) in bch_journal_read()
228 r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); in bch_journal_read()
232 seq = list_entry(list->prev, struct journal_replay, in bch_journal_read()
233 list)->j.seq; in bch_journal_read()
238 if (seq != list_entry(list->prev, struct journal_replay, in bch_journal_read()
239 list)->j.seq) in bch_journal_read()
250 m, ca->sb.njournal_buckets); in bch_journal_read()
254 if (!l--) in bch_journal_read()
255 l = ca->sb.njournal_buckets - 1; in bch_journal_read()
269 for (i = 0; i < ca->sb.njournal_buckets; i++) in bch_journal_read()
270 if (ja->seq[i] > seq) { in bch_journal_read()
271 seq = ja->seq[i]; in bch_journal_read()
273 * When journal_reclaim() goes to allocate for in bch_journal_read()
275 * ja->cur_idx in bch_journal_read()
277 ja->cur_idx = i; in bch_journal_read()
278 ja->last_idx = ja->discard_idx = (i + 1) % in bch_journal_read()
279 ca->sb.njournal_buckets; in bch_journal_read()
285 c->journal.seq = list_entry(list->prev, in bch_journal_read()
287 list)->j.seq; in bch_journal_read()
296 struct bkey *k; in bch_journal_mark() local
298 struct journal *j = &c->journal; in bch_journal_mark() local
299 uint64_t last = j->seq; in bch_journal_mark()
302 * journal.pin should never fill up - we never write a journal in bch_journal_mark()
309 BUG_ON(last < i->j.seq); in bch_journal_mark()
310 i->pin = NULL; in bch_journal_mark()
312 while (last-- != i->j.seq) in bch_journal_mark()
313 if (fifo_free(&j->pin) > 1) { in bch_journal_mark()
314 fifo_push_front(&j->pin, p); in bch_journal_mark()
315 atomic_set(&fifo_front(&j->pin), 0); in bch_journal_mark()
318 if (fifo_free(&j->pin) > 1) { in bch_journal_mark()
319 fifo_push_front(&j->pin, p); in bch_journal_mark()
320 i->pin = &fifo_front(&j->pin); in bch_journal_mark()
321 atomic_set(i->pin, 1); in bch_journal_mark()
324 for (k = i->j.start; in bch_journal_mark()
325 k < bset_bkey_last(&i->j); in bch_journal_mark()
326 k = bkey_next(k)) in bch_journal_mark()
327 if (!__bch_extent_invalid(c, k)) { in bch_journal_mark()
328 unsigned int j; in bch_journal_mark() local
330 for (j = 0; j < KEY_PTRS(k); j++) in bch_journal_mark()
331 if (ptr_available(c, k, j)) in bch_journal_mark()
332 atomic_inc(&PTR_BUCKET(c, k, j)->pin); in bch_journal_mark()
334 bch_initial_mark_key(c, 0, k); in bch_journal_mark()
341 struct cache *ca = s->cache; in is_discard_enabled()
343 if (ca->discard) in is_discard_enabled()
352 struct bkey *k; in bch_journal_replay() local
354 list_entry(list->prev, struct journal_replay, list); in bch_journal_replay()
356 uint64_t start = i->j.last_seq, end = i->j.seq, n = start; in bch_journal_replay()
360 BUG_ON(i->pin && atomic_read(i->pin) != 1); in bch_journal_replay()
362 if (n != i->j.seq) { in bch_journal_replay()
364 pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", in bch_journal_replay()
365 n, i->j.seq - 1, start, end); in bch_journal_replay()
367 pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", in bch_journal_replay()
368 n, i->j.seq - 1, start, end); in bch_journal_replay()
369 ret = -EIO; in bch_journal_replay()
374 for (k = i->j.start; in bch_journal_replay()
375 k < bset_bkey_last(&i->j); in bch_journal_replay()
376 k = bkey_next(k)) { in bch_journal_replay()
377 trace_bcache_journal_replay_key(k); in bch_journal_replay()
379 bch_keylist_init_single(&keylist, k); in bch_journal_replay()
381 ret = bch_btree_insert(s, &keylist, i->pin, NULL); in bch_journal_replay()
391 if (i->pin) in bch_journal_replay()
392 atomic_dec(i->pin); in bch_journal_replay()
393 n = i->j.seq + 1; in bch_journal_replay()
402 list_del(&i->list); in bch_journal_replay()
409 void bch_journal_space_reserve(struct journal *j) in bch_journal_space_reserve() argument
411 j->do_reserve = true; in bch_journal_space_reserve()
424 if (c->journal.btree_flushing) in btree_flush_write()
427 spin_lock(&c->journal.flush_write_lock); in btree_flush_write()
428 if (c->journal.btree_flushing) { in btree_flush_write()
429 spin_unlock(&c->journal.flush_write_lock); in btree_flush_write()
432 c->journal.btree_flushing = true; in btree_flush_write()
433 spin_unlock(&c->journal.flush_write_lock); in btree_flush_write()
436 spin_lock(&c->journal.lock); in btree_flush_write()
437 fifo_front_p = &fifo_front(&c->journal.pin); in btree_flush_write()
444 spin_unlock(&c->journal.lock); in btree_flush_write()
447 spin_unlock(&c->journal.lock); in btree_flush_write()
449 mask = c->journal.pin.mask; in btree_flush_write()
451 atomic_long_inc(&c->flush_write); in btree_flush_write()
454 mutex_lock(&c->bucket_lock); in btree_flush_write()
455 list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) { in btree_flush_write()
457 * It is safe to get now_fifo_front_p without holding in btree_flush_write()
458 * c->journal.lock here, because we don't need to know in btree_flush_write()
460 * front pointer of c->journal.pin is changed. in btree_flush_write()
462 now_fifo_front_p = &fifo_front(&c->journal.pin); in btree_flush_write()
465 * pointer of c->journal.pin changes, it is unnecessary in btree_flush_write()
466 * to scan c->btree_cache anymore, just quit the loop and in btree_flush_write()
482 mutex_lock(&b->write_lock); in btree_flush_write()
485 mutex_unlock(&b->write_lock); in btree_flush_write()
489 if (!btree_current_write(b)->journal) { in btree_flush_write()
490 mutex_unlock(&b->write_lock); in btree_flush_write()
500 * - the list_for_each_xxx loop will quit when checking in btree_flush_write()
502 * - If there are matched nodes recorded in btree_nodes[], in btree_flush_write()
505 * will be ignored and skipped in the following for-loop. in btree_flush_write()
507 if (((btree_current_write(b)->journal - fifo_front_p) & in btree_flush_write()
509 mutex_unlock(&b->write_lock); in btree_flush_write()
515 mutex_unlock(&b->write_lock); in btree_flush_write()
519 * To avoid holding c->bucket_lock too long time, in btree_flush_write()
522 * the oldest journal entry, try to flush them next in btree_flush_write()
528 mutex_unlock(&c->bucket_lock); in btree_flush_write()
537 /* safe to check without holding b->write_lock */ in btree_flush_write()
543 mutex_lock(&b->write_lock); in btree_flush_write()
544 if (!btree_current_write(b)->journal) { in btree_flush_write()
545 clear_bit(BTREE_NODE_journal_flush, &b->flags); in btree_flush_write()
546 mutex_unlock(&b->write_lock); in btree_flush_write()
552 clear_bit(BTREE_NODE_journal_flush, &b->flags); in btree_flush_write()
553 mutex_unlock(&b->write_lock); in btree_flush_write()
559 clear_bit(BTREE_NODE_journal_flush, &b->flags); in btree_flush_write()
560 mutex_unlock(&b->write_lock); in btree_flush_write()
564 spin_lock(&c->journal.flush_write_lock); in btree_flush_write()
565 c->journal.btree_flushing = false; in btree_flush_write()
566 spin_unlock(&c->journal.flush_write_lock); in btree_flush_write()
569 #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) argument
577 atomic_set(&ja->discard_in_flight, DISCARD_DONE); in journal_discard_endio()
579 closure_wake_up(&ca->set->journal.wait); in journal_discard_endio()
580 closure_put(&ca->set->cl); in journal_discard_endio()
588 submit_bio(&ja->discard_bio); in journal_discard_work()
593 struct journal_device *ja = &ca->journal; in do_journal_discard()
594 struct bio *bio = &ja->discard_bio; in do_journal_discard()
596 if (!ca->discard) { in do_journal_discard()
597 ja->discard_idx = ja->last_idx; in do_journal_discard()
601 switch (atomic_read(&ja->discard_in_flight)) { in do_journal_discard()
606 ja->discard_idx = (ja->discard_idx + 1) % in do_journal_discard()
607 ca->sb.njournal_buckets; in do_journal_discard()
609 atomic_set(&ja->discard_in_flight, DISCARD_READY); in do_journal_discard()
613 if (ja->discard_idx == ja->last_idx) in do_journal_discard()
616 atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); in do_journal_discard()
618 bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD); in do_journal_discard()
619 bio->bi_iter.bi_sector = bucket_to_sector(ca->set, in do_journal_discard()
620 ca->sb.d[ja->discard_idx]); in do_journal_discard()
621 bio->bi_iter.bi_size = bucket_bytes(ca); in do_journal_discard()
622 bio->bi_end_io = journal_discard_endio; in do_journal_discard()
624 closure_get(&ca->set->cl); in do_journal_discard()
625 INIT_WORK(&ja->discard_work, journal_discard_work); in do_journal_discard()
626 queue_work(bch_journal_wq, &ja->discard_work); in do_journal_discard()
632 struct journal *j = &c->journal; in free_journal_buckets() local
633 struct cache *ca = c->cache; in free_journal_buckets()
634 struct journal_device *ja = &c->cache->journal; in free_journal_buckets()
638 if (ja->cur_idx >= ja->discard_idx) in free_journal_buckets()
639 n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx; in free_journal_buckets()
641 n = ja->discard_idx - ja->cur_idx; in free_journal_buckets()
643 if (n > (1 + j->do_reserve)) in free_journal_buckets()
644 return n - (1 + j->do_reserve); in free_journal_buckets()
651 struct bkey *k = &c->journal.key; in journal_reclaim() local
652 struct cache *ca = c->cache; in journal_reclaim()
654 struct journal_device *ja = &ca->journal; in journal_reclaim()
657 atomic_long_inc(&c->reclaim); in journal_reclaim()
659 while (!atomic_read(&fifo_front(&c->journal.pin))) in journal_reclaim()
660 fifo_pop(&c->journal.pin, p); in journal_reclaim()
662 last_seq = last_seq(&c->journal); in journal_reclaim()
666 while (ja->last_idx != ja->cur_idx && in journal_reclaim()
667 ja->seq[ja->last_idx] < last_seq) in journal_reclaim()
668 ja->last_idx = (ja->last_idx + 1) % in journal_reclaim()
669 ca->sb.njournal_buckets; in journal_reclaim()
673 if (c->journal.blocks_free) in journal_reclaim()
679 ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets; in journal_reclaim()
680 k->ptr[0] = MAKE_PTR(0, in journal_reclaim()
681 bucket_to_sector(c, ca->sb.d[ja->cur_idx]), in journal_reclaim()
682 ca->sb.nr_this_dev); in journal_reclaim()
683 atomic_long_inc(&c->reclaimed_journal_buckets); in journal_reclaim()
685 bkey_init(k); in journal_reclaim()
686 SET_KEY_PTRS(k, 1); in journal_reclaim()
687 c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits; in journal_reclaim()
690 if (!journal_full(&c->journal)) in journal_reclaim()
691 __closure_wake_up(&c->journal.wait); in journal_reclaim()
694 void bch_journal_next(struct journal *j) in bch_journal_next() argument
698 j->cur = (j->cur == j->w) in bch_journal_next()
699 ? &j->w[1] in bch_journal_next()
700 : &j->w[0]; in bch_journal_next()
703 * The fifo_push() needs to happen at the same time as j->seq is in bch_journal_next()
704 * incremented for last_seq() to be calculated correctly in bch_journal_next()
706 BUG_ON(!fifo_push(&j->pin, p)); in bch_journal_next()
707 atomic_set(&fifo_back(&j->pin), 1); in bch_journal_next()
709 j->cur->data->seq = ++j->seq; in bch_journal_next()
710 j->cur->dirty = false; in bch_journal_next()
711 j->cur->need_write = false; in bch_journal_next()
712 j->cur->data->keys = 0; in bch_journal_next()
714 if (fifo_full(&j->pin)) in bch_journal_next()
715 pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin)); in bch_journal_next()
720 struct journal_write *w = bio->bi_private; in journal_write_endio()
722 cache_set_err_on(bio->bi_status, w->c, "journal io error"); in journal_write_endio()
723 closure_put(&w->c->journal.io); in journal_write_endio()
730 closure_type(j, struct journal, io); in CLOSURE_CALLBACK()
731 struct journal_write *w = (j->cur == j->w) in CLOSURE_CALLBACK()
732 ? &j->w[1] in CLOSURE_CALLBACK()
733 : &j->w[0]; in CLOSURE_CALLBACK()
735 __closure_wake_up(&w->wait); in CLOSURE_CALLBACK()
740 __releases(&c->journal.lock)
744 c->journal.io_in_flight = 0;
745 spin_unlock(&c->journal.lock);
749 __releases(c->journal.lock)
752 struct cache *ca = c->cache;
753 struct journal_write *w = c->journal.cur;
754 struct bkey *k = &c->journal.key; variable
755 unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) *
756 ca->sb.block_size;
763 if (!w->need_write) {
766 } else if (journal_full(&c->journal)) {
768 spin_unlock(&c->journal.lock);
775 c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca));
777 w->data->btree_level = c->root->level;
779 bkey_copy(&w->data->btree_root, &c->root->key);
780 bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket);
782 w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0];
783 w->data->magic = jset_magic(&ca->sb);
784 w->data->version = BCACHE_JSET_VERSION;
785 w->data->last_seq = last_seq(&c->journal);
786 w->data->csum = csum_set(w->data);
788 for (i = 0; i < KEY_PTRS(k); i++) {
789 ca = c->cache;
790 bio = &ca->journal.bio;
792 atomic_long_add(sectors, &ca->meta_sectors_written);
794 bio_reset(bio, ca->bdev, REQ_OP_WRITE |
796 bio->bi_iter.bi_sector = PTR_OFFSET(k, i);
797 bio->bi_iter.bi_size = sectors << 9;
799 bio->bi_end_io = journal_write_endio;
800 bio->bi_private = w;
801 bch_bio_map(bio, w->data);
803 trace_bcache_journal_write(bio, w->data->keys);
806 SET_PTR_OFFSET(k, i, PTR_OFFSET(k, i) + sectors);
808 ca->journal.seq[ca->journal.cur_idx] = w->data->seq;
811 /* If KEY_PTRS(k) == 0, this jset gets lost in air */
814 atomic_dec_bug(&fifo_back(&c->journal.pin));
815 bch_journal_next(&c->journal);
818 spin_unlock(&c->journal.lock);
830 spin_lock(&c->journal.lock); in CLOSURE_CALLBACK()
831 journal_write_unlocked(&cl->work); in CLOSURE_CALLBACK()
835 __releases(c->journal.lock) in journal_try_write()
837 struct closure *cl = &c->journal.io; in journal_try_write()
838 struct journal_write *w = c->journal.cur; in journal_try_write()
840 w->need_write = true; in journal_try_write()
842 if (!c->journal.io_in_flight) { in journal_try_write()
843 c->journal.io_in_flight = 1; in journal_try_write()
844 closure_call(cl, journal_write_unlocked, NULL, &c->cl); in journal_try_write()
846 spin_unlock(&c->journal.lock); in journal_try_write()
852 __acquires(&c->journal.lock) in journal_wait_for_write()
857 struct cache *ca = c->cache; in journal_wait_for_write()
861 spin_lock(&c->journal.lock); in journal_wait_for_write()
864 struct journal_write *w = c->journal.cur; in journal_wait_for_write()
866 sectors = __set_blocks(w->data, w->data->keys + nkeys, in journal_wait_for_write()
867 block_bytes(ca)) * ca->sb.block_size; in journal_wait_for_write()
870 c->journal.blocks_free * ca->sb.block_size, in journal_wait_for_write()
875 closure_wait(&c->journal.wait, &cl); in journal_wait_for_write()
877 if (!journal_full(&c->journal)) { in journal_wait_for_write()
885 * bch_keylist_realloc() - but something to think about. in journal_wait_for_write()
887 BUG_ON(!w->data->keys); in journal_wait_for_write()
895 spin_unlock(&c->journal.lock); in journal_wait_for_write()
901 spin_lock(&c->journal.lock); in journal_wait_for_write()
911 spin_lock(&c->journal.lock); in journal_write_work()
912 if (c->journal.cur->dirty) in journal_write_work()
915 spin_unlock(&c->journal.lock); in journal_write_work()
919 * Entry point to the journalling code - bio_insert() and btree_invalidate()
920 * pass bch_journal() a list of keys to be journalled, and then
921 * bch_journal() hands those same keys off to btree_insert_async()
932 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) in bch_journal()
935 if (!CACHE_SYNC(&c->cache->sb)) in bch_journal()
940 memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys)); in bch_journal()
941 w->data->keys += bch_keylist_nkeys(keys); in bch_journal()
943 ret = &fifo_back(&c->journal.pin); in bch_journal()
947 closure_wait(&w->wait, parent); in bch_journal()
949 } else if (!w->dirty) { in bch_journal()
950 w->dirty = true; in bch_journal()
951 queue_delayed_work(bch_flush_wq, &c->journal.work, in bch_journal()
952 msecs_to_jiffies(c->journal_delay_ms)); in bch_journal()
953 spin_unlock(&c->journal.lock); in bch_journal()
955 spin_unlock(&c->journal.lock); in bch_journal()
976 free_pages((unsigned long) c->journal.w[1].data, JSET_BITS); in bch_journal_free()
977 free_pages((unsigned long) c->journal.w[0].data, JSET_BITS); in bch_journal_free()
978 free_fifo(&c->journal.pin); in bch_journal_free()
983 struct journal *j = &c->journal; in bch_journal_alloc() local
985 spin_lock_init(&j->lock); in bch_journal_alloc()
986 spin_lock_init(&j->flush_write_lock); in bch_journal_alloc()
987 INIT_DELAYED_WORK(&j->work, journal_write_work); in bch_journal_alloc()
989 c->journal_delay_ms = 100; in bch_journal_alloc()
991 j->w[0].c = c; in bch_journal_alloc()
992 j->w[1].c = c; in bch_journal_alloc()
994 if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || in bch_journal_alloc()
995 !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS)) || in bch_journal_alloc()
996 !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS))) in bch_journal_alloc()
997 return -ENOMEM; in bch_journal_alloc()