Lines Matching +full:charging +full:- +full:algorithm

1 // SPDX-License-Identifier: GPL-2.0-only
3 * fs/fs-writeback.c
14 * Additions for address_space-based writeback
28 #include <linux/backing-dev.h>
37 #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
90 set_bit(WB_has_dirty_io, &wb->state); in wb_io_lists_populated()
91 WARN_ON_ONCE(!wb->avg_write_bandwidth); in wb_io_lists_populated()
92 atomic_long_add(wb->avg_write_bandwidth, in wb_io_lists_populated()
93 &wb->bdi->tot_write_bandwidth); in wb_io_lists_populated()
100 if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) && in wb_io_lists_depopulated()
101 list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) { in wb_io_lists_depopulated()
102 clear_bit(WB_has_dirty_io, &wb->state); in wb_io_lists_depopulated()
103 WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth, in wb_io_lists_depopulated()
104 &wb->bdi->tot_write_bandwidth) < 0); in wb_io_lists_depopulated()
109 * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
112 * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
114 * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
122 assert_spin_locked(&wb->list_lock); in inode_io_list_move_locked()
123 assert_spin_locked(&inode->i_lock); in inode_io_list_move_locked()
124 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_io_list_move_locked()
126 list_move(&inode->i_io_list, head); in inode_io_list_move_locked()
129 if (head != &wb->b_dirty_time) in inode_io_list_move_locked()
138 spin_lock_irq(&wb->work_lock); in wb_wakeup()
139 if (test_bit(WB_registered, &wb->state)) in wb_wakeup()
140 mod_delayed_work(bdi_wq, &wb->dwork, 0); in wb_wakeup()
141 spin_unlock_irq(&wb->work_lock); in wb_wakeup()
146 * wakes-up the corresponding bdi thread which should then take care of the
147 * periodic background write-out of dirty inodes. Since the write-out would
152 * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
153 * by delaying the wake-up.
163 spin_lock_irq(&wb->work_lock); in wb_wakeup_delayed()
164 if (test_bit(WB_registered, &wb->state)) in wb_wakeup_delayed()
165 queue_delayed_work(bdi_wq, &wb->dwork, timeout); in wb_wakeup_delayed()
166 spin_unlock_irq(&wb->work_lock); in wb_wakeup_delayed()
171 struct wb_completion *done = work->done; in finish_writeback_work()
173 if (work->auto_free) in finish_writeback_work()
176 wait_queue_head_t *waitq = done->waitq; in finish_writeback_work()
179 if (atomic_dec_and_test(&done->cnt)) in finish_writeback_work()
189 if (work->done) in wb_queue_work()
190 atomic_inc(&work->done->cnt); in wb_queue_work()
192 spin_lock_irq(&wb->work_lock); in wb_queue_work()
194 if (test_bit(WB_registered, &wb->state)) { in wb_queue_work()
195 list_add_tail(&work->list, &wb->work_list); in wb_queue_work()
196 mod_delayed_work(bdi_wq, &wb->dwork, 0); in wb_queue_work()
200 spin_unlock_irq(&wb->work_lock); in wb_queue_work()
204 * wb_wait_for_completion - wait for completion of bdi_writeback_works
207 * Wait for one or more work items issued to @bdi with their ->done field
215 atomic_dec(&done->cnt); /* put down the initial count */ in wb_wait_for_completion()
216 wait_event(*done->waitq, !atomic_read(&done->cnt)); in wb_wait_for_completion()
233 * avoiding too aggressive flip-flops from occasional foreign writes.
245 #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
258 #define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \
284 wb = &bdi->wb; in __inode_attach_wb()
290 if (unlikely(cmpxchg(&inode->i_wb, NULL, wb))) in __inode_attach_wb()
296 * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
306 assert_spin_locked(&wb->list_lock); in inode_cgwb_move_to_attached()
307 assert_spin_locked(&inode->i_lock); in inode_cgwb_move_to_attached()
308 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_cgwb_move_to_attached()
310 inode->i_state &= ~I_SYNC_QUEUED; in inode_cgwb_move_to_attached()
311 if (wb != &wb->bdi->wb) in inode_cgwb_move_to_attached()
312 list_move(&inode->i_io_list, &wb->b_attached); in inode_cgwb_move_to_attached()
314 list_del_init(&inode->i_io_list); in inode_cgwb_move_to_attached()
319 * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
322 * Returns @inode's wb with its list_lock held. @inode->i_lock must be
328 __releases(&inode->i_lock) in locked_inode_to_wb_and_lock_list()
329 __acquires(&wb->list_lock) in locked_inode_to_wb_and_lock_list()
336 * @inode->i_lock and @wb->list_lock but list_lock nests in locked_inode_to_wb_and_lock_list()
341 spin_unlock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
342 spin_lock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
345 if (likely(wb == inode->i_wb)) { in locked_inode_to_wb_and_lock_list()
350 spin_unlock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
353 spin_lock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
358 * inode_to_wb_and_lock_list - determine an inode's wb and lock it
361 * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
365 __acquires(&wb->list_lock) in inode_to_wb_and_lock_list()
367 spin_lock(&inode->i_lock); in inode_to_wb_and_lock_list()
378 * the first part, all inode pointers are placed into a NULL-terminated
380 * an inode could be left in a non-consistent state.
388 down_write(&bdi->wb_switch_rwsem); in bdi_down_write_wb_switch_rwsem()
393 up_write(&bdi->wb_switch_rwsem); in bdi_up_write_wb_switch_rwsem()
400 struct address_space *mapping = inode->i_mapping; in inode_do_switch_wbs()
401 XA_STATE(xas, &mapping->i_pages, 0); in inode_do_switch_wbs()
405 spin_lock(&inode->i_lock); in inode_do_switch_wbs()
406 xa_lock_irq(&mapping->i_pages); in inode_do_switch_wbs()
410 * path owns the inode and we shouldn't modify ->i_io_list. in inode_do_switch_wbs()
412 if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE))) in inode_do_switch_wbs()
425 wb_stat_mod(old_wb, WB_RECLAIMABLE, -nr); in inode_do_switch_wbs()
434 wb_stat_mod(old_wb, WB_WRITEBACK, -nr); in inode_do_switch_wbs()
439 atomic_dec(&old_wb->writeback_inodes); in inode_do_switch_wbs()
440 atomic_inc(&new_wb->writeback_inodes); in inode_do_switch_wbs()
448 * ->b_dirty which is always correct including from ->b_dirty_time. in inode_do_switch_wbs()
449 * The transfer preserves @inode->dirtied_when ordering. If the @inode in inode_do_switch_wbs()
453 if (!list_empty(&inode->i_io_list)) { in inode_do_switch_wbs()
454 inode->i_wb = new_wb; in inode_do_switch_wbs()
456 if (inode->i_state & I_DIRTY_ALL) { in inode_do_switch_wbs()
459 list_for_each_entry(pos, &new_wb->b_dirty, i_io_list) in inode_do_switch_wbs()
460 if (time_after_eq(inode->dirtied_when, in inode_do_switch_wbs()
461 pos->dirtied_when)) in inode_do_switch_wbs()
464 pos->i_io_list.prev); in inode_do_switch_wbs()
469 inode->i_wb = new_wb; in inode_do_switch_wbs()
472 /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */ in inode_do_switch_wbs()
473 inode->i_wb_frn_winner = 0; in inode_do_switch_wbs()
474 inode->i_wb_frn_avg_time = 0; in inode_do_switch_wbs()
475 inode->i_wb_frn_history = 0; in inode_do_switch_wbs()
482 smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH); in inode_do_switch_wbs()
484 xa_unlock_irq(&mapping->i_pages); in inode_do_switch_wbs()
485 spin_unlock(&inode->i_lock); in inode_do_switch_wbs()
494 struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]); in inode_switch_wbs_work_fn()
495 struct bdi_writeback *old_wb = isw->inodes[0]->i_wb; in inode_switch_wbs_work_fn()
496 struct bdi_writeback *new_wb = isw->new_wb; in inode_switch_wbs_work_fn()
504 down_read(&bdi->wb_switch_rwsem); in inode_switch_wbs_work_fn()
512 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock in inode_switch_wbs_work_fn()
517 spin_lock(&old_wb->list_lock); in inode_switch_wbs_work_fn()
518 spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING); in inode_switch_wbs_work_fn()
520 spin_lock(&new_wb->list_lock); in inode_switch_wbs_work_fn()
521 spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING); in inode_switch_wbs_work_fn()
524 for (inodep = isw->inodes; *inodep; inodep++) { in inode_switch_wbs_work_fn()
525 WARN_ON_ONCE((*inodep)->i_wb != old_wb); in inode_switch_wbs_work_fn()
530 spin_unlock(&new_wb->list_lock); in inode_switch_wbs_work_fn()
531 spin_unlock(&old_wb->list_lock); in inode_switch_wbs_work_fn()
533 up_read(&bdi->wb_switch_rwsem); in inode_switch_wbs_work_fn()
540 for (inodep = isw->inodes; *inodep; inodep++) in inode_switch_wbs_work_fn()
562 spin_lock(&inode->i_lock); in inode_prepare_wbs_switch()
563 if (!(inode->i_sb->s_flags & SB_ACTIVE) || in inode_prepare_wbs_switch()
564 inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) || in inode_prepare_wbs_switch()
566 spin_unlock(&inode->i_lock); in inode_prepare_wbs_switch()
569 inode->i_state |= I_WB_SWITCH; in inode_prepare_wbs_switch()
571 spin_unlock(&inode->i_lock); in inode_prepare_wbs_switch()
577 * inode_switch_wbs - change the wb association of an inode
591 if (inode->i_state & I_WB_SWITCH) in inode_switch_wbs()
613 isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); in inode_switch_wbs()
615 if (!isw->new_wb) in inode_switch_wbs()
618 if (!inode_prepare_wbs_switch(inode, isw->new_wb)) in inode_switch_wbs()
621 isw->inodes[0] = inode; in inode_switch_wbs()
629 INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); in inode_switch_wbs()
630 queue_rcu_work(isw_wq, &isw->work); in inode_switch_wbs()
635 if (isw->new_wb) in inode_switch_wbs()
636 wb_put(isw->new_wb); in inode_switch_wbs()
646 if (!inode_prepare_wbs_switch(inode, isw->new_wb)) in isw_prepare_wbs_switch()
649 isw->inodes[*nr] = inode; in isw_prepare_wbs_switch()
652 if (*nr >= WB_MAX_INODES_PER_ISW - 1) in isw_prepare_wbs_switch()
659 * cleanup_offline_cgwb - detach associated inodes
680 for (memcg_css = wb->memcg_css->parent; memcg_css; in cleanup_offline_cgwb()
681 memcg_css = memcg_css->parent) { in cleanup_offline_cgwb()
682 isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL); in cleanup_offline_cgwb()
683 if (isw->new_wb) in cleanup_offline_cgwb()
686 if (unlikely(!isw->new_wb)) in cleanup_offline_cgwb()
687 isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */ in cleanup_offline_cgwb()
690 spin_lock(&wb->list_lock); in cleanup_offline_cgwb()
699 restart = isw_prepare_wbs_switch(isw, &wb->b_attached, &nr); in cleanup_offline_cgwb()
701 restart = isw_prepare_wbs_switch(isw, &wb->b_dirty_time, &nr); in cleanup_offline_cgwb()
702 spin_unlock(&wb->list_lock); in cleanup_offline_cgwb()
707 wb_put(isw->new_wb); in cleanup_offline_cgwb()
718 INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn); in cleanup_offline_cgwb()
719 queue_rcu_work(isw_wq, &isw->work); in cleanup_offline_cgwb()
725 * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
738 spin_unlock(&inode->i_lock); in wbc_attach_and_unlock_inode()
742 wbc->wb = inode_to_wb(inode); in wbc_attach_and_unlock_inode()
743 wbc->inode = inode; in wbc_attach_and_unlock_inode()
745 wbc->wb_id = wbc->wb->memcg_css->id; in wbc_attach_and_unlock_inode()
746 wbc->wb_lcand_id = inode->i_wb_frn_winner; in wbc_attach_and_unlock_inode()
747 wbc->wb_tcand_id = 0; in wbc_attach_and_unlock_inode()
748 wbc->wb_bytes = 0; in wbc_attach_and_unlock_inode()
749 wbc->wb_lcand_bytes = 0; in wbc_attach_and_unlock_inode()
750 wbc->wb_tcand_bytes = 0; in wbc_attach_and_unlock_inode()
752 wb_get(wbc->wb); in wbc_attach_and_unlock_inode()
753 spin_unlock(&inode->i_lock); in wbc_attach_and_unlock_inode()
762 if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) in wbc_attach_and_unlock_inode()
763 inode_switch_wbs(inode, wbc->wb_id); in wbc_attach_and_unlock_inode()
768 * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
775 * memcg only tracks page ownership on first-use basis severely confining
777 * per-inode. While the support for concurrent write sharing of an inode
780 * charging only by first-use can too readily lead to grossly incorrect
791 * using Boyer-Moore majority vote algorithm. In addition to the byte
795 * candidate). Keeping track of the historical winner helps the algorithm
796 * to semi-reliably detect the most active writer even when it's not the
801 * inode->i_wb_frn_history. If the amount of recorded foreign IO time is
806 struct bdi_writeback *wb = wbc->wb; in wbc_detach_inode()
807 struct inode *inode = wbc->inode; in wbc_detach_inode()
815 history = inode->i_wb_frn_history; in wbc_detach_inode()
816 avg_time = inode->i_wb_frn_avg_time; in wbc_detach_inode()
819 if (wbc->wb_bytes >= wbc->wb_lcand_bytes && in wbc_detach_inode()
820 wbc->wb_bytes >= wbc->wb_tcand_bytes) { in wbc_detach_inode()
821 max_id = wbc->wb_id; in wbc_detach_inode()
822 max_bytes = wbc->wb_bytes; in wbc_detach_inode()
823 } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) { in wbc_detach_inode()
824 max_id = wbc->wb_lcand_id; in wbc_detach_inode()
825 max_bytes = wbc->wb_lcand_bytes; in wbc_detach_inode()
827 max_id = wbc->wb_tcand_id; in wbc_detach_inode()
828 max_bytes = wbc->wb_tcand_bytes; in wbc_detach_inode()
835 * deciding whether to switch or not. This is to prevent one-off in wbc_detach_inode()
839 wb->avg_write_bandwidth); in wbc_detach_inode()
841 avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) - in wbc_detach_inode()
860 if (wbc->wb_id != max_id) in wbc_detach_inode()
861 history |= (1U << slots) - 1; in wbc_detach_inode()
881 inode->i_wb_frn_winner = max_id; in wbc_detach_inode()
882 inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX); in wbc_detach_inode()
883 inode->i_wb_frn_history = history; in wbc_detach_inode()
885 wb_put(wbc->wb); in wbc_detach_inode()
886 wbc->wb = NULL; in wbc_detach_inode()
891 * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
913 if (!wbc->wb || wbc->no_cgroup_owner) in wbc_account_cgroup_owner()
919 if (!(css->flags & CSS_ONLINE)) in wbc_account_cgroup_owner()
922 id = css->id; in wbc_account_cgroup_owner()
924 if (id == wbc->wb_id) { in wbc_account_cgroup_owner()
925 wbc->wb_bytes += bytes; in wbc_account_cgroup_owner()
929 if (id == wbc->wb_lcand_id) in wbc_account_cgroup_owner()
930 wbc->wb_lcand_bytes += bytes; in wbc_account_cgroup_owner()
932 /* Boyer-Moore majority vote algorithm */ in wbc_account_cgroup_owner()
933 if (!wbc->wb_tcand_bytes) in wbc_account_cgroup_owner()
934 wbc->wb_tcand_id = id; in wbc_account_cgroup_owner()
935 if (id == wbc->wb_tcand_id) in wbc_account_cgroup_owner()
936 wbc->wb_tcand_bytes += bytes; in wbc_account_cgroup_owner()
938 wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes); in wbc_account_cgroup_owner()
943 * wb_split_bdi_pages - split nr_pages to write according to bandwidth
949 * @wb->bdi.
953 unsigned long this_bw = wb->avg_write_bandwidth; in wb_split_bdi_pages()
954 unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth); in wb_split_bdi_pages()
971 * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
977 * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's
986 struct bdi_writeback *wb = list_entry(&bdi->wb_list, in bdi_split_work_to_wbs()
992 list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) { in bdi_split_work_to_wbs()
1005 (base_work->sync_mode == WB_SYNC_NONE || in bdi_split_work_to_wbs()
1006 list_empty(&wb->b_dirty_time))) in bdi_split_work_to_wbs()
1011 nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages); in bdi_split_work_to_wbs()
1016 work->nr_pages = nr_pages; in bdi_split_work_to_wbs()
1017 work->auto_free = 1; in bdi_split_work_to_wbs()
1025 * Pin @wb so that it stays on @bdi->wb_list. This allows in bdi_split_work_to_wbs()
1032 /* alloc failed, execute synchronously using on-stack fallback */ in bdi_split_work_to_wbs()
1035 work->nr_pages = nr_pages; in bdi_split_work_to_wbs()
1036 work->auto_free = 0; in bdi_split_work_to_wbs()
1037 work->done = &fallback_work_done; in bdi_split_work_to_wbs()
1053 * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
1075 return -ENOENT; in cgroup_writeback_by_id()
1083 ret = -ENOENT; in cgroup_writeback_by_id()
1093 ret = -ENOENT; in cgroup_writeback_by_id()
1104 * BTW the memcg stats are flushed periodically and this is best-effort in cgroup_writeback_by_id()
1113 work->nr_pages = dirty; in cgroup_writeback_by_id()
1114 work->sync_mode = WB_SYNC_NONE; in cgroup_writeback_by_id()
1115 work->range_cyclic = 1; in cgroup_writeback_by_id()
1116 work->reason = reason; in cgroup_writeback_by_id()
1117 work->done = done; in cgroup_writeback_by_id()
1118 work->auto_free = 1; in cgroup_writeback_by_id()
1122 ret = -ENOMEM; in cgroup_writeback_by_id()
1134 * cgroup_writeback_umount - flush inode wb switches for umount
1138 * flushes in-flight inode wb switches. An inode wb switch goes through
1147 if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK)) in cgroup_writeback_umount()
1159 * ensure that all in-flight wb switches are in the workqueue. in cgroup_writeback_umount()
1170 return -ENOMEM; in cgroup_writeback_init()
1183 assert_spin_locked(&wb->list_lock); in inode_cgwb_move_to_attached()
1184 assert_spin_locked(&inode->i_lock); in inode_cgwb_move_to_attached()
1185 WARN_ON_ONCE(inode->i_state & I_FREEING); in inode_cgwb_move_to_attached()
1187 inode->i_state &= ~I_SYNC_QUEUED; in inode_cgwb_move_to_attached()
1188 list_del_init(&inode->i_io_list); in inode_cgwb_move_to_attached()
1194 __releases(&inode->i_lock) in locked_inode_to_wb_and_lock_list()
1195 __acquires(&wb->list_lock) in locked_inode_to_wb_and_lock_list()
1199 spin_unlock(&inode->i_lock); in locked_inode_to_wb_and_lock_list()
1200 spin_lock(&wb->list_lock); in locked_inode_to_wb_and_lock_list()
1205 __acquires(&wb->list_lock) in inode_to_wb_and_lock_list()
1209 spin_lock(&wb->list_lock); in inode_to_wb_and_lock_list()
1224 if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) { in bdi_split_work_to_wbs()
1225 base_work->auto_free = 0; in bdi_split_work_to_wbs()
1226 wb_queue_work(&bdi->wb, base_work); in bdi_split_work_to_wbs()
1255 if (test_bit(WB_start_all, &wb->state) || in wb_start_writeback()
1256 test_and_set_bit(WB_start_all, &wb->state)) in wb_start_writeback()
1259 wb->start_all_reason = reason; in wb_start_writeback()
1264 * wb_start_background_writeback - start background writeback
1291 spin_lock(&inode->i_lock); in inode_io_list_del()
1293 inode->i_state &= ~I_SYNC_QUEUED; in inode_io_list_del()
1294 list_del_init(&inode->i_io_list); in inode_io_list_del()
1297 spin_unlock(&inode->i_lock); in inode_io_list_del()
1298 spin_unlock(&wb->list_lock); in inode_io_list_del()
1307 struct super_block *sb = inode->i_sb; in sb_mark_inode_writeback()
1310 if (list_empty(&inode->i_wb_list)) { in sb_mark_inode_writeback()
1311 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); in sb_mark_inode_writeback()
1312 if (list_empty(&inode->i_wb_list)) { in sb_mark_inode_writeback()
1313 list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb); in sb_mark_inode_writeback()
1316 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); in sb_mark_inode_writeback()
1325 struct super_block *sb = inode->i_sb; in sb_clear_inode_writeback()
1328 if (!list_empty(&inode->i_wb_list)) { in sb_clear_inode_writeback()
1329 spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); in sb_clear_inode_writeback()
1330 if (!list_empty(&inode->i_wb_list)) { in sb_clear_inode_writeback()
1331 list_del_init(&inode->i_wb_list); in sb_clear_inode_writeback()
1334 spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); in sb_clear_inode_writeback()
1339 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
1340 * furthest end of its superblock's dirty-inode list.
1342 * Before stamping the inode's ->dirtied_when, we check to see whether it is
1343 * already the most-recently-dirtied inode on the b_dirty list. If that is
1349 assert_spin_locked(&inode->i_lock); in redirty_tail_locked()
1351 inode->i_state &= ~I_SYNC_QUEUED; in redirty_tail_locked()
1357 if (inode->i_state & I_FREEING) { in redirty_tail_locked()
1358 list_del_init(&inode->i_io_list); in redirty_tail_locked()
1362 if (!list_empty(&wb->b_dirty)) { in redirty_tail_locked()
1365 tail = wb_inode(wb->b_dirty.next); in redirty_tail_locked()
1366 if (time_before(inode->dirtied_when, tail->dirtied_when)) in redirty_tail_locked()
1367 inode->dirtied_when = jiffies; in redirty_tail_locked()
1369 inode_io_list_move_locked(inode, wb, &wb->b_dirty); in redirty_tail_locked()
1374 spin_lock(&inode->i_lock); in redirty_tail()
1376 spin_unlock(&inode->i_lock); in redirty_tail()
1380 * requeue inode for re-scanning after bdi->b_io list is exhausted.
1384 inode_io_list_move_locked(inode, wb, &wb->b_more_io); in requeue_io()
1389 assert_spin_locked(&inode->i_lock); in inode_sync_complete()
1391 inode->i_state &= ~I_SYNC; in inode_sync_complete()
1394 /* Called with inode->i_lock which ensures memory ordering. */ in inode_sync_complete()
1400 bool ret = time_after(inode->dirtied_when, t); in inode_dirtied_after()
1405 * This test is necessary to prevent such wrapped-around relative times in inode_dirtied_after()
1408 ret = ret && time_before_eq(inode->dirtied_when, jiffies); in inode_dirtied_after()
1429 inode = wb_inode(delaying_queue->prev); in move_expired_inodes()
1432 spin_lock(&inode->i_lock); in move_expired_inodes()
1433 list_move(&inode->i_io_list, &tmp); in move_expired_inodes()
1435 inode->i_state |= I_SYNC_QUEUED; in move_expired_inodes()
1436 spin_unlock(&inode->i_lock); in move_expired_inodes()
1437 if (sb_is_blkdev_sb(inode->i_sb)) in move_expired_inodes()
1439 if (sb && sb != inode->i_sb) in move_expired_inodes()
1441 sb = inode->i_sb; in move_expired_inodes()
1452 * we don't take inode->i_lock here because it is just a pointless overhead. in move_expired_inodes()
1457 sb = wb_inode(tmp.prev)->i_sb; in move_expired_inodes()
1460 if (inode->i_sb == sb) in move_expired_inodes()
1461 list_move(&inode->i_io_list, dispatch_queue); in move_expired_inodes()
1477 * +--> dequeue for IO
1485 assert_spin_locked(&wb->list_lock); in queue_io()
1486 list_splice_init(&wb->b_more_io, &wb->b_io); in queue_io()
1487 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before); in queue_io()
1488 if (!work->for_sync) in queue_io()
1489 time_expire_jif = jiffies - dirtytime_expire_interval * HZ; in queue_io()
1490 moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, in queue_io()
1501 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { in write_inode()
1503 ret = inode->i_sb->s_op->write_inode(inode, wbc); in write_inode()
1519 assert_spin_locked(&inode->i_lock); in inode_wait_for_writeback()
1521 if (!(inode->i_state & I_SYNC)) in inode_wait_for_writeback()
1527 /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ in inode_wait_for_writeback()
1528 if (!(inode->i_state & I_SYNC)) in inode_wait_for_writeback()
1530 spin_unlock(&inode->i_lock); in inode_wait_for_writeback()
1532 spin_lock(&inode->i_lock); in inode_wait_for_writeback()
1543 __releases(inode->i_lock) in inode_sleep_on_writeback()
1549 assert_spin_locked(&inode->i_lock); in inode_sleep_on_writeback()
1553 /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ in inode_sleep_on_writeback()
1554 sleep = !!(inode->i_state & I_SYNC); in inode_sleep_on_writeback()
1555 spin_unlock(&inode->i_lock); in inode_sleep_on_writeback()
1565 * inodes. This function can be called only by flusher thread - noone else
1573 if (inode->i_state & I_FREEING) in requeue_inode()
1581 if ((inode->i_state & I_DIRTY) && in requeue_inode()
1582 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) in requeue_inode()
1583 inode->dirtied_when = jiffies; in requeue_inode()
1585 if (wbc->pages_skipped) { in requeue_inode()
1592 if (inode->i_state & I_DIRTY_ALL) in requeue_inode()
1599 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { in requeue_inode()
1604 if (wbc->nr_to_write <= 0 && in requeue_inode()
1618 } else if (inode->i_state & I_DIRTY) { in requeue_inode()
1625 } else if (inode->i_state & I_DIRTY_TIME) { in requeue_inode()
1626 inode->dirtied_when = jiffies; in requeue_inode()
1627 inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); in requeue_inode()
1628 inode->i_state &= ~I_SYNC_QUEUED; in requeue_inode()
1637 * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
1649 struct address_space *mapping = inode->i_mapping; in __writeback_single_inode()
1650 long nr_to_write = wbc->nr_to_write; in __writeback_single_inode()
1654 WARN_ON(!(inode->i_state & I_SYNC)); in __writeback_single_inode()
1664 * separate, external IO completion path and ->sync_fs for guaranteeing in __writeback_single_inode()
1667 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { in __writeback_single_inode()
1678 if ((inode->i_state & I_DIRTY_TIME) && in __writeback_single_inode()
1679 (wbc->sync_mode == WB_SYNC_ALL || in __writeback_single_inode()
1680 time_after(jiffies, inode->dirtied_time_when + in __writeback_single_inode()
1692 spin_lock(&inode->i_lock); in __writeback_single_inode()
1693 dirty = inode->i_state & I_DIRTY; in __writeback_single_inode()
1694 inode->i_state &= ~dirty; in __writeback_single_inode()
1698 * __mark_inode_dirty() to test i_state without grabbing i_lock - in __writeback_single_inode()
1710 inode->i_state |= I_DIRTY_PAGES; in __writeback_single_inode()
1711 else if (unlikely(inode->i_state & I_PINNING_NETFS_WB)) { in __writeback_single_inode()
1712 if (!(inode->i_state & I_DIRTY_PAGES)) { in __writeback_single_inode()
1713 inode->i_state &= ~I_PINNING_NETFS_WB; in __writeback_single_inode()
1714 wbc->unpinned_netfs_wb = true; in __writeback_single_inode()
1719 spin_unlock(&inode->i_lock); in __writeback_single_inode()
1727 wbc->unpinned_netfs_wb = false; in __writeback_single_inode()
1733 * Write out an inode's dirty data and metadata on-demand, i.e. separately from
1736 * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
1747 spin_lock(&inode->i_lock); in writeback_single_inode()
1748 if (!atomic_read(&inode->i_count)) in writeback_single_inode()
1749 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); in writeback_single_inode()
1751 WARN_ON(inode->i_state & I_WILL_FREE); in writeback_single_inode()
1753 if (inode->i_state & I_SYNC) { in writeback_single_inode()
1760 if (wbc->sync_mode != WB_SYNC_ALL) in writeback_single_inode()
1764 WARN_ON(inode->i_state & I_SYNC); in writeback_single_inode()
1768 * For data-integrity syncs we also need to check whether any pages are in writeback_single_inode()
1772 if (!(inode->i_state & I_DIRTY_ALL) && in writeback_single_inode()
1773 (wbc->sync_mode != WB_SYNC_ALL || in writeback_single_inode()
1774 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) in writeback_single_inode()
1776 inode->i_state |= I_SYNC; in writeback_single_inode()
1784 spin_lock(&inode->i_lock); in writeback_single_inode()
1789 if (!(inode->i_state & I_FREEING)) { in writeback_single_inode()
1795 if (!(inode->i_state & I_DIRTY_ALL)) in writeback_single_inode()
1797 else if (!(inode->i_state & I_SYNC_QUEUED)) { in writeback_single_inode()
1798 if ((inode->i_state & I_DIRTY)) in writeback_single_inode()
1800 else if (inode->i_state & I_DIRTY_TIME) { in writeback_single_inode()
1801 inode->dirtied_when = jiffies; in writeback_single_inode()
1804 &wb->b_dirty_time); in writeback_single_inode()
1809 spin_unlock(&wb->list_lock); in writeback_single_inode()
1812 spin_unlock(&inode->i_lock); in writeback_single_inode()
1834 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) in writeback_chunk_size()
1837 pages = min(wb->avg_write_bandwidth / 2, in writeback_chunk_size()
1839 pages = min(pages, work->nr_pages); in writeback_chunk_size()
1852 * NOTE! This is called with wb->list_lock held, and will
1861 .sync_mode = work->sync_mode, in writeback_sb_inodes()
1862 .tagged_writepages = work->tagged_writepages, in writeback_sb_inodes()
1863 .for_kupdate = work->for_kupdate, in writeback_sb_inodes()
1864 .for_background = work->for_background, in writeback_sb_inodes()
1865 .for_sync = work->for_sync, in writeback_sb_inodes()
1866 .range_cyclic = work->range_cyclic, in writeback_sb_inodes()
1875 if (work->for_kupdate) in writeback_sb_inodes()
1876 dirtied_before = jiffies - in writeback_sb_inodes()
1879 while (!list_empty(&wb->b_io)) { in writeback_sb_inodes()
1880 struct inode *inode = wb_inode(wb->b_io.prev); in writeback_sb_inodes()
1884 if (inode->i_sb != sb) { in writeback_sb_inodes()
1885 if (work->sb) { in writeback_sb_inodes()
1908 spin_lock(&inode->i_lock); in writeback_sb_inodes()
1909 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { in writeback_sb_inodes()
1911 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1914 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { in writeback_sb_inodes()
1917 * doing writeback-for-data-integrity, move it to in writeback_sb_inodes()
1925 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1929 spin_unlock(&wb->list_lock); in writeback_sb_inodes()
1936 if (inode->i_state & I_SYNC) { in writeback_sb_inodes()
1940 spin_lock(&wb->list_lock); in writeback_sb_inodes()
1943 inode->i_state |= I_SYNC; in writeback_sb_inodes()
1957 work->nr_pages -= write_chunk - wbc.nr_to_write; in writeback_sb_inodes()
1958 wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped; in writeback_sb_inodes()
1971 blk_flush_plug(current->plug, false); in writeback_sb_inodes()
1980 spin_lock(&inode->i_lock); in writeback_sb_inodes()
1981 if (!(inode->i_state & I_DIRTY_ALL)) in writeback_sb_inodes()
1985 spin_unlock(&inode->i_lock); in writeback_sb_inodes()
1988 spin_unlock(&tmp_wb->list_lock); in writeback_sb_inodes()
1989 spin_lock(&wb->list_lock); in writeback_sb_inodes()
1999 if (work->nr_pages <= 0) in writeback_sb_inodes()
2012 while (!list_empty(&wb->b_io)) { in __writeback_inodes_wb()
2013 struct inode *inode = wb_inode(wb->b_io.prev); in __writeback_inodes_wb()
2014 struct super_block *sb = inode->i_sb; in __writeback_inodes_wb()
2026 up_read(&sb->s_umount); in __writeback_inodes_wb()
2032 if (work->nr_pages <= 0) in __writeback_inodes_wb()
2052 spin_lock(&wb->list_lock); in writeback_inodes_wb()
2053 if (list_empty(&wb->b_io)) in writeback_inodes_wb()
2056 spin_unlock(&wb->list_lock); in writeback_inodes_wb()
2059 return nr_pages - work.nr_pages; in writeback_inodes_wb()
2066 * dirtying-time in the inode's address_space. So this periodic writeback code
2072 * one-second gap.
2080 long nr_pages = work->nr_pages; in wb_writeback()
2092 if (work->nr_pages <= 0) in wb_writeback()
2096 * Background writeout and kupdate-style writeback may in wb_writeback()
2101 if ((work->for_background || work->for_kupdate) && in wb_writeback()
2102 !list_empty(&wb->work_list)) in wb_writeback()
2109 if (work->for_background && !wb_over_bg_thresh(wb)) in wb_writeback()
2113 spin_lock(&wb->list_lock); in wb_writeback()
2116 if (list_empty(&wb->b_io)) { in wb_writeback()
2123 if (work->for_kupdate) { in wb_writeback()
2124 dirtied_before = jiffies - in wb_writeback()
2127 } else if (work->for_background) in wb_writeback()
2133 if (work->sb) in wb_writeback()
2134 progress = writeback_sb_inodes(work->sb, wb, work); in wb_writeback()
2148 spin_unlock(&wb->list_lock); in wb_writeback()
2155 if (list_empty(&wb->b_more_io)) { in wb_writeback()
2156 spin_unlock(&wb->list_lock); in wb_writeback()
2166 inode = wb_inode(wb->b_more_io.prev); in wb_writeback()
2167 spin_lock(&inode->i_lock); in wb_writeback()
2168 spin_unlock(&wb->list_lock); in wb_writeback()
2174 return nr_pages - work->nr_pages; in wb_writeback()
2184 spin_lock_irq(&wb->work_lock); in get_next_work_item()
2185 if (!list_empty(&wb->work_list)) { in get_next_work_item()
2186 work = list_entry(wb->work_list.next, in get_next_work_item()
2188 list_del_init(&work->list); in get_next_work_item()
2190 spin_unlock_irq(&wb->work_lock); in get_next_work_item()
2223 expired = wb->last_old_flush + in wb_check_old_data_flush()
2228 wb->last_old_flush = jiffies; in wb_check_old_data_flush()
2250 if (!test_bit(WB_start_all, &wb->state)) in wb_check_start_all()
2259 .reason = wb->start_all_reason, in wb_check_start_all()
2265 clear_bit(WB_start_all, &wb->state); in wb_check_start_all()
2278 set_bit(WB_writeback_running, &wb->state); in wb_do_writeback()
2286 * Check for a flush-everything request in wb_do_writeback()
2295 clear_bit(WB_writeback_running, &wb->state); in wb_do_writeback()
2310 set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); in wb_workfn()
2313 !test_bit(WB_registered, &wb->state))) { in wb_workfn()
2323 } while (!list_empty(&wb->work_list)); in wb_workfn()
2335 if (!list_empty(&wb->work_list)) in wb_workfn()
2352 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) in __wakeup_flusher_threads_bdi()
2374 blk_flush_plug(current->plug, true); in wakeup_flusher_threads()
2408 list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) in wakeup_dirtytime_writeback()
2409 if (!list_empty(&wb->b_dirty_time)) in wakeup_dirtytime_writeback()
2435 * __mark_inode_dirty - internal function to mark an inode dirty
2454 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
2455 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
2456 * the kernel-internal blockdev inode represents the dirtying time of the
2458 * page->mapping->host, so the page-dirtying time is recorded in the internal
2463 struct super_block *sb = inode->i_sb; in __mark_inode_dirty()
2472 * We tell ->dirty_inode callback that timestamps need to in __mark_inode_dirty()
2475 if (inode->i_state & I_DIRTY_TIME) { in __mark_inode_dirty()
2476 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2477 if (inode->i_state & I_DIRTY_TIME) { in __mark_inode_dirty()
2478 inode->i_state &= ~I_DIRTY_TIME; in __mark_inode_dirty()
2481 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2486 * (if needed) it can update on-disk fields and journal the in __mark_inode_dirty()
2492 if (sb->s_op->dirty_inode) in __mark_inode_dirty()
2493 sb->s_op->dirty_inode(inode, in __mark_inode_dirty()
2515 if ((inode->i_state & flags) == flags) in __mark_inode_dirty()
2518 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2519 if ((inode->i_state & flags) != flags) { in __mark_inode_dirty()
2520 const int was_dirty = inode->i_state & I_DIRTY; in __mark_inode_dirty()
2524 inode->i_state |= flags; in __mark_inode_dirty()
2534 spin_lock(&inode->i_lock); in __mark_inode_dirty()
2543 if (inode->i_state & I_SYNC_QUEUED) in __mark_inode_dirty()
2550 if (!S_ISBLK(inode->i_mode)) { in __mark_inode_dirty()
2554 if (inode->i_state & I_FREEING) in __mark_inode_dirty()
2559 * reposition it (that would break b_dirty time-ordering). in __mark_inode_dirty()
2565 inode->dirtied_when = jiffies; in __mark_inode_dirty()
2567 inode->dirtied_time_when = jiffies; in __mark_inode_dirty()
2569 if (inode->i_state & I_DIRTY) in __mark_inode_dirty()
2570 dirty_list = &wb->b_dirty; in __mark_inode_dirty()
2572 dirty_list = &wb->b_dirty_time; in __mark_inode_dirty()
2577 spin_unlock(&wb->list_lock); in __mark_inode_dirty()
2578 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2583 * we have to wake-up the corresponding bdi thread in __mark_inode_dirty()
2584 * to make sure background write-back happens in __mark_inode_dirty()
2588 (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) in __mark_inode_dirty()
2595 spin_unlock(&wb->list_lock); in __mark_inode_dirty()
2596 spin_unlock(&inode->i_lock); in __mark_inode_dirty()
2617 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in wait_sb_inodes()
2619 mutex_lock(&sb->s_sync_lock); in wait_sb_inodes()
2626 * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as in wait_sb_inodes()
2631 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2632 list_splice_init(&sb->s_inodes_wb, &sync_list); in wait_sb_inodes()
2644 struct address_space *mapping = inode->i_mapping; in wait_sb_inodes()
2652 list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb); in wait_sb_inodes()
2655 * The mapping can appear untagged while still on-list since we in wait_sb_inodes()
2662 spin_unlock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2664 spin_lock(&inode->i_lock); in wait_sb_inodes()
2665 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { in wait_sb_inodes()
2666 spin_unlock(&inode->i_lock); in wait_sb_inodes()
2668 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2672 spin_unlock(&inode->i_lock); in wait_sb_inodes()
2687 spin_lock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2689 spin_unlock_irq(&sb->s_inode_wblist_lock); in wait_sb_inodes()
2691 mutex_unlock(&sb->s_sync_lock); in wait_sb_inodes()
2697 struct backing_dev_info *bdi = sb->s_bdi; in __writeback_inodes_sb_nr()
2710 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in __writeback_inodes_sb_nr()
2712 bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy); in __writeback_inodes_sb_nr()
2717 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
2735 * writeback_inodes_sb - writeback dirty inodes from given super_block
2750 * try_to_writeback_inodes_sb - try to start writeback if none underway
2758 if (!down_read_trylock(&sb->s_umount)) in try_to_writeback_inodes_sb()
2762 up_read(&sb->s_umount); in try_to_writeback_inodes_sb()
2767 * sync_inodes_sb - sync sb inode pages
2775 struct backing_dev_info *bdi = sb->s_bdi; in sync_inodes_sb()
2794 WARN_ON(!rwsem_is_locked(&sb->s_umount)); in sync_inodes_sb()
2807 * write_inode_now - write an inode to disk
2825 if (!mapping_can_writeback(inode->i_mapping)) in write_inode_now()
2834 * sync_inode_metadata - write an inode to disk
2846 .nr_to_write = 0, /* metadata-only */ in sync_inode_metadata()