Lines Matching +full:step +full:- +full:up
1 // SPDX-License-Identifier: GPL-2.0
6 * - Monitor latencies in a defined window of time.
7 * - If the minimum latency in the above window exceeds some target, increment
8 * scaling step and scale down queue depth by a factor of 2x. The monitoring
9 * window is then shrunk to 100 / sqrt(scaling step + 1).
10 * - For any window where we don't have solid data on what the latencies
12 * - If latencies look good, decrement scaling step.
13 * - If we're only doing writes, allow the scaling step to go negative. This
15 * scaling step of 0 if reads show up or the heavy writers finish. Unlike
17 * scaling step retains the default step==0 window size.
25 #include <linux/backing-dev.h>
28 #include "blk-stat.h"
29 #include "blk-wbt.h"
30 #include "blk-rq-qos.h"
76 * information to make a firm scale up/down decision.
88 unsigned long last_issue; /* last non-throttled issue */
89 unsigned long last_comp; /* last non-throttled comp */
103 rq->wbt_flags = 0; in wbt_clear_state()
108 return rq->wbt_flags; in wbt_flags()
113 return rq->wbt_flags & WBT_TRACKED; in wbt_is_tracked()
118 return rq->wbt_flags & WBT_READ; in wbt_is_read()
123 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
140 * information to scale up or down, scale up.
147 return rwb && rwb->enable_state != WBT_STATE_OFF_DEFAULT && in rwb_enabled()
148 rwb->enable_state != WBT_STATE_OFF_MANUAL; in rwb_enabled()
167 struct backing_dev_info *bdi = rwb->rqos.disk->bdi; in wb_recent_wait()
169 return time_before(jiffies, bdi->last_bdp_sleep + HZ); in wb_recent_wait()
176 return &rwb->rq_wait[WBT_RWQ_SWAP]; in get_rq_wait()
178 return &rwb->rq_wait[WBT_RWQ_DISCARD]; in get_rq_wait()
180 return &rwb->rq_wait[WBT_RWQ_BG]; in get_rq_wait()
188 struct rq_wait *rqw = &rwb->rq_wait[i]; in rwb_wake_all()
190 if (wq_has_sleeper(&rqw->wait)) in rwb_wake_all()
191 wake_up_all(&rqw->wait); in rwb_wake_all()
200 inflight = atomic_dec_return(&rqw->inflight); in wbt_rqw_done()
205 * wake people up. in wbt_rqw_done()
208 limit = rwb->wb_background; in wbt_rqw_done()
209 else if (blk_queue_write_cache(rwb->rqos.disk->queue) && in wbt_rqw_done()
213 limit = rwb->wb_normal; in wbt_rqw_done()
216 * Don't wake anyone up if we are above the normal limit. in wbt_rqw_done()
221 if (wq_has_sleeper(&rqw->wait)) { in wbt_rqw_done()
222 int diff = limit - inflight; in wbt_rqw_done()
224 if (!inflight || diff >= rwb->wb_background / 2) in wbt_rqw_done()
225 wake_up_all(&rqw->wait); in wbt_rqw_done()
250 if (rwb->sync_cookie == rq) { in wbt_done()
251 rwb->sync_issue = 0; in wbt_done()
252 rwb->sync_cookie = NULL; in wbt_done()
256 wb_timestamp(rwb, &rwb->last_comp); in wbt_done()
258 WARN_ON_ONCE(rq == rwb->sync_cookie); in wbt_done()
278 u64 issue = READ_ONCE(rwb->sync_issue); in rwb_sync_issue_lat()
280 if (!issue || !rwb->sync_cookie) in rwb_sync_issue_lat()
283 return blk_time_get_ns() - issue; in rwb_sync_issue_lat()
291 ret += atomic_read(&rwb->rq_wait[i].inflight); in wbt_inflight()
305 struct backing_dev_info *bdi = rwb->rqos.disk->bdi; in latency_exceeded()
306 struct rq_depth *rqd = &rwb->rq_depth; in latency_exceeded()
319 if (thislat > rwb->cur_win_nsec || in latency_exceeded()
320 (thislat > rwb->min_lat_nsec && !stat[READ].nr_samples)) { in latency_exceeded()
342 * If the 'min' latency exceeds our target, step down. in latency_exceeded()
344 if (stat[READ].min > rwb->min_lat_nsec) { in latency_exceeded()
350 if (rqd->scale_step) in latency_exceeded()
358 struct backing_dev_info *bdi = rwb->rqos.disk->bdi; in rwb_trace_step()
359 struct rq_depth *rqd = &rwb->rq_depth; in rwb_trace_step()
361 trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec, in rwb_trace_step()
362 rwb->wb_background, rwb->wb_normal, rqd->max_depth); in rwb_trace_step()
367 if (rwb->min_lat_nsec == 0) { in calc_wb_limits()
368 rwb->wb_normal = rwb->wb_background = 0; in calc_wb_limits()
369 } else if (rwb->rq_depth.max_depth <= 2) { in calc_wb_limits()
370 rwb->wb_normal = rwb->rq_depth.max_depth; in calc_wb_limits()
371 rwb->wb_background = 1; in calc_wb_limits()
373 rwb->wb_normal = (rwb->rq_depth.max_depth + 1) / 2; in calc_wb_limits()
374 rwb->wb_background = (rwb->rq_depth.max_depth + 3) / 4; in calc_wb_limits()
380 if (!rq_depth_scale_up(&rwb->rq_depth)) in scale_up()
383 rwb->unknown_cnt = 0; in scale_up()
385 rwb_trace_step(rwb, tracepoint_string("scale up")); in scale_up()
390 if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle)) in scale_down()
393 rwb->unknown_cnt = 0; in scale_down()
399 struct rq_depth *rqd = &rwb->rq_depth; in rwb_arm_timer()
401 if (rqd->scale_step > 0) { in rwb_arm_timer()
403 * We should speed this up, using some variant of a fast in rwb_arm_timer()
408 rwb->cur_win_nsec = div_u64(rwb->win_nsec << 4, in rwb_arm_timer()
409 int_sqrt((rqd->scale_step + 1) << 8)); in rwb_arm_timer()
412 * For step < 0, we don't want to increase/decrease the in rwb_arm_timer()
415 rwb->cur_win_nsec = rwb->win_nsec; in rwb_arm_timer()
418 blk_stat_activate_nsecs(rwb->cb, rwb->cur_win_nsec); in rwb_arm_timer()
423 struct rq_wb *rwb = cb->data; in wb_timer_fn()
424 struct rq_depth *rqd = &rwb->rq_depth; in wb_timer_fn()
428 if (!rwb->rqos.disk) in wb_timer_fn()
431 status = latency_exceeded(rwb, cb->stat); in wb_timer_fn()
433 trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight); in wb_timer_fn()
436 * If we exceeded the latency target, step down. If we did not, in wb_timer_fn()
437 * step one level up. If we don't know enough to say either exceeded in wb_timer_fn()
449 * We started a the center step, but don't have a valid in wb_timer_fn()
451 * Allow step to go negative, to increase write perf. in wb_timer_fn()
456 if (++rwb->unknown_cnt < RWB_UNKNOWN_BUMP) in wb_timer_fn()
461 * case, slowly return to center state (step == 0). in wb_timer_fn()
463 if (rqd->scale_step > 0) in wb_timer_fn()
465 else if (rqd->scale_step < 0) in wb_timer_fn()
473 * Re-arm timer, if we have IO in flight in wb_timer_fn()
475 if (rqd->scale_step || inflight) in wb_timer_fn()
481 struct rq_depth *rqd = &rwb->rq_depth; in wbt_update_limits()
483 rqd->scale_step = 0; in wbt_update_limits()
484 rqd->scaled_max = false; in wbt_update_limits()
504 return RQWB(rqos)->min_lat_nsec; in wbt_get_min_lat()
513 RQWB(rqos)->min_lat_nsec = val; in wbt_set_min_lat()
515 RQWB(rqos)->enable_state = WBT_STATE_ON_MANUAL; in wbt_set_min_lat()
517 RQWB(rqos)->enable_state = WBT_STATE_OFF_MANUAL; in wbt_set_min_lat()
527 return time_before(now, rwb->last_issue + HZ / 10) || in close_io()
528 time_before(now, rwb->last_comp + HZ / 10); in close_io()
538 return rwb->wb_background; in get_limit()
549 limit = rwb->rq_depth.max_depth; in get_limit()
555 limit = rwb->wb_background; in get_limit()
557 limit = rwb->wb_normal; in get_limit()
571 return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); in wbt_inflight_cb()
577 wbt_rqw_done(data->rwb, rqw, data->wb_acct); in wbt_cleanup_cb()
604 if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == in wbt_should_throttle()
625 if (bio->bi_opf & REQ_SWAP) in bio_to_wbt_flags()
644 * If we do sleep, we'll release and re-grab it.
654 wb_timestamp(rwb, &rwb->last_issue); in wbt_wait()
658 __wbt_wait(rwb, flags, bio->bi_opf); in wbt_wait()
660 if (!blk_stat_is_active(rwb->cb)) in wbt_wait()
667 rq->wbt_flags |= bio_to_wbt_flags(rwb, bio); in wbt_track()
684 if (wbt_is_read(rq) && !rwb->sync_issue) { in wbt_issue()
685 rwb->sync_cookie = rq; in wbt_issue()
686 rwb->sync_issue = rq->io_start_time_ns; in wbt_issue()
695 if (rq == rwb->sync_cookie) { in wbt_requeue()
696 rwb->sync_issue = 0; in wbt_requeue()
697 rwb->sync_cookie = NULL; in wbt_requeue()
706 struct request_queue *q = disk->queue; in wbt_enable_default()
710 if (q->elevator && in wbt_enable_default()
711 test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) in wbt_enable_default()
717 if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) in wbt_enable_default()
718 RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; in wbt_enable_default()
734 * We default to 2msec for non-rotational storage, and 75msec in wbt_default_latency_nsec()
753 return -1; in wbt_data_dir()
758 RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue); in wbt_queue_depth_changed()
766 blk_stat_remove_callback(rqos->disk->queue, rwb->cb); in wbt_exit()
767 blk_stat_free_callback(rwb->cb); in wbt_exit()
776 struct rq_qos *rqos = wbt_rq_qos(disk->queue); in wbt_disable_default()
781 if (rwb->enable_state == WBT_STATE_ON_DEFAULT) { in wbt_disable_default()
782 blk_stat_deactivate(rwb->cb); in wbt_disable_default()
783 rwb->enable_state = WBT_STATE_OFF_DEFAULT; in wbt_disable_default()
794 seq_printf(m, "%llu\n", rwb->cur_win_nsec); in wbt_curr_win_nsec_show()
803 seq_printf(m, "%d\n", rwb->enable_state); in wbt_enabled_show()
811 seq_printf(m, "%u\n", rqos->id); in wbt_id_show()
823 atomic_read(&rwb->rq_wait[i].inflight)); in wbt_inflight_show()
832 seq_printf(m, "%lu\n", rwb->min_lat_nsec); in wbt_min_lat_nsec_show()
841 seq_printf(m, "%u\n", rwb->unknown_cnt); in wbt_unknown_cnt_show()
850 seq_printf(m, "%u\n", rwb->wb_normal); in wbt_normal_show()
859 seq_printf(m, "%u\n", rwb->wb_background); in wbt_background_show()
892 struct request_queue *q = disk->queue; in wbt_init()
899 return -ENOMEM; in wbt_init()
901 rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb); in wbt_init()
902 if (!rwb->cb) { in wbt_init()
904 return -ENOMEM; in wbt_init()
908 rq_wait_init(&rwb->rq_wait[i]); in wbt_init()
910 rwb->last_comp = rwb->last_issue = jiffies; in wbt_init()
911 rwb->win_nsec = RWB_WINDOW_NSEC; in wbt_init()
912 rwb->enable_state = WBT_STATE_ON_DEFAULT; in wbt_init()
913 rwb->rq_depth.default_depth = RWB_DEF_DEPTH; in wbt_init()
914 rwb->min_lat_nsec = wbt_default_latency_nsec(q); in wbt_init()
915 rwb->rq_depth.queue_depth = blk_queue_depth(q); in wbt_init()
921 mutex_lock(&q->rq_qos_mutex); in wbt_init()
922 ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops); in wbt_init()
923 mutex_unlock(&q->rq_qos_mutex); in wbt_init()
927 blk_stat_add_callback(q, rwb->cb); in wbt_init()
932 blk_stat_free_callback(rwb->cb); in wbt_init()