mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 21:38:32 +08:00
block: hook up writeback throttling
Enable throttling of buffered writeback to make it a lot more smooth, and has way less impact on other system activity. Background writeback should be, by definition, background activity. The fact that we flush huge bundles of it at the time means that it potentially has heavy impacts on foreground workloads, which isn't ideal. We can't easily limit the sizes of writes that we do, since that would impact file system layout in the presence of delayed allocation. So just throttle back buffered writeback, unless someone is waiting for it. The algorithm for when to throttle takes its inspiration in the CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors the minimum latencies of requests over a window of time. In that window of time, if the minimum latency of any request exceeds a given target, then a scale count is incremented and the queue depth is shrunk. The next monitoring window is shrunk accordingly. Unlike CoDel, if we hit a window that exhibits good behavior, then we simply increment the scale count and re-calculate the limits for that scale value. This prevents us from oscillating between a close-to-ideal value and max all the time, instead remaining in the windows where we get good behavior. Unlike CoDel, blk-wb allows the scale count to to negative. This happens if we primarily have writes going on. Unlike positive scale counts, this doesn't change the size of the monitoring window. When the heavy writers finish, blk-bw quickly snaps back to it's stable state of a zero scale count. The patch registers a sysfs entry, 'wb_lat_usec'. This sets the latency target to me met. It defaults to 2 msec for non-rotational storage, and 75 msec for rotational storage. Setting this value to '0' disables blk-wb. Generally, a user would not have to touch this setting. We don't enable WBT on devices that are managed with CFQ, and have a non-root block cgroup attached. If we have a proportional share setup on this particular disk, then the wbt throttling will interfere with that. We don't have a strong need for wbt for that case, since we will rely on CFQ doing that for us. Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
e34cbd3074
commit
87760e5eef
@ -169,5 +169,12 @@ This is the number of bytes the device can write in a single write-same
|
|||||||
command. A value of '0' means write-same is not supported by this
|
command. A value of '0' means write-same is not supported by this
|
||||||
device.
|
device.
|
||||||
|
|
||||||
|
wb_lat_usec (RW)
|
||||||
|
----------------
|
||||||
|
If the device is registered for writeback throttling, then this file shows
|
||||||
|
the target minimum read latency. If this latency is exceeded in a given
|
||||||
|
window of time (see wb_window_usec), then the writeback throttling will start
|
||||||
|
scaling back writes.
|
||||||
|
|
||||||
|
|
||||||
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
Jens Axboe <jens.axboe@oracle.com>, February 2009
|
||||||
|
@ -121,6 +121,32 @@ config BLK_CMDLINE_PARSER
|
|||||||
|
|
||||||
See Documentation/block/cmdline-partition.txt for more information.
|
See Documentation/block/cmdline-partition.txt for more information.
|
||||||
|
|
||||||
|
config BLK_WBT
|
||||||
|
bool "Enable support for block device writeback throttling"
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
Enabling this option enables the block layer to throttle buffered
|
||||||
|
background writeback from the VM, making it more smooth and having
|
||||||
|
less impact on foreground operations. The throttling is done
|
||||||
|
dynamically on an algorithm loosely based on CoDel, factoring in
|
||||||
|
the realtime performance of the disk.
|
||||||
|
|
||||||
|
config BLK_WBT_SQ
|
||||||
|
bool "Single queue writeback throttling"
|
||||||
|
default n
|
||||||
|
depends on BLK_WBT
|
||||||
|
---help---
|
||||||
|
Enable writeback throttling by default on legacy single queue devices
|
||||||
|
|
||||||
|
config BLK_WBT_MQ
|
||||||
|
bool "Multiqueue writeback throttling"
|
||||||
|
default y
|
||||||
|
depends on BLK_WBT
|
||||||
|
---help---
|
||||||
|
Enable writeback throttling by default on multiqueue devices.
|
||||||
|
Multiqueue currently doesn't have support for IO scheduling,
|
||||||
|
enabling this option is recommended.
|
||||||
|
|
||||||
menu "Partition Types"
|
menu "Partition Types"
|
||||||
|
|
||||||
source "block/partitions/Kconfig"
|
source "block/partitions/Kconfig"
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
#include "blk-mq.h"
|
#include "blk-mq.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
|
||||||
@ -882,6 +883,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
|
|||||||
|
|
||||||
fail:
|
fail:
|
||||||
blk_free_flush_queue(q->fq);
|
blk_free_flush_queue(q->fq);
|
||||||
|
wbt_exit(q);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_init_allocated_queue);
|
EXPORT_SYMBOL(blk_init_allocated_queue);
|
||||||
@ -1344,6 +1346,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
|
|||||||
blk_delete_timer(rq);
|
blk_delete_timer(rq);
|
||||||
blk_clear_rq_complete(rq);
|
blk_clear_rq_complete(rq);
|
||||||
trace_block_rq_requeue(q, rq);
|
trace_block_rq_requeue(q, rq);
|
||||||
|
wbt_requeue(q->rq_wb, &rq->issue_stat);
|
||||||
|
|
||||||
if (rq->rq_flags & RQF_QUEUED)
|
if (rq->rq_flags & RQF_QUEUED)
|
||||||
blk_queue_end_tag(q, rq);
|
blk_queue_end_tag(q, rq);
|
||||||
@ -1436,6 +1439,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
|
|||||||
/* this is a bio leak */
|
/* this is a bio leak */
|
||||||
WARN_ON(req->bio != NULL);
|
WARN_ON(req->bio != NULL);
|
||||||
|
|
||||||
|
wbt_done(q->rq_wb, &req->issue_stat);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Request may not have originated from ll_rw_blk. if not,
|
* Request may not have originated from ll_rw_blk. if not,
|
||||||
* it didn't come out of our reserved rq pools
|
* it didn't come out of our reserved rq pools
|
||||||
@ -1663,6 +1668,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
|
|||||||
int el_ret, where = ELEVATOR_INSERT_SORT;
|
int el_ret, where = ELEVATOR_INSERT_SORT;
|
||||||
struct request *req;
|
struct request *req;
|
||||||
unsigned int request_count = 0;
|
unsigned int request_count = 0;
|
||||||
|
unsigned int wb_acct;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* low level driver can indicate that it wants pages above a
|
* low level driver can indicate that it wants pages above a
|
||||||
@ -1715,17 +1721,22 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
get_rq:
|
get_rq:
|
||||||
|
wb_acct = wbt_wait(q->rq_wb, bio, q->queue_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Grab a free request. This is might sleep but can not fail.
|
* Grab a free request. This is might sleep but can not fail.
|
||||||
* Returns with the queue unlocked.
|
* Returns with the queue unlocked.
|
||||||
*/
|
*/
|
||||||
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
|
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
|
||||||
if (IS_ERR(req)) {
|
if (IS_ERR(req)) {
|
||||||
|
__wbt_done(q->rq_wb, wb_acct);
|
||||||
bio->bi_error = PTR_ERR(req);
|
bio->bi_error = PTR_ERR(req);
|
||||||
bio_endio(bio);
|
bio_endio(bio);
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wbt_track(&req->issue_stat, wb_acct);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After dropping the lock and possibly sleeping here, our request
|
* After dropping the lock and possibly sleeping here, our request
|
||||||
* may now be mergeable after it had proven unmergeable (above).
|
* may now be mergeable after it had proven unmergeable (above).
|
||||||
@ -2467,6 +2478,7 @@ void blk_start_request(struct request *req)
|
|||||||
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
|
if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
|
||||||
blk_stat_set_issue_time(&req->issue_stat);
|
blk_stat_set_issue_time(&req->issue_stat);
|
||||||
req->rq_flags |= RQF_STATS;
|
req->rq_flags |= RQF_STATS;
|
||||||
|
wbt_issue(req->q->rq_wb, &req->issue_stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2708,9 +2720,10 @@ void blk_finish_request(struct request *req, int error)
|
|||||||
|
|
||||||
blk_account_io_done(req);
|
blk_account_io_done(req);
|
||||||
|
|
||||||
if (req->end_io)
|
if (req->end_io) {
|
||||||
|
wbt_done(req->q->rq_wb, &req->issue_stat);
|
||||||
req->end_io(req, error);
|
req->end_io(req, error);
|
||||||
else {
|
} else {
|
||||||
if (blk_bidi_rq(req))
|
if (blk_bidi_rq(req))
|
||||||
__blk_put_request(req->next_rq->q, req->next_rq);
|
__blk_put_request(req->next_rq->q, req->next_rq);
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#include "blk-mq.h"
|
#include "blk-mq.h"
|
||||||
#include "blk-mq-tag.h"
|
#include "blk-mq-tag.h"
|
||||||
#include "blk-stat.h"
|
#include "blk-stat.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
static DEFINE_MUTEX(all_q_mutex);
|
static DEFINE_MUTEX(all_q_mutex);
|
||||||
static LIST_HEAD(all_q_list);
|
static LIST_HEAD(all_q_list);
|
||||||
@ -326,6 +327,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
|
|||||||
|
|
||||||
if (rq->rq_flags & RQF_MQ_INFLIGHT)
|
if (rq->rq_flags & RQF_MQ_INFLIGHT)
|
||||||
atomic_dec(&hctx->nr_active);
|
atomic_dec(&hctx->nr_active);
|
||||||
|
|
||||||
|
wbt_done(q->rq_wb, &rq->issue_stat);
|
||||||
rq->rq_flags = 0;
|
rq->rq_flags = 0;
|
||||||
|
|
||||||
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
|
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
|
||||||
@ -354,6 +357,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
|
|||||||
blk_account_io_done(rq);
|
blk_account_io_done(rq);
|
||||||
|
|
||||||
if (rq->end_io) {
|
if (rq->end_io) {
|
||||||
|
wbt_done(rq->q->rq_wb, &rq->issue_stat);
|
||||||
rq->end_io(rq, error);
|
rq->end_io(rq, error);
|
||||||
} else {
|
} else {
|
||||||
if (unlikely(blk_bidi_rq(rq)))
|
if (unlikely(blk_bidi_rq(rq)))
|
||||||
@ -471,6 +475,7 @@ void blk_mq_start_request(struct request *rq)
|
|||||||
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
|
||||||
blk_stat_set_issue_time(&rq->issue_stat);
|
blk_stat_set_issue_time(&rq->issue_stat);
|
||||||
rq->rq_flags |= RQF_STATS;
|
rq->rq_flags |= RQF_STATS;
|
||||||
|
wbt_issue(q->rq_wb, &rq->issue_stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_add_timer(rq);
|
blk_add_timer(rq);
|
||||||
@ -508,6 +513,7 @@ static void __blk_mq_requeue_request(struct request *rq)
|
|||||||
struct request_queue *q = rq->q;
|
struct request_queue *q = rq->q;
|
||||||
|
|
||||||
trace_block_rq_requeue(q, rq);
|
trace_block_rq_requeue(q, rq);
|
||||||
|
wbt_requeue(q->rq_wb, &rq->issue_stat);
|
||||||
|
|
||||||
if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
|
if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
|
||||||
if (q->dma_drain_size && blk_rq_bytes(rq))
|
if (q->dma_drain_size && blk_rq_bytes(rq))
|
||||||
@ -1339,6 +1345,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||||||
struct blk_plug *plug;
|
struct blk_plug *plug;
|
||||||
struct request *same_queue_rq = NULL;
|
struct request *same_queue_rq = NULL;
|
||||||
blk_qc_t cookie;
|
blk_qc_t cookie;
|
||||||
|
unsigned int wb_acct;
|
||||||
|
|
||||||
blk_queue_bounce(q, &bio);
|
blk_queue_bounce(q, &bio);
|
||||||
|
|
||||||
@ -1353,9 +1360,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
|||||||
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
|
|
||||||
|
wb_acct = wbt_wait(q->rq_wb, bio, NULL);
|
||||||
|
|
||||||
rq = blk_mq_map_request(q, bio, &data);
|
rq = blk_mq_map_request(q, bio, &data);
|
||||||
if (unlikely(!rq))
|
if (unlikely(!rq)) {
|
||||||
|
__wbt_done(q->rq_wb, wb_acct);
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
wbt_track(&rq->issue_stat, wb_acct);
|
||||||
|
|
||||||
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
|
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
|
||||||
|
|
||||||
@ -1439,6 +1452,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
|||||||
struct blk_mq_alloc_data data;
|
struct blk_mq_alloc_data data;
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
blk_qc_t cookie;
|
blk_qc_t cookie;
|
||||||
|
unsigned int wb_acct;
|
||||||
|
|
||||||
blk_queue_bounce(q, &bio);
|
blk_queue_bounce(q, &bio);
|
||||||
|
|
||||||
@ -1455,9 +1469,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
|
|||||||
} else
|
} else
|
||||||
request_count = blk_plug_queued_count(q);
|
request_count = blk_plug_queued_count(q);
|
||||||
|
|
||||||
|
wb_acct = wbt_wait(q->rq_wb, bio, NULL);
|
||||||
|
|
||||||
rq = blk_mq_map_request(q, bio, &data);
|
rq = blk_mq_map_request(q, bio, &data);
|
||||||
if (unlikely(!rq))
|
if (unlikely(!rq)) {
|
||||||
|
__wbt_done(q->rq_wb, wb_acct);
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
|
}
|
||||||
|
|
||||||
|
wbt_track(&rq->issue_stat, wb_acct);
|
||||||
|
|
||||||
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
|
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
|
||||||
|
|
||||||
@ -2139,6 +2159,8 @@ void blk_mq_free_queue(struct request_queue *q)
|
|||||||
list_del_init(&q->all_q_node);
|
list_del_init(&q->all_q_node);
|
||||||
mutex_unlock(&all_q_mutex);
|
mutex_unlock(&all_q_mutex);
|
||||||
|
|
||||||
|
wbt_exit(q);
|
||||||
|
|
||||||
blk_mq_del_queue_tag_set(q);
|
blk_mq_del_queue_tag_set(q);
|
||||||
|
|
||||||
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
|
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
unsigned long blk_max_low_pfn;
|
unsigned long blk_max_low_pfn;
|
||||||
EXPORT_SYMBOL(blk_max_low_pfn);
|
EXPORT_SYMBOL(blk_max_low_pfn);
|
||||||
@ -845,6 +846,7 @@ EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
|
|||||||
void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
|
void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
|
||||||
{
|
{
|
||||||
q->queue_depth = depth;
|
q->queue_depth = depth;
|
||||||
|
wbt_set_queue_depth(q->rq_wb, depth);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(blk_set_queue_depth);
|
EXPORT_SYMBOL(blk_set_queue_depth);
|
||||||
|
|
||||||
@ -868,6 +870,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
|
|||||||
else
|
else
|
||||||
queue_flag_clear(QUEUE_FLAG_FUA, q);
|
queue_flag_clear(QUEUE_FLAG_FUA, q);
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
|
wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
|
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
#include "blk-mq.h"
|
#include "blk-mq.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
struct queue_sysfs_entry {
|
struct queue_sysfs_entry {
|
||||||
struct attribute attr;
|
struct attribute attr;
|
||||||
@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t queue_var_store64(u64 *var, const char *page)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
u64 v;
|
||||||
|
|
||||||
|
err = kstrtou64(page, 10, &v);
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
*var = v;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
return queue_var_show(q->nr_requests, (page));
|
return queue_var_show(q->nr_requests, (page));
|
||||||
@ -364,6 +378,32 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
|
||||||
|
{
|
||||||
|
if (!q->rq_wb)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||||
|
size_t count)
|
||||||
|
{
|
||||||
|
ssize_t ret;
|
||||||
|
u64 val;
|
||||||
|
|
||||||
|
if (!q->rq_wb)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ret = queue_var_store64(&val, page);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
q->rq_wb->min_lat_nsec = val * 1000ULL;
|
||||||
|
wbt_update_limits(q->rq_wb);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
||||||
{
|
{
|
||||||
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||||
@ -578,6 +618,12 @@ static struct queue_sysfs_entry queue_stats_entry = {
|
|||||||
.show = queue_stats_show,
|
.show = queue_stats_show,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct queue_sysfs_entry queue_wb_lat_entry = {
|
||||||
|
.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
|
||||||
|
.show = queue_wb_lat_show,
|
||||||
|
.store = queue_wb_lat_store,
|
||||||
|
};
|
||||||
|
|
||||||
static struct attribute *default_attrs[] = {
|
static struct attribute *default_attrs[] = {
|
||||||
&queue_requests_entry.attr,
|
&queue_requests_entry.attr,
|
||||||
&queue_ra_entry.attr,
|
&queue_ra_entry.attr,
|
||||||
@ -608,6 +654,7 @@ static struct attribute *default_attrs[] = {
|
|||||||
&queue_wc_entry.attr,
|
&queue_wc_entry.attr,
|
||||||
&queue_dax_entry.attr,
|
&queue_dax_entry.attr,
|
||||||
&queue_stats_entry.attr,
|
&queue_stats_entry.attr,
|
||||||
|
&queue_wb_lat_entry.attr,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -682,6 +729,7 @@ static void blk_release_queue(struct kobject *kobj)
|
|||||||
struct request_queue *q =
|
struct request_queue *q =
|
||||||
container_of(kobj, struct request_queue, kobj);
|
container_of(kobj, struct request_queue, kobj);
|
||||||
|
|
||||||
|
wbt_exit(q);
|
||||||
bdi_exit(&q->backing_dev_info);
|
bdi_exit(&q->backing_dev_info);
|
||||||
blkcg_exit_queue(q);
|
blkcg_exit_queue(q);
|
||||||
|
|
||||||
@ -722,6 +770,44 @@ struct kobj_type blk_queue_ktype = {
|
|||||||
.release = blk_release_queue,
|
.release = blk_release_queue,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat)
|
||||||
|
{
|
||||||
|
blk_queue_stat_get(data, stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_wb_stat_clear(void *data)
|
||||||
|
{
|
||||||
|
blk_stat_clear(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool blk_wb_stat_is_current(struct blk_rq_stat *stat)
|
||||||
|
{
|
||||||
|
return blk_stat_is_current(stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct wb_stat_ops wb_stat_ops = {
|
||||||
|
.get = blk_wb_stat_get,
|
||||||
|
.is_current = blk_wb_stat_is_current,
|
||||||
|
.clear = blk_wb_stat_clear,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void blk_wb_init(struct request_queue *q)
|
||||||
|
{
|
||||||
|
#ifndef CONFIG_BLK_WBT_MQ
|
||||||
|
if (q->mq_ops)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
#ifndef CONFIG_BLK_WBT_SQ
|
||||||
|
if (q->request_fn)
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this fails, we don't get throttling
|
||||||
|
*/
|
||||||
|
wbt_init(q, &wb_stat_ops);
|
||||||
|
}
|
||||||
|
|
||||||
int blk_register_queue(struct gendisk *disk)
|
int blk_register_queue(struct gendisk *disk)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
@ -761,6 +847,8 @@ int blk_register_queue(struct gendisk *disk)
|
|||||||
if (q->mq_ops)
|
if (q->mq_ops)
|
||||||
blk_mq_register_dev(dev, q);
|
blk_mq_register_dev(dev, q);
|
||||||
|
|
||||||
|
blk_wb_init(q);
|
||||||
|
|
||||||
if (!q->request_fn)
|
if (!q->request_fn)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include <linux/blktrace_api.h>
|
#include <linux/blktrace_api.h>
|
||||||
#include <linux/blk-cgroup.h>
|
#include <linux/blk-cgroup.h>
|
||||||
#include "blk.h"
|
#include "blk.h"
|
||||||
|
#include "blk-wbt.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* tunables
|
* tunables
|
||||||
@ -3762,9 +3763,11 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
|||||||
struct cfq_data *cfqd = cic_to_cfqd(cic);
|
struct cfq_data *cfqd = cic_to_cfqd(cic);
|
||||||
struct cfq_queue *cfqq;
|
struct cfq_queue *cfqq;
|
||||||
uint64_t serial_nr;
|
uint64_t serial_nr;
|
||||||
|
bool nonroot_cg;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
serial_nr = bio_blkcg(bio)->css.serial_nr;
|
||||||
|
nonroot_cg = bio_blkcg(bio) != &blkcg_root;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3774,6 +3777,17 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
|
|||||||
if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
|
if (unlikely(!cfqd) || likely(cic->blkcg_serial_nr == serial_nr))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we have a non-root cgroup, we can depend on that to
|
||||||
|
* do proper throttling of writes. Turn off wbt for that
|
||||||
|
* case.
|
||||||
|
*/
|
||||||
|
if (nonroot_cg) {
|
||||||
|
struct request_queue *q = cfqd->queue;
|
||||||
|
|
||||||
|
wbt_disable(q->rq_wb);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Drop reference to queues. New queues will be assigned in new
|
* Drop reference to queues. New queues will be assigned in new
|
||||||
* group upon arrival of fresh requests.
|
* group upon arrival of fresh requests.
|
||||||
|
@ -38,6 +38,7 @@ struct bsg_job;
|
|||||||
struct blkcg_gq;
|
struct blkcg_gq;
|
||||||
struct blk_flush_queue;
|
struct blk_flush_queue;
|
||||||
struct pr_ops;
|
struct pr_ops;
|
||||||
|
struct rq_wb;
|
||||||
|
|
||||||
#define BLKDEV_MIN_RQ 4
|
#define BLKDEV_MIN_RQ 4
|
||||||
#define BLKDEV_MAX_RQ 128 /* Default maximum */
|
#define BLKDEV_MAX_RQ 128 /* Default maximum */
|
||||||
@ -383,6 +384,8 @@ struct request_queue {
|
|||||||
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
int nr_rqs[2]; /* # allocated [a]sync rqs */
|
||||||
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
|
||||||
|
|
||||||
|
struct rq_wb *rq_wb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
|
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
|
||||||
* is used, root blkg allocates from @q->root_rl and all other
|
* is used, root blkg allocates from @q->root_rl and all other
|
||||||
|
Loading…
Reference in New Issue
Block a user