linux/block/blk-throttle.h

#ifndef BLK_THROTTLE_H
#define BLK_THROTTLE_H

#include "blk-cgroup-rwstat.h"

/*
 * To implement hierarchical throttling, throtl_grps form a tree and bios
 * are dispatched upwards level by level until they reach the top and get
 * issued.  When dispatching bios from the children and local group at each
 * level, if the bios are dispatched into a single bio_list, there's a risk
 * of a local or child group which can queue many bios at once filling up
 * the list starving others.
 *
 * To avoid such starvation, dispatched bios are queued separately
 * according to where they came from.  When they are again dispatched to
 * the parent, they're popped in round-robin order so that no single source
 * hogs the dispatch window.
 *
 * throtl_qnode is used to keep the queued bios separated by their sources.
 * Bios are queued to throtl_qnode which in turn is queued to
 * throtl_service_queue and then dispatched in round-robin order.
 *
 * It's also used to track the reference counts on blkg's.  A qnode always
 * belongs to a throtl_grp and gets queued on itself or the parent, so
 * incrementing the reference of the associated throtl_grp when a qnode is
 * queued and decrementing when dequeued is enough to keep the whole blkg
 * tree pinned while bios are in flight.
 */
struct throtl_qnode {
	struct list_head	node;		/* service_queue->queued[] */
	struct bio_list		bios;		/* queued bios */
	struct throtl_grp	*tg;		/* tg this qnode belongs to */
};

struct throtl_service_queue {
	struct throtl_service_queue *parent_sq;	/* the parent service_queue */

	/*
	 * Bios queued directly to this service_queue or dispatched from
	 * children throtl_grp's.
	 */
	struct list_head	queued[2];	/* throtl_qnode [READ/WRITE] */
	unsigned int		nr_queued[2];	/* number of queued bios */

	/*
	 * RB tree of active children throtl_grp's, which are sorted by
	 * their ->disptime.
	 */
	struct rb_root_cached	pending_tree;	/* RB tree of active tgs */
	unsigned int		nr_pending;	/* # queued in the tree */
	unsigned long		first_pending_disptime;	/* disptime of the first tg */
	struct timer_list	pending_timer;	/* fires on first_pending_disptime */
};

enum tg_state_flags {
	THROTL_TG_PENDING	= 1 << 0,	/* on parent's pending tree */
	THROTL_TG_WAS_EMPTY	= 1 << 1,	/* bio_lists[] became non-empty */
	THROTL_TG_CANCELING	= 1 << 2,	/* starts to cancel bio */
};

enum {
	LIMIT_LOW,
	LIMIT_MAX,
	LIMIT_CNT,
};

struct throtl_grp {
	/* must be the first member */
	struct blkg_policy_data pd;

	/* active throtl group service_queue member */
	struct rb_node rb_node;

	/* throtl_data this group belongs to */
	struct throtl_data *td;

	/* this group's service queue */
	struct throtl_service_queue service_queue;

	/*
	 * qnode_on_self is used when bios are directly queued to this
	 * throtl_grp so that local bios compete fairly with bios
	 * dispatched from children.  qnode_on_parent is used when bios are
	 * dispatched from this throtl_grp into its parent and will compete
	 * with the sibling qnode_on_parents and the parent's
	 * qnode_on_self.
	 */
	struct throtl_qnode qnode_on_self[2];
	struct throtl_qnode qnode_on_parent[2];

	/*
	 * Dispatch time in jiffies. This is the estimated time when group
	 * will unthrottle and is ready to dispatch more bio. It is used as
	 * key to sort active groups in service tree.
	 */
	unsigned long disptime;

	unsigned int flags;

	/* are there any throtl rules between this group and td? */
	bool has_rules_bps[2];
	bool has_rules_iops[2];

	/* internally used bytes per second rate limits */
	uint64_t bps[2][LIMIT_CNT];
	/* user configured bps limits */
	uint64_t bps_conf[2][LIMIT_CNT];

	/* internally used IOPS limits */
	unsigned int iops[2][LIMIT_CNT];
	/* user configured IOPS limits */
	unsigned int iops_conf[2][LIMIT_CNT];

	/* Number of bytes dispatched in current slice */
	uint64_t bytes_disp[2];
	/* Number of bio's dispatched in current slice */
	unsigned int io_disp[2];

	unsigned long last_low_overflow_time[2];

	uint64_t last_bytes_disp[2];
	unsigned int last_io_disp[2];

	/*
	 * The following two fields are updated when new configuration is
	 * submitted while some bios are still throttled, they record how many
	 * bytes/ios are waited already in previous configuration, and they will
	 * be used to calculate wait time under new configuration.
	 */
	uint64_t carryover_bytes[2];
	unsigned int carryover_ios[2];

	unsigned long last_check_time;

	unsigned long latency_target; /* us */
	unsigned long latency_target_conf; /* us */
	/* When did we start a new slice */
	unsigned long slice_start[2];
	unsigned long slice_end[2];

	unsigned long last_finish_time; /* ns / 1024 */
	unsigned long checked_last_finish_time; /* ns / 1024 */
	unsigned long avg_idletime; /* ns / 1024 */
	unsigned long idletime_threshold; /* us */
	unsigned long idletime_threshold_conf; /* us */

	unsigned int bio_cnt; /* total bios */
	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
	unsigned long bio_cnt_reset_time;

	struct blkg_rwstat stat_bytes;
	struct blkg_rwstat stat_ios;
};

extern struct blkcg_policy blkcg_policy_throtl;

static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd)
{
	return pd ? container_of(pd, struct throtl_grp, pd) : NULL;
}

static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
{
	return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));
}

/*
 * Internal throttling interface
 */
#ifndef CONFIG_BLK_DEV_THROTTLING
static inline int blk_throtl_init(struct gendisk *disk) { return 0; }
static inline void blk_throtl_exit(struct gendisk *disk) { }
static inline void blk_throtl_register(struct gendisk *disk) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
static inline void blk_throtl_cancel_bios(struct gendisk *disk) { }
#else /* CONFIG_BLK_DEV_THROTTLING */
int blk_throtl_init(struct gendisk *disk);
void blk_throtl_exit(struct gendisk *disk);
void blk_throtl_register(struct gendisk *disk);
bool __blk_throtl_bio(struct bio *bio);
void blk_throtl_cancel_bios(struct gendisk *disk);

static inline bool blk_should_throtl(struct bio *bio)
{
	struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
	int rw = bio_data_dir(bio);

	if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
		if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
			bio_set_flag(bio, BIO_CGROUP_ACCT);
			blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
					bio->bi_iter.bi_size);
		}
		blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
	}

	/* iops limit is always counted */
	if (tg->has_rules_iops[rw])
		return true;

	if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED))
		return true;

	return false;
}

static inline bool blk_throtl_bio(struct bio *bio)
{

	if (!blk_should_throtl(bio))
		return false;

	return __blk_throtl_bio(bio);
}
#endif /* CONFIG_BLK_DEV_THROTTLING */

#endif
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`#ifndef BLK_THROTTLE_H`
			`#define BLK_THROTTLE_H`

			`#include "blk-cgroup-rwstat.h"`

			`/*`
			`* To implement hierarchical throttling, throtl_grps form a tree and bios`
			`* are dispatched upwards level by level until they reach the top and get`
			`* issued. When dispatching bios from the children and local group at each`
			`* level, if the bios are dispatched into a single bio_list, there's a risk`
			`* of a local or child group which can queue many bios at once filling up`
			`* the list starving others.`
			`*`
			`* To avoid such starvation, dispatched bios are queued separately`
			`* according to where they came from. When they are again dispatched to`
			`* the parent, they're popped in round-robin order so that no single source`
			`* hogs the dispatch window.`
			`*`
			`* throtl_qnode is used to keep the queued bios separated by their sources.`
			`* Bios are queued to throtl_qnode which in turn is queued to`
			`* throtl_service_queue and then dispatched in round-robin order.`
			`*`
			`* It's also used to track the reference counts on blkg's. A qnode always`
			`* belongs to a throtl_grp and gets queued on itself or the parent, so`
			`* incrementing the reference of the associated throtl_grp when a qnode is`
			`* queued and decrementing when dequeued is enough to keep the whole blkg`
			`* tree pinned while bios are in flight.`
			`*/`
			`struct throtl_qnode {`
			`struct list_head node; /* service_queue->queued[] */`
			`struct bio_list bios; /* queued bios */`
			`struct throtl_grp tg; / tg this qnode belongs to */`
			`};`

			`struct throtl_service_queue {`
			`struct throtl_service_queue parent_sq; / the parent service_queue */`

			`/*`
			`* Bios queued directly to this service_queue or dispatched from`
			`* children throtl_grp's.`
			`*/`
			`struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */`
			`unsigned int nr_queued[2]; /* number of queued bios */`

			`/*`
			`* RB tree of active children throtl_grp's, which are sorted by`
			`* their ->disptime.`
			`*/`
			`struct rb_root_cached pending_tree; /* RB tree of active tgs */`
			`unsigned int nr_pending; /* # queued in the tree */`
			`unsigned long first_pending_disptime; /* disptime of the first tg */`
			`struct timer_list pending_timer; /* fires on first_pending_disptime */`
			`};`

block: don't try to throttle split bio if iops limit isn't set We need to throttle split bio in case of IOPS limit even though the split bio has been marked as BIO_THROTTLED since block layer accounts split bio actually. If only throughput throttle is setup, no need to throttle any more if BIO_THROTTLED is set since we have accounted & considered the whole bio bytes already. Add one flag of THROTL_TG_HAS_IOPS_LIMIT for serving this purpose. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20220216044514.2903784-8-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-02-16 12:45:13 +08:00			`enum tg_state_flags {`
			`THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */`
			`THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */`
blk-throttle: remove THROTL_TG_HAS_IOPS_LIMIT Currently, "tg->has_rules" and "tg->flags & THROTL_TG_HAS_IOPS_LIMIT" both try to bypass bios that don't need to be throttled, however, they are a little redundant and both not perfect: 1) "tg->has_rules" only distinguish read and write, but not iops and bps limit. 2) "tg->flags & THROTL_TG_HAS_IOPS_LIMIT" only check if iops limit exist, read and write is not distinguished, and bps limit is not checked. tg->has_rules will extended to distinguish bps and iops in the following patch. There is no need to keep the flag. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:08 +08:00			`THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */`
block: don't try to throttle split bio if iops limit isn't set We need to throttle split bio in case of IOPS limit even though the split bio has been marked as BIO_THROTTLED since block layer accounts split bio actually. If only throughput throttle is setup, no need to throttle any more if BIO_THROTTLED is set since we have accounted & considered the whole bio bytes already. Add one flag of THROTL_TG_HAS_IOPS_LIMIT for serving this purpose. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20220216044514.2903784-8-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-02-16 12:45:13 +08:00			`};`

block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`enum {`
			`LIMIT_LOW,`
			`LIMIT_MAX,`
			`LIMIT_CNT,`
			`};`

			`struct throtl_grp {`
			`/* must be the first member */`
			`struct blkg_policy_data pd;`

			`/* active throtl group service_queue member */`
			`struct rb_node rb_node;`

			`/* throtl_data this group belongs to */`
			`struct throtl_data *td;`

			`/* this group's service queue */`
			`struct throtl_service_queue service_queue;`

			`/*`
			`* qnode_on_self is used when bios are directly queued to this`
			`* throtl_grp so that local bios compete fairly with bios`
			`* dispatched from children. qnode_on_parent is used when bios are`
			`* dispatched from this throtl_grp into its parent and will compete`
			`* with the sibling qnode_on_parents and the parent's`
			`* qnode_on_self.`
			`*/`
			`struct throtl_qnode qnode_on_self[2];`
			`struct throtl_qnode qnode_on_parent[2];`

			`/*`
			`* Dispatch time in jiffies. This is the estimated time when group`
			`* will unthrottle and is ready to dispatch more bio. It is used as`
			`* key to sort active groups in service tree.`
			`*/`
			`unsigned long disptime;`

			`unsigned int flags;`

			`/* are there any throtl rules between this group and td? */`
blk-throttle: improve bypassing bios checkings "tg->has_rules" is extended to "tg->has_rules_iops/bps", thus bios that don't need to be throttled can be checked accurately. With this patch, bio will be throttled if: 1) Bio is read/write, and corresponding read/write iops limit exist. 2) If corresponding doesn't exist, corresponding bps limit exist and bio is not throttled before. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:09 +08:00			`bool has_rules_bps[2];`
			`bool has_rules_iops[2];`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00
			`/* internally used bytes per second rate limits */`
			`uint64_t bps[2][LIMIT_CNT];`
			`/* user configured bps limits */`
			`uint64_t bps_conf[2][LIMIT_CNT];`

			`/* internally used IOPS limits */`
			`unsigned int iops[2][LIMIT_CNT];`
			`/* user configured IOPS limits */`
			`unsigned int iops_conf[2][LIMIT_CNT];`

			`/* Number of bytes dispatched in current slice */`
			`uint64_t bytes_disp[2];`
			`/* Number of bio's dispatched in current slice */`
			`unsigned int io_disp[2];`

			`unsigned long last_low_overflow_time[2];`

			`uint64_t last_bytes_disp[2];`
			`unsigned int last_io_disp[2];`

blk-throttle: fix io hung due to configuration updates If new configuration is submitted while a bio is throttled, then new waiting time is recalculated regardless that the bio might already wait for some time: tg_conf_updated throtl_start_new_slice tg_update_disptime throtl_schedule_next_dispatch Then io hung can be triggered by always submmiting new configuration before the throttled bio is dispatched. Fix the problem by respecting the time that throttled bio already waited. In order to do that, add new fields to record how many bytes/io are waited, and use it to calculate wait time for throttled bio under new configuration. Some simple test: 1) cd /sys/fs/cgroup/blkio/ echo $$ > cgroup.procs echo "8:0 2048" > blkio.throttle.write_bps_device { sleep 2 echo "8:0 1024" > blkio.throttle.write_bps_device } & dd if=/dev/zero of=/dev/sda bs=8k count=1 oflag=direct 2) cd /sys/fs/cgroup/blkio/ echo $$ > cgroup.procs echo "8:0 1024" > blkio.throttle.write_bps_device { sleep 4 echo "8:0 2048" > blkio.throttle.write_bps_device } & dd if=/dev/zero of=/dev/sda bs=8k count=1 oflag=direct test results: io finish time before this patch with this patch 1) 10s 6s 2) 8s 6s Signed-off-by: Yu Kuai <yukuai3@huawei.com> Reviewed-by: Michal Koutný <mkoutny@suse.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220829022240.3348319-5-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-08-29 10:22:40 +08:00			`/*`
			`* The following two fields are updated when new configuration is`
			`* submitted while some bios are still throttled, they record how many`
			`* bytes/ios are waited already in previous configuration, and they will`
			`* be used to calculate wait time under new configuration.`
			`*/`
			`uint64_t carryover_bytes[2];`
			`unsigned int carryover_ios[2];`

block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`unsigned long last_check_time;`

			`unsigned long latency_target; /* us */`
			`unsigned long latency_target_conf; /* us */`
			`/* When did we start a new slice */`
			`unsigned long slice_start[2];`
			`unsigned long slice_end[2];`

			`unsigned long last_finish_time; /* ns / 1024 */`
			`unsigned long checked_last_finish_time; /* ns / 1024 */`
			`unsigned long avg_idletime; /* ns / 1024 */`
			`unsigned long idletime_threshold; /* us */`
			`unsigned long idletime_threshold_conf; /* us */`

			`unsigned int bio_cnt; /* total bios */`
			`unsigned int bad_bio_cnt; /* bios exceeding latency threshold */`
			`unsigned long bio_cnt_reset_time;`

			`struct blkg_rwstat stat_bytes;`
			`struct blkg_rwstat stat_ios;`
			`};`

			`extern struct blkcg_policy blkcg_policy_throtl;`

			`static inline struct throtl_grp pd_to_tg(struct blkg_policy_data pd)`
			`{`
			`return pd ? container_of(pd, struct throtl_grp, pd) : NULL;`
			`}`

			`static inline struct throtl_grp blkg_to_tg(struct blkcg_gq blkg)`
			`{`
			`return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl));`
			`}`

			`/*`
			`* Internal throttling interface`
			`*/`
			`#ifndef CONFIG_BLK_DEV_THROTTLING`
blk-throttle: pass a gendisk to blk_throtl_init and blk_throtl_exit Pass the gendisk to blk_throtl_init and blk_throtl_exit as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-13-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:56 +08:00			`static inline int blk_throtl_init(struct gendisk *disk) { return 0; }`
			`static inline void blk_throtl_exit(struct gendisk *disk) { }`
blk-throttle: pass a gendisk to blk_throtl_register_queue Pass the gendisk to blk_throtl_register_queue as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-14-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:57 +08:00			`static inline void blk_throtl_register(struct gendisk *disk) { }`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`static inline bool blk_throtl_bio(struct bio *bio) { return false; }`
blk-throttle: pass a gendisk to blk_throtl_cancel_bios Pass the gendisk to blk_throtl_cancel_bios as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-15-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:58 +08:00			`static inline void blk_throtl_cancel_bios(struct gendisk *disk) { }`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`#else /* CONFIG_BLK_DEV_THROTTLING */`
blk-throttle: pass a gendisk to blk_throtl_init and blk_throtl_exit Pass the gendisk to blk_throtl_init and blk_throtl_exit as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-13-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:56 +08:00			`int blk_throtl_init(struct gendisk *disk);`
			`void blk_throtl_exit(struct gendisk *disk);`
blk-throttle: pass a gendisk to blk_throtl_register_queue Pass the gendisk to blk_throtl_register_queue as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-14-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:57 +08:00			`void blk_throtl_register(struct gendisk *disk);`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`bool __blk_throtl_bio(struct bio *bio);`
blk-throttle: pass a gendisk to blk_throtl_cancel_bios Pass the gendisk to blk_throtl_cancel_bios as part of moving the blk-cgroup infrastructure to be gendisk based. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921180501.1539876-15-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-22 02:04:58 +08:00			`void blk_throtl_cancel_bios(struct gendisk *disk);`
blk-throttle: improve bypassing bios checkings "tg->has_rules" is extended to "tg->has_rules_iops/bps", thus bios that don't need to be throttled can be checked accurately. With this patch, bio will be throttled if: 1) Bio is read/write, and corresponding read/write iops limit exist. 2) If corresponding doesn't exist, corresponding bps limit exist and bio is not throttled before. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:09 +08:00
			`static inline bool blk_should_throtl(struct bio *bio)`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`{`
			`struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);`
blk-throttle: improve bypassing bios checkings "tg->has_rules" is extended to "tg->has_rules_iops/bps", thus bios that don't need to be throttled can be checked accurately. With this patch, bio will be throttled if: 1) Bio is read/write, and corresponding read/write iops limit exist. 2) If corresponding doesn't exist, corresponding bps limit exist and bio is not throttled before. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:09 +08:00			`int rw = bio_data_dir(bio);`

blk-throttle: Fix io statistics for cgroup v1 After commit f382fb0bcef4 ("block: remove legacy IO schedulers"), blkio.throttle.io_serviced and blkio.throttle.io_service_bytes become the only stable io stats interface of cgroup v1, and these statistics are done in the blk-throttle code. But the current code only counts the bios that are actually throttled. When the user does not add the throttle limit, the io stats for cgroup v1 has nothing. I fix it according to the statistical method of v2, and made it count all ios accurately. Fixes: a7b36ee6ba29 ("block: move blk-throtl fast path inline") Tested-by: Andrea Righi <andrea.righi@canonical.com> Signed-off-by: Jinke Han <hanjinke.666@bytedance.com> Acked-by: Muchun Song <songmuchun@bytedance.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20230507170631.89607-1-hanjinke.666@bytedance.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2023-05-08 01:06:31 +08:00			`if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {`
			`if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {`
			`bio_set_flag(bio, BIO_CGROUP_ACCT);`
			`blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,`
			`bio->bi_iter.bi_size);`
			`}`
			`blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);`
			`}`

blk-throttle: improve bypassing bios checkings "tg->has_rules" is extended to "tg->has_rules_iops/bps", thus bios that don't need to be throttled can be checked accurately. With this patch, bio will be throttled if: 1) Bio is read/write, and corresponding read/write iops limit exist. 2) If corresponding doesn't exist, corresponding bps limit exist and bio is not throttled before. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:09 +08:00			`/* iops limit is always counted */`
			`if (tg->has_rules_iops[rw])`
			`return true;`

			`if (tg->has_rules_bps[rw] && !bio_flagged(bio, BIO_BPS_THROTTLED))`
			`return true;`

			`return false;`
			`}`

			`static inline bool blk_throtl_bio(struct bio *bio)`
			`{`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00
blk-throttle: improve bypassing bios checkings "tg->has_rules" is extended to "tg->has_rules_iops/bps", thus bios that don't need to be throttled can be checked accurately. With this patch, bio will be throttled if: 1) Bio is read/write, and corresponding read/write iops limit exist. 2) If corresponding doesn't exist, corresponding bps limit exist and bio is not throttled before. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20220921095309.1481289-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe <axboe@kernel.dk> 2022-09-21 17:53:09 +08:00			`if (!blk_should_throtl(bio))`
block: move blk-throtl fast path inline Even if no policies are defined, we spend ~2% of the total IO time checking. Move the fast path inline. Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> 2021-10-05 23:11:56 +08:00			`return false;`

			`return __blk_throtl_bio(bio);`
			`}`
			`#endif /* CONFIG_BLK_DEV_THROTTLING */`

			`#endif`