md: improve io stats accounting
Use generic io accounting functions to manage io stats. There was an attempt to do this earlier in commit18c0b223cf
("md: use generic io stats accounting functions to simplify io stat accounting"), but it did not include a call to generic_end_io_acct() and caused issues with tracking in-flight IOs, so it was later removed in commit74672d069b
("md: fix md io stats accounting broken"). This patch attempts to fix this by using both disk_start_io_acct() and disk_end_io_acct(). To make it possible, a struct md_io is allocated for every new md bio, which includes the io start_time. A new mempool is introduced for this purpose. We override bio->bi_end_io with our own callback and call disk_start_io_acct() before passing the bio to md_handle_request(). When it completes, we call disk_end_io_acct() and the original bi_end_io callback. This adds correct statistics about in-flight IOs and IO processing time, interpreted e.g. in iostat as await, svctm, aqu-sz and %util. It also fixes a situation where too many IOs where reported if a bio was re-submitted to the mddev, because io accounting is now performed only on newly arriving bios. Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com> Signed-off-by: Song Liu <songliubraving@fb.com>
This commit is contained in:
parent
9a5a85972c
commit
41d2d848e5
2 changed files with 46 additions and 12 deletions
|
@ -463,12 +463,33 @@ check_suspended:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(md_handle_request);
|
EXPORT_SYMBOL(md_handle_request);
|
||||||
|
|
||||||
|
struct md_io {
|
||||||
|
struct mddev *mddev;
|
||||||
|
bio_end_io_t *orig_bi_end_io;
|
||||||
|
void *orig_bi_private;
|
||||||
|
unsigned long start_time;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void md_end_io(struct bio *bio)
|
||||||
|
{
|
||||||
|
struct md_io *md_io = bio->bi_private;
|
||||||
|
struct mddev *mddev = md_io->mddev;
|
||||||
|
|
||||||
|
disk_end_io_acct(mddev->gendisk, bio_op(bio), md_io->start_time);
|
||||||
|
|
||||||
|
bio->bi_end_io = md_io->orig_bi_end_io;
|
||||||
|
bio->bi_private = md_io->orig_bi_private;
|
||||||
|
|
||||||
|
mempool_free(md_io, &mddev->md_io_pool);
|
||||||
|
|
||||||
|
if (bio->bi_end_io)
|
||||||
|
bio->bi_end_io(bio);
|
||||||
|
}
|
||||||
|
|
||||||
static blk_qc_t md_submit_bio(struct bio *bio)
|
static blk_qc_t md_submit_bio(struct bio *bio)
|
||||||
{
|
{
|
||||||
const int rw = bio_data_dir(bio);
|
const int rw = bio_data_dir(bio);
|
||||||
const int sgrp = op_stat_group(bio_op(bio));
|
|
||||||
struct mddev *mddev = bio->bi_disk->private_data;
|
struct mddev *mddev = bio->bi_disk->private_data;
|
||||||
unsigned int sectors;
|
|
||||||
|
|
||||||
if (mddev == NULL || mddev->pers == NULL) {
|
if (mddev == NULL || mddev->pers == NULL) {
|
||||||
bio_io_error(bio);
|
bio_io_error(bio);
|
||||||
|
@ -489,21 +510,27 @@ static blk_qc_t md_submit_bio(struct bio *bio)
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
if (bio->bi_end_io != md_end_io) {
|
||||||
* save the sectors now since our bio can
|
struct md_io *md_io;
|
||||||
* go away inside make_request
|
|
||||||
*/
|
md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO);
|
||||||
sectors = bio_sectors(bio);
|
md_io->mddev = mddev;
|
||||||
|
md_io->orig_bi_end_io = bio->bi_end_io;
|
||||||
|
md_io->orig_bi_private = bio->bi_private;
|
||||||
|
|
||||||
|
bio->bi_end_io = md_end_io;
|
||||||
|
bio->bi_private = md_io;
|
||||||
|
|
||||||
|
md_io->start_time = disk_start_io_acct(mddev->gendisk,
|
||||||
|
bio_sectors(bio),
|
||||||
|
bio_op(bio));
|
||||||
|
}
|
||||||
|
|
||||||
/* bio could be mergeable after passing to underlayer */
|
/* bio could be mergeable after passing to underlayer */
|
||||||
bio->bi_opf &= ~REQ_NOMERGE;
|
bio->bi_opf &= ~REQ_NOMERGE;
|
||||||
|
|
||||||
md_handle_request(mddev, bio);
|
md_handle_request(mddev, bio);
|
||||||
|
|
||||||
part_stat_lock();
|
|
||||||
part_stat_inc(&mddev->gendisk->part0, ios[sgrp]);
|
|
||||||
part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors);
|
|
||||||
part_stat_unlock();
|
|
||||||
|
|
||||||
return BLK_QC_T_NONE;
|
return BLK_QC_T_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5546,6 +5573,7 @@ static void md_free(struct kobject *ko)
|
||||||
|
|
||||||
bioset_exit(&mddev->bio_set);
|
bioset_exit(&mddev->bio_set);
|
||||||
bioset_exit(&mddev->sync_set);
|
bioset_exit(&mddev->sync_set);
|
||||||
|
mempool_exit(&mddev->md_io_pool);
|
||||||
kfree(mddev);
|
kfree(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5641,6 +5669,11 @@ static int md_alloc(dev_t dev, char *name)
|
||||||
*/
|
*/
|
||||||
mddev->hold_active = UNTIL_STOP;
|
mddev->hold_active = UNTIL_STOP;
|
||||||
|
|
||||||
|
error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE,
|
||||||
|
sizeof(struct md_io));
|
||||||
|
if (error)
|
||||||
|
goto abort;
|
||||||
|
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
mddev->queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||||
if (!mddev->queue)
|
if (!mddev->queue)
|
||||||
|
|
|
@ -481,6 +481,7 @@ struct mddev {
|
||||||
struct bio_set sync_set; /* for sync operations like
|
struct bio_set sync_set; /* for sync operations like
|
||||||
* metadata and bitmap writes
|
* metadata and bitmap writes
|
||||||
*/
|
*/
|
||||||
|
mempool_t md_io_pool;
|
||||||
|
|
||||||
/* Generic flush handling.
|
/* Generic flush handling.
|
||||||
* The last to finish preflush schedules a worker to submit
|
* The last to finish preflush schedules a worker to submit
|
||||||
|
|
Loading…
Add table
Reference in a new issue