1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

block: remove bio_add_pc_page

Lift bio_split_rw_at into blk_rq_append_bio so that it validates the
hardware limits.  With this all passthrough callers can simply add
bio_add_page to build the bio and delay checking for exceeding of limits
to this point instead of doing it for each page.

While this looks like adding a new expensive loop over all bio_vecs,
blk_rq_append_bio is already doing that just to counter the number of
segments.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Link: https://lore.kernel.org/r/20250103073417.459715-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig 2025-01-03 08:33:57 +01:00 committed by Jens Axboe
parent c2398e6d5f
commit 6aeb4f8364
7 changed files with 48 additions and 214 deletions

View file

@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
/*
* Try to merge a page into a segment, while obeying the hardware segment
* size limit. This is not for normal read/write bios, but for passthrough
* or Zone Append operations that we can't split.
* size limit.
*
* This is kept around for the integrity metadata, which is still tries
* to build the initial bio to the hardware limit and doesn't have proper
* helpers to split. Hopefully this will go away soon.
*/
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
struct page *page, unsigned len, unsigned offset,
@ -964,106 +967,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
return bvec_try_merge_page(bv, page, len, offset, same_page);
}
/**
* bio_add_hw_page - attempt to add a page to a bio with hw constraints
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
* @max_sectors: maximum number of sectors that can be added
* @same_page: return if the segment has been merged inside the same page
*
* Add a page to a bio while respecting the hardware max_sectors, max_segment
* and gap limitations.
*/
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page)
{
unsigned int max_size = max_sectors << SECTOR_SHIFT;
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return 0;
len = min3(len, max_size, queue_max_segment_size(q));
if (len > max_size - bio->bi_iter.bi_size)
return 0;
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bvec_try_merge_hw_page(q, bv, page, len, offset,
same_page)) {
bio->bi_iter.bi_size += len;
return len;
}
if (bio->bi_vcnt >=
min(bio->bi_max_vecs, queue_max_segments(q)))
return 0;
/*
* If the queue doesn't support SG gaps and adding this segment
* would create a gap, disallow it.
*/
if (bvec_gap_to_prev(&q->limits, bv, offset))
return 0;
}
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
bio->bi_vcnt++;
bio->bi_iter.bi_size += len;
return len;
}
/**
* bio_add_hw_folio - attempt to add a folio to a bio with hw constraints
* @q: the target queue
* @bio: destination bio
* @folio: folio to add
* @len: vec entry length
* @offset: vec entry offset in the folio
* @max_sectors: maximum number of sectors that can be added
* @same_page: return if the segment has been merged inside the same folio
*
* Add a folio to a bio while respecting the hardware max_sectors, max_segment
* and gap limitations.
*/
int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
struct folio *folio, size_t len, size_t offset,
unsigned int max_sectors, bool *same_page)
{
if (len > UINT_MAX || offset > UINT_MAX)
return 0;
return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset,
max_sectors, same_page);
}
/**
* bio_add_pc_page - attempt to add page to passthrough bio
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
*
* Attempt to add a page to the bio_vec maplist. This can fail for a
* number of reasons, such as the bio being full or target block device
* limitations. The target block device must allow bio's up to PAGE_SIZE,
* so it is always possible to add a single page to an empty bio.
*
* This should only be used by passthrough bios.
*/
int bio_add_pc_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset)
{
bool same_page = false;
return bio_add_hw_page(q, bio, page, len, offset,
queue_max_hw_sectors(q), &same_page);
}
EXPORT_SYMBOL(bio_add_pc_page);
/**
* __bio_add_page - add page(s) to a bio in a new segment
* @bio: destination bio

View file

@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
}
}
if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
if (bio_add_page(bio, page, bytes, offset) < bytes) {
if (!map_data)
__free_page(page);
break;
@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
iov_iter_extraction_t extraction_flags = 0;
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
int j;
if (!iov_iter_count(iter))
return -EINVAL;
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
if (bio == NULL)
if (!bio)
return -ENOMEM;
if (blk_queue_pci_p2pdma(rq->q))
extraction_flags |= ITER_ALLOW_P2PDMA;
if (iov_iter_extract_will_pin(iter))
bio_set_flag(bio, BIO_PAGE_PINNED);
while (iov_iter_count(iter)) {
struct page *stack_pages[UIO_FASTIOV];
struct page **pages = stack_pages;
ssize_t bytes;
size_t offs;
int npages;
if (nr_vecs > ARRAY_SIZE(stack_pages))
pages = NULL;
bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
nr_vecs, extraction_flags, &offs);
if (unlikely(bytes <= 0)) {
ret = bytes ? bytes : -EFAULT;
goto out_unmap;
}
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
if (unlikely(offs & queue_dma_alignment(rq->q)))
j = 0;
else {
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
bool same_page = false;
if (n > bytes)
n = bytes;
if (!bio_add_hw_page(rq->q, bio, page, n, offs,
max_sectors, &same_page))
break;
if (same_page)
bio_release_page(bio, page);
bytes -= n;
offs = 0;
}
}
/*
* release the pages we didn't map into the bio, if any
*/
while (j < npages)
bio_release_page(bio, pages[j++]);
if (pages != stack_pages)
kvfree(pages);
/* couldn't stuff something into bio? */
if (bytes) {
iov_iter_revert(iter, bytes);
break;
}
}
ret = bio_iov_iter_get_pages(bio, iter);
if (ret)
goto out_put;
ret = blk_rq_append_bio(rq, bio);
if (ret)
goto out_unmap;
goto out_release;
return 0;
out_unmap:
out_release:
bio_release_pages(bio, false);
out_put:
blk_mq_map_bio_put(bio);
return ret;
}
@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
page = virt_to_page(data);
else
page = vmalloc_to_page(data);
if (bio_add_pc_page(q, bio, page, bytes,
offset) < bytes) {
if (bio_add_page(bio, page, bytes, offset) < bytes) {
/* we don't support partial mappings */
bio_uninit(bio);
kfree(bio);
@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (!reading)
memcpy(page_address(page), p, bytes);
if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
if (bio_add_page(bio, page, bytes, 0) < bytes)
break;
len -= bytes;
@ -536,12 +476,19 @@ cleanup:
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
struct bvec_iter iter;
struct bio_vec bv;
const struct queue_limits *lim = &rq->q->limits;
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
unsigned int nr_segs = 0;
int ret;
bio_for_each_bvec(bv, bio, iter)
nr_segs++;
/* check that the data layout matches the hardware restrictions */
ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes);
if (ret) {
/* if we would have to split the bio, copy instead */
if (ret > 0)
ret = -EREMOTEIO;
return ret;
}
if (!rq->bio) {
blk_rq_bio_prep(rq, bio, nr_segs);
@ -561,9 +508,7 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
const struct queue_limits *lim = &rq->q->limits;
unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
unsigned int nsegs;
unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
struct bio *bio;
int ret;
@ -576,18 +521,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
return -ENOMEM;
bio_iov_bvec_set(bio, iter);
/* check that the data layout matches the hardware restrictions */
ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
if (ret) {
/* if we would have to split the bio, copy instead */
if (ret > 0)
ret = -EREMOTEIO;
ret = blk_rq_append_bio(rq, bio);
if (ret)
blk_mq_map_bio_put(bio);
return ret;
}
blk_rq_bio_prep(rq, bio, nsegs);
return 0;
return ret;
}
/**
@ -644,8 +581,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
else
ret = bio_map_user_iov(rq, &i, gfp_mask);
if (ret)
if (ret) {
if (ret == -EREMOTEIO)
ret = -EINVAL;
goto unmap_rq;
}
if (!bio)
bio = rq->bio;
} while (iov_iter_count(&i));

View file

@ -556,14 +556,6 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
struct lock_class_key *lkclass);
int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
struct folio *folio, size_t len, size_t offset,
unsigned int max_sectors, bool *same_page);
/*
* Clean up a page appropriately, where the page may be pinned, may have a
* ref taken on it or neither.

View file

@ -261,6 +261,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
{
struct scatterlist *sg;
struct bio *bio;
int ret = -EINVAL;
int i;
if (req->sg_cnt > BIO_MAX_VECS)
@ -277,16 +278,19 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
}
for_each_sg(req->sg, sg, req->sg_cnt, i) {
if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length,
sg->offset) < sg->length) {
nvmet_req_bio_put(req, bio);
return -EINVAL;
}
if (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) <
sg->length)
goto out_bio_put;
}
blk_rq_bio_prep(rq, bio, req->sg_cnt);
ret = blk_rq_append_bio(rq, bio);
if (ret)
goto out_bio_put;
return 0;
out_bio_put:
nvmet_req_bio_put(req, bio);
return ret;
}
static void nvmet_passthru_execute_cmd(struct nvmet_req *req)

View file

@ -586,8 +586,7 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req)
for_each_sg(req->sg, sg, req->sg_cnt, sg_cnt) {
unsigned int len = sg->length;
if (bio_add_pc_page(bdev_get_queue(bio->bi_bdev), bio,
sg_page(sg), len, sg->offset) != len) {
if (bio_add_page(bio, sg_page(sg), len, sg->offset) != len) {
status = NVME_SC_INTERNAL;
goto out_put_bio;
}

View file

@ -823,7 +823,6 @@ static sense_reason_t
pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
struct request *req)
{
struct pscsi_dev_virt *pdv = PSCSI_DEV(cmd->se_dev);
struct bio *bio = NULL;
struct page *page;
struct scatterlist *sg;
@ -871,12 +870,11 @@ new_bio:
(rw) ? "rw" : "r", nr_vecs);
}
pr_debug("PSCSI: Calling bio_add_pc_page() i: %d"
pr_debug("PSCSI: Calling bio_add_page() i: %d"
" bio: %p page: %p len: %d off: %d\n", i, bio,
page, len, off);
rc = bio_add_pc_page(pdv->pdv_sd->request_queue,
bio, page, bytes, off);
rc = bio_add_page(bio, page, bytes, off);
pr_debug("PSCSI: bio->bi_vcnt: %d nr_vecs: %d\n",
bio_segments(bio), nr_vecs);
if (rc != bytes) {

View file

@ -413,8 +413,6 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
unsigned off);
bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
size_t len, size_t off);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,