erofs: support I/O submission for sub-page compressed blocks
Add a basic I/O submission path first to support sub-page blocks: - Temporary short-lived pages will be used entirely; - In-place I/O pages can be used partially, but compressed pages need to be able to be mapped in contiguous virtual memory. As a start, currently cache decompression is explicitly disabled for sub-page blocks, which will be supported in the future. Reviewed-by: Yue Hu <huyue2@coolpad.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206091057.87027-2-hsiangkao@linux.alibaba.com
This commit is contained in:
parent
3c12466b6b
commit
192351616a
1 changed files with 74 additions and 82 deletions
150
fs/erofs/zdata.c
150
fs/erofs/zdata.c
|
@ -1435,86 +1435,85 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
|
||||||
z_erofs_decompressqueue_work(&io->u.work);
|
z_erofs_decompressqueue_work(&io->u.work);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
|
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
|
||||||
|
struct z_erofs_decompress_frontend *f,
|
||||||
|
struct z_erofs_pcluster *pcl,
|
||||||
unsigned int nr,
|
unsigned int nr,
|
||||||
struct page **pagepool,
|
|
||||||
struct address_space *mc)
|
struct address_space *mc)
|
||||||
{
|
{
|
||||||
const pgoff_t index = pcl->obj.index;
|
|
||||||
gfp_t gfp = mapping_gfp_mask(mc);
|
gfp_t gfp = mapping_gfp_mask(mc);
|
||||||
bool tocache = false;
|
bool tocache = false;
|
||||||
|
struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr;
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
struct page *oldpage, *page;
|
struct page *page, *oldpage;
|
||||||
int justfound;
|
int justfound, bs = i_blocksize(f->inode);
|
||||||
|
|
||||||
|
/* Except for inplace pages, the entire page can be used for I/Os */
|
||||||
|
bvec->bv_offset = 0;
|
||||||
|
bvec->bv_len = PAGE_SIZE;
|
||||||
repeat:
|
repeat:
|
||||||
page = READ_ONCE(pcl->compressed_bvecs[nr].page);
|
oldpage = READ_ONCE(zbv->page);
|
||||||
oldpage = page;
|
if (!oldpage)
|
||||||
|
|
||||||
if (!page)
|
|
||||||
goto out_allocpage;
|
goto out_allocpage;
|
||||||
|
|
||||||
justfound = (unsigned long)page & 1UL;
|
justfound = (unsigned long)oldpage & 1UL;
|
||||||
page = (struct page *)((unsigned long)page & ~1UL);
|
page = (struct page *)((unsigned long)oldpage & ~1UL);
|
||||||
|
bvec->bv_page = page;
|
||||||
|
|
||||||
|
DBG_BUGON(z_erofs_is_shortlived_page(page));
|
||||||
/*
|
/*
|
||||||
* preallocated cached pages, which is used to avoid direct reclaim
|
* Handle preallocated cached pages. We tried to allocate such pages
|
||||||
* otherwise, it will go inplace I/O path instead.
|
* without triggering direct reclaim. If allocation failed, inplace
|
||||||
|
* file-backed pages will be used instead.
|
||||||
*/
|
*/
|
||||||
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
|
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
|
||||||
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
|
|
||||||
set_page_private(page, 0);
|
set_page_private(page, 0);
|
||||||
|
WRITE_ONCE(zbv->page, page);
|
||||||
tocache = true;
|
tocache = true;
|
||||||
goto out_tocache;
|
goto out_tocache;
|
||||||
}
|
}
|
||||||
|
|
||||||
mapping = READ_ONCE(page->mapping);
|
mapping = READ_ONCE(page->mapping);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* file-backed online pages in plcuster are all locked steady,
|
* File-backed pages for inplace I/Os are all locked steady,
|
||||||
* therefore it is impossible for `mapping' to be NULL.
|
* therefore it is impossible for `mapping` to be NULL.
|
||||||
*/
|
*/
|
||||||
if (mapping && mapping != mc)
|
if (mapping && mapping != mc) {
|
||||||
/* ought to be unmanaged pages */
|
if (zbv->offset < 0)
|
||||||
goto out;
|
bvec->bv_offset = round_up(-zbv->offset, bs);
|
||||||
|
bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset;
|
||||||
/* directly return for shortlived page as well */
|
return;
|
||||||
if (z_erofs_is_shortlived_page(page))
|
}
|
||||||
goto out;
|
|
||||||
|
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
|
|
||||||
/* only true if page reclaim goes wrong, should never happen */
|
/* only true if page reclaim goes wrong, should never happen */
|
||||||
DBG_BUGON(justfound && PagePrivate(page));
|
DBG_BUGON(justfound && PagePrivate(page));
|
||||||
|
|
||||||
/* the page is still in manage cache */
|
/* the cached page is still in managed cache */
|
||||||
if (page->mapping == mc) {
|
if (page->mapping == mc) {
|
||||||
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
|
WRITE_ONCE(zbv->page, page);
|
||||||
|
|
||||||
if (!PagePrivate(page)) {
|
|
||||||
/*
|
/*
|
||||||
* impossible to be !PagePrivate(page) for
|
* The cached page is still available but without a valid
|
||||||
* the current restriction as well if
|
* `->private` pcluster hint. Let's reconnect them.
|
||||||
* the page is already in compressed_bvecs[].
|
|
||||||
*/
|
*/
|
||||||
|
if (!PagePrivate(page)) {
|
||||||
DBG_BUGON(!justfound);
|
DBG_BUGON(!justfound);
|
||||||
|
/* compressed_bvecs[] already takes a ref */
|
||||||
justfound = 0;
|
attach_page_private(page, pcl);
|
||||||
set_page_private(page, (unsigned long)pcl);
|
put_page(page);
|
||||||
SetPagePrivate(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* no need to submit io if it is already up-to-date */
|
/* no need to submit if it is already up-to-date */
|
||||||
if (PageUptodate(page)) {
|
if (PageUptodate(page)) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
page = NULL;
|
bvec->bv_page = NULL;
|
||||||
}
|
}
|
||||||
goto out;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the managed page has been truncated, it's unsafe to
|
* It has been truncated, so it's unsafe to reuse this one. Let's
|
||||||
* reuse this one, let's allocate a new cache-managed page.
|
* allocate a new page for compressed data.
|
||||||
*/
|
*/
|
||||||
DBG_BUGON(page->mapping);
|
DBG_BUGON(page->mapping);
|
||||||
DBG_BUGON(!justfound);
|
DBG_BUGON(!justfound);
|
||||||
|
@ -1523,25 +1522,23 @@ repeat:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
out_allocpage:
|
out_allocpage:
|
||||||
page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
|
page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
|
||||||
if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
|
if (oldpage != cmpxchg(&zbv->page, oldpage, page)) {
|
||||||
oldpage, page)) {
|
erofs_pagepool_add(&f->pagepool, page);
|
||||||
erofs_pagepool_add(pagepool, page);
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
|
bvec->bv_page = page;
|
||||||
out_tocache:
|
out_tocache:
|
||||||
if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
|
if (!tocache || bs != PAGE_SIZE ||
|
||||||
/* turn into temporary page if fails (1 ref) */
|
add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) {
|
||||||
|
/* turn into a temporary shortlived page (1 ref) */
|
||||||
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
|
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
|
||||||
goto out;
|
return;
|
||||||
}
|
}
|
||||||
attach_page_private(page, pcl);
|
attach_page_private(page, pcl);
|
||||||
/* drop a refcount added by allocpage (then we have 2 refs here) */
|
/* drop a refcount added by allocpage (then 2 refs in total here) */
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
|
||||||
out: /* the only exit (for tracing and debugging) */
|
|
||||||
return page;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
|
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
|
||||||
|
@ -1596,7 +1593,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
|
||||||
qtail[JQ_BYPASS] = &pcl->next;
|
qtail[JQ_BYPASS] = &pcl->next;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void z_erofs_decompressqueue_endio(struct bio *bio)
|
static void z_erofs_submissionqueue_endio(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct z_erofs_decompressqueue *q = bio->bi_private;
|
struct z_erofs_decompressqueue *q = bio->bi_private;
|
||||||
blk_status_t err = bio->bi_status;
|
blk_status_t err = bio->bi_status;
|
||||||
|
@ -1608,7 +1605,6 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
|
||||||
|
|
||||||
DBG_BUGON(PageUptodate(page));
|
DBG_BUGON(PageUptodate(page));
|
||||||
DBG_BUGON(z_erofs_page_is_invalidated(page));
|
DBG_BUGON(z_erofs_page_is_invalidated(page));
|
||||||
|
|
||||||
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
|
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
|
||||||
if (!err)
|
if (!err)
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
|
@ -1631,17 +1627,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
||||||
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
|
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
|
||||||
z_erofs_next_pcluster_t owned_head = f->owned_head;
|
z_erofs_next_pcluster_t owned_head = f->owned_head;
|
||||||
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
|
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
|
||||||
pgoff_t last_index;
|
erofs_off_t last_pa;
|
||||||
struct block_device *last_bdev;
|
struct block_device *last_bdev;
|
||||||
unsigned int nr_bios = 0;
|
unsigned int nr_bios = 0;
|
||||||
struct bio *bio = NULL;
|
struct bio *bio = NULL;
|
||||||
unsigned long pflags;
|
unsigned long pflags;
|
||||||
int memstall = 0;
|
int memstall = 0;
|
||||||
|
|
||||||
/*
|
/* No need to read from device for pclusters in the bypass queue. */
|
||||||
* if managed cache is enabled, bypass jobqueue is needed,
|
|
||||||
* no need to read from device for all pclusters in this queue.
|
|
||||||
*/
|
|
||||||
q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
|
q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
|
||||||
q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);
|
q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);
|
||||||
|
|
||||||
|
@ -1654,7 +1647,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
||||||
do {
|
do {
|
||||||
struct erofs_map_dev mdev;
|
struct erofs_map_dev mdev;
|
||||||
struct z_erofs_pcluster *pcl;
|
struct z_erofs_pcluster *pcl;
|
||||||
pgoff_t cur, end;
|
erofs_off_t cur, end;
|
||||||
|
struct bio_vec bvec;
|
||||||
unsigned int i = 0;
|
unsigned int i = 0;
|
||||||
bool bypass = true;
|
bool bypass = true;
|
||||||
|
|
||||||
|
@ -1673,18 +1667,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
|
||||||
};
|
};
|
||||||
(void)erofs_map_dev(sb, &mdev);
|
(void)erofs_map_dev(sb, &mdev);
|
||||||
|
|
||||||
cur = erofs_blknr(sb, mdev.m_pa);
|
cur = mdev.m_pa;
|
||||||
end = cur + pcl->pclusterpages;
|
end = cur + (pcl->pclusterpages << PAGE_SHIFT);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct page *page;
|
z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
|
||||||
|
if (!bvec.bv_page)
|
||||||
page = pickup_page_for_submission(pcl, i++,
|
|
||||||
&f->pagepool, mc);
|
|
||||||
if (!page)
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (bio && (cur != last_index + 1 ||
|
if (bio && (cur != last_pa ||
|
||||||
last_bdev != mdev.m_bdev)) {
|
last_bdev != mdev.m_bdev)) {
|
||||||
submit_bio_retry:
|
submit_bio_retry:
|
||||||
submit_bio(bio);
|
submit_bio(bio);
|
||||||
|
@ -1695,7 +1685,8 @@ submit_bio_retry:
|
||||||
bio = NULL;
|
bio = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(PageWorkingset(page)) && !memstall) {
|
if (unlikely(PageWorkingset(bvec.bv_page)) &&
|
||||||
|
!memstall) {
|
||||||
psi_memstall_enter(&pflags);
|
psi_memstall_enter(&pflags);
|
||||||
memstall = 1;
|
memstall = 1;
|
||||||
}
|
}
|
||||||
|
@ -1703,23 +1694,24 @@ submit_bio_retry:
|
||||||
if (!bio) {
|
if (!bio) {
|
||||||
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
|
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
|
||||||
REQ_OP_READ, GFP_NOIO);
|
REQ_OP_READ, GFP_NOIO);
|
||||||
bio->bi_end_io = z_erofs_decompressqueue_endio;
|
bio->bi_end_io = z_erofs_submissionqueue_endio;
|
||||||
|
bio->bi_iter.bi_sector = cur >> 9;
|
||||||
last_bdev = mdev.m_bdev;
|
|
||||||
bio->bi_iter.bi_sector = (sector_t)cur <<
|
|
||||||
(sb->s_blocksize_bits - 9);
|
|
||||||
bio->bi_private = q[JQ_SUBMIT];
|
bio->bi_private = q[JQ_SUBMIT];
|
||||||
if (readahead)
|
if (readahead)
|
||||||
bio->bi_opf |= REQ_RAHEAD;
|
bio->bi_opf |= REQ_RAHEAD;
|
||||||
++nr_bios;
|
++nr_bios;
|
||||||
|
last_bdev = mdev.m_bdev;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
|
if (cur + bvec.bv_len > end)
|
||||||
|
bvec.bv_len = end - cur;
|
||||||
|
if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
|
||||||
|
bvec.bv_offset))
|
||||||
goto submit_bio_retry;
|
goto submit_bio_retry;
|
||||||
|
|
||||||
last_index = cur;
|
last_pa = cur + bvec.bv_len;
|
||||||
bypass = false;
|
bypass = false;
|
||||||
} while (++cur < end);
|
} while ((cur += bvec.bv_len) < end);
|
||||||
|
|
||||||
if (!bypass)
|
if (!bypass)
|
||||||
qtail[JQ_SUBMIT] = &pcl->next;
|
qtail[JQ_SUBMIT] = &pcl->next;
|
||||||
|
|
Loading…
Add table
Reference in a new issue