btrfs: convert btrfs_buffered_write() to use folios
The buffered write path is still heavily utilizing the page interface. Since we have converted it to do a page-by-page copying, it's much easier to convert all involved functions to folio interface, this involves: - btrfs_copy_from_user() - btrfs_drop_folio() - prepare_uptodate_page() - prepare_one_page() - lock_and_cleanup_extent_if_need() - btrfs_dirty_page() All function are changed to accept a folio parameter, and if the word "page" is in the function name, change that to "folio" too. The function btrfs_dirty_page() is exported for v1 space cache, convert v1 cache call site to convert its page to folio for the new interface. And there is a small enhancement for prepare_one_folio(), instead of manually waiting for the page writeback, let __filemap_get_folio() to handle that by using FGP_WRITEBEGIN, which implies (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE). Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
c87c299776
commit
e820dbeb6a
3 changed files with 66 additions and 80 deletions
138
fs/btrfs/file.c
138
fs/btrfs/file.c
|
@ -42,7 +42,7 @@
|
||||||
* calls into generic code.
|
* calls into generic code.
|
||||||
*/
|
*/
|
||||||
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||||
struct page *page, struct iov_iter *i)
|
struct folio *folio, struct iov_iter *i)
|
||||||
{
|
{
|
||||||
size_t copied = 0;
|
size_t copied = 0;
|
||||||
size_t total_copied = 0;
|
size_t total_copied = 0;
|
||||||
|
@ -53,10 +53,10 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||||
/*
|
/*
|
||||||
* Copy data from userspace to the current page
|
* Copy data from userspace to the current page
|
||||||
*/
|
*/
|
||||||
copied = copy_page_from_iter_atomic(page, offset, count, i);
|
copied = copy_folio_from_iter_atomic(folio, offset, count, i);
|
||||||
|
|
||||||
/* Flush processor's dcache for this page */
|
/* Flush processor's dcache for this page */
|
||||||
flush_dcache_page(page);
|
flush_dcache_folio(folio);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if we get a partial write, we can end up with
|
* if we get a partial write, we can end up with
|
||||||
|
@ -68,7 +68,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||||
* back to page at a time copies after we return 0.
|
* back to page at a time copies after we return 0.
|
||||||
*/
|
*/
|
||||||
if (unlikely(copied < count)) {
|
if (unlikely(copied < count)) {
|
||||||
if (!PageUptodate(page)) {
|
if (!folio_test_uptodate(folio)) {
|
||||||
iov_iter_revert(i, copied);
|
iov_iter_revert(i, copied);
|
||||||
copied = 0;
|
copied = 0;
|
||||||
}
|
}
|
||||||
|
@ -84,37 +84,36 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* unlocks pages after btrfs_file_write is done with them
|
* Unlock folio after btrfs_file_write() is done with it.
|
||||||
*/
|
*/
|
||||||
static void btrfs_drop_page(struct btrfs_fs_info *fs_info, struct page *page,
|
static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio,
|
||||||
u64 pos, u64 copied)
|
u64 pos, u64 copied)
|
||||||
{
|
{
|
||||||
u64 block_start = round_down(pos, fs_info->sectorsize);
|
u64 block_start = round_down(pos, fs_info->sectorsize);
|
||||||
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
|
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
|
||||||
|
|
||||||
ASSERT(block_len <= U32_MAX);
|
ASSERT(block_len <= U32_MAX);
|
||||||
/*
|
/*
|
||||||
* Page checked is some magic around finding pages that have been
|
* Folio checked is some magic around finding folios that have been
|
||||||
* modified without going through btrfs_set_page_dirty clear it here.
|
* modified without going through btrfs_dirty_folio(). Clear it here.
|
||||||
* There should be no need to mark the pages accessed as
|
* There should be no need to mark the pages accessed as
|
||||||
* prepare_one_page() should have marked them accessed in
|
* prepare_one_folio() should have marked them accessed in
|
||||||
* prepare_one_page() via find_or_create_page()
|
* prepare_one_folio() via find_or_create_page()
|
||||||
*/
|
*/
|
||||||
btrfs_folio_clamp_clear_checked(fs_info, page_folio(page), block_start,
|
btrfs_folio_clamp_clear_checked(fs_info, folio, block_start, block_len);
|
||||||
block_len);
|
folio_unlock(folio);
|
||||||
unlock_page(page);
|
folio_put(folio);
|
||||||
put_page(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After btrfs_copy_from_user(), update the following things for delalloc:
|
* After btrfs_copy_from_user(), update the following things for delalloc:
|
||||||
* - Mark newly dirtied pages as DELALLOC in the io tree.
|
* - Mark newly dirtied folio as DELALLOC in the io tree.
|
||||||
* Used to advise which range is to be written back.
|
* Used to advise which range is to be written back.
|
||||||
* - Mark modified pages as Uptodate/Dirty and not needing COW fixup
|
* - Mark modified folio as Uptodate/Dirty and not needing COW fixup
|
||||||
* - Update inode size for past EOF write
|
* - Update inode size for past EOF write
|
||||||
*/
|
*/
|
||||||
int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
|
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
|
||||||
size_t write_bytes, struct extent_state **cached, bool noreserve)
|
size_t write_bytes, struct extent_state **cached, bool noreserve)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -122,7 +121,6 @@ int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
|
||||||
u64 start_pos;
|
u64 start_pos;
|
||||||
u64 end_of_last_block;
|
u64 end_of_last_block;
|
||||||
u64 end_pos = pos + write_bytes;
|
u64 end_pos = pos + write_bytes;
|
||||||
struct folio *folio = page_folio(page);
|
|
||||||
loff_t isize = i_size_read(&inode->vfs_inode);
|
loff_t isize = i_size_read(&inode->vfs_inode);
|
||||||
unsigned int extra_bits = 0;
|
unsigned int extra_bits = 0;
|
||||||
|
|
||||||
|
@ -835,14 +833,12 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* on error we return an unlocked page and the error value
|
* On error return an unlocked folio and the error value
|
||||||
* on success we return a locked page and 0
|
* On success return a locked folio and 0
|
||||||
*/
|
*/
|
||||||
static int prepare_uptodate_page(struct inode *inode,
|
static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos,
|
||||||
struct page *page, u64 pos,
|
u64 len, bool force_uptodate)
|
||||||
u64 len, bool force_uptodate)
|
|
||||||
{
|
{
|
||||||
struct folio *folio = page_folio(page);
|
|
||||||
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
|
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
|
||||||
u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
|
u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -873,23 +869,13 @@ static int prepare_uptodate_page(struct inode *inode,
|
||||||
* The private flag check is essential for subpage as we need to store
|
* The private flag check is essential for subpage as we need to store
|
||||||
* extra bitmap using folio private.
|
* extra bitmap using folio private.
|
||||||
*/
|
*/
|
||||||
if (page->mapping != inode->i_mapping || !folio_test_private(folio)) {
|
if (folio->mapping != inode->i_mapping || !folio_test_private(folio)) {
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static fgf_t get_prepare_fgp_flags(bool nowait)
|
|
||||||
{
|
|
||||||
fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
|
|
||||||
|
|
||||||
if (nowait)
|
|
||||||
fgp_flags |= FGP_NOWAIT;
|
|
||||||
|
|
||||||
return fgp_flags;
|
|
||||||
}
|
|
||||||
|
|
||||||
static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
|
static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
|
||||||
{
|
{
|
||||||
gfp_t gfp;
|
gfp_t gfp;
|
||||||
|
@ -904,60 +890,60 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this just gets page into the page cache and locks them down.
|
* Get folio into the page cache and lock it.
|
||||||
*/
|
*/
|
||||||
static noinline int prepare_one_page(struct inode *inode, struct page **page_ret,
|
static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret,
|
||||||
loff_t pos, size_t write_bytes,
|
loff_t pos, size_t write_bytes,
|
||||||
bool force_uptodate, bool nowait)
|
bool force_uptodate, bool nowait)
|
||||||
{
|
{
|
||||||
unsigned long index = pos >> PAGE_SHIFT;
|
unsigned long index = pos >> PAGE_SHIFT;
|
||||||
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
|
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
|
||||||
fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
|
fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
|
||||||
struct page *page;
|
struct folio *folio;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
again:
|
again:
|
||||||
page = pagecache_get_page(inode->i_mapping, index, fgp_flags,
|
folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask);
|
||||||
mask | __GFP_WRITE);
|
if (IS_ERR(folio)) {
|
||||||
if (!page) {
|
|
||||||
if (nowait)
|
if (nowait)
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
else
|
else
|
||||||
ret = -ENOMEM;
|
ret = PTR_ERR(folio);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
ret = set_page_extent_mapped(page);
|
/* Only support page sized folio yet. */
|
||||||
|
ASSERT(folio_order(folio) == 0);
|
||||||
|
ret = set_folio_extent_mapped(folio);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
unlock_page(page);
|
folio_unlock(folio);
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
ret = prepare_uptodate_page(inode, page, pos, write_bytes, force_uptodate);
|
ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* The page is already unlocked. */
|
/* The folio is already unlocked. */
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
if (!nowait && ret == -EAGAIN) {
|
if (!nowait && ret == -EAGAIN) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
wait_on_page_writeback(page);
|
*folio_ret = folio;
|
||||||
*page_ret = page;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function locks the extent and properly waits for data=ordered extents
|
* Locks the extent and properly waits for data=ordered extents to finish
|
||||||
* to finish before allowing the pages to be modified if need.
|
* before allowing the folios to be modified if need.
|
||||||
*
|
*
|
||||||
* The return value:
|
* Return:
|
||||||
* 1 - the extent is locked
|
* 1 - the extent is locked
|
||||||
* 0 - the extent is not locked, and everything is OK
|
* 0 - the extent is not locked, and everything is OK
|
||||||
* -EAGAIN - need re-prepare the pages
|
* -EAGAIN - need to prepare the folios again
|
||||||
*/
|
*/
|
||||||
static noinline int
|
static noinline int
|
||||||
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
|
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
|
||||||
loff_t pos, size_t write_bytes,
|
loff_t pos, size_t write_bytes,
|
||||||
u64 *lockstart, u64 *lockend, bool nowait,
|
u64 *lockstart, u64 *lockend, bool nowait,
|
||||||
struct extent_state **cached_state)
|
struct extent_state **cached_state)
|
||||||
|
@ -976,8 +962,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
|
||||||
if (nowait) {
|
if (nowait) {
|
||||||
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
|
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
|
||||||
cached_state)) {
|
cached_state)) {
|
||||||
unlock_page(page);
|
folio_unlock(folio);
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -991,8 +977,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
|
||||||
ordered->file_offset <= last_pos) {
|
ordered->file_offset <= last_pos) {
|
||||||
unlock_extent(&inode->io_tree, start_pos, last_pos,
|
unlock_extent(&inode->io_tree, start_pos, last_pos,
|
||||||
cached_state);
|
cached_state);
|
||||||
unlock_page(page);
|
folio_unlock(folio);
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
btrfs_start_ordered_extent(ordered);
|
btrfs_start_ordered_extent(ordered);
|
||||||
btrfs_put_ordered_extent(ordered);
|
btrfs_put_ordered_extent(ordered);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
@ -1006,10 +992,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page *page,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We should be called after prepare_one_page() which should have locked
|
* We should be called after prepare_one_folio() which should have locked
|
||||||
* all pages in the range.
|
* all pages in the range.
|
||||||
*/
|
*/
|
||||||
WARN_ON(!PageLocked(page));
|
WARN_ON(!folio_test_locked(folio));
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1190,12 +1176,12 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||||
size_t copied;
|
size_t copied;
|
||||||
size_t dirty_sectors;
|
size_t dirty_sectors;
|
||||||
size_t num_sectors;
|
size_t num_sectors;
|
||||||
struct page *page = NULL;
|
struct folio *folio = NULL;
|
||||||
int extents_locked;
|
int extents_locked;
|
||||||
bool force_page_uptodate = false;
|
bool force_page_uptodate = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fault pages before locking them in prepare_one_page()
|
* Fault pages before locking them in prepare_one_folio()
|
||||||
* to avoid recursive lock
|
* to avoid recursive lock
|
||||||
*/
|
*/
|
||||||
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
|
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
|
||||||
|
@ -1261,8 +1247,8 @@ again:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = prepare_one_page(inode, &page, pos, write_bytes,
|
ret = prepare_one_folio(inode, &folio, pos, write_bytes,
|
||||||
force_page_uptodate, false);
|
force_page_uptodate, false);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||||
reserve_bytes);
|
reserve_bytes);
|
||||||
|
@ -1270,7 +1256,7 @@ again:
|
||||||
}
|
}
|
||||||
|
|
||||||
extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
|
extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
|
||||||
page, pos, write_bytes, &lockstart,
|
folio, pos, write_bytes, &lockstart,
|
||||||
&lockend, nowait, &cached_state);
|
&lockend, nowait, &cached_state);
|
||||||
if (extents_locked < 0) {
|
if (extents_locked < 0) {
|
||||||
if (!nowait && extents_locked == -EAGAIN)
|
if (!nowait && extents_locked == -EAGAIN)
|
||||||
|
@ -1282,7 +1268,7 @@ again:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
copied = btrfs_copy_from_user(pos, write_bytes, page, i);
|
copied = btrfs_copy_from_user(pos, write_bytes, folio, i);
|
||||||
|
|
||||||
num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
|
num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
|
||||||
dirty_sectors = round_up(copied + sector_offset,
|
dirty_sectors = round_up(copied + sector_offset,
|
||||||
|
@ -1314,8 +1300,8 @@ again:
|
||||||
release_bytes = round_up(copied + sector_offset,
|
release_bytes = round_up(copied + sector_offset,
|
||||||
fs_info->sectorsize);
|
fs_info->sectorsize);
|
||||||
|
|
||||||
ret = btrfs_dirty_page(BTRFS_I(inode), page, pos, copied,
|
ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied,
|
||||||
&cached_state, only_release_metadata);
|
&cached_state, only_release_metadata);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have not locked the extent range, because the range's
|
* If we have not locked the extent range, because the range's
|
||||||
|
@ -1332,7 +1318,7 @@ again:
|
||||||
|
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_drop_page(fs_info, page, pos, copied);
|
btrfs_drop_folio(fs_info, folio, pos, copied);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1340,7 +1326,7 @@ again:
|
||||||
if (only_release_metadata)
|
if (only_release_metadata)
|
||||||
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||||
|
|
||||||
btrfs_drop_page(fs_info, page, pos, copied);
|
btrfs_drop_folio(fs_info, folio, pos, copied);
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
|
|
|
@ -34,8 +34,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||||
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||||
int btrfs_release_file(struct inode *inode, struct file *file);
|
int btrfs_release_file(struct inode *inode, struct file *file);
|
||||||
int btrfs_dirty_page(struct btrfs_inode *inode, struct page *page, loff_t pos,
|
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
|
||||||
size_t write_bytes, struct extent_state **cached, bool noreserve);
|
size_t write_bytes, struct extent_state **cached, bool noreserve);
|
||||||
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
|
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
|
||||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||||
size_t *write_bytes, bool nowait);
|
size_t *write_bytes, bool nowait);
|
||||||
|
|
|
@ -1464,8 +1464,8 @@ static int __btrfs_write_out_cache(struct inode *inode,
|
||||||
u64 dirty_start = i * PAGE_SIZE;
|
u64 dirty_start = i * PAGE_SIZE;
|
||||||
u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start;
|
u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start;
|
||||||
|
|
||||||
ret = btrfs_dirty_page(BTRFS_I(inode), io_ctl->pages[i],
|
ret = btrfs_dirty_folio(BTRFS_I(inode), page_folio(io_ctl->pages[i]),
|
||||||
dirty_start, dirty_len, &cached_state, false);
|
dirty_start, dirty_len, &cached_state, false);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out_nospc;
|
goto out_nospc;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue