ext4: Add delayed allocation support in data=writeback mode
Updated with fixes from Mingming Cao <cmm@us.ibm.com> to unlock and release the page from page cache if the delalloc write_begin failed, and properly handle preallocated blocks. Also added a fix to clear buffer_delay in block_write_full_page() after allocating a delayed buffer. Updated with fixes from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to update i_disksize properly and to add bmap support for delayed allocation. Updated with a fix from Valerie Clement <valerie.clement@bull.net> to avoid filesystem corruption when the filesystem is mounted with the delalloc option and blocksize < pagesize. Signed-off-by: Alex Tomas <alex@clusterfs.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
This commit is contained in:
parent
29a814d2ee
commit
64769240bd
3 changed files with 700 additions and 6 deletions
|
@ -536,6 +536,7 @@ do { \
|
||||||
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
|
||||||
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
||||||
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
|
||||||
|
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
|
||||||
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
||||||
#ifndef _LINUX_EXT2_FS_H
|
#ifndef _LINUX_EXT2_FS_H
|
||||||
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
|
||||||
|
|
699
fs/ext4/inode.c
699
fs/ext4/inode.c
|
@ -32,6 +32,7 @@
|
||||||
#include <linux/string.h>
|
#include <linux/string.h>
|
||||||
#include <linux/buffer_head.h>
|
#include <linux/buffer_head.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
|
#include <linux/pagevec.h>
|
||||||
#include <linux/mpage.h>
|
#include <linux/mpage.h>
|
||||||
#include <linux/uio.h>
|
#include <linux/uio.h>
|
||||||
#include <linux/bio.h>
|
#include <linux/bio.h>
|
||||||
|
@ -46,6 +47,8 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
|
||||||
new_size);
|
new_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ext4_invalidatepage(struct page *page, unsigned long offset);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test whether an inode is a fast symlink.
|
* Test whether an inode is a fast symlink.
|
||||||
*/
|
*/
|
||||||
|
@ -1407,6 +1410,669 @@ static int ext4_journalled_write_end(struct file *file,
|
||||||
return ret ? ret : copied;
|
return ret ? ret : copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Delayed allocation stuff
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct mpage_da_data {
|
||||||
|
struct inode *inode;
|
||||||
|
struct buffer_head lbh; /* extent of blocks */
|
||||||
|
unsigned long first_page, next_page; /* extent of pages */
|
||||||
|
get_block_t *get_block;
|
||||||
|
struct writeback_control *wbc;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mpage_da_submit_io - walks through extent of pages and try to write
|
||||||
|
* them with __mpage_writepage()
|
||||||
|
*
|
||||||
|
* @mpd->inode: inode
|
||||||
|
* @mpd->first_page: first page of the extent
|
||||||
|
* @mpd->next_page: page after the last page of the extent
|
||||||
|
* @mpd->get_block: the filesystem's block mapper function
|
||||||
|
*
|
||||||
|
* By the time mpage_da_submit_io() is called we expect all blocks
|
||||||
|
* to be allocated. this may be wrong if allocation failed.
|
||||||
|
*
|
||||||
|
* As pages are already locked by write_cache_pages(), we can't use it
|
||||||
|
*/
|
||||||
|
static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = mpd->inode->i_mapping;
|
||||||
|
struct mpage_data mpd_pp = {
|
||||||
|
.bio = NULL,
|
||||||
|
.last_block_in_bio = 0,
|
||||||
|
.get_block = mpd->get_block,
|
||||||
|
.use_writepage = 1,
|
||||||
|
};
|
||||||
|
int ret = 0, err, nr_pages, i;
|
||||||
|
unsigned long index, end;
|
||||||
|
struct pagevec pvec;
|
||||||
|
|
||||||
|
BUG_ON(mpd->next_page <= mpd->first_page);
|
||||||
|
|
||||||
|
pagevec_init(&pvec, 0);
|
||||||
|
index = mpd->first_page;
|
||||||
|
end = mpd->next_page - 1;
|
||||||
|
|
||||||
|
while (index <= end) {
|
||||||
|
/* XXX: optimize tail */
|
||||||
|
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
||||||
|
if (nr_pages == 0)
|
||||||
|
break;
|
||||||
|
for (i = 0; i < nr_pages; i++) {
|
||||||
|
struct page *page = pvec.pages[i];
|
||||||
|
|
||||||
|
index = page->index;
|
||||||
|
if (index > end)
|
||||||
|
break;
|
||||||
|
index++;
|
||||||
|
|
||||||
|
err = __mpage_writepage(page, mpd->wbc, &mpd_pp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In error case, we have to continue because
|
||||||
|
* remaining pages are still locked
|
||||||
|
* XXX: unlock and re-dirty them?
|
||||||
|
*/
|
||||||
|
if (ret == 0)
|
||||||
|
ret = err;
|
||||||
|
}
|
||||||
|
pagevec_release(&pvec);
|
||||||
|
}
|
||||||
|
if (mpd_pp.bio)
|
||||||
|
mpage_bio_submit(WRITE, mpd_pp.bio);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
|
||||||
|
*
|
||||||
|
* @mpd->inode - inode to walk through
|
||||||
|
* @exbh->b_blocknr - first block on a disk
|
||||||
|
* @exbh->b_size - amount of space in bytes
|
||||||
|
* @logical - first logical block to start assignment with
|
||||||
|
*
|
||||||
|
* the function goes through all passed space and put actual disk
|
||||||
|
* block numbers into buffer heads, dropping BH_Delay
|
||||||
|
*/
|
||||||
|
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
|
||||||
|
struct buffer_head *exbh)
|
||||||
|
{
|
||||||
|
struct inode *inode = mpd->inode;
|
||||||
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
int blocks = exbh->b_size >> inode->i_blkbits;
|
||||||
|
sector_t pblock = exbh->b_blocknr, cur_logical;
|
||||||
|
struct buffer_head *head, *bh;
|
||||||
|
unsigned long index, end;
|
||||||
|
struct pagevec pvec;
|
||||||
|
int nr_pages, i;
|
||||||
|
|
||||||
|
index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
end = (logical + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
|
||||||
|
pagevec_init(&pvec, 0);
|
||||||
|
|
||||||
|
while (index <= end) {
|
||||||
|
/* XXX: optimize tail */
|
||||||
|
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
||||||
|
if (nr_pages == 0)
|
||||||
|
break;
|
||||||
|
for (i = 0; i < nr_pages; i++) {
|
||||||
|
struct page *page = pvec.pages[i];
|
||||||
|
|
||||||
|
index = page->index;
|
||||||
|
if (index > end)
|
||||||
|
break;
|
||||||
|
index++;
|
||||||
|
|
||||||
|
BUG_ON(!PageLocked(page));
|
||||||
|
BUG_ON(PageWriteback(page));
|
||||||
|
BUG_ON(!page_has_buffers(page));
|
||||||
|
|
||||||
|
bh = page_buffers(page);
|
||||||
|
head = bh;
|
||||||
|
|
||||||
|
/* skip blocks out of the range */
|
||||||
|
do {
|
||||||
|
if (cur_logical >= logical)
|
||||||
|
break;
|
||||||
|
cur_logical++;
|
||||||
|
} while ((bh = bh->b_this_page) != head);
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (cur_logical >= logical + blocks)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (buffer_delay(bh)) {
|
||||||
|
bh->b_blocknr = pblock;
|
||||||
|
clear_buffer_delay(bh);
|
||||||
|
} else if (buffer_mapped(bh)) {
|
||||||
|
BUG_ON(bh->b_blocknr != pblock);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_logical++;
|
||||||
|
pblock++;
|
||||||
|
} while ((bh = bh->b_this_page) != head);
|
||||||
|
}
|
||||||
|
pagevec_release(&pvec);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __unmap_underlying_blocks - just a helper function to unmap
|
||||||
|
* set of blocks described by @bh
|
||||||
|
*/
|
||||||
|
static inline void __unmap_underlying_blocks(struct inode *inode,
|
||||||
|
struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
struct block_device *bdev = inode->i_sb->s_bdev;
|
||||||
|
int blocks, i;
|
||||||
|
|
||||||
|
blocks = bh->b_size >> inode->i_blkbits;
|
||||||
|
for (i = 0; i < blocks; i++)
|
||||||
|
unmap_underlying_metadata(bdev, bh->b_blocknr + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mpage_da_map_blocks - go through given space
|
||||||
|
*
|
||||||
|
* @mpd->lbh - bh describing space
|
||||||
|
* @mpd->get_block - the filesystem's block mapper function
|
||||||
|
*
|
||||||
|
* The function skips space we know is already mapped to disk blocks.
|
||||||
|
*
|
||||||
|
* The function ignores errors ->get_block() returns, thus real
|
||||||
|
* error handling is postponed to __mpage_writepage()
|
||||||
|
*/
|
||||||
|
static void mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||||
|
{
|
||||||
|
struct buffer_head *lbh = &mpd->lbh;
|
||||||
|
int err = 0, remain = lbh->b_size;
|
||||||
|
sector_t next = lbh->b_blocknr;
|
||||||
|
struct buffer_head new;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We consider only non-mapped and non-allocated blocks
|
||||||
|
*/
|
||||||
|
if (buffer_mapped(lbh) && !buffer_delay(lbh))
|
||||||
|
return;
|
||||||
|
|
||||||
|
while (remain) {
|
||||||
|
new.b_state = lbh->b_state;
|
||||||
|
new.b_blocknr = 0;
|
||||||
|
new.b_size = remain;
|
||||||
|
err = mpd->get_block(mpd->inode, next, &new, 1);
|
||||||
|
if (err) {
|
||||||
|
/*
|
||||||
|
* Rather than implement own error handling
|
||||||
|
* here, we just leave remaining blocks
|
||||||
|
* unallocated and try again with ->writepage()
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
BUG_ON(new.b_size == 0);
|
||||||
|
|
||||||
|
if (buffer_new(&new))
|
||||||
|
__unmap_underlying_blocks(mpd->inode, &new);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If blocks are delayed marked, we need to
|
||||||
|
* put actual blocknr and drop delayed bit
|
||||||
|
*/
|
||||||
|
if (buffer_delay(lbh))
|
||||||
|
mpage_put_bnr_to_bhs(mpd, next, &new);
|
||||||
|
|
||||||
|
/* go for the remaining blocks */
|
||||||
|
next += new.b_size >> mpd->inode->i_blkbits;
|
||||||
|
remain -= new.b_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mpage_add_bh_to_extent - try to add one more block to extent of blocks
|
||||||
|
*
|
||||||
|
* @mpd->lbh - extent of blocks
|
||||||
|
* @logical - logical number of the block in the file
|
||||||
|
* @bh - bh of the block (used to access block's state)
|
||||||
|
*
|
||||||
|
* the function is used to collect contig. blocks in same state
|
||||||
|
*/
|
||||||
|
static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
|
||||||
|
sector_t logical, struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
struct buffer_head *lbh = &mpd->lbh;
|
||||||
|
sector_t next;
|
||||||
|
|
||||||
|
next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First block in the extent
|
||||||
|
*/
|
||||||
|
if (lbh->b_size == 0) {
|
||||||
|
lbh->b_blocknr = logical;
|
||||||
|
lbh->b_size = bh->b_size;
|
||||||
|
lbh->b_state = bh->b_state & BH_FLAGS;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can we merge the block to our big extent?
|
||||||
|
*/
|
||||||
|
if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) {
|
||||||
|
lbh->b_size += bh->b_size;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We couldn't merge the block to our extent, so we
|
||||||
|
* need to flush current extent and start new one
|
||||||
|
*/
|
||||||
|
mpage_da_map_blocks(mpd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now start a new extent
|
||||||
|
*/
|
||||||
|
lbh->b_size = bh->b_size;
|
||||||
|
lbh->b_state = bh->b_state & BH_FLAGS;
|
||||||
|
lbh->b_blocknr = logical;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* __mpage_da_writepage - finds extent of pages and blocks
|
||||||
|
*
|
||||||
|
* @page: page to consider
|
||||||
|
* @wbc: not used, we just follow rules
|
||||||
|
* @data: context
|
||||||
|
*
|
||||||
|
* The function finds extents of pages and scan them for all blocks.
|
||||||
|
*/
|
||||||
|
static int __mpage_da_writepage(struct page *page,
|
||||||
|
struct writeback_control *wbc, void *data)
|
||||||
|
{
|
||||||
|
struct mpage_da_data *mpd = data;
|
||||||
|
struct inode *inode = mpd->inode;
|
||||||
|
struct buffer_head *bh, *head, fake;
|
||||||
|
sector_t logical;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can we merge this page to current extent?
|
||||||
|
*/
|
||||||
|
if (mpd->next_page != page->index) {
|
||||||
|
/*
|
||||||
|
* Nope, we can't. So, we map non-allocated blocks
|
||||||
|
* and start IO on them using __mpage_writepage()
|
||||||
|
*/
|
||||||
|
if (mpd->next_page != mpd->first_page) {
|
||||||
|
mpage_da_map_blocks(mpd);
|
||||||
|
mpage_da_submit_io(mpd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start next extent of pages ...
|
||||||
|
*/
|
||||||
|
mpd->first_page = page->index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ... and blocks
|
||||||
|
*/
|
||||||
|
mpd->lbh.b_size = 0;
|
||||||
|
mpd->lbh.b_state = 0;
|
||||||
|
mpd->lbh.b_blocknr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mpd->next_page = page->index + 1;
|
||||||
|
logical = (sector_t) page->index <<
|
||||||
|
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
|
||||||
|
if (!page_has_buffers(page)) {
|
||||||
|
/*
|
||||||
|
* There is no attached buffer heads yet (mmap?)
|
||||||
|
* we treat the page asfull of dirty blocks
|
||||||
|
*/
|
||||||
|
bh = &fake;
|
||||||
|
bh->b_size = PAGE_CACHE_SIZE;
|
||||||
|
bh->b_state = 0;
|
||||||
|
set_buffer_dirty(bh);
|
||||||
|
set_buffer_uptodate(bh);
|
||||||
|
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Page with regular buffer heads, just add all dirty ones
|
||||||
|
*/
|
||||||
|
head = page_buffers(page);
|
||||||
|
bh = head;
|
||||||
|
do {
|
||||||
|
BUG_ON(buffer_locked(bh));
|
||||||
|
if (buffer_dirty(bh))
|
||||||
|
mpage_add_bh_to_extent(mpd, logical, bh);
|
||||||
|
logical++;
|
||||||
|
} while ((bh = bh->b_this_page) != head);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mpage_da_writepages - walk the list of dirty pages of the given
|
||||||
|
* address space, allocates non-allocated blocks, maps newly-allocated
|
||||||
|
* blocks to existing bhs and issue IO them
|
||||||
|
*
|
||||||
|
* @mapping: address space structure to write
|
||||||
|
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
|
||||||
|
* @get_block: the filesystem's block mapper function.
|
||||||
|
*
|
||||||
|
* This is a library function, which implements the writepages()
|
||||||
|
* address_space_operation.
|
||||||
|
*
|
||||||
|
* In order to avoid duplication of logic that deals with partial pages,
|
||||||
|
* multiple bio per page, etc, we find non-allocated blocks, allocate
|
||||||
|
* them with minimal calls to ->get_block() and re-use __mpage_writepage()
|
||||||
|
*
|
||||||
|
* It's important that we call __mpage_writepage() only once for each
|
||||||
|
* involved page, otherwise we'd have to implement more complicated logic
|
||||||
|
* to deal with pages w/o PG_lock or w/ PG_writeback and so on.
|
||||||
|
*
|
||||||
|
* See comments to mpage_writepages()
|
||||||
|
*/
|
||||||
|
static int mpage_da_writepages(struct address_space *mapping,
|
||||||
|
struct writeback_control *wbc,
|
||||||
|
get_block_t get_block)
|
||||||
|
{
|
||||||
|
struct mpage_da_data mpd;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!get_block)
|
||||||
|
return generic_writepages(mapping, wbc);
|
||||||
|
|
||||||
|
mpd.wbc = wbc;
|
||||||
|
mpd.inode = mapping->host;
|
||||||
|
mpd.lbh.b_size = 0;
|
||||||
|
mpd.lbh.b_state = 0;
|
||||||
|
mpd.lbh.b_blocknr = 0;
|
||||||
|
mpd.first_page = 0;
|
||||||
|
mpd.next_page = 0;
|
||||||
|
mpd.get_block = get_block;
|
||||||
|
|
||||||
|
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle last extent of pages
|
||||||
|
*/
|
||||||
|
if (mpd.next_page != mpd.first_page) {
|
||||||
|
mpage_da_map_blocks(&mpd);
|
||||||
|
mpage_da_submit_io(&mpd);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* this is a special callback for ->write_begin() only
|
||||||
|
* it's intention is to return mapped block or reserve space
|
||||||
|
*/
|
||||||
|
static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
BUG_ON(create == 0);
|
||||||
|
BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* first, we need to know whether the block is allocated already
|
||||||
|
* preallocated blocks are unmapped but should treated
|
||||||
|
* the same as allocated blocks.
|
||||||
|
*/
|
||||||
|
ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
|
||||||
|
if (ret == 0) {
|
||||||
|
/* the block isn't allocated yet, let's reserve space */
|
||||||
|
/* XXX: call reservation here */
|
||||||
|
/*
|
||||||
|
* XXX: __block_prepare_write() unmaps passed block,
|
||||||
|
* is it OK?
|
||||||
|
*/
|
||||||
|
map_bh(bh_result, inode->i_sb, 0);
|
||||||
|
set_buffer_new(bh_result);
|
||||||
|
set_buffer_delay(bh_result);
|
||||||
|
} else if (ret > 0) {
|
||||||
|
bh_result->b_size = (ret << inode->i_blkbits);
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||||
|
struct buffer_head *bh_result, int create)
|
||||||
|
{
|
||||||
|
int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
|
||||||
|
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||||
|
loff_t disksize = EXT4_I(inode)->i_disksize;
|
||||||
|
handle_t *handle = NULL;
|
||||||
|
|
||||||
|
if (create) {
|
||||||
|
handle = ext4_journal_start(inode, needed_blocks);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
ret = PTR_ERR(handle);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
|
||||||
|
bh_result, create, 0);
|
||||||
|
if (ret > 0) {
|
||||||
|
bh_result->b_size = (ret << inode->i_blkbits);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update on-disk size along with block allocation
|
||||||
|
* we don't use 'extend_disksize' as size may change
|
||||||
|
* within already allocated block -bzzz
|
||||||
|
*/
|
||||||
|
disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
|
||||||
|
if (disksize > i_size_read(inode))
|
||||||
|
disksize = i_size_read(inode);
|
||||||
|
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||||
|
/*
|
||||||
|
* XXX: replace with spinlock if seen contended -bzzz
|
||||||
|
*/
|
||||||
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
if (disksize > EXT4_I(inode)->i_disksize)
|
||||||
|
EXT4_I(inode)->i_disksize = disksize;
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
|
||||||
|
if (EXT4_I(inode)->i_disksize == disksize) {
|
||||||
|
if (handle == NULL)
|
||||||
|
handle = ext4_journal_start(inode, 1);
|
||||||
|
if (!IS_ERR(handle))
|
||||||
|
ext4_mark_inode_dirty(handle, inode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (handle && !IS_ERR(handle))
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
/* FIXME!! only support data=writeback mode */
|
||||||
|
static int ext4_da_writepage(struct page *page,
|
||||||
|
struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
struct inode *inode = page->mapping->host;
|
||||||
|
handle_t *handle = NULL;
|
||||||
|
int ret = 0;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (ext4_journal_current_handle())
|
||||||
|
goto out_fail;
|
||||||
|
|
||||||
|
handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
ret = PTR_ERR(handle);
|
||||||
|
goto out_fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
|
||||||
|
ret = nobh_writepage(page, ext4_get_block, wbc);
|
||||||
|
else
|
||||||
|
ret = block_write_full_page(page, ext4_get_block, wbc);
|
||||||
|
|
||||||
|
if (!ret && inode->i_size > EXT4_I(inode)->i_disksize) {
|
||||||
|
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||||
|
ext4_mark_inode_dirty(handle, inode);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ext4_journal_stop(handle);
|
||||||
|
if (!ret)
|
||||||
|
ret = err;
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
out_fail:
|
||||||
|
redirty_page_for_writepage(wbc, page);
|
||||||
|
unlock_page(page);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_da_writepages(struct address_space *mapping,
|
||||||
|
struct writeback_control *wbc)
|
||||||
|
{
|
||||||
|
return mpage_da_writepages(mapping, wbc, ext4_da_get_block_write);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
|
||||||
|
loff_t pos, unsigned len, unsigned flags,
|
||||||
|
struct page **pagep, void **fsdata)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct page *page;
|
||||||
|
pgoff_t index;
|
||||||
|
unsigned from, to;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
handle_t *handle;
|
||||||
|
|
||||||
|
index = pos >> PAGE_CACHE_SHIFT;
|
||||||
|
from = pos & (PAGE_CACHE_SIZE - 1);
|
||||||
|
to = from + len;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* With delayed allocation, we don't log the i_disksize update
|
||||||
|
* if there is delayed block allocation. But we still need
|
||||||
|
* to journalling the i_disksize update if writes to the end
|
||||||
|
* of file which has an already mapped buffer.
|
||||||
|
*/
|
||||||
|
handle = ext4_journal_start(inode, 1);
|
||||||
|
if (IS_ERR(handle)) {
|
||||||
|
ret = PTR_ERR(handle);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = __grab_cache_page(mapping, index);
|
||||||
|
if (!page)
|
||||||
|
return -ENOMEM;
|
||||||
|
*pagep = page;
|
||||||
|
|
||||||
|
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
||||||
|
ext4_da_get_block_prep);
|
||||||
|
if (ret < 0) {
|
||||||
|
unlock_page(page);
|
||||||
|
ext4_journal_stop(handle);
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||||
|
{
|
||||||
|
return !buffer_mapped(bh) || buffer_delay(bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ext4_da_write_end(struct file *file,
|
||||||
|
struct address_space *mapping,
|
||||||
|
loff_t pos, unsigned len, unsigned copied,
|
||||||
|
struct page *page, void *fsdata)
|
||||||
|
{
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
int ret = 0, ret2;
|
||||||
|
handle_t *handle = ext4_journal_current_handle();
|
||||||
|
loff_t new_i_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* generic_write_end() will run mark_inode_dirty() if i_size
|
||||||
|
* changes. So let's piggyback the i_disksize mark_inode_dirty
|
||||||
|
* into that.
|
||||||
|
*/
|
||||||
|
|
||||||
|
new_i_size = pos + copied;
|
||||||
|
if (new_i_size > EXT4_I(inode)->i_disksize)
|
||||||
|
if (!walk_page_buffers(NULL, page_buffers(page),
|
||||||
|
0, len, NULL, ext4_bh_unmapped_or_delay)){
|
||||||
|
/*
|
||||||
|
* Updating i_disksize when extending file without
|
||||||
|
* needing block allocation
|
||||||
|
*/
|
||||||
|
if (ext4_should_order_data(inode))
|
||||||
|
ret = ext4_jbd2_file_inode(handle, inode);
|
||||||
|
|
||||||
|
EXT4_I(inode)->i_disksize = new_i_size;
|
||||||
|
}
|
||||||
|
ret2 = generic_write_end(file, mapping, pos, len, copied,
|
||||||
|
page, fsdata);
|
||||||
|
copied = ret2;
|
||||||
|
if (ret2 < 0)
|
||||||
|
ret = ret2;
|
||||||
|
ret2 = ext4_journal_stop(handle);
|
||||||
|
if (!ret)
|
||||||
|
ret = ret2;
|
||||||
|
|
||||||
|
return ret ? ret : copied;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
|
||||||
|
{
|
||||||
|
struct buffer_head *head, *bh;
|
||||||
|
unsigned int curr_off = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop reserved blocks
|
||||||
|
*/
|
||||||
|
BUG_ON(!PageLocked(page));
|
||||||
|
if (!page_has_buffers(page))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
head = page_buffers(page);
|
||||||
|
bh = head;
|
||||||
|
do {
|
||||||
|
unsigned int next_off = curr_off + bh->b_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* is this block fully invalidated?
|
||||||
|
*/
|
||||||
|
if (offset <= curr_off && buffer_delay(bh)) {
|
||||||
|
clear_buffer_delay(bh);
|
||||||
|
/* XXX: add real stuff here */
|
||||||
|
}
|
||||||
|
curr_off = next_off;
|
||||||
|
bh = bh->b_this_page;
|
||||||
|
} while (bh != head);
|
||||||
|
|
||||||
|
out:
|
||||||
|
ext4_invalidatepage(page, offset);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bmap() is special. It gets used by applications such as lilo and by
|
* bmap() is special. It gets used by applications such as lilo and by
|
||||||
* the swapper to find the on-disk block of a specific piece of data.
|
* the swapper to find the on-disk block of a specific piece of data.
|
||||||
|
@ -1427,6 +2093,16 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
|
||||||
journal_t *journal;
|
journal_t *journal;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
|
||||||
|
test_opt(inode->i_sb, DELALLOC)) {
|
||||||
|
/*
|
||||||
|
* With delalloc we want to sync the file
|
||||||
|
* so that we can make sure we allocate
|
||||||
|
* blocks for file
|
||||||
|
*/
|
||||||
|
filemap_write_and_wait(mapping);
|
||||||
|
}
|
||||||
|
|
||||||
if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
|
if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
|
||||||
/*
|
/*
|
||||||
* This is a REALLY heavyweight approach, but the use of
|
* This is a REALLY heavyweight approach, but the use of
|
||||||
|
@ -1471,11 +2147,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
|
||||||
{
|
|
||||||
return !buffer_mapped(bh) || buffer_delay(bh);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that we don't need to start a transaction unless we're journaling data
|
* Note that we don't need to start a transaction unless we're journaling data
|
||||||
* because we should have holes filled from ext4_page_mkwrite(). We even don't
|
* because we should have holes filled from ext4_page_mkwrite(). We even don't
|
||||||
|
@ -1832,10 +2503,28 @@ static const struct address_space_operations ext4_journalled_aops = {
|
||||||
.releasepage = ext4_releasepage,
|
.releasepage = ext4_releasepage,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct address_space_operations ext4_da_aops = {
|
||||||
|
.readpage = ext4_readpage,
|
||||||
|
.readpages = ext4_readpages,
|
||||||
|
.writepage = ext4_da_writepage,
|
||||||
|
.writepages = ext4_da_writepages,
|
||||||
|
.sync_page = block_sync_page,
|
||||||
|
.write_begin = ext4_da_write_begin,
|
||||||
|
.write_end = ext4_da_write_end,
|
||||||
|
.bmap = ext4_bmap,
|
||||||
|
.invalidatepage = ext4_da_invalidatepage,
|
||||||
|
.releasepage = ext4_releasepage,
|
||||||
|
.direct_IO = ext4_direct_IO,
|
||||||
|
.migratepage = buffer_migrate_page,
|
||||||
|
};
|
||||||
|
|
||||||
void ext4_set_aops(struct inode *inode)
|
void ext4_set_aops(struct inode *inode)
|
||||||
{
|
{
|
||||||
if (ext4_should_order_data(inode))
|
if (ext4_should_order_data(inode))
|
||||||
inode->i_mapping->a_ops = &ext4_ordered_aops;
|
inode->i_mapping->a_ops = &ext4_ordered_aops;
|
||||||
|
else if (ext4_should_writeback_data(inode) &&
|
||||||
|
test_opt(inode->i_sb, DELALLOC))
|
||||||
|
inode->i_mapping->a_ops = &ext4_da_aops;
|
||||||
else if (ext4_should_writeback_data(inode))
|
else if (ext4_should_writeback_data(inode))
|
||||||
inode->i_mapping->a_ops = &ext4_writeback_aops;
|
inode->i_mapping->a_ops = &ext4_writeback_aops;
|
||||||
else
|
else
|
||||||
|
|
|
@ -898,7 +898,7 @@ enum {
|
||||||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||||
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
||||||
Opt_mballoc, Opt_nomballoc, Opt_stripe,
|
Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc,
|
||||||
};
|
};
|
||||||
|
|
||||||
static match_table_t tokens = {
|
static match_table_t tokens = {
|
||||||
|
@ -957,6 +957,7 @@ static match_table_t tokens = {
|
||||||
{Opt_nomballoc, "nomballoc"},
|
{Opt_nomballoc, "nomballoc"},
|
||||||
{Opt_stripe, "stripe=%u"},
|
{Opt_stripe, "stripe=%u"},
|
||||||
{Opt_resize, "resize"},
|
{Opt_resize, "resize"},
|
||||||
|
{Opt_delalloc, "delalloc"},
|
||||||
{Opt_err, NULL},
|
{Opt_err, NULL},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1335,6 +1336,9 @@ set_qf_format:
|
||||||
return 0;
|
return 0;
|
||||||
sbi->s_stripe = option;
|
sbi->s_stripe = option;
|
||||||
break;
|
break;
|
||||||
|
case Opt_delalloc:
|
||||||
|
set_opt(sbi->s_mount_opt, DELALLOC);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
printk (KERN_ERR
|
printk (KERN_ERR
|
||||||
"EXT4-fs: Unrecognized mount option \"%s\" "
|
"EXT4-fs: Unrecognized mount option \"%s\" "
|
||||||
|
|
Loading…
Add table
Reference in a new issue