Many cleanups and bug fixes in ext4, especially for the fast commit
feature. Also some performance improvements; in particular, improving IOPS and throughput on fast devices running Async Direct I/O by up to 20% by optimizing jbd2_transaction_committed(). -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmaYiqsACgkQ8vlZVpUN gaOWpQf/d6Y9WGyjeC1jOc+vIBxLgL+X0kbzYkkjGTSIZ7mZJS9X4NMMEtqayJ4f 1zGobcGENc05l4LVxf3uMbDj1aGlHeI9X4GLGaP5s5NcaAl4HKjQ3aFs3MuiJHPj Ol2CebXJx+NKt1lkD8PSPGgaTb5zg+SeZifI+OZ1RpkcKmGnkSNa5NkUNAaBh6dl 5LLXTc2p9NcCwAwDAQSiAJCV35bAZpcp6fwLLaPQ6Eok9HxGcJuYXW2Fict4rbtV mXeogXVIo2bkMcfh6tDchDBrFvORYIA7uBVmaG1LgAMrtEnYxnxnEntD0h6j/bzF Fl4jjQfd8o2uYto/4eo+iY6Z0haxyQ== =rcOo -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "Many cleanups and bug fixes in ext4, especially for the fast commit feature. Also some performance improvements; in particular, improving IOPS and throughput on fast devices running Async Direct I/O by up to 20% by optimizing jbd2_transaction_committed()" * tag 'ext4_for_linus-6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits) ext4: make sure the first directory block is not a hole ext4: check dot and dotdot of dx_root before making dir indexed ext4: sanity check for NULL pointer after ext4_force_shutdown jbd2: increase maximum transaction size jbd2: drop pointless shrinker batch initialization jbd2: avoid infinite transaction commit loop jbd2: precompute number of transaction descriptor blocks jbd2: make jbd2_journal_get_max_txn_bufs() internal jbd2: avoid mount failed when commit block is partial submitted ext4: avoid writing unitialized memory to disk in EA inodes ext4: don't track ranges in fast_commit if inode has inlined data ext4: fix possible tid_t sequence overflows ext4: use ext4_update_inode_fsync_trans() helper in inode creation ext4: add missing MODULE_DESCRIPTION() jbd2: add missing MODULE_DESCRIPTION() ext4: use memtostr_pad() for s_volume_name jbd2: speed up jbd2_transaction_committed() ext4: make ext4_da_map_blocks() buffer_head unaware ext4: make ext4_insert_delayed_block() insert multi-blocks ext4: factor out a helper to check the cluster allocation state ...
This commit is contained in:
commit
51ed42a8a1
20 changed files with 458 additions and 254 deletions
|
@ -2184,6 +2184,8 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
|
||||||
struct buffer_head *bh, *head;
|
struct buffer_head *bh, *head;
|
||||||
|
|
||||||
bh = head = folio_buffers(folio);
|
bh = head = folio_buffers(folio);
|
||||||
|
if (!bh)
|
||||||
|
return;
|
||||||
blocksize = bh->b_size;
|
blocksize = bh->b_size;
|
||||||
|
|
||||||
block_start = 0;
|
block_start = 0;
|
||||||
|
|
|
@ -72,7 +72,7 @@ static int add_system_zone(struct ext4_system_blocks *system_blks,
|
||||||
{
|
{
|
||||||
struct ext4_system_zone *new_entry, *entry;
|
struct ext4_system_zone *new_entry, *entry;
|
||||||
struct rb_node **n = &system_blks->root.rb_node, *node;
|
struct rb_node **n = &system_blks->root.rb_node, *node;
|
||||||
struct rb_node *parent = NULL, *new_node = NULL;
|
struct rb_node *parent = NULL, *new_node;
|
||||||
|
|
||||||
while (*n) {
|
while (*n) {
|
||||||
parent = *n;
|
parent = *n;
|
||||||
|
|
|
@ -1347,7 +1347,7 @@ struct ext4_super_block {
|
||||||
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
|
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
|
||||||
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
|
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
|
||||||
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
|
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
|
||||||
/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
|
/*78*/ char s_volume_name[EXT4_LABEL_MAX] __nonstring; /* volume name */
|
||||||
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
|
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
|
||||||
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
|
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -310,6 +310,8 @@ void ext4_es_find_extent_range(struct inode *inode,
|
||||||
ext4_lblk_t lblk, ext4_lblk_t end,
|
ext4_lblk_t lblk, ext4_lblk_t end,
|
||||||
struct extent_status *es)
|
struct extent_status *es)
|
||||||
{
|
{
|
||||||
|
es->es_lblk = es->es_len = es->es_pblk = 0;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -2052,34 +2054,49 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_es_insert_delayed_block - adds a delayed block to the extents status
|
* ext4_es_insert_delayed_extent - adds some delayed blocks to the extents
|
||||||
* tree, adding a pending reservation where
|
* status tree, adding a pending reservation
|
||||||
* needed
|
* where needed
|
||||||
*
|
*
|
||||||
* @inode - file containing the newly added block
|
* @inode - file containing the newly added block
|
||||||
* @lblk - logical block to be added
|
* @lblk - start logical block to be added
|
||||||
* @allocated - indicates whether a physical cluster has been allocated for
|
* @len - length of blocks to be added
|
||||||
* the logical cluster that contains the block
|
* @lclu_allocated/end_allocated - indicates whether a physical cluster has
|
||||||
|
* been allocated for the logical cluster
|
||||||
|
* that contains the start/end block. Note that
|
||||||
|
* end_allocated should always be set to false
|
||||||
|
* if the start and the end block are in the
|
||||||
|
* same cluster
|
||||||
*/
|
*/
|
||||||
void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
bool allocated)
|
ext4_lblk_t len, bool lclu_allocated,
|
||||||
|
bool end_allocated)
|
||||||
{
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
struct extent_status newes;
|
struct extent_status newes;
|
||||||
|
ext4_lblk_t end = lblk + len - 1;
|
||||||
int err1 = 0, err2 = 0, err3 = 0;
|
int err1 = 0, err2 = 0, err3 = 0;
|
||||||
struct extent_status *es1 = NULL;
|
struct extent_status *es1 = NULL;
|
||||||
struct extent_status *es2 = NULL;
|
struct extent_status *es2 = NULL;
|
||||||
struct pending_reservation *pr = NULL;
|
struct pending_reservation *pr1 = NULL;
|
||||||
|
struct pending_reservation *pr2 = NULL;
|
||||||
|
|
||||||
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
|
es_debug("add [%u/%u) delayed to extent status tree of inode %lu\n",
|
||||||
lblk, inode->i_ino);
|
lblk, len, inode->i_ino);
|
||||||
|
if (!len)
|
||||||
|
return;
|
||||||
|
|
||||||
|
WARN_ON_ONCE((EXT4_B2C(sbi, lblk) == EXT4_B2C(sbi, end)) &&
|
||||||
|
end_allocated);
|
||||||
|
|
||||||
newes.es_lblk = lblk;
|
newes.es_lblk = lblk;
|
||||||
newes.es_len = 1;
|
newes.es_len = len;
|
||||||
ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
|
ext4_es_store_pblock_status(&newes, ~0, EXTENT_STATUS_DELAYED);
|
||||||
trace_ext4_es_insert_delayed_block(inode, &newes, allocated);
|
trace_ext4_es_insert_delayed_extent(inode, &newes, lclu_allocated,
|
||||||
|
end_allocated);
|
||||||
|
|
||||||
ext4_es_insert_extent_check(inode, &newes);
|
ext4_es_insert_extent_check(inode, &newes);
|
||||||
|
|
||||||
|
@ -2088,11 +2105,15 @@ retry:
|
||||||
es1 = __es_alloc_extent(true);
|
es1 = __es_alloc_extent(true);
|
||||||
if ((err1 || err2) && !es2)
|
if ((err1 || err2) && !es2)
|
||||||
es2 = __es_alloc_extent(true);
|
es2 = __es_alloc_extent(true);
|
||||||
if ((err1 || err2 || err3) && allocated && !pr)
|
if (err1 || err2 || err3) {
|
||||||
pr = __alloc_pending(true);
|
if (lclu_allocated && !pr1)
|
||||||
|
pr1 = __alloc_pending(true);
|
||||||
|
if (end_allocated && !pr2)
|
||||||
|
pr2 = __alloc_pending(true);
|
||||||
|
}
|
||||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||||
|
|
||||||
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
|
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
||||||
if (err1 != 0)
|
if (err1 != 0)
|
||||||
goto error;
|
goto error;
|
||||||
/* Free preallocated extent if it didn't get used. */
|
/* Free preallocated extent if it didn't get used. */
|
||||||
|
@ -2112,13 +2133,22 @@ retry:
|
||||||
es2 = NULL;
|
es2 = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allocated) {
|
if (lclu_allocated) {
|
||||||
err3 = __insert_pending(inode, lblk, &pr);
|
err3 = __insert_pending(inode, lblk, &pr1);
|
||||||
if (err3 != 0)
|
if (err3 != 0)
|
||||||
goto error;
|
goto error;
|
||||||
if (pr) {
|
if (pr1) {
|
||||||
__free_pending(pr);
|
__free_pending(pr1);
|
||||||
pr = NULL;
|
pr1 = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (end_allocated) {
|
||||||
|
err3 = __insert_pending(inode, end, &pr2);
|
||||||
|
if (err3 != 0)
|
||||||
|
goto error;
|
||||||
|
if (pr2) {
|
||||||
|
__free_pending(pr2);
|
||||||
|
pr2 = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
error:
|
error:
|
||||||
|
|
|
@ -249,8 +249,9 @@ extern void ext4_exit_pending(void);
|
||||||
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
|
extern void ext4_init_pending_tree(struct ext4_pending_tree *tree);
|
||||||
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
|
extern void ext4_remove_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||||
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
|
||||||
extern void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||||
bool allocated);
|
ext4_lblk_t len, bool lclu_allocated,
|
||||||
|
bool end_allocated);
|
||||||
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||||
ext4_lblk_t len);
|
ext4_lblk_t len);
|
||||||
extern void ext4_clear_inode_es(struct inode *inode);
|
extern void ext4_clear_inode_es(struct inode *inode);
|
||||||
|
|
|
@ -353,7 +353,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
|
||||||
read_unlock(&sbi->s_journal->j_state_lock);
|
read_unlock(&sbi->s_journal->j_state_lock);
|
||||||
}
|
}
|
||||||
spin_lock(&sbi->s_fc_lock);
|
spin_lock(&sbi->s_fc_lock);
|
||||||
if (sbi->s_fc_ineligible_tid < tid)
|
if (tid_gt(tid, sbi->s_fc_ineligible_tid))
|
||||||
sbi->s_fc_ineligible_tid = tid;
|
sbi->s_fc_ineligible_tid = tid;
|
||||||
spin_unlock(&sbi->s_fc_lock);
|
spin_unlock(&sbi->s_fc_lock);
|
||||||
WARN_ON(reason >= EXT4_FC_REASON_MAX);
|
WARN_ON(reason >= EXT4_FC_REASON_MAX);
|
||||||
|
@ -649,6 +649,12 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star
|
||||||
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
|
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (ext4_has_inline_data(inode)) {
|
||||||
|
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR,
|
||||||
|
handle);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
args.start = start;
|
args.start = start;
|
||||||
args.end = end;
|
args.end = end;
|
||||||
|
|
||||||
|
@ -1207,7 +1213,7 @@ restart_fc:
|
||||||
if (ret == -EALREADY) {
|
if (ret == -EALREADY) {
|
||||||
/* There was an ongoing commit, check if we need to restart */
|
/* There was an ongoing commit, check if we need to restart */
|
||||||
if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
|
if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
|
||||||
commit_tid > journal->j_commit_sequence)
|
tid_gt(commit_tid, journal->j_commit_sequence))
|
||||||
goto restart_fc;
|
goto restart_fc;
|
||||||
ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
|
ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
|
||||||
commit_tid);
|
commit_tid);
|
||||||
|
@ -1282,7 +1288,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
|
||||||
list_del_init(&iter->i_fc_list);
|
list_del_init(&iter->i_fc_list);
|
||||||
ext4_clear_inode_state(&iter->vfs_inode,
|
ext4_clear_inode_state(&iter->vfs_inode,
|
||||||
EXT4_STATE_FC_COMMITTING);
|
EXT4_STATE_FC_COMMITTING);
|
||||||
if (iter->i_sync_tid <= tid)
|
if (tid_geq(tid, iter->i_sync_tid))
|
||||||
ext4_fc_reset_inode(&iter->vfs_inode);
|
ext4_fc_reset_inode(&iter->vfs_inode);
|
||||||
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
|
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
|
||||||
smp_mb();
|
smp_mb();
|
||||||
|
@ -1313,7 +1319,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
|
||||||
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
|
list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
|
||||||
&sbi->s_fc_q[FC_Q_MAIN]);
|
&sbi->s_fc_q[FC_Q_MAIN]);
|
||||||
|
|
||||||
if (tid >= sbi->s_fc_ineligible_tid) {
|
if (tid_geq(tid, sbi->s_fc_ineligible_tid)) {
|
||||||
sbi->s_fc_ineligible_tid = 0;
|
sbi->s_fc_ineligible_tid = 0;
|
||||||
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1336,10 +1336,7 @@ got:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ext4_handle_valid(handle)) {
|
ext4_update_inode_fsync_trans(handle, inode, 1);
|
||||||
ei->i_sync_tid = handle->h_transaction->t_tid;
|
|
||||||
ei->i_datasync_tid = handle->h_transaction->t_tid;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = ext4_mark_inode_dirty(handle, inode);
|
err = ext4_mark_inode_dirty(handle, inode);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|
|
@ -1410,7 +1410,11 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
|
||||||
hinfo->hash = EXT4_DIRENT_HASH(de);
|
hinfo->hash = EXT4_DIRENT_HASH(de);
|
||||||
hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
|
hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
|
||||||
} else {
|
} else {
|
||||||
ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
|
err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
|
||||||
|
if (err) {
|
||||||
|
ret = err;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if ((hinfo->hash < start_hash) ||
|
if ((hinfo->hash < start_hash) ||
|
||||||
((hinfo->hash == start_hash) &&
|
((hinfo->hash == start_hash) &&
|
||||||
|
|
|
@ -279,4 +279,5 @@ static struct kunit_suite ext4_inode_test_suite = {
|
||||||
|
|
||||||
kunit_test_suites(&ext4_inode_test_suite);
|
kunit_test_suites(&ext4_inode_test_suite);
|
||||||
|
|
||||||
|
MODULE_DESCRIPTION("KUnit test of ext4 inode timestamp decoding");
|
||||||
MODULE_LICENSE("GPL v2");
|
MODULE_LICENSE("GPL v2");
|
||||||
|
|
263
fs/ext4/inode.c
263
fs/ext4/inode.c
|
@ -453,6 +453,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
|
||||||
}
|
}
|
||||||
#endif /* ES_AGGRESSIVE_TEST */
|
#endif /* ES_AGGRESSIVE_TEST */
|
||||||
|
|
||||||
|
static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
|
||||||
|
struct ext4_map_blocks *map)
|
||||||
|
{
|
||||||
|
unsigned int status;
|
||||||
|
int retval;
|
||||||
|
|
||||||
|
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
||||||
|
retval = ext4_ext_map_blocks(handle, inode, map, 0);
|
||||||
|
else
|
||||||
|
retval = ext4_ind_map_blocks(handle, inode, map, 0);
|
||||||
|
|
||||||
|
if (retval <= 0)
|
||||||
|
return retval;
|
||||||
|
|
||||||
|
if (unlikely(retval != map->m_len)) {
|
||||||
|
ext4_warning(inode->i_sb,
|
||||||
|
"ES len assertion failed for inode "
|
||||||
|
"%lu: retval %d != map->m_len %d",
|
||||||
|
inode->i_ino, retval, map->m_len);
|
||||||
|
WARN_ON(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
||||||
|
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
||||||
|
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
||||||
|
map->m_pblk, status);
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ext4_map_blocks() function tries to look up the requested blocks,
|
* The ext4_map_blocks() function tries to look up the requested blocks,
|
||||||
* and returns if the blocks are already mapped.
|
* and returns if the blocks are already mapped.
|
||||||
|
@ -1450,9 +1479,9 @@ static int ext4_journalled_write_end(struct file *file,
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reserve space for a single cluster
|
* Reserve space for 'nr_resv' clusters
|
||||||
*/
|
*/
|
||||||
static int ext4_da_reserve_space(struct inode *inode)
|
static int ext4_da_reserve_space(struct inode *inode, int nr_resv)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||||
|
@ -1463,18 +1492,18 @@ static int ext4_da_reserve_space(struct inode *inode)
|
||||||
* us from metadata over-estimation, though we may go over by
|
* us from metadata over-estimation, though we may go over by
|
||||||
* a small amount in the end. Here we just reserve for data.
|
* a small amount in the end. Here we just reserve for data.
|
||||||
*/
|
*/
|
||||||
ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
|
ret = dquot_reserve_block(inode, EXT4_C2B(sbi, nr_resv));
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
spin_lock(&ei->i_block_reservation_lock);
|
spin_lock(&ei->i_block_reservation_lock);
|
||||||
if (ext4_claim_free_clusters(sbi, 1, 0)) {
|
if (ext4_claim_free_clusters(sbi, nr_resv, 0)) {
|
||||||
spin_unlock(&ei->i_block_reservation_lock);
|
spin_unlock(&ei->i_block_reservation_lock);
|
||||||
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
|
dquot_release_reservation_block(inode, EXT4_C2B(sbi, nr_resv));
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
}
|
}
|
||||||
ei->i_reserved_data_blocks++;
|
ei->i_reserved_data_blocks += nr_resv;
|
||||||
trace_ext4_da_reserve_space(inode);
|
trace_ext4_da_reserve_space(inode, nr_resv);
|
||||||
spin_unlock(&ei->i_block_reservation_lock);
|
spin_unlock(&ei->i_block_reservation_lock);
|
||||||
|
|
||||||
return 0; /* success */
|
return 0; /* success */
|
||||||
|
@ -1621,24 +1650,58 @@ static void ext4_print_free_blocks(struct inode *inode)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ext4_insert_delayed_block - adds a delayed block to the extents status
|
* Check whether the cluster containing lblk has been allocated or has
|
||||||
* tree, incrementing the reserved cluster/block
|
* delalloc reservation.
|
||||||
* count or making a pending reservation
|
|
||||||
* where needed
|
|
||||||
*
|
*
|
||||||
* @inode - file containing the newly added block
|
* Returns 0 if the cluster doesn't have either, 1 if it has delalloc
|
||||||
* @lblk - logical block to be added
|
* reservation, 2 if it's already been allocated, negative error code on
|
||||||
*
|
* failure.
|
||||||
* Returns 0 on success, negative error code on failure.
|
|
||||||
*/
|
*/
|
||||||
static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
static int ext4_clu_alloc_state(struct inode *inode, ext4_lblk_t lblk)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
int ret;
|
int ret;
|
||||||
bool allocated = false;
|
|
||||||
|
/* Has delalloc reservation? */
|
||||||
|
if (ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* Already been allocated? */
|
||||||
|
if (ext4_es_scan_clu(inode, &ext4_es_is_mapped, lblk))
|
||||||
|
return 2;
|
||||||
|
ret = ext4_clu_mapped(inode, EXT4_B2C(sbi, lblk));
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
if (ret > 0)
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_insert_delayed_blocks - adds a multiple delayed blocks to the extents
|
||||||
|
* status tree, incrementing the reserved
|
||||||
|
* cluster/block count or making pending
|
||||||
|
* reservations where needed
|
||||||
|
*
|
||||||
|
* @inode - file containing the newly added block
|
||||||
|
* @lblk - start logical block to be added
|
||||||
|
* @len - length of blocks to be added
|
||||||
|
*
|
||||||
|
* Returns 0 on success, negative error code on failure.
|
||||||
|
*/
|
||||||
|
static int ext4_insert_delayed_blocks(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
ext4_lblk_t len)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
int ret;
|
||||||
|
bool lclu_allocated = false;
|
||||||
|
bool end_allocated = false;
|
||||||
|
ext4_lblk_t resv_clu;
|
||||||
|
ext4_lblk_t end = lblk + len - 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the cluster containing lblk is shared with a delayed,
|
* If the cluster containing lblk or end is shared with a delayed,
|
||||||
* written, or unwritten extent in a bigalloc file system, it's
|
* written, or unwritten extent in a bigalloc file system, it's
|
||||||
* already been accounted for and does not need to be reserved.
|
* already been accounted for and does not need to be reserved.
|
||||||
* A pending reservation must be made for the cluster if it's
|
* A pending reservation must be made for the cluster if it's
|
||||||
|
@ -1649,81 +1712,84 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
|
||||||
* extents status tree doesn't get a match.
|
* extents status tree doesn't get a match.
|
||||||
*/
|
*/
|
||||||
if (sbi->s_cluster_ratio == 1) {
|
if (sbi->s_cluster_ratio == 1) {
|
||||||
ret = ext4_da_reserve_space(inode);
|
ret = ext4_da_reserve_space(inode, len);
|
||||||
if (ret != 0) /* ENOSPC */
|
if (ret != 0) /* ENOSPC */
|
||||||
return ret;
|
return ret;
|
||||||
} else { /* bigalloc */
|
} else { /* bigalloc */
|
||||||
if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
|
resv_clu = EXT4_B2C(sbi, end) - EXT4_B2C(sbi, lblk) + 1;
|
||||||
if (!ext4_es_scan_clu(inode,
|
|
||||||
&ext4_es_is_mapped, lblk)) {
|
ret = ext4_clu_alloc_state(inode, lblk);
|
||||||
ret = ext4_clu_mapped(inode,
|
if (ret < 0)
|
||||||
EXT4_B2C(sbi, lblk));
|
return ret;
|
||||||
if (ret < 0)
|
if (ret > 0) {
|
||||||
return ret;
|
resv_clu--;
|
||||||
if (ret == 0) {
|
lclu_allocated = (ret == 2);
|
||||||
ret = ext4_da_reserve_space(inode);
|
}
|
||||||
if (ret != 0) /* ENOSPC */
|
|
||||||
return ret;
|
if (EXT4_B2C(sbi, lblk) != EXT4_B2C(sbi, end)) {
|
||||||
} else {
|
ret = ext4_clu_alloc_state(inode, end);
|
||||||
allocated = true;
|
if (ret < 0)
|
||||||
}
|
return ret;
|
||||||
} else {
|
if (ret > 0) {
|
||||||
allocated = true;
|
resv_clu--;
|
||||||
|
end_allocated = (ret == 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (resv_clu) {
|
||||||
|
ret = ext4_da_reserve_space(inode, resv_clu);
|
||||||
|
if (ret != 0) /* ENOSPC */
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ext4_es_insert_delayed_block(inode, lblk, allocated);
|
ext4_es_insert_delayed_extent(inode, lblk, len, lclu_allocated,
|
||||||
|
end_allocated);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function is grabs code from the very beginning of
|
* Looks up the requested blocks and sets the delalloc extent map.
|
||||||
* ext4_map_blocks, but assumes that the caller is from delayed write
|
* First try to look up for the extent entry that contains the requested
|
||||||
* time. This function looks up the requested blocks and sets the
|
* blocks in the extent status tree without i_data_sem, then try to look
|
||||||
* buffer delay bit under the protection of i_data_sem.
|
* up for the ondisk extent mapping with i_data_sem in read mode,
|
||||||
|
* finally hold i_data_sem in write mode, looks up again and add a
|
||||||
|
* delalloc extent entry if it still couldn't find any extent. Pass out
|
||||||
|
* the mapped extent through @map and return 0 on success.
|
||||||
*/
|
*/
|
||||||
static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
static int ext4_da_map_blocks(struct inode *inode, struct ext4_map_blocks *map)
|
||||||
struct ext4_map_blocks *map,
|
|
||||||
struct buffer_head *bh)
|
|
||||||
{
|
{
|
||||||
struct extent_status es;
|
struct extent_status es;
|
||||||
int retval;
|
int retval;
|
||||||
sector_t invalid_block = ~((sector_t) 0xffff);
|
|
||||||
#ifdef ES_AGGRESSIVE_TEST
|
#ifdef ES_AGGRESSIVE_TEST
|
||||||
struct ext4_map_blocks orig_map;
|
struct ext4_map_blocks orig_map;
|
||||||
|
|
||||||
memcpy(&orig_map, map, sizeof(*map));
|
memcpy(&orig_map, map, sizeof(*map));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
|
|
||||||
invalid_block = ~0;
|
|
||||||
|
|
||||||
map->m_flags = 0;
|
map->m_flags = 0;
|
||||||
ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
|
ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
|
||||||
(unsigned long) map->m_lblk);
|
(unsigned long) map->m_lblk);
|
||||||
|
|
||||||
/* Lookup extent status tree firstly */
|
/* Lookup extent status tree firstly */
|
||||||
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
|
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
|
||||||
|
map->m_len = min_t(unsigned int, map->m_len,
|
||||||
|
es.es_len - (map->m_lblk - es.es_lblk));
|
||||||
|
|
||||||
if (ext4_es_is_hole(&es))
|
if (ext4_es_is_hole(&es))
|
||||||
goto add_delayed;
|
goto add_delayed;
|
||||||
|
|
||||||
|
found:
|
||||||
/*
|
/*
|
||||||
* Delayed extent could be allocated by fallocate.
|
* Delayed extent could be allocated by fallocate.
|
||||||
* So we need to check it.
|
* So we need to check it.
|
||||||
*/
|
*/
|
||||||
if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
|
if (ext4_es_is_delonly(&es)) {
|
||||||
map_bh(bh, inode->i_sb, invalid_block);
|
map->m_flags |= EXT4_MAP_DELAYED;
|
||||||
set_buffer_new(bh);
|
|
||||||
set_buffer_delay(bh);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
|
map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk;
|
||||||
retval = es.es_len - (iblock - es.es_lblk);
|
|
||||||
if (retval > map->m_len)
|
|
||||||
retval = map->m_len;
|
|
||||||
map->m_len = retval;
|
|
||||||
if (ext4_es_is_written(&es))
|
if (ext4_es_is_written(&es))
|
||||||
map->m_flags |= EXT4_MAP_MAPPED;
|
map->m_flags |= EXT4_MAP_MAPPED;
|
||||||
else if (ext4_es_is_unwritten(&es))
|
else if (ext4_es_is_unwritten(&es))
|
||||||
|
@ -1734,7 +1800,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||||
#ifdef ES_AGGRESSIVE_TEST
|
#ifdef ES_AGGRESSIVE_TEST
|
||||||
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
|
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
|
||||||
#endif
|
#endif
|
||||||
return retval;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1744,44 +1810,41 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
|
||||||
down_read(&EXT4_I(inode)->i_data_sem);
|
down_read(&EXT4_I(inode)->i_data_sem);
|
||||||
if (ext4_has_inline_data(inode))
|
if (ext4_has_inline_data(inode))
|
||||||
retval = 0;
|
retval = 0;
|
||||||
else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
|
|
||||||
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
|
|
||||||
else
|
else
|
||||||
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
|
retval = ext4_map_query_blocks(NULL, inode, map);
|
||||||
if (retval < 0) {
|
|
||||||
up_read(&EXT4_I(inode)->i_data_sem);
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
if (retval > 0) {
|
|
||||||
unsigned int status;
|
|
||||||
|
|
||||||
if (unlikely(retval != map->m_len)) {
|
|
||||||
ext4_warning(inode->i_sb,
|
|
||||||
"ES len assertion failed for inode "
|
|
||||||
"%lu: retval %d != map->m_len %d",
|
|
||||||
inode->i_ino, retval, map->m_len);
|
|
||||||
WARN_ON(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
|
|
||||||
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
|
|
||||||
ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
|
|
||||||
map->m_pblk, status);
|
|
||||||
up_read(&EXT4_I(inode)->i_data_sem);
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
up_read(&EXT4_I(inode)->i_data_sem);
|
up_read(&EXT4_I(inode)->i_data_sem);
|
||||||
|
if (retval)
|
||||||
|
return retval < 0 ? retval : 0;
|
||||||
|
|
||||||
add_delayed:
|
add_delayed:
|
||||||
down_write(&EXT4_I(inode)->i_data_sem);
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
retval = ext4_insert_delayed_block(inode, map->m_lblk);
|
/*
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
* Page fault path (ext4_page_mkwrite does not take i_rwsem)
|
||||||
if (retval)
|
* and fallocate path (no folio lock) can race. Make sure we
|
||||||
return retval;
|
* lookup the extent status tree here again while i_data_sem
|
||||||
|
* is held in write mode, before inserting a new da entry in
|
||||||
|
* the extent status tree.
|
||||||
|
*/
|
||||||
|
if (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
|
||||||
|
map->m_len = min_t(unsigned int, map->m_len,
|
||||||
|
es.es_len - (map->m_lblk - es.es_lblk));
|
||||||
|
|
||||||
|
if (!ext4_es_is_hole(&es)) {
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
} else if (!ext4_has_inline_data(inode)) {
|
||||||
|
retval = ext4_map_query_blocks(NULL, inode, map);
|
||||||
|
if (retval) {
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
return retval < 0 ? retval : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
map->m_flags |= EXT4_MAP_DELAYED;
|
||||||
|
retval = ext4_insert_delayed_blocks(inode, map->m_lblk, map->m_len);
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
|
||||||
map_bh(bh, inode->i_sb, invalid_block);
|
|
||||||
set_buffer_new(bh);
|
|
||||||
set_buffer_delay(bh);
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1801,11 +1864,15 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||||
struct buffer_head *bh, int create)
|
struct buffer_head *bh, int create)
|
||||||
{
|
{
|
||||||
struct ext4_map_blocks map;
|
struct ext4_map_blocks map;
|
||||||
|
sector_t invalid_block = ~((sector_t) 0xffff);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
BUG_ON(create == 0);
|
BUG_ON(create == 0);
|
||||||
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
|
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
|
||||||
|
|
||||||
|
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
|
||||||
|
invalid_block = ~0;
|
||||||
|
|
||||||
map.m_lblk = iblock;
|
map.m_lblk = iblock;
|
||||||
map.m_len = 1;
|
map.m_len = 1;
|
||||||
|
|
||||||
|
@ -1814,10 +1881,17 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||||
* preallocated blocks are unmapped but should treated
|
* preallocated blocks are unmapped but should treated
|
||||||
* the same as allocated blocks.
|
* the same as allocated blocks.
|
||||||
*/
|
*/
|
||||||
ret = ext4_da_map_blocks(inode, iblock, &map, bh);
|
ret = ext4_da_map_blocks(inode, &map);
|
||||||
if (ret <= 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
if (map.m_flags & EXT4_MAP_DELAYED) {
|
||||||
|
map_bh(bh, inode->i_sb, invalid_block);
|
||||||
|
set_buffer_new(bh);
|
||||||
|
set_buffer_delay(bh);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
map_bh(bh, inode->i_sb, map.m_pblk);
|
map_bh(bh, inode->i_sb, map.m_pblk);
|
||||||
ext4_update_bh_state(bh, map.m_flags);
|
ext4_update_bh_state(bh, map.m_flags);
|
||||||
|
|
||||||
|
@ -2945,6 +3019,11 @@ static int ext4_da_do_write_end(struct address_space *mapping,
|
||||||
bool disksize_changed = false;
|
bool disksize_changed = false;
|
||||||
loff_t new_i_size;
|
loff_t new_i_size;
|
||||||
|
|
||||||
|
if (unlikely(!folio_buffers(folio))) {
|
||||||
|
folio_unlock(folio);
|
||||||
|
folio_put(folio);
|
||||||
|
return -EIO;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
|
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
|
||||||
* flag, which all that's needed to trigger page writeback.
|
* flag, which all that's needed to trigger page writeback.
|
||||||
|
|
|
@ -1151,7 +1151,7 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
|
||||||
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
|
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
|
||||||
|
|
||||||
lock_buffer(sbi->s_sbh);
|
lock_buffer(sbi->s_sbh);
|
||||||
strscpy_pad(label, sbi->s_es->s_volume_name);
|
memtostr_pad(label, sbi->s_es->s_volume_name);
|
||||||
unlock_buffer(sbi->s_sbh);
|
unlock_buffer(sbi->s_sbh);
|
||||||
|
|
||||||
if (copy_to_user(user_label, label, sizeof(label)))
|
if (copy_to_user(user_label, label, sizeof(label)))
|
||||||
|
|
|
@ -151,10 +151,11 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
|
||||||
|
|
||||||
return bh;
|
return bh;
|
||||||
}
|
}
|
||||||
if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
|
/* The first directory block must not be a hole. */
|
||||||
|
if (!bh && (type == INDEX || type == DIRENT_HTREE || block == 0)) {
|
||||||
ext4_error_inode(inode, func, line, block,
|
ext4_error_inode(inode, func, line, block,
|
||||||
"Directory hole found for htree %s block",
|
"Directory hole found for htree %s block %u",
|
||||||
(type == INDEX) ? "index" : "leaf");
|
(type == INDEX) ? "index" : "leaf", block);
|
||||||
return ERR_PTR(-EFSCORRUPTED);
|
return ERR_PTR(-EFSCORRUPTED);
|
||||||
}
|
}
|
||||||
if (!bh)
|
if (!bh)
|
||||||
|
@ -2172,6 +2173,52 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
|
||||||
return err ? err : err2;
|
return err ? err : err2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool ext4_check_dx_root(struct inode *dir, struct dx_root *root)
|
||||||
|
{
|
||||||
|
struct fake_dirent *fde;
|
||||||
|
const char *error_msg;
|
||||||
|
unsigned int rlen;
|
||||||
|
unsigned int blocksize = dir->i_sb->s_blocksize;
|
||||||
|
char *blockend = (char *)root + dir->i_sb->s_blocksize;
|
||||||
|
|
||||||
|
fde = &root->dot;
|
||||||
|
if (unlikely(fde->name_len != 1)) {
|
||||||
|
error_msg = "invalid name_len for '.'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
if (unlikely(strncmp(root->dot_name, ".", fde->name_len))) {
|
||||||
|
error_msg = "invalid name for '.'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
|
||||||
|
if (unlikely((char *)fde + rlen >= blockend)) {
|
||||||
|
error_msg = "invalid rec_len for '.'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
|
||||||
|
fde = &root->dotdot;
|
||||||
|
if (unlikely(fde->name_len != 2)) {
|
||||||
|
error_msg = "invalid name_len for '..'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
if (unlikely(strncmp(root->dotdot_name, "..", fde->name_len))) {
|
||||||
|
error_msg = "invalid name for '..'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
|
||||||
|
if (unlikely((char *)fde + rlen >= blockend)) {
|
||||||
|
error_msg = "invalid rec_len for '..'";
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
corrupted:
|
||||||
|
EXT4_ERROR_INODE(dir, "Corrupt dir, %s, running e2fsck is recommended",
|
||||||
|
error_msg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This converts a one block unindexed directory to a 3 block indexed
|
* This converts a one block unindexed directory to a 3 block indexed
|
||||||
* directory, and adds the dentry to the indexed directory.
|
* directory, and adds the dentry to the indexed directory.
|
||||||
|
@ -2206,17 +2253,17 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
root = (struct dx_root *) bh->b_data;
|
root = (struct dx_root *) bh->b_data;
|
||||||
|
if (!ext4_check_dx_root(dir, root)) {
|
||||||
|
brelse(bh);
|
||||||
|
return -EFSCORRUPTED;
|
||||||
|
}
|
||||||
|
|
||||||
/* The 0th block becomes the root, move the dirents out */
|
/* The 0th block becomes the root, move the dirents out */
|
||||||
fde = &root->dotdot;
|
fde = &root->dotdot;
|
||||||
de = (struct ext4_dir_entry_2 *)((char *)fde +
|
de = (struct ext4_dir_entry_2 *)((char *)fde +
|
||||||
ext4_rec_len_from_disk(fde->rec_len, blocksize));
|
ext4_rec_len_from_disk(fde->rec_len, blocksize));
|
||||||
if ((char *) de >= (((char *) root) + blocksize)) {
|
|
||||||
EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
|
|
||||||
brelse(bh);
|
|
||||||
return -EFSCORRUPTED;
|
|
||||||
}
|
|
||||||
len = ((char *) root) + (blocksize - csum_size) - (char *) de;
|
len = ((char *) root) + (blocksize - csum_size) - (char *) de;
|
||||||
|
|
||||||
/* Allocate new block for the 0th block's dirents */
|
/* Allocate new block for the 0th block's dirents */
|
||||||
|
@ -3038,10 +3085,7 @@ bool ext4_empty_dir(struct inode *inode)
|
||||||
EXT4_ERROR_INODE(inode, "invalid size");
|
EXT4_ERROR_INODE(inode, "invalid size");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* The first directory block must not be a hole,
|
bh = ext4_read_dirblock(inode, 0, EITHER);
|
||||||
* so treat it as DIRENT_HTREE
|
|
||||||
*/
|
|
||||||
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
|
|
||||||
if (IS_ERR(bh))
|
if (IS_ERR(bh))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -3483,10 +3527,7 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
|
||||||
struct ext4_dir_entry_2 *de;
|
struct ext4_dir_entry_2 *de;
|
||||||
unsigned int offset;
|
unsigned int offset;
|
||||||
|
|
||||||
/* The first directory block must not be a hole, so
|
bh = ext4_read_dirblock(inode, 0, EITHER);
|
||||||
* treat it as DIRENT_HTREE
|
|
||||||
*/
|
|
||||||
bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
|
|
||||||
if (IS_ERR(bh)) {
|
if (IS_ERR(bh)) {
|
||||||
*retval = PTR_ERR(bh);
|
*retval = PTR_ERR(bh);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -1327,6 +1327,9 @@ static void ext4_put_super(struct super_block *sb)
|
||||||
|
|
||||||
ext4_group_desc_free(sbi);
|
ext4_group_desc_free(sbi);
|
||||||
ext4_flex_groups_free(sbi);
|
ext4_flex_groups_free(sbi);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
|
||||||
|
percpu_counter_sum(&sbi->s_dirtyclusters_counter));
|
||||||
ext4_percpu_param_destroy(sbi);
|
ext4_percpu_param_destroy(sbi);
|
||||||
#ifdef CONFIG_QUOTA
|
#ifdef CONFIG_QUOTA
|
||||||
for (int i = 0; i < EXT4_MAXQUOTAS; i++)
|
for (int i = 0; i < EXT4_MAXQUOTAS; i++)
|
||||||
|
@ -1457,7 +1460,8 @@ static void ext4_destroy_inode(struct inode *inode)
|
||||||
dump_stack();
|
dump_stack();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EXT4_I(inode)->i_reserved_data_blocks)
|
if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
|
||||||
|
WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
|
||||||
ext4_msg(inode->i_sb, KERN_ERR,
|
ext4_msg(inode->i_sb, KERN_ERR,
|
||||||
"Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
|
"Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
|
||||||
inode->i_ino, EXT4_I(inode),
|
inode->i_ino, EXT4_I(inode),
|
||||||
|
|
|
@ -1433,6 +1433,12 @@ retry:
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
memcpy(bh->b_data, buf, csize);
|
memcpy(bh->b_data, buf, csize);
|
||||||
|
/*
|
||||||
|
* Zero out block tail to avoid writing uninitialized memory
|
||||||
|
* to disk.
|
||||||
|
*/
|
||||||
|
if (csize < blocksize)
|
||||||
|
memset(bh->b_data + csize, 0, blocksize - csize);
|
||||||
set_buffer_uptodate(bh);
|
set_buffer_uptodate(bh);
|
||||||
ext4_handle_dirty_metadata(handle, ea_inode, bh);
|
ext4_handle_dirty_metadata(handle, ea_inode, bh);
|
||||||
|
|
||||||
|
|
|
@ -353,7 +353,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
struct buffer_head *descriptor;
|
struct buffer_head *descriptor;
|
||||||
struct buffer_head **wbuf = journal->j_wbuf;
|
struct buffer_head **wbuf = journal->j_wbuf;
|
||||||
int bufs;
|
int bufs;
|
||||||
int flags;
|
int escape;
|
||||||
int err;
|
int err;
|
||||||
unsigned long long blocknr;
|
unsigned long long blocknr;
|
||||||
ktime_t start_time;
|
ktime_t start_time;
|
||||||
|
@ -660,10 +660,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
*/
|
*/
|
||||||
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
|
set_bit(BH_JWrite, &jh2bh(jh)->b_state);
|
||||||
JBUFFER_TRACE(jh, "ph3: write metadata");
|
JBUFFER_TRACE(jh, "ph3: write metadata");
|
||||||
flags = jbd2_journal_write_metadata_buffer(commit_transaction,
|
escape = jbd2_journal_write_metadata_buffer(commit_transaction,
|
||||||
jh, &wbuf[bufs], blocknr);
|
jh, &wbuf[bufs], blocknr);
|
||||||
if (flags < 0) {
|
if (escape < 0) {
|
||||||
jbd2_journal_abort(journal, flags);
|
jbd2_journal_abort(journal, escape);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
|
jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
|
||||||
|
@ -672,7 +672,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||||
buffer */
|
buffer */
|
||||||
|
|
||||||
tag_flag = 0;
|
tag_flag = 0;
|
||||||
if (flags & 1)
|
if (escape)
|
||||||
tag_flag |= JBD2_FLAG_ESCAPE;
|
tag_flag |= JBD2_FLAG_ESCAPE;
|
||||||
if (!first_tag)
|
if (!first_tag)
|
||||||
tag_flag |= JBD2_FLAG_SAME_UUID;
|
tag_flag |= JBD2_FLAG_SAME_UUID;
|
||||||
|
@ -766,7 +766,7 @@ start_journal_io:
|
||||||
if (first_block < journal->j_tail)
|
if (first_block < journal->j_tail)
|
||||||
freed += journal->j_last - journal->j_first;
|
freed += journal->j_last - journal->j_first;
|
||||||
/* Update tail only if we free significant amount of space */
|
/* Update tail only if we free significant amount of space */
|
||||||
if (freed < jbd2_journal_get_max_txn_bufs(journal))
|
if (freed < journal->j_max_transaction_buffers)
|
||||||
update_tail = 0;
|
update_tail = 0;
|
||||||
}
|
}
|
||||||
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
J_ASSERT(commit_transaction->t_state == T_COMMIT);
|
||||||
|
@ -1107,7 +1107,7 @@ restart_loop:
|
||||||
|
|
||||||
commit_transaction->t_state = T_COMMIT_CALLBACK;
|
commit_transaction->t_state = T_COMMIT_CALLBACK;
|
||||||
J_ASSERT(commit_transaction == journal->j_committing_transaction);
|
J_ASSERT(commit_transaction == journal->j_committing_transaction);
|
||||||
journal->j_commit_sequence = commit_transaction->t_tid;
|
WRITE_ONCE(journal->j_commit_sequence, commit_transaction->t_tid);
|
||||||
journal->j_committing_transaction = NULL;
|
journal->j_committing_transaction = NULL;
|
||||||
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
|
||||||
|
|
||||||
|
|
|
@ -220,19 +220,12 @@ loop:
|
||||||
* so we don't sleep
|
* so we don't sleep
|
||||||
*/
|
*/
|
||||||
DEFINE_WAIT(wait);
|
DEFINE_WAIT(wait);
|
||||||
int should_sleep = 1;
|
|
||||||
|
|
||||||
prepare_to_wait(&journal->j_wait_commit, &wait,
|
prepare_to_wait(&journal->j_wait_commit, &wait,
|
||||||
TASK_INTERRUPTIBLE);
|
TASK_INTERRUPTIBLE);
|
||||||
if (journal->j_commit_sequence != journal->j_commit_request)
|
|
||||||
should_sleep = 0;
|
|
||||||
transaction = journal->j_running_transaction;
|
transaction = journal->j_running_transaction;
|
||||||
if (transaction && time_after_eq(jiffies,
|
if (transaction == NULL ||
|
||||||
transaction->t_expires))
|
time_before(jiffies, transaction->t_expires)) {
|
||||||
should_sleep = 0;
|
|
||||||
if (journal->j_flags & JBD2_UNMOUNT)
|
|
||||||
should_sleep = 0;
|
|
||||||
if (should_sleep) {
|
|
||||||
write_unlock(&journal->j_state_lock);
|
write_unlock(&journal->j_state_lock);
|
||||||
schedule();
|
schedule();
|
||||||
write_lock(&journal->j_state_lock);
|
write_lock(&journal->j_state_lock);
|
||||||
|
@ -316,11 +309,8 @@ static void journal_kill_thread(journal_t *journal)
|
||||||
*
|
*
|
||||||
* Return value:
|
* Return value:
|
||||||
* <0: Error
|
* <0: Error
|
||||||
* >=0: Finished OK
|
* =0: Finished OK without escape
|
||||||
*
|
* =1: Finished OK with escape
|
||||||
* On success:
|
|
||||||
* Bit 0 set == escape performed on the data
|
|
||||||
* Bit 1 set == buffer copy-out performed (kfree the data after IO)
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||||
|
@ -328,7 +318,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||||
struct buffer_head **bh_out,
|
struct buffer_head **bh_out,
|
||||||
sector_t blocknr)
|
sector_t blocknr)
|
||||||
{
|
{
|
||||||
int need_copy_out = 0;
|
|
||||||
int done_copy_out = 0;
|
int done_copy_out = 0;
|
||||||
int do_escape = 0;
|
int do_escape = 0;
|
||||||
char *mapped_data;
|
char *mapped_data;
|
||||||
|
@ -355,7 +344,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||||
atomic_set(&new_bh->b_count, 1);
|
atomic_set(&new_bh->b_count, 1);
|
||||||
|
|
||||||
spin_lock(&jh_in->b_state_lock);
|
spin_lock(&jh_in->b_state_lock);
|
||||||
repeat:
|
|
||||||
/*
|
/*
|
||||||
* If a new transaction has already done a buffer copy-out, then
|
* If a new transaction has already done a buffer copy-out, then
|
||||||
* we use that version of the data for the commit.
|
* we use that version of the data for the commit.
|
||||||
|
@ -365,8 +353,8 @@ repeat:
|
||||||
new_folio = virt_to_folio(jh_in->b_frozen_data);
|
new_folio = virt_to_folio(jh_in->b_frozen_data);
|
||||||
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
|
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
|
||||||
} else {
|
} else {
|
||||||
new_folio = jh2bh(jh_in)->b_folio;
|
new_folio = bh_in->b_folio;
|
||||||
new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data);
|
new_offset = offset_in_folio(new_folio, bh_in->b_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
mapped_data = kmap_local_folio(new_folio, new_offset);
|
mapped_data = kmap_local_folio(new_folio, new_offset);
|
||||||
|
@ -383,54 +371,52 @@ repeat:
|
||||||
/*
|
/*
|
||||||
* Check for escaping
|
* Check for escaping
|
||||||
*/
|
*/
|
||||||
if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) {
|
if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER))
|
||||||
need_copy_out = 1;
|
|
||||||
do_escape = 1;
|
do_escape = 1;
|
||||||
}
|
|
||||||
kunmap_local(mapped_data);
|
kunmap_local(mapped_data);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do we need to do a data copy?
|
* Do we need to do a data copy?
|
||||||
*/
|
*/
|
||||||
if (need_copy_out && !done_copy_out) {
|
if (do_escape && !done_copy_out) {
|
||||||
char *tmp;
|
char *tmp;
|
||||||
|
|
||||||
spin_unlock(&jh_in->b_state_lock);
|
spin_unlock(&jh_in->b_state_lock);
|
||||||
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
||||||
if (!tmp) {
|
if (!tmp) {
|
||||||
brelse(new_bh);
|
brelse(new_bh);
|
||||||
|
free_buffer_head(new_bh);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
spin_lock(&jh_in->b_state_lock);
|
spin_lock(&jh_in->b_state_lock);
|
||||||
if (jh_in->b_frozen_data) {
|
if (jh_in->b_frozen_data) {
|
||||||
jbd2_free(tmp, bh_in->b_size);
|
jbd2_free(tmp, bh_in->b_size);
|
||||||
goto repeat;
|
goto copy_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
jh_in->b_frozen_data = tmp;
|
jh_in->b_frozen_data = tmp;
|
||||||
memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
|
memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
|
||||||
|
|
||||||
new_folio = virt_to_folio(tmp);
|
|
||||||
new_offset = offset_in_folio(new_folio, tmp);
|
|
||||||
done_copy_out = 1;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This isn't strictly necessary, as we're using frozen
|
* This isn't strictly necessary, as we're using frozen
|
||||||
* data for the escaping, but it keeps consistency with
|
* data for the escaping, but it keeps consistency with
|
||||||
* b_frozen_data usage.
|
* b_frozen_data usage.
|
||||||
*/
|
*/
|
||||||
jh_in->b_frozen_triggers = jh_in->b_triggers;
|
jh_in->b_frozen_triggers = jh_in->b_triggers;
|
||||||
|
|
||||||
|
copy_done:
|
||||||
|
new_folio = virt_to_folio(jh_in->b_frozen_data);
|
||||||
|
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
|
||||||
|
done_copy_out = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Did we need to do an escaping? Now we've done all the
|
* Did we need to do an escaping? Now we've done all the
|
||||||
* copying, we can finally do so.
|
* copying, we can finally do so.
|
||||||
|
* b_frozen_data is from jbd2_alloc() which always provides an
|
||||||
|
* address from the direct kernels mapping.
|
||||||
*/
|
*/
|
||||||
if (do_escape) {
|
if (do_escape)
|
||||||
mapped_data = kmap_local_folio(new_folio, new_offset);
|
*((unsigned int *)jh_in->b_frozen_data) = 0;
|
||||||
*((unsigned int *)mapped_data) = 0;
|
|
||||||
kunmap_local(mapped_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
folio_set_bh(new_bh, new_folio, new_offset);
|
folio_set_bh(new_bh, new_folio, new_offset);
|
||||||
new_bh->b_size = bh_in->b_size;
|
new_bh->b_size = bh_in->b_size;
|
||||||
|
@ -454,7 +440,7 @@ repeat:
|
||||||
set_buffer_shadow(bh_in);
|
set_buffer_shadow(bh_in);
|
||||||
spin_unlock(&jh_in->b_state_lock);
|
spin_unlock(&jh_in->b_state_lock);
|
||||||
|
|
||||||
return do_escape | (done_copy_out << 1);
|
return do_escape;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -789,17 +775,7 @@ EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
|
||||||
/* Return 1 when transaction with given tid has already committed. */
|
/* Return 1 when transaction with given tid has already committed. */
|
||||||
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
|
int jbd2_transaction_committed(journal_t *journal, tid_t tid)
|
||||||
{
|
{
|
||||||
int ret = 1;
|
return tid_geq(READ_ONCE(journal->j_commit_sequence), tid);
|
||||||
|
|
||||||
read_lock(&journal->j_state_lock);
|
|
||||||
if (journal->j_running_transaction &&
|
|
||||||
journal->j_running_transaction->t_tid == tid)
|
|
||||||
ret = 0;
|
|
||||||
if (journal->j_committing_transaction &&
|
|
||||||
journal->j_committing_transaction->t_tid == tid)
|
|
||||||
ret = 0;
|
|
||||||
read_unlock(&journal->j_state_lock);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(jbd2_transaction_committed);
|
EXPORT_SYMBOL(jbd2_transaction_committed);
|
||||||
|
|
||||||
|
@ -1451,6 +1427,48 @@ static int journal_revoke_records_per_block(journal_t *journal)
|
||||||
return space / record_size;
|
return space / record_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int jbd2_journal_get_max_txn_bufs(journal_t *journal)
|
||||||
|
{
|
||||||
|
return (journal->j_total_len - journal->j_fc_wbufsize) / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Base amount of descriptor blocks we reserve for each transaction.
|
||||||
|
*/
|
||||||
|
static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
|
||||||
|
{
|
||||||
|
int tag_space = journal->j_blocksize - sizeof(journal_header_t);
|
||||||
|
int tags_per_block;
|
||||||
|
|
||||||
|
/* Subtract UUID */
|
||||||
|
tag_space -= 16;
|
||||||
|
if (jbd2_journal_has_csum_v2or3(journal))
|
||||||
|
tag_space -= sizeof(struct jbd2_journal_block_tail);
|
||||||
|
/* Commit code leaves a slack space of 16 bytes at the end of block */
|
||||||
|
tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
|
||||||
|
/*
|
||||||
|
* Revoke descriptors are accounted separately so we need to reserve
|
||||||
|
* space for commit block and normal transaction descriptor blocks.
|
||||||
|
*/
|
||||||
|
return 1 + DIV_ROUND_UP(jbd2_journal_get_max_txn_bufs(journal),
|
||||||
|
tags_per_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize number of blocks each transaction reserves for its bookkeeping
|
||||||
|
* and maximum number of blocks a transaction can use. This needs to be called
|
||||||
|
* after the journal size and the fastcommit area size are initialized.
|
||||||
|
*/
|
||||||
|
static void jbd2_journal_init_transaction_limits(journal_t *journal)
|
||||||
|
{
|
||||||
|
journal->j_revoke_records_per_block =
|
||||||
|
journal_revoke_records_per_block(journal);
|
||||||
|
journal->j_transaction_overhead_buffers =
|
||||||
|
jbd2_descriptor_blocks_per_trans(journal);
|
||||||
|
journal->j_max_transaction_buffers =
|
||||||
|
jbd2_journal_get_max_txn_bufs(journal);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Load the on-disk journal superblock and read the key fields into the
|
* Load the on-disk journal superblock and read the key fields into the
|
||||||
* journal_t.
|
* journal_t.
|
||||||
|
@ -1492,8 +1510,8 @@ static int journal_load_superblock(journal_t *journal)
|
||||||
if (jbd2_journal_has_csum_v2or3(journal))
|
if (jbd2_journal_has_csum_v2or3(journal))
|
||||||
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
|
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
|
||||||
sizeof(sb->s_uuid));
|
sizeof(sb->s_uuid));
|
||||||
journal->j_revoke_records_per_block =
|
/* After journal features are set, we can compute transaction limits */
|
||||||
journal_revoke_records_per_block(journal);
|
jbd2_journal_init_transaction_limits(journal);
|
||||||
|
|
||||||
if (jbd2_has_feature_fast_commit(journal)) {
|
if (jbd2_has_feature_fast_commit(journal)) {
|
||||||
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
|
journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
|
||||||
|
@ -1599,7 +1617,6 @@ static journal_t *journal_init_common(struct block_device *bdev,
|
||||||
|
|
||||||
journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
|
journal->j_shrinker->scan_objects = jbd2_journal_shrink_scan;
|
||||||
journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
|
journal->j_shrinker->count_objects = jbd2_journal_shrink_count;
|
||||||
journal->j_shrinker->batch = journal->j_max_transaction_buffers;
|
|
||||||
journal->j_shrinker->private_data = journal;
|
journal->j_shrinker->private_data = journal;
|
||||||
|
|
||||||
shrinker_register(journal->j_shrinker);
|
shrinker_register(journal->j_shrinker);
|
||||||
|
@ -1743,8 +1760,6 @@ static int journal_reset(journal_t *journal)
|
||||||
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
|
journal->j_commit_sequence = journal->j_transaction_sequence - 1;
|
||||||
journal->j_commit_request = journal->j_commit_sequence;
|
journal->j_commit_request = journal->j_commit_sequence;
|
||||||
|
|
||||||
journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that journal recovery is done, turn fast commits off here. This
|
* Now that journal recovery is done, turn fast commits off here. This
|
||||||
* way, if fast commit was enabled before the crash but if now FS has
|
* way, if fast commit was enabled before the crash but if now FS has
|
||||||
|
@ -2285,8 +2300,6 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
|
||||||
journal->j_fc_first = journal->j_last + 1;
|
journal->j_fc_first = journal->j_last + 1;
|
||||||
journal->j_fc_off = 0;
|
journal->j_fc_off = 0;
|
||||||
journal->j_free = journal->j_last - journal->j_first;
|
journal->j_free = journal->j_last - journal->j_first;
|
||||||
journal->j_max_transaction_buffers =
|
|
||||||
jbd2_journal_get_max_txn_bufs(journal);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2374,8 +2387,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
|
||||||
sb->s_feature_ro_compat |= cpu_to_be32(ro);
|
sb->s_feature_ro_compat |= cpu_to_be32(ro);
|
||||||
sb->s_feature_incompat |= cpu_to_be32(incompat);
|
sb->s_feature_incompat |= cpu_to_be32(incompat);
|
||||||
unlock_buffer(journal->j_sb_buffer);
|
unlock_buffer(journal->j_sb_buffer);
|
||||||
journal->j_revoke_records_per_block =
|
jbd2_journal_init_transaction_limits(journal);
|
||||||
journal_revoke_records_per_block(journal);
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
#undef COMPAT_FEATURE_ON
|
#undef COMPAT_FEATURE_ON
|
||||||
|
@ -2406,8 +2418,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
|
||||||
sb->s_feature_compat &= ~cpu_to_be32(compat);
|
sb->s_feature_compat &= ~cpu_to_be32(compat);
|
||||||
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
|
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
|
||||||
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
|
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
|
||||||
journal->j_revoke_records_per_block =
|
jbd2_journal_init_transaction_limits(journal);
|
||||||
journal_revoke_records_per_block(journal);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(jbd2_journal_clear_features);
|
EXPORT_SYMBOL(jbd2_journal_clear_features);
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/crc32.h>
|
#include <linux/crc32.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/string_choices.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -374,7 +375,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
|
||||||
be32_to_cpu(journal->j_superblock->s_sequence);
|
be32_to_cpu(journal->j_superblock->s_sequence);
|
||||||
jbd2_debug(1,
|
jbd2_debug(1,
|
||||||
"JBD2: ignoring %d transaction%s from the journal.\n",
|
"JBD2: ignoring %d transaction%s from the journal.\n",
|
||||||
dropped, (dropped == 1) ? "" : "s");
|
dropped, str_plural(dropped));
|
||||||
#endif
|
#endif
|
||||||
journal->j_transaction_sequence = ++info.end_transaction;
|
journal->j_transaction_sequence = ++info.end_transaction;
|
||||||
journal->j_head = info.head_block;
|
journal->j_head = info.head_block;
|
||||||
|
@ -443,6 +444,27 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
|
||||||
return provided == cpu_to_be32(calculated);
|
return provided == cpu_to_be32(calculated);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf)
|
||||||
|
{
|
||||||
|
struct commit_header *h;
|
||||||
|
__be32 provided;
|
||||||
|
__u32 calculated;
|
||||||
|
void *tmpbuf;
|
||||||
|
|
||||||
|
tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL);
|
||||||
|
if (!tmpbuf)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
memcpy(tmpbuf, buf, sizeof(struct commit_header));
|
||||||
|
h = tmpbuf;
|
||||||
|
provided = h->h_chksum[0];
|
||||||
|
h->h_chksum[0] = 0;
|
||||||
|
calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize);
|
||||||
|
kfree(tmpbuf);
|
||||||
|
|
||||||
|
return provided == cpu_to_be32(calculated);
|
||||||
|
}
|
||||||
|
|
||||||
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
|
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
|
||||||
journal_block_tag3_t *tag3,
|
journal_block_tag3_t *tag3,
|
||||||
void *buf, __u32 sequence)
|
void *buf, __u32 sequence)
|
||||||
|
@ -810,6 +832,13 @@ static int do_one_pass(journal_t *journal,
|
||||||
if (pass == PASS_SCAN &&
|
if (pass == PASS_SCAN &&
|
||||||
!jbd2_commit_block_csum_verify(journal,
|
!jbd2_commit_block_csum_verify(journal,
|
||||||
bh->b_data)) {
|
bh->b_data)) {
|
||||||
|
if (jbd2_commit_block_csum_verify_partial(
|
||||||
|
journal,
|
||||||
|
bh->b_data)) {
|
||||||
|
pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n",
|
||||||
|
next_commit_ID, next_log_block);
|
||||||
|
goto chksum_ok;
|
||||||
|
}
|
||||||
chksum_error:
|
chksum_error:
|
||||||
if (commit_time < last_trans_commit_time)
|
if (commit_time < last_trans_commit_time)
|
||||||
goto ignore_crc_mismatch;
|
goto ignore_crc_mismatch;
|
||||||
|
@ -824,6 +853,7 @@ static int do_one_pass(journal_t *journal,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pass == PASS_SCAN) {
|
if (pass == PASS_SCAN) {
|
||||||
|
chksum_ok:
|
||||||
last_trans_commit_time = commit_time;
|
last_trans_commit_time = commit_time;
|
||||||
head_block = next_log_block;
|
head_block = next_log_block;
|
||||||
}
|
}
|
||||||
|
@ -843,6 +873,7 @@ static int do_one_pass(journal_t *journal,
|
||||||
next_log_block);
|
next_log_block);
|
||||||
need_check_commit_time = true;
|
need_check_commit_time = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we aren't in the REVOKE pass, then we can
|
/* If we aren't in the REVOKE pass, then we can
|
||||||
* just skip over this block. */
|
* just skip over this block. */
|
||||||
if (pass != PASS_REVOKE) {
|
if (pass != PASS_REVOKE) {
|
||||||
|
|
|
@ -62,28 +62,6 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
|
||||||
kmem_cache_free(transaction_cache, transaction);
|
kmem_cache_free(transaction_cache, transaction);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Base amount of descriptor blocks we reserve for each transaction.
|
|
||||||
*/
|
|
||||||
static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
|
|
||||||
{
|
|
||||||
int tag_space = journal->j_blocksize - sizeof(journal_header_t);
|
|
||||||
int tags_per_block;
|
|
||||||
|
|
||||||
/* Subtract UUID */
|
|
||||||
tag_space -= 16;
|
|
||||||
if (jbd2_journal_has_csum_v2or3(journal))
|
|
||||||
tag_space -= sizeof(struct jbd2_journal_block_tail);
|
|
||||||
/* Commit code leaves a slack space of 16 bytes at the end of block */
|
|
||||||
tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
|
|
||||||
/*
|
|
||||||
* Revoke descriptors are accounted separately so we need to reserve
|
|
||||||
* space for commit block and normal transaction descriptor blocks.
|
|
||||||
*/
|
|
||||||
return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
|
|
||||||
tags_per_block);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* jbd2_get_transaction: obtain a new transaction_t object.
|
* jbd2_get_transaction: obtain a new transaction_t object.
|
||||||
*
|
*
|
||||||
|
@ -109,7 +87,7 @@ static void jbd2_get_transaction(journal_t *journal,
|
||||||
transaction->t_expires = jiffies + journal->j_commit_interval;
|
transaction->t_expires = jiffies + journal->j_commit_interval;
|
||||||
atomic_set(&transaction->t_updates, 0);
|
atomic_set(&transaction->t_updates, 0);
|
||||||
atomic_set(&transaction->t_outstanding_credits,
|
atomic_set(&transaction->t_outstanding_credits,
|
||||||
jbd2_descriptor_blocks_per_trans(journal) +
|
journal->j_transaction_overhead_buffers +
|
||||||
atomic_read(&journal->j_reserved_credits));
|
atomic_read(&journal->j_reserved_credits));
|
||||||
atomic_set(&transaction->t_outstanding_revokes, 0);
|
atomic_set(&transaction->t_outstanding_revokes, 0);
|
||||||
atomic_set(&transaction->t_handle_count, 0);
|
atomic_set(&transaction->t_handle_count, 0);
|
||||||
|
@ -213,6 +191,13 @@ static void sub_reserved_credits(journal_t *journal, int blocks)
|
||||||
wake_up(&journal->j_wait_reserved);
|
wake_up(&journal->j_wait_reserved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Maximum number of blocks for user transaction payload */
|
||||||
|
static int jbd2_max_user_trans_buffers(journal_t *journal)
|
||||||
|
{
|
||||||
|
return journal->j_max_transaction_buffers -
|
||||||
|
journal->j_transaction_overhead_buffers;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait until we can add credits for handle to the running transaction. Called
|
* Wait until we can add credits for handle to the running transaction. Called
|
||||||
* with j_state_lock held for reading. Returns 0 if handle joined the running
|
* with j_state_lock held for reading. Returns 0 if handle joined the running
|
||||||
|
@ -262,12 +247,12 @@ __must_hold(&journal->j_state_lock)
|
||||||
* big to fit this handle? Wait until reserved credits are freed.
|
* big to fit this handle? Wait until reserved credits are freed.
|
||||||
*/
|
*/
|
||||||
if (atomic_read(&journal->j_reserved_credits) + total >
|
if (atomic_read(&journal->j_reserved_credits) + total >
|
||||||
journal->j_max_transaction_buffers) {
|
jbd2_max_user_trans_buffers(journal)) {
|
||||||
read_unlock(&journal->j_state_lock);
|
read_unlock(&journal->j_state_lock);
|
||||||
jbd2_might_wait_for_commit(journal);
|
jbd2_might_wait_for_commit(journal);
|
||||||
wait_event(journal->j_wait_reserved,
|
wait_event(journal->j_wait_reserved,
|
||||||
atomic_read(&journal->j_reserved_credits) + total <=
|
atomic_read(&journal->j_reserved_credits) + total <=
|
||||||
journal->j_max_transaction_buffers);
|
jbd2_max_user_trans_buffers(journal));
|
||||||
__acquire(&journal->j_state_lock); /* fake out sparse */
|
__acquire(&journal->j_state_lock); /* fake out sparse */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -307,14 +292,14 @@ __must_hold(&journal->j_state_lock)
|
||||||
|
|
||||||
needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
|
needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
|
||||||
/* We allow at most half of a transaction to be reserved */
|
/* We allow at most half of a transaction to be reserved */
|
||||||
if (needed > journal->j_max_transaction_buffers / 2) {
|
if (needed > jbd2_max_user_trans_buffers(journal) / 2) {
|
||||||
sub_reserved_credits(journal, rsv_blocks);
|
sub_reserved_credits(journal, rsv_blocks);
|
||||||
atomic_sub(total, &t->t_outstanding_credits);
|
atomic_sub(total, &t->t_outstanding_credits);
|
||||||
read_unlock(&journal->j_state_lock);
|
read_unlock(&journal->j_state_lock);
|
||||||
jbd2_might_wait_for_commit(journal);
|
jbd2_might_wait_for_commit(journal);
|
||||||
wait_event(journal->j_wait_reserved,
|
wait_event(journal->j_wait_reserved,
|
||||||
atomic_read(&journal->j_reserved_credits) + rsv_blocks
|
atomic_read(&journal->j_reserved_credits) + rsv_blocks
|
||||||
<= journal->j_max_transaction_buffers / 2);
|
<= jbd2_max_user_trans_buffers(journal) / 2);
|
||||||
__acquire(&journal->j_state_lock); /* fake out sparse */
|
__acquire(&journal->j_state_lock); /* fake out sparse */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -344,12 +329,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
|
||||||
* size and limit the number of total credits to not exceed maximum
|
* size and limit the number of total credits to not exceed maximum
|
||||||
* transaction size per operation.
|
* transaction size per operation.
|
||||||
*/
|
*/
|
||||||
if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
|
if (rsv_blocks > jbd2_max_user_trans_buffers(journal) / 2 ||
|
||||||
(rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
|
rsv_blocks + blocks > jbd2_max_user_trans_buffers(journal)) {
|
||||||
printk(KERN_ERR "JBD2: %s wants too many credits "
|
printk(KERN_ERR "JBD2: %s wants too many credits "
|
||||||
"credits:%d rsv_credits:%d max:%d\n",
|
"credits:%d rsv_credits:%d max:%d\n",
|
||||||
current->comm, blocks, rsv_blocks,
|
current->comm, blocks, rsv_blocks,
|
||||||
journal->j_max_transaction_buffers);
|
jbd2_max_user_trans_buffers(journal));
|
||||||
WARN_ON(1);
|
WARN_ON(1);
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1085,6 +1085,13 @@ struct journal_s
|
||||||
*/
|
*/
|
||||||
int j_revoke_records_per_block;
|
int j_revoke_records_per_block;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @j_transaction_overhead:
|
||||||
|
*
|
||||||
|
* Number of blocks each transaction needs for its own bookkeeping
|
||||||
|
*/
|
||||||
|
int j_transaction_overhead_buffers;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @j_commit_interval:
|
* @j_commit_interval:
|
||||||
*
|
*
|
||||||
|
@ -1660,11 +1667,6 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
|
||||||
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
|
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
|
||||||
int jbd2_fc_release_bufs(journal_t *journal);
|
int jbd2_fc_release_bufs(journal_t *journal);
|
||||||
|
|
||||||
static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal)
|
|
||||||
{
|
|
||||||
return (journal->j_total_len - journal->j_fc_wbufsize) / 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* is_journal_abort
|
* is_journal_abort
|
||||||
*
|
*
|
||||||
|
|
|
@ -1246,14 +1246,15 @@ TRACE_EVENT(ext4_da_update_reserve_space,
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(ext4_da_reserve_space,
|
TRACE_EVENT(ext4_da_reserve_space,
|
||||||
TP_PROTO(struct inode *inode),
|
TP_PROTO(struct inode *inode, int nr_resv),
|
||||||
|
|
||||||
TP_ARGS(inode),
|
TP_ARGS(inode, nr_resv),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field( dev_t, dev )
|
__field( dev_t, dev )
|
||||||
__field( ino_t, ino )
|
__field( ino_t, ino )
|
||||||
__field( __u64, i_blocks )
|
__field( __u64, i_blocks )
|
||||||
|
__field( int, reserve_blocks )
|
||||||
__field( int, reserved_data_blocks )
|
__field( int, reserved_data_blocks )
|
||||||
__field( __u16, mode )
|
__field( __u16, mode )
|
||||||
),
|
),
|
||||||
|
@ -1262,16 +1263,17 @@ TRACE_EVENT(ext4_da_reserve_space,
|
||||||
__entry->dev = inode->i_sb->s_dev;
|
__entry->dev = inode->i_sb->s_dev;
|
||||||
__entry->ino = inode->i_ino;
|
__entry->ino = inode->i_ino;
|
||||||
__entry->i_blocks = inode->i_blocks;
|
__entry->i_blocks = inode->i_blocks;
|
||||||
|
__entry->reserve_blocks = nr_resv;
|
||||||
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
|
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
|
||||||
__entry->mode = inode->i_mode;
|
__entry->mode = inode->i_mode;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
|
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu reserve_blocks %d"
|
||||||
"reserved_data_blocks %d",
|
"reserved_data_blocks %d",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
(unsigned long) __entry->ino,
|
(unsigned long) __entry->ino,
|
||||||
__entry->mode, __entry->i_blocks,
|
__entry->mode, __entry->i_blocks,
|
||||||
__entry->reserved_data_blocks)
|
__entry->reserve_blocks, __entry->reserved_data_blocks)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(ext4_da_release_space,
|
TRACE_EVENT(ext4_da_release_space,
|
||||||
|
@ -2478,11 +2480,11 @@ TRACE_EVENT(ext4_es_shrink,
|
||||||
__entry->scan_time, __entry->nr_skipped, __entry->retried)
|
__entry->scan_time, __entry->nr_skipped, __entry->retried)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(ext4_es_insert_delayed_block,
|
TRACE_EVENT(ext4_es_insert_delayed_extent,
|
||||||
TP_PROTO(struct inode *inode, struct extent_status *es,
|
TP_PROTO(struct inode *inode, struct extent_status *es,
|
||||||
bool allocated),
|
bool lclu_allocated, bool end_allocated),
|
||||||
|
|
||||||
TP_ARGS(inode, es, allocated),
|
TP_ARGS(inode, es, lclu_allocated, end_allocated),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field( dev_t, dev )
|
__field( dev_t, dev )
|
||||||
|
@ -2491,7 +2493,8 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
|
||||||
__field( ext4_lblk_t, len )
|
__field( ext4_lblk_t, len )
|
||||||
__field( ext4_fsblk_t, pblk )
|
__field( ext4_fsblk_t, pblk )
|
||||||
__field( char, status )
|
__field( char, status )
|
||||||
__field( bool, allocated )
|
__field( bool, lclu_allocated )
|
||||||
|
__field( bool, end_allocated )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
|
@ -2501,16 +2504,17 @@ TRACE_EVENT(ext4_es_insert_delayed_block,
|
||||||
__entry->len = es->es_len;
|
__entry->len = es->es_len;
|
||||||
__entry->pblk = ext4_es_show_pblock(es);
|
__entry->pblk = ext4_es_show_pblock(es);
|
||||||
__entry->status = ext4_es_status(es);
|
__entry->status = ext4_es_status(es);
|
||||||
__entry->allocated = allocated;
|
__entry->lclu_allocated = lclu_allocated;
|
||||||
|
__entry->end_allocated = end_allocated;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s "
|
TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s "
|
||||||
"allocated %d",
|
"allocated %d %d",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
(unsigned long) __entry->ino,
|
(unsigned long) __entry->ino,
|
||||||
__entry->lblk, __entry->len,
|
__entry->lblk, __entry->len,
|
||||||
__entry->pblk, show_extent_status(__entry->status),
|
__entry->pblk, show_extent_status(__entry->status),
|
||||||
__entry->allocated)
|
__entry->lclu_allocated, __entry->end_allocated)
|
||||||
);
|
);
|
||||||
|
|
||||||
/* fsmap traces */
|
/* fsmap traces */
|
||||||
|
|
Loading…
Add table
Reference in a new issue