1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

f2fs: introduce fragment allocation mode mount option

Added two options into "mode=" mount option to make it possible for
developers to simulate filesystem fragmentation/after-GC situation
itself. The developers use these modes to understand filesystem
fragmentation/after-GC condition well, and eventually get some
insights to handle them better.

"fragment:segment": f2fs allocates a new segment in ramdom position.
		With this, we can simulate the after-GC condition.
"fragment:block" : We can scatter block allocation with
		"max_fragment_chunk" and "max_fragment_hole" sysfs
		nodes. f2fs will allocate 1..<max_fragment_chunk>
		blocks in a chunk and make a hole in the length of
		1..<max_fragment_hole> by turns	in a newly allocated
		free segment. Plus, this mode implicitly enables
		"fragment:segment" option for more randomness.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Daeho Jeong 2021-09-29 11:12:03 -07:00 committed by Jaegeuk Kim
parent 84eab2a899
commit 6691d940b0
8 changed files with 104 additions and 5 deletions

View file

@ -512,3 +512,19 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com> Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can control the multiplier value of bdi device readahead window size Description: You can control the multiplier value of bdi device readahead window size
between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option. between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
What: /sys/fs/f2fs/<disk>/max_fragment_chunk
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/max_fragment_hole
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.

View file

@ -201,6 +201,24 @@ fault_type=%d Support configuring fault injection type, should be
mode=%s Control block allocation mode which supports "adaptive" mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random and "lfs". In "lfs" mode, there should be no random
writes towards main area. writes towards main area.
"fragment:segment" and "fragment:block" are newly added here.
These are developer options for experiments to simulate filesystem
fragmentation/after-GC situation itself. The developers use these
modes to understand filesystem fragmentation/after-GC condition well,
and eventually get some insights to handle them better.
In "fragment:segment", f2fs allocates a new segment in ramdom
position. With this, we can simulate the after-GC condition.
In "fragment:block", we can scatter block allocation with
"max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
We added some randomness to both chunk and hole size to make
it close to realistic IO pattern. So, in this mode, f2fs will allocate
1..<max_fragment_chunk> blocks in a chunk and make a hole in the
length of 1..<max_fragment_hole> by turns. With this, the newly
allocated blocks will be scattered throughout the whole partition.
Note that "fragment:block" implicitly enables "fragment:segment"
option for more randomness.
Please, use these options for your experiments and we strongly
recommend to re-format the filesystem after using these options.
io_bits=%u Set the bit size of write IO requests. It should be set io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs". with "mode=lfs".
usrquota Enable plain user disk quota accounting. usrquota Enable plain user disk quota accounting.

View file

@ -1287,8 +1287,10 @@ enum {
}; };
enum { enum {
FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */
FS_MODE_LFS, /* use lfs allocation only */ FS_MODE_LFS, /* use lfs allocation only */
FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */
FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */
}; };
enum { enum {
@ -1759,6 +1761,9 @@ struct f2fs_sb_info {
unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */ unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */
int max_fragment_chunk; /* max chunk size for block fragmentation mode */
int max_fragment_hole; /* max hole size for block fragmentation mode */
#ifdef CONFIG_F2FS_FS_COMPRESSION #ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */ struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */ unsigned int page_array_slab_size; /* default page array slab size */
@ -3519,6 +3524,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno); unsigned int segno);
#define DEF_FRAGMENT_SIZE 4
#define MIN_FRAGMENT_SIZE 1
#define MAX_FRAGMENT_SIZE 512
static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
}
/* /*
* checkpoint.c * checkpoint.c
*/ */

View file

@ -14,6 +14,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h" #include "f2fs.h"
#include "node.h" #include "node.h"
@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = sbi->max_victim_search; p->max_search = sbi->max_victim_search;
/* let's select beginning hot/small space first in no_heap mode*/ /* let's select beginning hot/small space first in no_heap mode*/
if (test_opt(sbi, NOHEAP) && if (f2fs_need_rand_seg(sbi))
p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type))) (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0; p->offset = 0;
else else

View file

@ -15,6 +15,7 @@
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h" #include "f2fs.h"
#include "segment.h" #include "segment.h"
@ -2649,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
unsigned short seg_type = curseg->seg_type; unsigned short seg_type = curseg->seg_type;
sanity_check_seg_type(sbi, seg_type); sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */ /* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi)) if (__is_large_section(sbi))
@ -2700,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->next_segno = segno; curseg->next_segno = segno;
reset_curseg(sbi, type, 1); reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS; curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
} }
static int __next_free_blkoff(struct f2fs_sb_info *sbi, static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@ -2726,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg) struct curseg_info *seg)
{ {
if (seg->alloc_type == SSR) if (seg->alloc_type == SSR) {
seg->next_blkoff = seg->next_blkoff =
__next_free_blkoff(sbi, seg->segno, __next_free_blkoff(sbi, seg->segno,
seg->next_blkoff + 1); seg->next_blkoff + 1);
else } else {
seg->next_blkoff++; seg->next_blkoff++;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
seg->next_blkoff +=
prandom_u32() % sbi->max_fragment_hole + 1;
}
}
}
} }
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)

View file

@ -314,6 +314,7 @@ struct curseg_info {
unsigned short next_blkoff; /* next block offset to write */ unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */ unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */ unsigned int next_segno; /* preallocated segment */
int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */
bool inited; /* indicate inmem log is inited */ bool inited; /* indicate inmem log is inited */
}; };

View file

@ -817,6 +817,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
} else if (!strcmp(name, "lfs")) { } else if (!strcmp(name, "lfs")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else if (!strcmp(name, "fragment:segment")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
} else if (!strcmp(name, "fragment:block")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
} else { } else {
kfree(name); kfree(name);
return -EINVAL; return -EINVAL;
@ -1896,6 +1900,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, "adaptive"); seq_puts(seq, "adaptive");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
seq_puts(seq, "lfs"); seq_puts(seq, "lfs");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG)
seq_puts(seq, "fragment:segment");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
seq_puts(seq, "fragment:block");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT)) if (test_opt(sbi, RESERVE_ROOT))
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
@ -3523,6 +3531,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = sbi->segs_per_sec; sbi->migration_granularity = sbi->segs_per_sec;
sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
sbi->dir_level = DEF_DIR_LEVEL; sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;

View file

@ -551,6 +551,22 @@ out:
return count; return count;
} }
if (!strcmp(a->attr.name, "max_fragment_chunk")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_chunk = t;
else
return -EINVAL;
return count;
}
if (!strcmp(a->attr.name, "max_fragment_hole")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_hole = t;
else
return -EINVAL;
return count;
}
*ui = (unsigned int)t; *ui = (unsigned int)t;
return count; return count;
@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr) #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = { static struct attribute *f2fs_attrs[] = {
@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(seq_file_ra_mul), ATTR_LIST(seq_file_ra_mul),
ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_segment_mode),
ATTR_LIST(gc_reclaimed_segments), ATTR_LIST(gc_reclaimed_segments),
ATTR_LIST(max_fragment_chunk),
ATTR_LIST(max_fragment_hole),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(f2fs); ATTRIBUTE_GROUPS(f2fs);