Skip to content

Commit

Permalink
ext4: Convert mballoc cr (criteria) to enum
Browse files Browse the repository at this point in the history
Convert criteria to be an enum so it easier to maintain and
update the tracefiles to use enum names. This change also makes
it easier to insert new criterias in the future.

There is no functional change in this patch.

Signed-off-by: Ojaswin Mujoo <[email protected]>
Reviewed-by: Ritesh Harjani (IBM) <[email protected]>
Link: https://lore.kernel.org/r/5d82fd467bdf70ea45bdaef810af3b146013946c.1685449706.git.ojaswin@linux.ibm.com
Signed-off-by: Theodore Ts'o <[email protected]>
  • Loading branch information
OjaswinM authored and tytso committed Jun 26, 2023
1 parent 5730cce commit 4eb7a4a
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 53 deletions.
23 changes: 20 additions & 3 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,23 @@ enum SHIFT_DIRECTION {
SHIFT_RIGHT,
};

/*
* Number of criterias defined. For each criteria, mballoc has slightly
* different way of finding the required blocks nad usually, higher the
* criteria the slower the allocation. We start at lower criterias and keep
* falling back to higher ones if we are not able to find any blocks.
*/
#define EXT4_MB_NUM_CRS 4
/*
* All possible allocation criterias for mballoc
*/
enum criteria {
CR0,
CR1,
CR2,
CR3,
};

/*
* Flags used in mballoc's allocation_context flags field.
*
Expand Down Expand Up @@ -1544,9 +1561,9 @@ struct ext4_sb_info {
atomic_t s_bal_2orders; /* 2^order hits */
atomic_t s_bal_cr0_bad_suggestions;
atomic_t s_bal_cr1_bad_suggestions;
atomic64_t s_bal_cX_groups_considered[4];
atomic64_t s_bal_cX_hits[4];
atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
atomic_t s_mb_buddies_generated; /* number of buddies generated */
atomic64_t s_mb_generation_time;
atomic_t s_mb_lost_chunks;
Expand Down
96 changes: 48 additions & 48 deletions fs/ext4/mballoc.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,19 +154,19 @@
* structures to decide the order in which groups are to be traversed for
* fulfilling an allocation request.
*
* At CR = 0, we look for groups which have the largest_free_order >= the order
* At CR0 , we look for groups which have the largest_free_order >= the order
* of the request. We directly look at the largest free order list in the data
* structure (1) above where largest_free_order = order of the request. If that
* list is empty, we look at remaining list in the increasing order of
* largest_free_order. This allows us to perform CR = 0 lookup in O(1) time.
* largest_free_order. This allows us to perform CR0 lookup in O(1) time.
*
* At CR = 1, we only consider groups where average fragment size > request
* At CR1, we only consider groups where average fragment size > request
* size. So, we lookup a group which has average fragment size just above or
* equal to request size using our average fragment size group lists (data
* structure 2) in O(1) time.
*
* If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
* linear order which requires O(N) search time for each CR 0 and CR 1 phase.
* linear order which requires O(N) search time for each CR0 and CR1 phase.
*
* The regular allocator (using the buddy cache) supports a few tunables.
*
Expand Down Expand Up @@ -409,7 +409,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);

static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr);
ext4_group_t group, enum criteria cr);

static int ext4_try_to_trim_range(struct super_block *sb,
struct ext4_buddy *e4b, ext4_grpblk_t start,
Expand Down Expand Up @@ -859,7 +859,7 @@ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
* cr level needs an update.
*/
static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *iter, *grp;
Expand All @@ -884,8 +884,8 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
list_for_each_entry(iter, &sbi->s_mb_largest_free_orders[i],
bb_largest_free_order_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[0]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, 0))) {
atomic64_inc(&sbi->s_bal_cX_groups_considered[CR0]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, CR0))) {
grp = iter;
break;
}
Expand All @@ -897,7 +897,7 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,

if (!grp) {
/* Increment cr and search again */
*new_cr = 1;
*new_cr = CR1;
} else {
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
Expand All @@ -909,7 +909,7 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
* order. Updates *new_cr if cr level needs an update.
*/
static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = NULL, *iter;
Expand All @@ -932,8 +932,8 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
bb_avg_fragment_size_node) {
if (sbi->s_mb_stats)
atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
atomic64_inc(&sbi->s_bal_cX_groups_considered[CR1]);
if (likely(ext4_mb_good_group(ac, iter->bb_group, CR1))) {
grp = iter;
break;
}
Expand All @@ -947,15 +947,15 @@ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
*group = grp->bb_group;
ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
} else {
*new_cr = 2;
*new_cr = CR2;
}
}

static inline int should_optimize_scan(struct ext4_allocation_context *ac)
{
if (unlikely(!test_opt2(ac->ac_sb, MB_OPTIMIZE_SCAN)))
return 0;
if (ac->ac_criteria >= 2)
if (ac->ac_criteria >= CR2)
return 0;
if (!ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS))
return 0;
Expand Down Expand Up @@ -1000,7 +1000,7 @@ next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
* @ngroups Total number of groups
*/
static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
enum criteria *new_cr, ext4_group_t *group, ext4_group_t ngroups)
{
*new_cr = ac->ac_criteria;

Expand All @@ -1009,9 +1009,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
return;
}

if (*new_cr == 0) {
if (*new_cr == CR0) {
ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
} else if (*new_cr == 1) {
} else if (*new_cr == CR1) {
ext4_mb_choose_next_group_cr1(ac, new_cr, group, ngroups);
} else {
/*
Expand Down Expand Up @@ -2408,13 +2408,13 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
* for the allocation or not.
*/
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
ext4_group_t group, int cr)
ext4_group_t group, enum criteria cr)
{
ext4_grpblk_t free, fragments;
int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);

BUG_ON(cr < 0 || cr >= 4);
BUG_ON(cr < CR0 || cr >= EXT4_MB_NUM_CRS);

if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp) || !grp))
return false;
Expand All @@ -2428,7 +2428,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
return false;

switch (cr) {
case 0:
case CR0:
BUG_ON(ac->ac_2order == 0);

/* Avoid using the first bg of a flexgroup for data files */
Expand All @@ -2447,15 +2447,15 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
return false;

return true;
case 1:
case CR1:
if ((free / fragments) >= ac->ac_g_ex.fe_len)
return true;
break;
case 2:
case CR2:
if (free >= ac->ac_g_ex.fe_len)
return true;
break;
case 3:
case CR3:
return true;
default:
BUG();
Expand All @@ -2476,7 +2476,7 @@ static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
* out"!
*/
static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
ext4_group_t group, int cr)
ext4_group_t group, enum criteria cr)
{
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
struct super_block *sb = ac->ac_sb;
Expand All @@ -2496,7 +2496,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
free = grp->bb_free;
if (free == 0)
goto out;
if (cr <= 2 && free < ac->ac_g_ex.fe_len)
if (cr <= CR2 && free < ac->ac_g_ex.fe_len)
goto out;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
goto out;
Expand All @@ -2511,15 +2511,15 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
ext4_get_group_desc(sb, group, NULL);
int ret;

/* cr=0/1 is a very optimistic search to find large
/* cr=CR0/CR1 is a very optimistic search to find large
* good chunks almost for free. If buddy data is not
* ready, then this optimization makes no sense. But
* we never skip the first block group in a flex_bg,
* since this gets used for metadata block allocation,
* and we want to make sure we locate metadata blocks
* in the first block group in the flex_bg if possible.
*/
if (cr < 2 &&
if (cr < CR2 &&
(!sbi->s_log_groups_per_flex ||
((group & ((1 << sbi->s_log_groups_per_flex) - 1)) != 0)) &&
!(ext4_has_group_desc_csum(sb) &&
Expand Down Expand Up @@ -2625,7 +2625,7 @@ static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t prefetch_grp = 0, ngroups, group, i;
int cr = -1, new_cr;
enum criteria cr, new_cr;
int err = 0, first_err = 0;
unsigned int nr = 0, prefetch_ios = 0;
struct ext4_sb_info *sbi;
Expand Down Expand Up @@ -2683,13 +2683,13 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}

/* Let's just scan groups to find more-less suitable blocks */
cr = ac->ac_2order ? 0 : 1;
cr = ac->ac_2order ? CR0 : CR1;
/*
* cr == 0 try to get exact allocation,
* cr == 3 try to get anything
* cr == CR0 try to get exact allocation,
* cr == CR3 try to get anything
*/
repeat:
for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
for (; cr < EXT4_MB_NUM_CRS && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
ac->ac_criteria = cr;
/*
* searching for the right group start
Expand All @@ -2716,7 +2716,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
* spend a lot of time loading imperfect groups
*/
if ((prefetch_grp == group) &&
(cr > 1 ||
(cr > CR1 ||
prefetch_ios < sbi->s_mb_prefetch_limit)) {
unsigned int curr_ios = prefetch_ios;

Expand Down Expand Up @@ -2758,9 +2758,9 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
}

ac->ac_groups_scanned++;
if (cr == 0)
if (cr == CR0)
ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && sbi->s_stripe &&
else if (cr == CR1 && sbi->s_stripe &&
!(ac->ac_g_ex.fe_len %
EXT4_B2C(sbi, sbi->s_stripe)))
ext4_mb_scan_aligned(ac, &e4b);
Expand Down Expand Up @@ -2801,7 +2801,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_flags |= EXT4_MB_HINT_FIRST;
cr = 3;
cr = CR3;
goto repeat;
}
}
Expand Down Expand Up @@ -2926,36 +2926,36 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
seq_printf(seq, "\tgroups_scanned: %u\n", atomic_read(&sbi->s_bal_groups_scanned));

seq_puts(seq, "\tcr0_stats:\n");
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[0]));
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR0]));
seq_printf(seq, "\t\tgroups_considered: %llu\n",
atomic64_read(&sbi->s_bal_cX_groups_considered[0]));
atomic64_read(&sbi->s_bal_cX_groups_considered[CR0]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[0]));
atomic64_read(&sbi->s_bal_cX_failed[CR0]));
seq_printf(seq, "\t\tbad_suggestions: %u\n",
atomic_read(&sbi->s_bal_cr0_bad_suggestions));

seq_puts(seq, "\tcr1_stats:\n");
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[1]));
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR1]));
seq_printf(seq, "\t\tgroups_considered: %llu\n",
atomic64_read(&sbi->s_bal_cX_groups_considered[1]));
atomic64_read(&sbi->s_bal_cX_groups_considered[CR1]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[1]));
atomic64_read(&sbi->s_bal_cX_failed[CR1]));
seq_printf(seq, "\t\tbad_suggestions: %u\n",
atomic_read(&sbi->s_bal_cr1_bad_suggestions));

seq_puts(seq, "\tcr2_stats:\n");
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[2]));
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR2]));
seq_printf(seq, "\t\tgroups_considered: %llu\n",
atomic64_read(&sbi->s_bal_cX_groups_considered[2]));
atomic64_read(&sbi->s_bal_cX_groups_considered[CR2]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[2]));
atomic64_read(&sbi->s_bal_cX_failed[CR2]));

seq_puts(seq, "\tcr3_stats:\n");
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[3]));
seq_printf(seq, "\t\thits: %llu\n", atomic64_read(&sbi->s_bal_cX_hits[CR3]));
seq_printf(seq, "\t\tgroups_considered: %llu\n",
atomic64_read(&sbi->s_bal_cX_groups_considered[3]));
atomic64_read(&sbi->s_bal_cX_groups_considered[CR3]));
seq_printf(seq, "\t\tuseless_loops: %llu\n",
atomic64_read(&sbi->s_bal_cX_failed[3]));
atomic64_read(&sbi->s_bal_cX_failed[CR3]));
seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned));
seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals));
seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders));
Expand Down
16 changes: 14 additions & 2 deletions include/trace/events/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,18 @@ TRACE_DEFINE_ENUM(EXT4_FC_REASON_MAX);
{ EXT4_FC_REASON_INODE_JOURNAL_DATA, "INODE_JOURNAL_DATA"}, \
{ EXT4_FC_REASON_ENCRYPTED_FILENAME, "ENCRYPTED_FILENAME"})

TRACE_DEFINE_ENUM(CR0);
TRACE_DEFINE_ENUM(CR1);
TRACE_DEFINE_ENUM(CR2);
TRACE_DEFINE_ENUM(CR3);

#define show_criteria(cr) \
__print_symbolic(cr, \
{ CR0, "CR0" }, \
{ CR1, "CR1" }, \
{ CR2, "CR2" }, \
{ CR3, "CR3" })

TRACE_EVENT(ext4_other_inode_update_time,
TP_PROTO(struct inode *inode, ino_t orig_ino),

Expand Down Expand Up @@ -1063,7 +1075,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
),

TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
"result %u/%d/%u@%u blks %u grps %u cr %u flags %s "
"result %u/%d/%u@%u blks %u grps %u cr %s flags %s "
"tail %u broken %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
Expand All @@ -1073,7 +1085,7 @@ TRACE_EVENT(ext4_mballoc_alloc,
__entry->goal_len, __entry->goal_logical,
__entry->result_group, __entry->result_start,
__entry->result_len, __entry->result_logical,
__entry->found, __entry->groups, __entry->cr,
__entry->found, __entry->groups, show_criteria(__entry->cr),
show_mballoc_flags(__entry->flags), __entry->tail,
__entry->buddy ? 1 << __entry->buddy : 0)
);
Expand Down

0 comments on commit 4eb7a4a

Please sign in to comment.