Skip to content

Commit

Permalink
[GFS2] Fix journal flush problem
Browse files Browse the repository at this point in the history
This fixes a bug which resulted in poor performance due to flushing
the journal too often. The code path in question was via the inode_go_sync()
function in glops.c. The solution is not to flush the journal immediately
when inodes are ejected from memory, but batch up the work for glockd to
deal with later on. This means that glocks may now live on beyond the end of
the lifetime of their inodes (but not very much longer in the normal case).

Also fixed in this patch is a bug (which was hidden by the bug mentioned above) in
calculation of the number of free journal blocks.

The gfs2_logd process has been altered to be more responsive to the journal
filling up. We now wake it up when the number of uncommitted journal blocks
has reached the threshold level rather than trying to flush directly at the
end of each transaction. This again means doing fewer, but larger, log
flushes in general.

Signed-off-by: Steven Whitehouse <[email protected]>
  • Loading branch information
swhiteho committed Nov 30, 2006
1 parent ae61932 commit b004157
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 99 deletions.
7 changes: 5 additions & 2 deletions fs/gfs2/daemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ int gfs2_logd(void *data)
struct gfs2_sbd *sdp = data;
struct gfs2_holder ji_gh;
unsigned long t;
int need_flush;

while (!kthread_should_stop()) {
/* Advance the log tail */
Expand All @@ -120,8 +121,10 @@ int gfs2_logd(void *data)
gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;

gfs2_ail1_empty(sdp, DIO_ALL);

if (time_after_eq(jiffies, t)) {
gfs2_log_lock(sdp);
need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
gfs2_log_unlock(sdp);
if (need_flush || time_after_eq(jiffies, t)) {
gfs2_log_flush(sdp, NULL);
sdp->sd_log_flush_time = jiffies;
}
Expand Down
17 changes: 1 addition & 16 deletions fs/gfs2/glock.c
Original file line number Diff line number Diff line change
Expand Up @@ -785,21 +785,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state)
gfs2_holder_put(new_gh);
}

void gfs2_glock_inode_squish(struct inode *inode)
{
struct gfs2_holder gh;
struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
gfs2_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
set_bit(HIF_DEMOTE, &gh.gh_iflags);
spin_lock(&gl->gl_spin);
gfs2_assert(inode->i_sb->s_fs_info, list_empty(&gl->gl_holders));
list_add_tail(&gh.gh_list, &gl->gl_waiters2);
run_queue(gl);
spin_unlock(&gl->gl_spin);
wait_for_completion(&gh.gh_wait);
gfs2_holder_uninit(&gh);
}

/**
* state_change - record that the glock is now in a different state
* @gl: the glock
Expand Down Expand Up @@ -1920,7 +1905,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,

static void scan_glock(struct gfs2_glock *gl)
{
if (gl->gl_ops == &gfs2_inode_glops)
if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object)
return;

if (gfs2_glmutex_trylock(gl)) {
Expand Down
1 change: 0 additions & 1 deletion fs/gfs2/glock.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops,
unsigned int state, int flags);
void gfs2_glock_inode_squish(struct inode *inode);

/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
Expand Down
93 changes: 24 additions & 69 deletions fs/gfs2/glops.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,70 +106,6 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
clear_bit(GIF_SW_PAGED, &ip->i_flags);
}

/**
* gfs2_page_inval - Invalidate all pages associated with a glock
* @gl: the glock
*
*/

static void gfs2_page_inval(struct gfs2_glock *gl)
{
struct gfs2_inode *ip;
struct inode *inode;

ip = gl->gl_object;
inode = &ip->i_inode;
if (!ip || !S_ISREG(inode->i_mode))
return;

truncate_inode_pages(inode->i_mapping, 0);
gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !inode->i_mapping->nrpages);
clear_bit(GIF_PAGED, &ip->i_flags);
}

/**
* gfs2_page_wait - Wait for writeback of data
* @gl: the glock
*
* Syncs data (not metadata) for a regular file.
* No-op for all other types.
*/

static void gfs2_page_wait(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
struct inode *inode = &ip->i_inode;
struct address_space *mapping = inode->i_mapping;
int error;

if (!S_ISREG(inode->i_mode))
return;

error = filemap_fdatawait(mapping);

/* Put back any errors cleared by filemap_fdatawait()
so they can be caught by someone who can pass them
up to user space. */

if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else if (error)
set_bit(AS_EIO, &mapping->flags);

}

static void gfs2_page_writeback(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
struct inode *inode = &ip->i_inode;
struct address_space *mapping = inode->i_mapping;

if (!S_ISREG(inode->i_mode))
return;

filemap_fdatawrite(mapping);
}

/**
* meta_go_sync - sync out the metadata for this glock
* @gl: the glock
Expand Down Expand Up @@ -264,11 +200,24 @@ static void inode_go_drop_th(struct gfs2_glock *gl)

static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;

if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;

if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_page_writeback(gl);
gfs2_log_flush(gl->gl_sbd, gl);
if (ip)
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
gfs2_page_wait(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
int error = filemap_fdatawait(mapping);
if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else if (error)
set_bit(AS_EIO, &mapping->flags);
}
clear_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_ail_empty_gl(gl);
}
Expand All @@ -283,14 +232,20 @@ static void inode_go_sync(struct gfs2_glock *gl)

static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
int meta = (flags & DIO_METADATA);

if (meta) {
struct gfs2_inode *ip = gl->gl_object;
gfs2_meta_inval(gl);
set_bit(GIF_INVALID, &ip->i_flags);
if (ip)
set_bit(GIF_INVALID, &ip->i_flags);
}

if (ip && S_ISREG(ip->i_inode.i_mode)) {
truncate_inode_pages(ip->i_inode.i_mapping, 0);
gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), !ip->i_inode.i_mapping->nrpages);
clear_bit(GIF_PAGED, &ip->i_flags);
}
gfs2_page_inval(gl);
}

/**
Expand Down
17 changes: 10 additions & 7 deletions fs/gfs2/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
*
* Note that we never give out the last 6 blocks of the journal. Thats
* due to the fact that there is are a small number of header blocks
* associated with each log flush. The exact number can't be known until
* flush time, so we ensure that we have just enough free blocks at all
* times to avoid running out during a log flush.
*
* Returns: errno
*/

Expand All @@ -274,7 +280,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)

mutex_lock(&sdp->sd_log_reserve_mutex);
gfs2_log_lock(sdp);
while(sdp->sd_log_blks_free <= blks) {
while(sdp->sd_log_blks_free <= (blks + 6)) {
gfs2_log_unlock(sdp);
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL);
Expand Down Expand Up @@ -643,12 +649,9 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
up_read(&sdp->sd_log_flush_lock);

gfs2_log_lock(sdp);
if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks)) {
gfs2_log_unlock(sdp);
gfs2_log_flush(sdp, NULL);
} else {
gfs2_log_unlock(sdp);
}
if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
wake_up_process(sdp->sd_logd_process);
gfs2_log_unlock(sdp);
}

/**
Expand Down
3 changes: 3 additions & 0 deletions fs/gfs2/meta_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,9 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
int in_cache = 0;

BUG_ON(!gl);
BUG_ON(!sdp);

spin_lock(&ip->i_spin);
if (*bh_slot && (*bh_slot)->b_blocknr == num) {
bh = *bh_slot;
Expand Down
7 changes: 3 additions & 4 deletions fs/gfs2/ops_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ static void gfs2_write_super(struct super_block *sb)
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
sb->s_dirt = 0;
gfs2_log_flush(sb->s_fs_info, NULL);
if (wait)
gfs2_log_flush(sb->s_fs_info, NULL);
return 0;
}

Expand Down Expand Up @@ -293,8 +294,6 @@ static void gfs2_clear_inode(struct inode *inode)
*/
if (inode->i_private) {
struct gfs2_inode *ip = GFS2_I(inode);
gfs2_glock_inode_squish(inode);
gfs2_assert(inode->i_sb->s_fs_info, ip->i_gl->gl_state == LM_ST_UNLOCKED);
ip->i_gl->gl_object = NULL;
gfs2_glock_schedule_for_reclaim(ip->i_gl);
gfs2_glock_put(ip->i_gl);
Expand Down Expand Up @@ -395,7 +394,7 @@ static void gfs2_delete_inode(struct inode *inode)
if (!inode->i_private)
goto out;

error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &gh);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
if (unlikely(error)) {
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
Expand Down

0 comments on commit b004157

Please sign in to comment.