Skip to content

Commit

Permalink
Fix congestion_wait() sync/async vs read/write confusion
Browse files Browse the repository at this point in the history
Commit 1faa16d accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.

Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Jens Axboe authored and Jens Axboe committed Jul 10, 2009
1 parent c2cc49a commit 8aa7e84
Show file tree
Hide file tree
Showing 16 changed files with 43 additions and 40 deletions.
2 changes: 1 addition & 1 deletion arch/x86/lib/usercopy_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,

if (retval == -ENOMEM && is_global_init(current)) {
up_read(&current->mm->mmap_sem);
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto survive;
}

Expand Down
10 changes: 6 additions & 4 deletions drivers/block/pktcdvd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1372,8 +1372,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
wakeup = (pd->write_congestion_on > 0
&& pd->bio_queue_size <= pd->write_congestion_off);
spin_unlock(&pd->lock);
if (wakeup)
clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
if (wakeup) {
clear_bdi_congested(&pd->disk->queue->backing_dev_info,
BLK_RW_ASYNC);
}

pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
pkt_set_state(pkt, PACKET_WAITING_STATE);
Expand Down Expand Up @@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
spin_lock(&pd->lock);
if (pd->write_congestion_on > 0
&& pd->bio_queue_size >= pd->write_congestion_on) {
set_bdi_congested(&q->backing_dev_info, WRITE);
set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
do {
spin_unlock(&pd->lock);
congestion_wait(WRITE, HZ);
congestion_wait(BLK_RW_ASYNC, HZ);
spin_lock(&pd->lock);
} while(pd->bio_queue_size > pd->write_congestion_off);
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/dm-crypt.c
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* But don't wait if split was due to the io size restriction
*/
if (unlikely(out_of_pages))
congestion_wait(WRITE, HZ/100);
congestion_wait(BLK_RW_ASYNC, HZ/100);

/*
* With async crypto it is unsafe to share the crypto context
Expand Down
2 changes: 1 addition & 1 deletion fs/fat/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
if ((filp->f_mode & FMODE_WRITE) &&
MSDOS_SB(inode->i_sb)->options.flush) {
fat_flush_inodes(inode->i_sb, inode, NULL);
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
return 0;
}
Expand Down
8 changes: 4 additions & 4 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ __releases(&fc->lock)
}
if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
fc->connected && fc->bdi_initialized) {
clear_bdi_congested(&fc->bdi, READ);
clear_bdi_congested(&fc->bdi, WRITE);
clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
}
fc->num_background--;
fc->active_background--;
Expand Down Expand Up @@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
fc->blocked = 1;
if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
fc->bdi_initialized) {
set_bdi_congested(&fc->bdi, READ);
set_bdi_congested(&fc->bdi, WRITE);
set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
}
list_add_tail(&req->list, &fc->bg_queue);
flush_bg_queue(fc);
Expand Down
8 changes: 5 additions & 3 deletions fs/nfs/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
struct nfs_server *nfss = NFS_SERVER(inode);

if (atomic_long_inc_return(&nfss->writeback) >
NFS_CONGESTION_ON_THRESH)
set_bdi_congested(&nfss->backing_dev_info, WRITE);
NFS_CONGESTION_ON_THRESH) {
set_bdi_congested(&nfss->backing_dev_info,
BLK_RW_ASYNC);
}
}
return ret;
}
Expand All @@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)

end_page_writeback(page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, WRITE);
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion fs/reiserfs/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
DEFINE_WAIT(wait);
struct reiserfs_journal *j = SB_JOURNAL(s);
if (atomic_read(&j->j_async_throttle))
congestion_wait(WRITE, HZ / 10);
congestion_wait(BLK_RW_ASYNC, HZ / 10);
return 0;
}

Expand Down
4 changes: 2 additions & 2 deletions fs/xfs/linux-2.6/kmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__func__, lflags);
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}

Expand Down Expand Up @@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
printk(KERN_ERR "XFS: possible memory allocation "
"deadlock in %s (mode:0x%x)\n",
__func__, lflags);
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (1);
}

Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/linux-2.6/xfs_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(

XFS_STATS_INC(xb_page_retries);
xfsbufd_wakeup(0, gfp_mask);
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}

Expand Down
6 changes: 3 additions & 3 deletions include/linux/backing-dev.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << BDI_async_congested));
}

void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
void set_bdi_congested(struct backing_dev_info *bdi, int rw);
long congestion_wait(int rw, long timeout);
void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
void set_bdi_congested(struct backing_dev_info *bdi, int sync);
long congestion_wait(int sync, long timeout);


static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
Expand Down
8 changes: 4 additions & 4 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
* congested queues, and wake up anyone who was waiting for requests to be
* put back.
*/
static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
{
clear_bdi_congested(&q->backing_dev_info, rw);
clear_bdi_congested(&q->backing_dev_info, sync);
}

/*
* A queue has just entered congestion. Flag that in the queue's VM-visible
* state flags and increment the global gounter of congested queues.
*/
static inline void blk_set_queue_congested(struct request_queue *q, int rw)
static inline void blk_set_queue_congested(struct request_queue *q, int sync)
{
set_bdi_congested(&q->backing_dev_info, rw);
set_bdi_congested(&q->backing_dev_info, sync);
}

extern void blk_start_queue(struct request_queue *q);
Expand Down
7 changes: 3 additions & 4 deletions mm/backing-dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
};


void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
{
enum bdi_state bit;
Expand All @@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested);

/**
* congestion_wait - wait for a backing_dev to become uncongested
* @rw: READ or WRITE
* @sync: SYNC or ASYNC IO
* @timeout: timeout in jiffies
*
* Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
* write congestion. If no backing_devs are congested then just wait for the
* next write to be completed.
*/
long congestion_wait(int rw, long timeout)
long congestion_wait(int sync, long timeout)
{
long ret;
DEFINE_WAIT(wait);
wait_queue_head_t *wqh = &congestion_wqh[rw];
wait_queue_head_t *wqh = &congestion_wqh[sync];

prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
ret = io_schedule_timeout(timeout);
Expand Down
2 changes: 1 addition & 1 deletion mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1973,7 +1973,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
if (!progress) {
nr_retries--;
/* maybe some writeback is necessary */
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
}

}
Expand Down
8 changes: 4 additions & 4 deletions mm/page-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping)
if (pages_written >= write_chunk)
break; /* We've done our duty */

congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
}

if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
Expand Down Expand Up @@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
if (global_page_state(NR_UNSTABLE_NFS) +
global_page_state(NR_WRITEBACK) <= dirty_thresh)
break;
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);

/*
* The caller might hold locks which can prevent IO completion
Expand Down Expand Up @@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages)
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
/* Wrote less than expected */
if (wbc.encountered_congestion || wbc.more_io)
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
else
break;
}
Expand Down Expand Up @@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg)
writeback_inodes(&wbc);
if (wbc.nr_to_write > 0) {
if (wbc.encountered_congestion || wbc.more_io)
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
else
break; /* All the old data is written */
}
Expand Down
4 changes: 2 additions & 2 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
preferred_zone, migratetype);

if (!page && gfp_mask & __GFP_NOFAIL)
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL));

return page;
Expand Down Expand Up @@ -1831,7 +1831,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
/* Wait for some write requests to complete then retry */
congestion_wait(WRITE, HZ/50);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto rebalance;
}

Expand Down
8 changes: 4 additions & 4 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
*/
if (nr_freed < nr_taken && !current_is_kswapd() &&
lumpy_reclaim) {
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);

/*
* The attempt at page out may have made some
Expand Down Expand Up @@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,

/* Take a nap, wait for some writeback to complete */
if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);
}
/* top priority shrink_zones still had more to do? don't OOM, then */
if (!sc->all_unreclaimable && scanning_global_lru(sc))
Expand Down Expand Up @@ -1960,7 +1960,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
* another pass across the zones.
*/
if (total_scanned && priority < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ/10);
congestion_wait(BLK_RW_ASYNC, HZ/10);

/*
* We do this so kswapd doesn't build up large priorities for
Expand Down Expand Up @@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
goto out;

if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
congestion_wait(WRITE, HZ / 10);
congestion_wait(BLK_RW_ASYNC, HZ / 10);
}
}

Expand Down

0 comments on commit 8aa7e84

Please sign in to comment.