Skip to content

Commit

Permalink
ocfs2: zero tail of sparse files on truncate
Browse files Browse the repository at this point in the history
Since we don't zero on extend anymore, truncate needs to be fixed up to zero
the part of a file between i_size and and end of it's cluster. Otherwise a
subsequent extend could expose bad data.

This introduced a new helper, which can be used in ocfs2_write().

Signed-off-by: Mark Fasheh <[email protected]>
  • Loading branch information
Mark Fasheh committed Apr 26, 2007
1 parent 25baf2d commit 60b1139
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 25 deletions.
224 changes: 224 additions & 0 deletions fs/ocfs2/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/swap.h>

#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>

#include "ocfs2.h"

#include "alloc.h"
#include "aops.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "inode.h"
Expand Down Expand Up @@ -3342,6 +3344,228 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
return status;
}

static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh)
{
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
return 0;
}

static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
{
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
return ocfs2_journal_dirty_data(handle, bh);
}

static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize,
struct page **pages, int numpages,
u64 phys, handle_t *handle)
{
int i, ret, partial = 0;
void *kaddr;
struct page *page;
unsigned int from, to = PAGE_CACHE_SIZE;
struct super_block *sb = inode->i_sb;

BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));

if (numpages == 0)
goto out;

from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */
if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) {
/*
* Since 'from' has been capped to a value below page
* size, this calculation won't be able to overflow
* 'to'
*/
to = ocfs2_align_bytes_to_clusters(sb, from);

/*
* The truncate tail in this case should never contain
* more than one page at maximum. The loop below also
* assumes this.
*/
BUG_ON(numpages != 1);
}

for(i = 0; i < numpages; i++) {
page = pages[i];

BUG_ON(from > PAGE_CACHE_SIZE);
BUG_ON(to > PAGE_CACHE_SIZE);

ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0);
if (ret)
mlog_errno(ret);

kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + from, 0, to - from);
kunmap_atomic(kaddr, KM_USER0);

/*
* Need to set the buffers we zero'd into uptodate
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
if (ocfs2_should_order_data(inode)) {
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, &partial,
ocfs2_ordered_zero_func);
if (ret < 0)
mlog_errno(ret);
} else {
ret = walk_page_buffers(handle, page_buffers(page),
from, to, &partial,
ocfs2_writeback_zero_func);
if (ret < 0)
mlog_errno(ret);
}

if (!partial)
SetPageUptodate(page);

flush_dcache_page(page);

/*
* Every page after the 1st one should be completely zero'd.
*/
from = 0;
}
out:
if (pages) {
for (i = 0; i < numpages; i++) {
page = pages[i];
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
}
}
}

static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages,
int *num, u64 *phys)
{
int i, numpages = 0, ret = 0;
unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize;
struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
unsigned long index;
u64 next_cluster_bytes;

BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));

/* Cluster boundary, so we don't need to grab any pages. */
if ((isize & (csize - 1)) == 0)
goto out;

ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits,
phys, NULL);
if (ret) {
mlog_errno(ret);
goto out;
}

/* Tail is a hole. */
if (*phys == 0)
goto out;

next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize);
index = isize >> PAGE_CACHE_SHIFT;
do {
pages[numpages] = grab_cache_page(mapping, index);
if (!pages[numpages]) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}

numpages++;
index++;
} while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT));

out:
if (ret != 0) {
if (pages) {
for (i = 0; i < numpages; i++) {
if (pages[i]) {
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
}
}
numpages = 0;
}

*num = numpages;

return ret;
}

/*
* Zero the area past i_size but still within an allocated
* cluster. This avoids exposing nonzero data on subsequent file
* extends.
*
* We need to call this before i_size is updated on the inode because
* otherwise block_write_full_page() will skip writeout of pages past
* i_size. The new_i_size parameter is passed for this reason.
*/
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
u64 new_i_size)
{
int ret, numpages;
struct page **pages = NULL;
u64 phys;

/*
* File systems which don't support sparse files zero on every
* extend.
*/
if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
return 0;

pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}

ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys);
if (ret) {
mlog_errno(ret);
goto out;
}

/*
* Truncate on an i_size boundary - nothing more to do.
*/
if (numpages == 0)
goto out;

ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys,
handle);

/*
* Initiate writeout of the pages we zero'd here. We don't
* wait on them - the truncate_inode_pages() call later will
* do that for us.
*/
ret = filemap_fdatawrite(inode->i_mapping);
if (ret)
mlog_errno(ret);

out:
if (pages)
kfree(pages);

return ret;
}

/*
* It is expected, that by the time you call this function,
* inode->i_size and fe->i_size have been adjusted.
Expand Down
2 changes: 2 additions & 0 deletions fs/ocfs2/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ struct ocfs2_truncate_context {
struct buffer_head *tc_last_eb_bh;
};

int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
u64 new_i_size);
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
Expand Down
34 changes: 15 additions & 19 deletions fs/ocfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -308,13 +308,13 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
* functionality yet, but IMHO it's better to cut and paste the whole
* thing so we can avoid introducing our own bugs (and easily pick up
* their fixes when they happen) --Mark */
static int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
int *partial,
int (*fn)( handle_t *handle,
struct buffer_head *bh))
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
int *partial,
int (*fn)( handle_t *handle,
struct buffer_head *bh))
{
struct buffer_head *bh;
unsigned block_start, block_end;
Expand Down Expand Up @@ -654,9 +654,9 @@ static void ocfs2_clear_page_regions(struct page *page,
*
* This will also skip zeroing, which is handled externally.
*/
static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new)
int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new)
{
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
Expand All @@ -675,8 +675,7 @@ static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
* Ignore blocks outside of our i/o range -
* they may belong to unallocated clusters.
*/
if (block_start >= to ||
(block_start + bsize) <= from) {
if (block_start >= to || block_end <= from) {
if (PageUptodate(page))
set_buffer_uptodate(bh);
continue;
Expand Down Expand Up @@ -971,21 +970,18 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
u64 v_blkno, p_blkno;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned int cbits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
unsigned long index, start;
struct page **cpages;

new = phys == 0 ? 1 : 0;

/*
* Figure out how many pages we'll be manipulating here. For
* non-allocating write, or any writes where cluster size is
* less than page size, we only need one page. Otherwise,
* allocating writes of cluster size larger than page size
* need cluster size pages.
* non allocating write, we just change the one
* page. Otherwise, we'll need a whole clusters worth.
*/
if (new && !wc->w_large_pages)
numpages = (1 << cbits) / PAGE_SIZE;
if (new)
numpages = ocfs2_pages_per_cluster(inode->i_sb);

cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
if (!cpages) {
Expand Down
12 changes: 12 additions & 0 deletions fs/ocfs2/aops.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
unsigned from,
unsigned to);

int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);

int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
int *partial,
int (*fn)( handle_t *handle,
struct buffer_head *bh));

struct ocfs2_write_ctxt;
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
u64 *, unsigned int *, unsigned int *);
Expand Down
Loading

0 comments on commit 60b1139

Please sign in to comment.