Skip to content

Commit

Permalink
Merge tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/ke…
Browse files Browse the repository at this point in the history
…rnel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "This contains a few fixups to the qgroup patches that were merged this
  dev cycle, unaligned access fix, blockgroup removal corner case fix
  and a small debugging output tweak"

* tag 'for-4.17-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: print-tree: debugging output enhancement
  btrfs: Fix race condition between delayed refs and blockgroup removal
  btrfs: fix unaligned access in readdir
  btrfs: Fix wrong btrfs_delalloc_release_extents parameter
  btrfs: delayed-inode: Remove wrong qgroup meta reservation calls
  btrfs: qgroup: Use independent and accurate per inode qgroup rsv
  btrfs: qgroup: Commit transaction in advance to reduce early EDQUOT
  • Loading branch information
torvalds committed Apr 22, 2018
2 parents 37a535e + c087232 commit d54b5c1
Show file tree
Hide file tree
Showing 13 changed files with 199 additions and 47 deletions.
25 changes: 25 additions & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,25 @@ struct btrfs_block_rsv {
unsigned short full;
unsigned short type;
unsigned short failfast;

/*
* Qgroup equivalent for @size @reserved
*
* Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
* about things like csum size nor how many tree blocks it will need to
* reserve.
*
* Qgroup cares more about net change of the extent usage.
*
* So for one newly inserted file extent, in worst case it will cause
* leaf split and level increase, nodesize for each file extent is
* already too much.
*
* In short, qgroup_size/reserved is the upper limit of possible needed
* qgroup metadata reservation.
*/
u64 qgroup_rsv_size;
u64 qgroup_rsv_reserved;
};

/*
Expand Down Expand Up @@ -714,6 +733,12 @@ struct btrfs_delayed_root;
*/
#define BTRFS_FS_EXCL_OP 16

/*
* To info transaction_kthread we need an immediate commit so it doesn't
* need to wait for commit_interval
*/
#define BTRFS_FS_NEED_ASYNC_COMMIT 17

struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
Expand Down
20 changes: 16 additions & 4 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,12 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
dst_rsv = &fs_info->delayed_block_rsv;

num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);

/*
* Here we migrate space rsv from transaction rsv, since have already
* reserved space when starting a transaction. So no need to reserve
* qgroup space here.
*/
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_item",
Expand All @@ -577,7 +583,10 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
return;

rsv = &fs_info->delayed_block_rsv;
btrfs_qgroup_convert_reserved_meta(root, item->bytes_reserved);
/*
* Check btrfs_delayed_item_reserve_metadata() to see why we don't need
* to release/reserve qgroup space.
*/
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
Expand All @@ -602,9 +611,6 @@ static int btrfs_delayed_inode_reserve_metadata(

num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);

ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
if (ret < 0)
return ret;
/*
* btrfs_dirty_inode will update the inode under btrfs_join_transaction
* which doesn't reserve space for speed. This is a problem since we
Expand All @@ -616,6 +622,10 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
ret = btrfs_qgroup_reserve_meta_prealloc(root,
fs_info->nodesize, true);
if (ret < 0)
return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
BTRFS_RESERVE_NO_FLUSH);
/*
Expand All @@ -634,6 +644,8 @@ static int btrfs_delayed_inode_reserve_metadata(
"delayed_inode",
btrfs_ino(inode),
num_bytes, 1);
} else {
btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
}
return ret;
}
Expand Down
19 changes: 14 additions & 5 deletions fs/btrfs/delayed-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,10 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
int action, int is_data, int *qrecord_inserted_ret,
int action, int is_data, int is_system,
int *qrecord_inserted_ret,
int *old_ref_mod, int *new_ref_mod)

{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
Expand Down Expand Up @@ -585,6 +587,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
head_ref->ref_mod = count_mod;
head_ref->must_insert_reserved = must_insert_reserved;
head_ref->is_data = is_data;
head_ref->is_system = is_system;
head_ref->ref_tree = RB_ROOT;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
Expand Down Expand Up @@ -772,6 +775,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);

BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
Expand Down Expand Up @@ -800,8 +804,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, 0, 0, action, 0,
&qrecord_inserted, old_ref_mod,
new_ref_mod);
is_system, &qrecord_inserted,
old_ref_mod, new_ref_mod);

add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action);
Expand Down Expand Up @@ -868,7 +872,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
*/
head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
bytenr, num_bytes, ref_root, reserved,
action, 1, &qrecord_inserted,
action, 1, 0, &qrecord_inserted,
old_ref_mod, new_ref_mod);

add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
Expand Down Expand Up @@ -898,9 +902,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);

/*
* extent_ops just modify the flags of an extent and they don't result
* in ref count changes, hence it's safe to pass false/0 for is_system
* argument
*/
add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data, NULL, NULL, NULL);
extent_op->is_data, 0, NULL, NULL, NULL);

spin_unlock(&delayed_refs->lock);
return 0;
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/delayed-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ struct btrfs_delayed_ref_head {
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
unsigned int is_system:1;
unsigned int processing:1;
};

Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,7 @@ static int transaction_kthread(void *arg)

now = get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) {
spin_unlock(&fs_info->trans_lock);
Expand Down
73 changes: 57 additions & 16 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2601,13 +2601,19 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
trace_run_delayed_ref_head(fs_info, head, 0);

if (head->total_ref_mod < 0) {
struct btrfs_block_group_cache *cache;
struct btrfs_space_info *space_info;
u64 flags;

cache = btrfs_lookup_block_group(fs_info, head->bytenr);
ASSERT(cache);
percpu_counter_add(&cache->space_info->total_bytes_pinned,
if (head->is_data)
flags = BTRFS_BLOCK_GROUP_DATA;
else if (head->is_system)
flags = BTRFS_BLOCK_GROUP_SYSTEM;
else
flags = BTRFS_BLOCK_GROUP_METADATA;
space_info = __find_space_info(fs_info, flags);
ASSERT(space_info);
percpu_counter_add(&space_info->total_bytes_pinned,
-head->num_bytes);
btrfs_put_block_group(cache);

if (head->is_data) {
spin_lock(&delayed_refs->lock);
Expand Down Expand Up @@ -5559,14 +5565,18 @@ static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,

static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes)
struct btrfs_block_rsv *dest, u64 num_bytes,
u64 *qgroup_to_release_ret)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 qgroup_to_release = 0;
u64 ret;

spin_lock(&block_rsv->lock);
if (num_bytes == (u64)-1)
if (num_bytes == (u64)-1) {
num_bytes = block_rsv->size;
qgroup_to_release = block_rsv->qgroup_rsv_size;
}
block_rsv->size -= num_bytes;
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
Expand All @@ -5575,6 +5585,13 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
} else {
qgroup_to_release = 0;
}
spin_unlock(&block_rsv->lock);

ret = num_bytes;
Expand All @@ -5597,6 +5614,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info_add_old_bytes(fs_info, space_info,
num_bytes);
}
if (qgroup_to_release_ret)
*qgroup_to_release_ret = qgroup_to_release;
return ret;
}

Expand Down Expand Up @@ -5738,25 +5757,35 @@ static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 num_bytes = 0;
u64 qgroup_num_bytes = 0;
int ret = -ENOSPC;

spin_lock(&block_rsv->lock);
if (block_rsv->reserved < block_rsv->size)
num_bytes = block_rsv->size - block_rsv->reserved;
if (block_rsv->qgroup_rsv_reserved < block_rsv->qgroup_rsv_size)
qgroup_num_bytes = block_rsv->qgroup_rsv_size -
block_rsv->qgroup_rsv_reserved;
spin_unlock(&block_rsv->lock);

if (num_bytes == 0)
return 0;

ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_num_bytes, true);
if (ret)
return ret;
ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), num_bytes, 1);
}

/* Don't forget to increase qgroup_rsv_reserved */
spin_lock(&block_rsv->lock);
block_rsv->qgroup_rsv_reserved += qgroup_num_bytes;
spin_unlock(&block_rsv->lock);
} else
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
return ret;
}

Expand All @@ -5777,20 +5806,23 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 released = 0;
u64 qgroup_to_release = 0;

/*
* Since we statically set the block_rsv->size we just want to say we
* are releasing 0 bytes, and then we'll just get the reservation over
* the size free'd.
*/
released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0);
released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0,
&qgroup_to_release);
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
btrfs_ino(inode), released, 0);
if (qgroup_free)
btrfs_qgroup_free_meta_prealloc(inode->root, released);
btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
else
btrfs_qgroup_convert_reserved_meta(inode->root, released);
btrfs_qgroup_convert_reserved_meta(inode->root,
qgroup_to_release);
}

void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
Expand All @@ -5802,7 +5834,7 @@ void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
if (global_rsv == block_rsv ||
block_rsv->space_info != global_rsv->space_info)
global_rsv = NULL;
block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes);
block_rsv_release_bytes(fs_info, block_rsv, global_rsv, num_bytes, NULL);
}

static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
Expand Down Expand Up @@ -5882,7 +5914,7 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL,
(u64)-1);
(u64)-1, NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
Expand All @@ -5906,7 +5938,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
WARN_ON_ONCE(!list_empty(&trans->new_bgs));

block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
trans->chunk_bytes_reserved);
trans->chunk_bytes_reserved, NULL);
trans->chunk_bytes_reserved = 0;
}

Expand Down Expand Up @@ -6011,6 +6043,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
{
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
u64 reserve_size = 0;
u64 qgroup_rsv_size = 0;
u64 csum_leaves;
unsigned outstanding_extents;

Expand All @@ -6023,9 +6056,17 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
inode->csum_bytes);
reserve_size += btrfs_calc_trans_metadata_size(fs_info,
csum_leaves);
/*
* For qgroup rsv, the calculation is very simple:
* account one nodesize for each outstanding extent
*
* This is overestimating in most cases.
*/
qgroup_rsv_size = outstanding_extents * fs_info->nodesize;

spin_lock(&block_rsv->lock);
block_rsv->size = reserve_size;
block_rsv->qgroup_rsv_size = qgroup_rsv_size;
spin_unlock(&block_rsv->lock);
}

Expand Down Expand Up @@ -8403,7 +8444,7 @@ static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u32 blocksize)
{
block_rsv_add_bytes(block_rsv, blocksize, 0);
block_rsv_release_bytes(fs_info, block_rsv, NULL, 0);
block_rsv_release_bytes(fs_info, block_rsv, NULL, 0, NULL);
}

/*
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1748,7 +1748,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state);
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
(ret != 0));
true);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
Expand Down
Loading

0 comments on commit d54b5c1

Please sign in to comment.