Skip to content

Commit

Permalink
Merge tag 'reflink-speedups-5.19_2022-04-28' of git://git.kernel.org/…
Browse files Browse the repository at this point in the history
…pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next

xfs: fix reflink inefficiencies

As Dave Chinner has complained about on IRC, there are a couple of
things about reflink that are very inefficient.  First of all, we
limited the size of all bunmapi operations to avoid flooding the log
with defer ops in the worst case, but recent changes to the defer
ops code have solved that problem, so get rid of the bunmapi length
clamp.

Second, the log reservations for reflink operations are far far
larger than they need to be.  Shrink them to exactly what we need to
handle each deferred RUI and CUI log item, and no more.  Also reduce
logcount because we don't need 8 rolls per operation.  Introduce a
transaction reservation compatibility layer to avoid changing the
minimum log size calculations.

Signed-off-by: Dave Chinner <[email protected]>
  • Loading branch information
dchinner committed May 4, 2022
2 parents 956f1b8 + 6ed7e50 commit 166afc4
Show file tree
Hide file tree
Showing 9 changed files with 345 additions and 139 deletions.
22 changes: 1 addition & 21 deletions fs/xfs/libxfs/xfs_bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -5280,7 +5280,6 @@ __xfs_bunmapi(
int whichfork; /* data or attribute fork */
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t max_len;
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
bool done = false;
Expand All @@ -5299,16 +5298,6 @@ __xfs_bunmapi(
ASSERT(len > 0);
ASSERT(nexts >= 0);

/*
* Guesstimate how many blocks we can unmap without running the risk of
* blowing out the transaction with a mix of EFIs and reflink
* adjustments.
*/
if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
else
max_len = len;

error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
Expand Down Expand Up @@ -5347,7 +5336,7 @@ __xfs_bunmapi(

extno = 0;
while (end != (xfs_fileoff_t)-1 && end >= start &&
(nexts == 0 || extno < nexts) && max_len > 0) {
(nexts == 0 || extno < nexts)) {
/*
* Is the found extent after a hole in which end lives?
* Just back up to the previous extent, if so.
Expand Down Expand Up @@ -5381,14 +5370,6 @@ __xfs_bunmapi(
if (del.br_startoff + del.br_blockcount > end + 1)
del.br_blockcount = end + 1 - del.br_startoff;

/* How much can we safely unmap? */
if (max_len < del.br_blockcount) {
del.br_startoff += del.br_blockcount - max_len;
if (!wasdel)
del.br_startblock += del.br_blockcount - max_len;
del.br_blockcount = max_len;
}

if (!isrt)
goto delete;

Expand Down Expand Up @@ -5524,7 +5505,6 @@ __xfs_bunmapi(
if (error)
goto error0;

max_len -= del.br_blockcount;
end = del.br_startoff - 1;
nodelete:
/*
Expand Down
75 changes: 71 additions & 4 deletions fs/xfs/libxfs/xfs_log_rlimit.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "xfs_trans_space.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_trace.h"

/*
* Calculate the maximum length in bytes that would be required for a local
Expand All @@ -36,6 +37,65 @@ xfs_log_calc_max_attrsetm_res(
M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
}

/*
* Compute an alternate set of log reservation sizes for use exclusively with
* minimum log size calculations.
*/
static void
xfs_log_calc_trans_resv_for_minlogblocks(
struct xfs_mount *mp,
struct xfs_trans_resv *resv)
{
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;

/*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
* that height. Transaction reservation sizes influence the minimum
* log size calculation, which influences the size of the log that mkfs
* creates. Use the old value here to ensure that newly formatted
* small filesystems will mount on older kernels.
*/
if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;

xfs_trans_resv_calc(mp, resv);

if (xfs_has_reflink(mp)) {
/*
* In the early days of reflink, typical log operation counts
* were greatly overestimated.
*/
resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
resv->tr_itruncate.tr_logcount =
XFS_ITRUNCATE_LOG_COUNT_REFLINK;
resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
} else if (xfs_has_rmapbt(mp)) {
/*
* In the early days of non-reflink rmap, the impact of rmapbt
* updates on log counts were not taken into account at all.
*/
resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resv->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
}

/*
* In the early days of reflink, we did not use deferred refcount
* update log items, so log reservations must be recomputed using the
* old calculations.
*/
resv->tr_write.tr_logres =
xfs_calc_write_reservation_minlogsize(mp);
resv->tr_itruncate.tr_logres =
xfs_calc_itruncate_reservation_minlogsize(mp);
resv->tr_qm_dqalloc.tr_logres =
xfs_calc_qm_dqalloc_reservation_minlogsize(mp);

/* Put everything back the way it was. This goes at the end. */
mp->m_rmap_maxlevels = rmap_maxlevels;
}

/*
* Iterate over the log space reservation table to figure out and return
* the maximum one in terms of the pre-calculated values which were done
Expand All @@ -46,29 +106,36 @@ xfs_log_get_max_trans_res(
struct xfs_mount *mp,
struct xfs_trans_res *max_resp)
{
struct xfs_trans_resv resv = {};
struct xfs_trans_res *resp;
struct xfs_trans_res *end_resp;
unsigned int i;
int log_space = 0;
int attr_space;

attr_space = xfs_log_calc_max_attrsetm_res(mp);

resp = (struct xfs_trans_res *)M_RES(mp);
end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
for (; resp < end_resp; resp++) {
xfs_log_calc_trans_resv_for_minlogblocks(mp, &resv);

resp = (struct xfs_trans_res *)&resv;
end_resp = (struct xfs_trans_res *)(&resv + 1);
for (i = 0; resp < end_resp; i++, resp++) {
int tmp = resp->tr_logcount > 1 ?
resp->tr_logres * resp->tr_logcount :
resp->tr_logres;

trace_xfs_trans_resv_calc_minlogsize(mp, i, resp);
if (log_space < tmp) {
log_space = tmp;
*max_resp = *resp; /* struct copy */
}
}

if (attr_space > log_space) {
*max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */
*max_resp = resv.tr_attrsetm; /* struct copy */
max_resp->tr_logres = attr_space;
}
trace_xfs_log_get_max_trans_res(mp, max_resp);
}

/*
Expand Down
14 changes: 9 additions & 5 deletions fs/xfs/libxfs/xfs_refcount.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,8 +886,13 @@ xfs_refcount_still_have_space(
{
unsigned long overhead;

overhead = cur->bc_ag.refc.shape_changes *
xfs_allocfree_log_count(cur->bc_mp, 1);
/*
* Worst case estimate: full splits of the free space and rmap btrees
* to handle each of the shape changes to the refcount btree.
*/
overhead = xfs_allocfree_block_count(cur->bc_mp,
cur->bc_ag.refc.shape_changes);
overhead += cur->bc_mp->m_refc_maxlevels;
overhead *= cur->bc_mp->m_sb.sb_blocksize;

/*
Expand Down Expand Up @@ -960,6 +965,7 @@ xfs_refcount_adjust_extents(
* Either cover the hole (increment) or
* delete the range (decrement).
*/
cur->bc_ag.refc.nr_ops++;
if (tmp.rc_refcount) {
error = xfs_refcount_insert(cur, &tmp,
&found_tmp);
Expand All @@ -970,7 +976,6 @@ xfs_refcount_adjust_extents(
error = -EFSCORRUPTED;
goto out_error;
}
cur->bc_ag.refc.nr_ops++;
} else {
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
Expand Down Expand Up @@ -1001,11 +1006,11 @@ xfs_refcount_adjust_extents(
ext.rc_refcount += adj;
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &ext);
cur->bc_ag.refc.nr_ops++;
if (ext.rc_refcount > 1) {
error = xfs_refcount_update(cur, &ext);
if (error)
goto out_error;
cur->bc_ag.refc.nr_ops++;
} else if (ext.rc_refcount == 1) {
error = xfs_refcount_delete(cur, &found_rec);
if (error)
Expand All @@ -1014,7 +1019,6 @@ xfs_refcount_adjust_extents(
error = -EFSCORRUPTED;
goto out_error;
}
cur->bc_ag.refc.nr_ops++;
goto advloop;
} else {
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
Expand Down
13 changes: 8 additions & 5 deletions fs/xfs/libxfs/xfs_refcount.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,17 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
* log (plus any key updates) so we'll conservatively assume 32 bytes
* per record. We must also leave space for btree splits on both ends
* of the range and space for the CUD and a new CUI.
*
* Each EFI that we attach to the transaction is assumed to consume ~32 bytes.
* This is a low estimate for an EFI tracking a single extent (16 bytes for the
* EFI header, 16 for the extent, and 12 for the xlog op header), but the
* estimate is acceptable if there's more than one extent being freed.
* In the worst case of freeing every other block during a refcount decrease
* operation, we amortize the space used for one EFI log item across 16
* extents.
*/
#define XFS_REFCOUNT_ITEM_OVERHEAD 32

static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
{
return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
}

extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
union xfs_btree_rec;
Expand Down
Loading

0 comments on commit 166afc4

Please sign in to comment.