Merge: xfs: XFS update #1 for RHEL9.6
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/5188 JIRA: https://issues.redhat.com/browse/RHEL-57114 Omitted-fix: 19ebc8f84ea1 xfs: fix file_path handling in tracepoints Omitted-fix: 2b3f004d3d51 xfs: drop xfarray sortinfo folio on error, due to multiple upstream dependencies that came about post-6.5. XFS update for RHEL9.6. Update through upstream 6.6, along with any corresponding "fixes" patches. Two fixes patches are ommitted/waived/deferred, as 19ebc8f84ea1 xfs: fix file_path handling in tracepoints and 2b3f004d3d51 xfs: drop xfarray sortinfo folio on error, due to multiple upstream dependencies that came about post-6.5. Signed-off-by: Bill O'Donnell <bodonnel@redhat.com> Approved-by: Brian Foster <bfoster@redhat.com> Approved-by: Eric Sandeen <esandeen@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Rado Vrbovsky <rvrbovsk@redhat.com>
This commit is contained in:
commit
999ee17e2d
|
@ -128,6 +128,7 @@ config XFS_ONLINE_SCRUB
|
|||
bool "XFS online metadata check support"
|
||||
default n
|
||||
depends on XFS_FS
|
||||
depends on TMPFS && SHMEM
|
||||
select XFS_DRAIN_INTENTS
|
||||
help
|
||||
If you say Y here you will be able to check metadata on a
|
||||
|
@ -142,6 +143,23 @@ config XFS_ONLINE_SCRUB
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config XFS_ONLINE_SCRUB_STATS
|
||||
bool "XFS online metadata check usage data collection"
|
||||
default n
|
||||
depends on XFS_ONLINE_SCRUB
|
||||
select DEBUG_FS
|
||||
help
|
||||
If you say Y here, the kernel will gather usage data about
|
||||
the online metadata check subsystem. This includes the number
|
||||
of invocations, the outcomes, and the results of repairs, if any.
|
||||
This may slow down scrub slightly due to the use of high precision
|
||||
timers and the need to merge per-invocation information into the
|
||||
filesystem counters.
|
||||
|
||||
Usage data are collected in /sys/kernel/debug/xfs/scrub.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config XFS_ONLINE_REPAIR
|
||||
bool "XFS online metadata repair support"
|
||||
default n
|
||||
|
|
|
@ -164,15 +164,24 @@ xfs-y += $(addprefix scrub/, \
|
|||
rmap.o \
|
||||
scrub.o \
|
||||
symlink.o \
|
||||
xfarray.o \
|
||||
xfile.o \
|
||||
)
|
||||
|
||||
xfs-$(CONFIG_XFS_ONLINE_SCRUB_STATS) += scrub/stats.o
|
||||
|
||||
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
|
||||
rtbitmap.o \
|
||||
rtsummary.o \
|
||||
)
|
||||
|
||||
xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o
|
||||
xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
|
||||
|
||||
# online repair
|
||||
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
|
||||
xfs-y += $(addprefix scrub/, \
|
||||
agheader_repair.o \
|
||||
reap.o \
|
||||
repair.o \
|
||||
)
|
||||
endif
|
||||
|
|
|
@ -743,7 +743,11 @@ struct xfs_scrub_metadata {
|
|||
*/
|
||||
#define XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED (1u << 7)
|
||||
|
||||
#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_IFLAG_REPAIR)
|
||||
/* i: Rebuild the data structure. */
|
||||
#define XFS_SCRUB_IFLAG_FORCE_REBUILD (1u << 8)
|
||||
|
||||
#define XFS_SCRUB_FLAGS_IN (XFS_SCRUB_IFLAG_REPAIR | \
|
||||
XFS_SCRUB_IFLAG_FORCE_REBUILD)
|
||||
#define XFS_SCRUB_FLAGS_OUT (XFS_SCRUB_OFLAG_CORRUPT | \
|
||||
XFS_SCRUB_OFLAG_PREEN | \
|
||||
XFS_SCRUB_OFLAG_XFAIL | \
|
||||
|
|
|
@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct xlog *log);
|
|||
#define xlog_check_buf_cancel_table(log) do { } while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Transform a regular reservation into one suitable for recovery of a log
|
||||
* intent item.
|
||||
*
|
||||
* Intent recovery only runs a single step of the transaction chain and defers
|
||||
* the rest to a separate transaction. Therefore, we reduce logcount to 1 here
|
||||
* to avoid livelocks if the log grant space is nearly exhausted due to the
|
||||
* recovered intent pinning the tail. Keep the same logflags to avoid tripping
|
||||
* asserts elsewhere. Struct copies abound below.
|
||||
*/
|
||||
static inline struct xfs_trans_res
|
||||
xlog_recover_resv(const struct xfs_trans_res *r)
|
||||
{
|
||||
struct xfs_trans_res ret = {
|
||||
.tr_logres = r->tr_logres,
|
||||
.tr_logcount = 1,
|
||||
.tr_logflags = r->tr_logflags,
|
||||
};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* __XFS_LOG_RECOVER_H__ */
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/* Superblock */
|
||||
|
||||
|
@ -48,6 +49,10 @@ xrep_superblock(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/* Copy AG 0's superblock to this one. */
|
||||
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
|
||||
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
|
||||
|
@ -423,6 +428,10 @@ xrep_agf(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/* Start rewriting the header and implant the btrees we found. */
|
||||
xrep_agf_init_header(sc, agf_bp, &old_agf);
|
||||
xrep_agf_set_roots(sc, agf, fab);
|
||||
|
@ -444,13 +453,13 @@ out_revert:
|
|||
|
||||
struct xrep_agfl {
|
||||
/* Bitmap of alleged AGFL blocks that we're not going to add. */
|
||||
struct xbitmap crossed;
|
||||
struct xagb_bitmap crossed;
|
||||
|
||||
/* Bitmap of other OWN_AG metadata blocks. */
|
||||
struct xbitmap agmetablocks;
|
||||
struct xagb_bitmap agmetablocks;
|
||||
|
||||
/* Bitmap of free space. */
|
||||
struct xbitmap *freesp;
|
||||
struct xagb_bitmap *freesp;
|
||||
|
||||
/* rmapbt cursor for finding crosslinked blocks */
|
||||
struct xfs_btree_cur *rmap_cur;
|
||||
|
@ -466,7 +475,6 @@ xrep_agfl_walk_rmap(
|
|||
void *priv)
|
||||
{
|
||||
struct xrep_agfl *ra = priv;
|
||||
xfs_fsblock_t fsb;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(ra->sc, &error))
|
||||
|
@ -474,14 +482,13 @@ xrep_agfl_walk_rmap(
|
|||
|
||||
/* Record all the OWN_AG blocks. */
|
||||
if (rec->rm_owner == XFS_RMAP_OWN_AG) {
|
||||
fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno,
|
||||
rec->rm_startblock);
|
||||
error = xbitmap_set(ra->freesp, fsb, rec->rm_blockcount);
|
||||
error = xagb_bitmap_set(ra->freesp, rec->rm_startblock,
|
||||
rec->rm_blockcount);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return xbitmap_set_btcur_path(&ra->agmetablocks, cur);
|
||||
return xagb_bitmap_set_btcur_path(&ra->agmetablocks, cur);
|
||||
}
|
||||
|
||||
/* Strike out the blocks that are cross-linked according to the rmapbt. */
|
||||
|
@ -492,12 +499,10 @@ xrep_agfl_check_extent(
|
|||
void *priv)
|
||||
{
|
||||
struct xrep_agfl *ra = priv;
|
||||
xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start);
|
||||
xfs_agblock_t agbno = start;
|
||||
xfs_agblock_t last_agbno = agbno + len - 1;
|
||||
int error;
|
||||
|
||||
ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno);
|
||||
|
||||
while (agbno <= last_agbno) {
|
||||
bool other_owners;
|
||||
|
||||
|
@ -507,7 +512,7 @@ xrep_agfl_check_extent(
|
|||
return error;
|
||||
|
||||
if (other_owners) {
|
||||
error = xbitmap_set(&ra->crossed, agbno, 1);
|
||||
error = xagb_bitmap_set(&ra->crossed, agbno, 1);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
@ -533,7 +538,7 @@ STATIC int
|
|||
xrep_agfl_collect_blocks(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_buf *agf_bp,
|
||||
struct xbitmap *agfl_extents,
|
||||
struct xagb_bitmap *agfl_extents,
|
||||
xfs_agblock_t *flcount)
|
||||
{
|
||||
struct xrep_agfl ra;
|
||||
|
@ -543,8 +548,8 @@ xrep_agfl_collect_blocks(
|
|||
|
||||
ra.sc = sc;
|
||||
ra.freesp = agfl_extents;
|
||||
xbitmap_init(&ra.agmetablocks);
|
||||
xbitmap_init(&ra.crossed);
|
||||
xagb_bitmap_init(&ra.agmetablocks);
|
||||
xagb_bitmap_init(&ra.crossed);
|
||||
|
||||
/* Find all space used by the free space btrees & rmapbt. */
|
||||
cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
|
||||
|
@ -556,7 +561,7 @@ xrep_agfl_collect_blocks(
|
|||
/* Find all blocks currently being used by the bnobt. */
|
||||
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
|
||||
sc->sa.pag, XFS_BTNUM_BNO);
|
||||
error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
|
||||
error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur);
|
||||
xfs_btree_del_cursor(cur, error);
|
||||
if (error)
|
||||
goto out_bmp;
|
||||
|
@ -564,7 +569,7 @@ xrep_agfl_collect_blocks(
|
|||
/* Find all blocks currently being used by the cntbt. */
|
||||
cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
|
||||
sc->sa.pag, XFS_BTNUM_CNT);
|
||||
error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
|
||||
error = xagb_bitmap_set_btblocks(&ra.agmetablocks, cur);
|
||||
xfs_btree_del_cursor(cur, error);
|
||||
if (error)
|
||||
goto out_bmp;
|
||||
|
@ -573,17 +578,17 @@ xrep_agfl_collect_blocks(
|
|||
* Drop the freesp meta blocks that are in use by btrees.
|
||||
* The remaining blocks /should/ be AGFL blocks.
|
||||
*/
|
||||
error = xbitmap_disunion(agfl_extents, &ra.agmetablocks);
|
||||
error = xagb_bitmap_disunion(agfl_extents, &ra.agmetablocks);
|
||||
if (error)
|
||||
goto out_bmp;
|
||||
|
||||
/* Strike out the blocks that are cross-linked. */
|
||||
ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
|
||||
error = xbitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra);
|
||||
error = xagb_bitmap_walk(agfl_extents, xrep_agfl_check_extent, &ra);
|
||||
xfs_btree_del_cursor(ra.rmap_cur, error);
|
||||
if (error)
|
||||
goto out_bmp;
|
||||
error = xbitmap_disunion(agfl_extents, &ra.crossed);
|
||||
error = xagb_bitmap_disunion(agfl_extents, &ra.crossed);
|
||||
if (error)
|
||||
goto out_bmp;
|
||||
|
||||
|
@ -591,12 +596,12 @@ xrep_agfl_collect_blocks(
|
|||
* Calculate the new AGFL size. If we found more blocks than fit in
|
||||
* the AGFL we'll free them later.
|
||||
*/
|
||||
*flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents),
|
||||
*flcount = min_t(uint64_t, xagb_bitmap_hweight(agfl_extents),
|
||||
xfs_agfl_size(mp));
|
||||
|
||||
out_bmp:
|
||||
xbitmap_destroy(&ra.crossed);
|
||||
xbitmap_destroy(&ra.agmetablocks);
|
||||
xagb_bitmap_destroy(&ra.crossed);
|
||||
xagb_bitmap_destroy(&ra.agmetablocks);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -615,8 +620,11 @@ xrep_agfl_update_agf(
|
|||
xfs_force_summary_recalc(sc->mp);
|
||||
|
||||
/* Update the AGF counters. */
|
||||
if (xfs_perag_initialised_agf(sc->sa.pag))
|
||||
if (xfs_perag_initialised_agf(sc->sa.pag)) {
|
||||
sc->sa.pag->pagf_flcount = flcount;
|
||||
clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET,
|
||||
&sc->sa.pag->pag_opstate);
|
||||
}
|
||||
agf->agf_flfirst = cpu_to_be32(0);
|
||||
agf->agf_flcount = cpu_to_be32(flcount);
|
||||
if (flcount)
|
||||
|
@ -629,7 +637,7 @@ xrep_agfl_update_agf(
|
|||
}
|
||||
|
||||
struct xrep_agfl_fill {
|
||||
struct xbitmap used_extents;
|
||||
struct xagb_bitmap used_extents;
|
||||
struct xfs_scrub *sc;
|
||||
__be32 *agfl_bno;
|
||||
xfs_agblock_t flcount;
|
||||
|
@ -645,17 +653,15 @@ xrep_agfl_fill(
|
|||
{
|
||||
struct xrep_agfl_fill *af = priv;
|
||||
struct xfs_scrub *sc = af->sc;
|
||||
xfs_fsblock_t fsbno = start;
|
||||
xfs_agblock_t agbno = start;
|
||||
int error;
|
||||
|
||||
while (fsbno < start + len && af->fl_off < af->flcount)
|
||||
af->agfl_bno[af->fl_off++] =
|
||||
cpu_to_be32(XFS_FSB_TO_AGBNO(sc->mp, fsbno++));
|
||||
trace_xrep_agfl_insert(sc->sa.pag, agbno, len);
|
||||
|
||||
trace_xrep_agfl_insert(sc->mp, sc->sa.pag->pag_agno,
|
||||
XFS_FSB_TO_AGBNO(sc->mp, start), len);
|
||||
while (agbno < start + len && af->fl_off < af->flcount)
|
||||
af->agfl_bno[af->fl_off++] = cpu_to_be32(agbno++);
|
||||
|
||||
error = xbitmap_set(&af->used_extents, start, fsbno - 1);
|
||||
error = xagb_bitmap_set(&af->used_extents, start, agbno - 1);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
@ -670,7 +676,7 @@ STATIC int
|
|||
xrep_agfl_init_header(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_buf *agfl_bp,
|
||||
struct xbitmap *agfl_extents,
|
||||
struct xagb_bitmap *agfl_extents,
|
||||
xfs_agblock_t flcount)
|
||||
{
|
||||
struct xrep_agfl_fill af = {
|
||||
|
@ -698,17 +704,17 @@ xrep_agfl_init_header(
|
|||
* blocks than fit in the AGFL, they will be freed in a subsequent
|
||||
* step.
|
||||
*/
|
||||
xbitmap_init(&af.used_extents);
|
||||
xagb_bitmap_init(&af.used_extents);
|
||||
af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp),
|
||||
xbitmap_walk(agfl_extents, xrep_agfl_fill, &af);
|
||||
error = xbitmap_disunion(agfl_extents, &af.used_extents);
|
||||
xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af);
|
||||
error = xagb_bitmap_disunion(agfl_extents, &af.used_extents);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Write new AGFL to disk. */
|
||||
xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
|
||||
xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1);
|
||||
xbitmap_destroy(&af.used_extents);
|
||||
xagb_bitmap_destroy(&af.used_extents);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -717,7 +723,7 @@ int
|
|||
xrep_agfl(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xbitmap agfl_extents;
|
||||
struct xagb_bitmap agfl_extents;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_buf *agf_bp;
|
||||
struct xfs_buf *agfl_bp;
|
||||
|
@ -728,7 +734,7 @@ xrep_agfl(
|
|||
if (!xfs_has_rmapbt(mp))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xbitmap_init(&agfl_extents);
|
||||
xagb_bitmap_init(&agfl_extents);
|
||||
|
||||
/*
|
||||
* Read the AGF so that we can query the rmapbt. We hope that there's
|
||||
|
@ -756,6 +762,10 @@ xrep_agfl(
|
|||
if (error)
|
||||
goto err;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* Update AGF and AGFL. We reset the global free block counter when
|
||||
* we adjust the AGF flcount (which can fail) so avoid updating any
|
||||
|
@ -777,10 +787,10 @@ xrep_agfl(
|
|||
goto err;
|
||||
|
||||
/* Dump any AGFL overflow. */
|
||||
error = xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
|
||||
error = xrep_reap_agblocks(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
|
||||
XFS_AG_RESV_AGFL);
|
||||
err:
|
||||
xbitmap_destroy(&agfl_extents);
|
||||
xagb_bitmap_destroy(&agfl_extents);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1003,6 +1013,10 @@ xrep_agi(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/* Last chance to abort before we start committing fixes. */
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/* Start rewriting the header and implant the btrees we found. */
|
||||
xrep_agi_init_header(sc, agi_bp, &old_agi);
|
||||
xrep_agi_set_roots(sc, agi, fab);
|
||||
|
|
|
@ -301,21 +301,15 @@ xagb_bitmap_set_btblocks(
|
|||
* blocks going from the leaf towards the root.
|
||||
*/
|
||||
int
|
||||
xbitmap_set_btcur_path(
|
||||
struct xbitmap *bitmap,
|
||||
xagb_bitmap_set_btcur_path(
|
||||
struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
struct xfs_buf *bp;
|
||||
xfs_fsblock_t fsb;
|
||||
int i;
|
||||
int error;
|
||||
|
||||
for (i = 0; i < cur->bc_nlevels && cur->bc_levels[i].ptr == 1; i++) {
|
||||
xfs_btree_get_block(cur, i, &bp);
|
||||
if (!bp)
|
||||
continue;
|
||||
fsb = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
error = xbitmap_set(bitmap, fsb, 1);
|
||||
error = xagb_bitmap_visit_btblock(cur, i, bitmap);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
@ -323,35 +317,6 @@ xbitmap_set_btcur_path(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Collect a btree's block in the bitmap. */
|
||||
STATIC int
|
||||
xbitmap_collect_btblock(
|
||||
struct xfs_btree_cur *cur,
|
||||
int level,
|
||||
void *priv)
|
||||
{
|
||||
struct xbitmap *bitmap = priv;
|
||||
struct xfs_buf *bp;
|
||||
xfs_fsblock_t fsbno;
|
||||
|
||||
xfs_btree_get_block(cur, level, &bp);
|
||||
if (!bp)
|
||||
return 0;
|
||||
|
||||
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
|
||||
return xbitmap_set(bitmap, fsbno, 1);
|
||||
}
|
||||
|
||||
/* Walk the btree and mark the bitmap wherever a btree block is found. */
|
||||
int
|
||||
xbitmap_set_btblocks(
|
||||
struct xbitmap *bitmap,
|
||||
struct xfs_btree_cur *cur)
|
||||
{
|
||||
return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock,
|
||||
XFS_BTREE_VISIT_ALL, bitmap);
|
||||
}
|
||||
|
||||
/* How many bits are set in this bitmap? */
|
||||
uint64_t
|
||||
xbitmap_hweight(
|
||||
|
@ -385,43 +350,6 @@ xbitmap_walk(
|
|||
return error;
|
||||
}
|
||||
|
||||
struct xbitmap_walk_bits {
|
||||
xbitmap_walk_bits_fn fn;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
/* Walk all the bits in a run. */
|
||||
static int
|
||||
xbitmap_walk_bits_in_run(
|
||||
uint64_t start,
|
||||
uint64_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xbitmap_walk_bits *wb = priv;
|
||||
uint64_t i;
|
||||
int error = 0;
|
||||
|
||||
for (i = start; i < start + len; i++) {
|
||||
error = wb->fn(i, wb->priv);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Call a function for every set bit in this bitmap. */
|
||||
int
|
||||
xbitmap_walk_bits(
|
||||
struct xbitmap *bitmap,
|
||||
xbitmap_walk_bits_fn fn,
|
||||
void *priv)
|
||||
{
|
||||
struct xbitmap_walk_bits wb = {.fn = fn, .priv = priv};
|
||||
|
||||
return xbitmap_walk(bitmap, xbitmap_walk_bits_in_run, &wb);
|
||||
}
|
||||
|
||||
/* Does this bitmap have no bits set at all? */
|
||||
bool
|
||||
xbitmap_empty(
|
||||
|
|
|
@ -16,10 +16,6 @@ void xbitmap_destroy(struct xbitmap *bitmap);
|
|||
int xbitmap_clear(struct xbitmap *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap_set(struct xbitmap *bitmap, uint64_t start, uint64_t len);
|
||||
int xbitmap_disunion(struct xbitmap *bitmap, struct xbitmap *sub);
|
||||
int xbitmap_set_btcur_path(struct xbitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
int xbitmap_set_btblocks(struct xbitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
uint64_t xbitmap_hweight(struct xbitmap *bitmap);
|
||||
|
||||
/*
|
||||
|
@ -33,10 +29,6 @@ typedef int (*xbitmap_walk_fn)(uint64_t start, uint64_t len, void *priv);
|
|||
int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn,
|
||||
void *priv);
|
||||
|
||||
typedef int (*xbitmap_walk_bits_fn)(uint64_t bit, void *priv);
|
||||
int xbitmap_walk_bits(struct xbitmap *bitmap, xbitmap_walk_bits_fn fn,
|
||||
void *priv);
|
||||
|
||||
bool xbitmap_empty(struct xbitmap *bitmap);
|
||||
bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len);
|
||||
|
||||
|
@ -110,5 +102,7 @@ static inline int xagb_bitmap_walk(struct xagb_bitmap *bitmap,
|
|||
|
||||
int xagb_bitmap_set_btblocks(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
int xagb_bitmap_set_btcur_path(struct xagb_bitmap *bitmap,
|
||||
struct xfs_btree_cur *cur);
|
||||
|
||||
#endif /* __XFS_SCRUB_BITMAP_H__ */
|
||||
|
|
|
@ -38,8 +38,7 @@ xchk_setup_inode_bmap(
|
|||
if (error)
|
||||
goto out;
|
||||
|
||||
sc->ilock_flags = XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
|
||||
xchk_ilock(sc, XFS_IOLOCK_EXCL);
|
||||
|
||||
/*
|
||||
* We don't want any ephemeral data/cow fork updates sitting around
|
||||
|
@ -50,8 +49,7 @@ xchk_setup_inode_bmap(
|
|||
sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
|
||||
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
|
||||
|
||||
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
|
||||
xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
|
||||
xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
|
||||
|
||||
inode_dio_wait(VFS_I(sc->ip));
|
||||
|
||||
|
@ -79,9 +77,8 @@ xchk_setup_inode_bmap(
|
|||
error = xchk_trans_alloc(sc, 0);
|
||||
if (error)
|
||||
goto out;
|
||||
sc->ilock_flags |= XFS_ILOCK_EXCL;
|
||||
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
out:
|
||||
/* scrub teardown will unlock and release the inode */
|
||||
return error;
|
||||
|
@ -844,7 +841,7 @@ xchk_bmap(
|
|||
|
||||
/* Non-existent forks can be ignored. */
|
||||
if (!ifp)
|
||||
goto out;
|
||||
return -ENOENT;
|
||||
|
||||
info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
|
||||
info.whichfork = whichfork;
|
||||
|
@ -853,10 +850,10 @@ xchk_bmap(
|
|||
|
||||
switch (whichfork) {
|
||||
case XFS_COW_FORK:
|
||||
/* No CoW forks on non-reflink inodes/filesystems. */
|
||||
if (!xfs_is_reflink_inode(ip)) {
|
||||
/* No CoW forks on non-reflink filesystems. */
|
||||
if (!xfs_has_reflink(mp)) {
|
||||
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case XFS_ATTR_FORK:
|
||||
|
@ -876,31 +873,31 @@ xchk_bmap(
|
|||
/* No mappings to check. */
|
||||
if (whichfork == XFS_COW_FORK)
|
||||
xchk_fblock_set_corrupt(sc, whichfork, 0);
|
||||
goto out;
|
||||
return 0;
|
||||
case XFS_DINODE_FMT_EXTENTS:
|
||||
break;
|
||||
case XFS_DINODE_FMT_BTREE:
|
||||
if (whichfork == XFS_COW_FORK) {
|
||||
xchk_fblock_set_corrupt(sc, whichfork, 0);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xchk_bmap_btree(sc, whichfork, &info);
|
||||
if (error)
|
||||
goto out;
|
||||
return error;
|
||||
break;
|
||||
default:
|
||||
xchk_fblock_set_corrupt(sc, whichfork, 0);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
/* Find the offset of the last extent in the mapping. */
|
||||
error = xfs_bmap_last_offset(ip, &endoff, whichfork);
|
||||
if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
|
||||
goto out;
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Scrub extent records. We use a special iterator function here that
|
||||
|
@ -913,12 +910,12 @@ xchk_bmap(
|
|||
while (xchk_bmap_iext_iter(&info, &irec)) {
|
||||
if (xchk_should_terminate(sc, &error) ||
|
||||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
goto out;
|
||||
return 0;
|
||||
|
||||
if (irec.br_startoff >= endoff) {
|
||||
xchk_fblock_set_corrupt(sc, whichfork,
|
||||
irec.br_startoff);
|
||||
goto out;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (isnullstartblock(irec.br_startblock))
|
||||
|
@ -931,10 +928,10 @@ xchk_bmap(
|
|||
if (xchk_bmap_want_check_rmaps(&info)) {
|
||||
error = xchk_bmap_check_rmaps(sc, whichfork);
|
||||
if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
|
||||
goto out;
|
||||
return error;
|
||||
}
|
||||
out:
|
||||
return error;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scrub an inode's data fork. */
|
||||
|
@ -958,8 +955,5 @@ int
|
|||
xchk_bmap_cow(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
if (!xfs_is_reflink_inode(sc->ip))
|
||||
return -ENOENT;
|
||||
|
||||
return xchk_bmap(sc, XFS_COW_FORK);
|
||||
}
|
||||
|
|
|
@ -831,6 +831,25 @@ xchk_install_handle_inode(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Install an already-referenced inode for scrubbing. Get our own reference to
|
||||
* the inode to make disposal simpler. The inode must not be in I_FREEING or
|
||||
* I_WILL_FREE state!
|
||||
*/
|
||||
int
|
||||
xchk_install_live_inode(
|
||||
struct xfs_scrub *sc,
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
if (!igrab(VFS_I(ip))) {
|
||||
xchk_ino_set_corrupt(sc, ip->i_ino);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
sc->ip = ip;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* In preparation to scrub metadata structures that hang off of an inode,
|
||||
* grab either the inode referenced in the scrub control structure or the
|
||||
|
@ -854,10 +873,8 @@ xchk_iget_for_scrubbing(
|
|||
ASSERT(sc->tp == NULL);
|
||||
|
||||
/* We want to scan the inode we already had opened. */
|
||||
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
|
||||
sc->ip = ip_in;
|
||||
return 0;
|
||||
}
|
||||
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
|
||||
return xchk_install_live_inode(sc, ip_in);
|
||||
|
||||
/* Reject internal metadata files and obviously bad inode numbers. */
|
||||
if (xfs_internal_inum(mp, sc->sm->sm_ino))
|
||||
|
@ -999,20 +1016,48 @@ xchk_setup_inode_contents(
|
|||
return error;
|
||||
|
||||
/* Lock the inode so the VFS cannot touch this file. */
|
||||
sc->ilock_flags = XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(sc->ip, sc->ilock_flags);
|
||||
xchk_ilock(sc, XFS_IOLOCK_EXCL);
|
||||
|
||||
error = xchk_trans_alloc(sc, resblks);
|
||||
if (error)
|
||||
goto out;
|
||||
sc->ilock_flags |= XFS_ILOCK_EXCL;
|
||||
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
out:
|
||||
/* scrub teardown will unlock and release the inode for us */
|
||||
return error;
|
||||
}
|
||||
|
||||
void
|
||||
xchk_ilock(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int ilock_flags)
|
||||
{
|
||||
xfs_ilock(sc->ip, ilock_flags);
|
||||
sc->ilock_flags |= ilock_flags;
|
||||
}
|
||||
|
||||
bool
|
||||
xchk_ilock_nowait(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int ilock_flags)
|
||||
{
|
||||
if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
|
||||
sc->ilock_flags |= ilock_flags;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
xchk_iunlock(
|
||||
struct xfs_scrub *sc,
|
||||
unsigned int ilock_flags)
|
||||
{
|
||||
sc->ilock_flags &= ~ilock_flags;
|
||||
xfs_iunlock(sc->ip, ilock_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Predicate that decides if we need to evaluate the cross-reference check.
|
||||
* If there was an error accessing the cross-reference btree, just delete
|
||||
|
@ -1179,3 +1224,155 @@ xchk_fsgates_enable(
|
|||
|
||||
sc->flags |= scrub_fsgates;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide if this is this a cached inode that's also allocated. The caller
|
||||
* must hold a reference to an AG and the AGI buffer lock to prevent inodes
|
||||
* from being allocated or freed.
|
||||
*
|
||||
* Look up an inode by number in the given file system. If the inode number
|
||||
* is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
|
||||
* If the inode is being reclaimed, return -ENODATA because we know the inode
|
||||
* cache cannot be updating the ondisk metadata.
|
||||
*
|
||||
* Otherwise, the incore inode is the one we want, and it is either live,
|
||||
* somewhere in the inactivation machinery, or reclaimable. The inode is
|
||||
* allocated if i_mode is nonzero. In all three cases, the cached inode will
|
||||
* be more up to date than the ondisk inode buffer, so we must use the incore
|
||||
* i_mode.
|
||||
*/
|
||||
int
|
||||
xchk_inode_is_allocated(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_agino_t agino,
|
||||
bool *inuse)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
xfs_ino_t ino;
|
||||
struct xfs_inode *ip;
|
||||
int error;
|
||||
|
||||
/* caller must hold perag reference */
|
||||
if (pag == NULL) {
|
||||
ASSERT(pag != NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* caller must have AGI buffer */
|
||||
if (sc->sa.agi_bp == NULL) {
|
||||
ASSERT(sc->sa.agi_bp != NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* reject inode numbers outside existing AGs */
|
||||
ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
|
||||
if (!xfs_verify_ino(mp, ino))
|
||||
return -EINVAL;
|
||||
|
||||
error = -ENODATA;
|
||||
rcu_read_lock();
|
||||
ip = radix_tree_lookup(&pag->pag_ici_root, agino);
|
||||
if (!ip) {
|
||||
/* cache miss */
|
||||
goto out_rcu;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the inode number doesn't match, the incore inode got reused
|
||||
* during an RCU grace period and the radix tree hasn't been updated.
|
||||
* This isn't the inode we want.
|
||||
*/
|
||||
spin_lock(&ip->i_flags_lock);
|
||||
if (ip->i_ino != ino)
|
||||
goto out_skip;
|
||||
|
||||
trace_xchk_inode_is_allocated(ip);
|
||||
|
||||
/*
|
||||
* We have an incore inode that matches the inode we want, and the
|
||||
* caller holds the perag structure and the AGI buffer. Let's check
|
||||
* our assumptions below:
|
||||
*/
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* (1) If the incore inode is live (i.e. referenced from the dcache),
|
||||
* it will not be INEW, nor will it be in the inactivation or reclaim
|
||||
* machinery. The ondisk inode had better be allocated. This is the
|
||||
* most trivial case.
|
||||
*/
|
||||
if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
|
||||
XFS_INACTIVATING))) {
|
||||
/* live inode */
|
||||
ASSERT(VFS_I(ip)->i_mode != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the incore inode is INEW, there are several possibilities:
|
||||
*
|
||||
* (2) For a file that is being created, note that we allocate the
|
||||
* ondisk inode before allocating, initializing, and adding the incore
|
||||
* inode to the radix tree.
|
||||
*
|
||||
* (3) If the incore inode is being recycled, the inode has to be
|
||||
* allocated because we don't allow freed inodes to be recycled.
|
||||
* Recycling doesn't touch i_mode.
|
||||
*/
|
||||
if (ip->i_flags & XFS_INEW) {
|
||||
/* created on disk already or recycling */
|
||||
ASSERT(VFS_I(ip)->i_mode != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* (4) If the inode is queued for inactivation (NEED_INACTIVE) but
|
||||
* inactivation has not started (!INACTIVATING), it is still allocated.
|
||||
*/
|
||||
if ((ip->i_flags & XFS_NEED_INACTIVE) &&
|
||||
!(ip->i_flags & XFS_INACTIVATING)) {
|
||||
/* definitely before difree */
|
||||
ASSERT(VFS_I(ip)->i_mode != 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If the incore inode is undergoing inactivation (INACTIVATING), there
|
||||
* are two possibilities:
|
||||
*
|
||||
* (5) It is before the point where it would get freed ondisk, in which
|
||||
* case i_mode is still nonzero.
|
||||
*
|
||||
* (6) It has already been freed, in which case i_mode is zero.
|
||||
*
|
||||
* We don't take the ILOCK here, but difree and dialloc update the AGI,
|
||||
* and we've taken the AGI buffer lock, which prevents that from
|
||||
* happening.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (7) Inodes undergoing inactivation (INACTIVATING) or queued for
|
||||
* reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
|
||||
* reflects the ondisk state.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (8) If the inode is in IFLUSHING, it's safe to query i_mode because
|
||||
* the flush code uses i_mode to format the ondisk inode.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (9) If the inode is in IRECLAIM and was reachable via the radix
|
||||
* tree, it still has the same i_mode as it did before it entered
|
||||
* reclaim. The inode object is still alive because we hold the RCU
|
||||
* read lock.
|
||||
*/
|
||||
|
||||
*inuse = VFS_I(ip)->i_mode != 0;
|
||||
error = 0;
|
||||
|
||||
out_skip:
|
||||
spin_unlock(&ip->i_flags_lock);
|
||||
out_rcu:
|
||||
rcu_read_unlock();
|
||||
return error;
|
||||
}
|
||||
|
|
|
@ -88,10 +88,16 @@ int xchk_setup_xattr(struct xfs_scrub *sc);
|
|||
int xchk_setup_symlink(struct xfs_scrub *sc);
|
||||
int xchk_setup_parent(struct xfs_scrub *sc);
|
||||
#ifdef CONFIG_XFS_RT
|
||||
int xchk_setup_rt(struct xfs_scrub *sc);
|
||||
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
|
||||
int xchk_setup_rtsummary(struct xfs_scrub *sc);
|
||||
#else
|
||||
static inline int
|
||||
xchk_setup_rt(struct xfs_scrub *sc)
|
||||
xchk_setup_rtbitmap(struct xfs_scrub *sc)
|
||||
{
|
||||
return -ENOENT;
|
||||
}
|
||||
static inline int
|
||||
xchk_setup_rtsummary(struct xfs_scrub *sc)
|
||||
{
|
||||
return -ENOENT;
|
||||
}
|
||||
|
@ -137,6 +143,12 @@ int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
|
|||
int xchk_setup_ag_btree(struct xfs_scrub *sc, bool force_log);
|
||||
int xchk_iget_for_scrubbing(struct xfs_scrub *sc);
|
||||
int xchk_setup_inode_contents(struct xfs_scrub *sc, unsigned int resblks);
|
||||
int xchk_install_live_inode(struct xfs_scrub *sc, struct xfs_inode *ip);
|
||||
|
||||
void xchk_ilock(struct xfs_scrub *sc, unsigned int ilock_flags);
|
||||
bool xchk_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags);
|
||||
void xchk_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
|
||||
|
||||
void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
|
||||
|
||||
int xchk_iget(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp);
|
||||
|
@ -155,8 +167,28 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
|
|||
XFS_SCRUB_OFLAG_XCORRUPT);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
/* Decide if a repair is required. */
|
||||
static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
|
||||
{
|
||||
return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
||||
XFS_SCRUB_OFLAG_XCORRUPT |
|
||||
XFS_SCRUB_OFLAG_PREEN);
|
||||
}
|
||||
#else
|
||||
# define xchk_needs_repair(sc) (false)
|
||||
#endif /* CONFIG_XFS_ONLINE_REPAIR */
|
||||
|
||||
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
|
||||
|
||||
/*
|
||||
* Helper macros to allocate and format xfile description strings.
|
||||
* Callers must kfree the pointer returned.
|
||||
*/
|
||||
#define xchk_xfile_descr(sc, fmt, ...) \
|
||||
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \
|
||||
(sc)->mp->m_super->s_id, ##__VA_ARGS__)
|
||||
|
||||
/*
|
||||
* Setting up a hook to wait for intents to drain is costly -- we have to take
|
||||
* the CPU hotplug lock and force an i-cache flush on all CPUs once to set it
|
||||
|
@ -171,4 +203,7 @@ static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)
|
|||
|
||||
void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
|
||||
|
||||
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
|
||||
bool *inuse);
|
||||
|
||||
#endif /* __XFS_SCRUB_COMMON_H__ */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
|
@ -8,6 +8,8 @@
|
|||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_ialloc.h"
|
||||
|
@ -16,6 +18,7 @@
|
|||
#include "xfs_ag.h"
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_icache.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
|
@ -53,6 +56,7 @@ struct xchk_fscounters {
|
|||
uint64_t frextents;
|
||||
unsigned long long icount_min;
|
||||
unsigned long long icount_max;
|
||||
bool frozen;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -123,6 +127,82 @@ xchk_fscount_warmup(
|
|||
return error;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xchk_fsfreeze(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
|
||||
trace_xchk_fsfreeze(sc, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline int
|
||||
xchk_fsthaw(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
/* This should always succeed, we have a kernel freeze */
|
||||
error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
|
||||
trace_xchk_fsthaw(sc, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* We couldn't stabilize the filesystem long enough to sample all the variables
|
||||
* that comprise the summary counters and compare them to the percpu counters.
|
||||
* We need to disable all writer threads, which means taking the first two
|
||||
* freeze levels to put userspace to sleep, and the third freeze level to
|
||||
* prevent background threads from starting new transactions. Take one level
|
||||
* more to prevent other callers from unfreezing the filesystem while we run.
|
||||
*/
|
||||
STATIC int
|
||||
xchk_fscounters_freeze(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xchk_fscounters *fsc = sc->buf;
|
||||
int error = 0;
|
||||
|
||||
if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
|
||||
sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
|
||||
mnt_drop_write_file(sc->file);
|
||||
}
|
||||
|
||||
/* Try to grab a kernel freeze. */
|
||||
while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
delay(HZ / 10);
|
||||
}
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
fsc->frozen = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Thaw the filesystem after checking or repairing fscounters. */
|
||||
STATIC void
|
||||
xchk_fscounters_cleanup(
|
||||
void *buf)
|
||||
{
|
||||
struct xchk_fscounters *fsc = buf;
|
||||
struct xfs_scrub *sc = fsc->sc;
|
||||
int error;
|
||||
|
||||
if (!fsc->frozen)
|
||||
return;
|
||||
|
||||
error = xchk_fsthaw(sc);
|
||||
if (error)
|
||||
xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
|
||||
else
|
||||
fsc->frozen = false;
|
||||
}
|
||||
|
||||
int
|
||||
xchk_setup_fscounters(
|
||||
struct xfs_scrub *sc)
|
||||
|
@ -140,6 +220,7 @@ xchk_setup_fscounters(
|
|||
sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
|
||||
if (!sc->buf)
|
||||
return -ENOMEM;
|
||||
sc->buf_cleanup = xchk_fscounters_cleanup;
|
||||
fsc = sc->buf;
|
||||
fsc->sc = sc;
|
||||
|
||||
|
@ -150,7 +231,18 @@ xchk_setup_fscounters(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
return xchk_trans_alloc(sc, 0);
|
||||
/*
|
||||
* Pause all writer activity in the filesystem while we're scrubbing to
|
||||
* reduce the likelihood of background perturbations to the counters
|
||||
* throwing off our calculations.
|
||||
*/
|
||||
if (sc->flags & XCHK_TRY_HARDER) {
|
||||
error = xchk_fscounters_freeze(sc);
|
||||
if (error)
|
||||
return error;
|
||||
}
|
||||
|
||||
return xfs_trans_alloc_empty(sc->mp, &sc->tp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -290,8 +382,7 @@ retry:
|
|||
if (fsc->ifree > fsc->icount) {
|
||||
if (tries--)
|
||||
goto retry;
|
||||
xchk_set_incomplete(sc);
|
||||
return 0;
|
||||
return -EDEADLOCK;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
|
|||
* Otherwise, we /might/ have a problem. If the change in the summations is
|
||||
* more than we want to tolerate, the filesystem is probably busy and we should
|
||||
* just send back INCOMPLETE and see if userspace will try again.
|
||||
*
|
||||
* If we're repairing then we require an exact match.
|
||||
*/
|
||||
static inline bool
|
||||
xchk_fscount_within_range(
|
||||
|
@ -396,21 +489,7 @@ xchk_fscount_within_range(
|
|||
if (expected >= min_value && expected <= max_value)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the difference between the two summations is too large, the fs
|
||||
* might just be busy and so we'll mark the scrub incomplete. Return
|
||||
* true here so that we don't mark the counter corrupt.
|
||||
*
|
||||
* XXX: In the future when userspace can grant scrub permission to
|
||||
* quiesce the filesystem to solve the outsized variance problem, this
|
||||
* check should be moved up and the return code changed to signal to
|
||||
* userspace that we need quiesce permission.
|
||||
*/
|
||||
if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
|
||||
xchk_set_incomplete(sc);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Everything else is bad. */
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -422,6 +501,7 @@ xchk_fscounters(
|
|||
struct xfs_mount *mp = sc->mp;
|
||||
struct xchk_fscounters *fsc = sc->buf;
|
||||
int64_t icount, ifree, fdblocks, frextents;
|
||||
bool try_again = false;
|
||||
int error;
|
||||
|
||||
/* Snapshot the percpu counters. */
|
||||
|
@ -431,9 +511,26 @@ xchk_fscounters(
|
|||
frextents = percpu_counter_sum(&mp->m_frextents);
|
||||
|
||||
/* No negative values, please! */
|
||||
if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
|
||||
if (icount < 0 || ifree < 0)
|
||||
xchk_set_corrupt(sc);
|
||||
|
||||
/*
|
||||
* If the filesystem is not frozen, the counter summation calls above
|
||||
* can race with xfs_mod_freecounter, which subtracts a requested space
|
||||
* reservation from the counter and undoes the subtraction if that made
|
||||
* the counter go negative. Therefore, it's possible to see negative
|
||||
* values here, and we should only flag that as a corruption if we
|
||||
* froze the fs. This is much more likely to happen with frextents
|
||||
* since there are no reserved pools.
|
||||
*/
|
||||
if (fdblocks < 0 || frextents < 0) {
|
||||
if (!fsc->frozen)
|
||||
return -EDEADLOCK;
|
||||
|
||||
xchk_set_corrupt(sc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* See if icount is obviously wrong. */
|
||||
if (icount < fsc->icount_min || icount > fsc->icount_max)
|
||||
xchk_set_corrupt(sc);
|
||||
|
@ -446,12 +543,6 @@ xchk_fscounters(
|
|||
if (frextents > mp->m_sb.sb_rextents)
|
||||
xchk_set_corrupt(sc);
|
||||
|
||||
/*
|
||||
* XXX: We can't quiesce percpu counter updates, so exit early.
|
||||
* This can be re-enabled when we gain exclusive freeze functionality.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If ifree exceeds icount by more than the minimum variance then
|
||||
* something's probably wrong with the counters.
|
||||
|
@ -463,8 +554,6 @@ xchk_fscounters(
|
|||
error = xchk_fscount_aggregate_agcounts(sc, fsc);
|
||||
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
|
||||
return error;
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
|
||||
return 0;
|
||||
|
||||
/* Count the free extents counter for rt volumes. */
|
||||
error = xchk_fscount_count_frextents(sc, fsc);
|
||||
|
@ -473,20 +562,45 @@ xchk_fscounters(
|
|||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
|
||||
return 0;
|
||||
|
||||
/* Compare the in-core counters with whatever we counted. */
|
||||
if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
|
||||
xchk_set_corrupt(sc);
|
||||
/*
|
||||
* Compare the in-core counters with whatever we counted. If the fs is
|
||||
* frozen, we treat the discrepancy as a corruption because the freeze
|
||||
* should have stabilized the counter values. Otherwise, we need
|
||||
* userspace to call us back having granted us freeze permission.
|
||||
*/
|
||||
if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
|
||||
fsc->icount)) {
|
||||
if (fsc->frozen)
|
||||
xchk_set_corrupt(sc);
|
||||
else
|
||||
try_again = true;
|
||||
}
|
||||
|
||||
if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
|
||||
xchk_set_corrupt(sc);
|
||||
if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
|
||||
if (fsc->frozen)
|
||||
xchk_set_corrupt(sc);
|
||||
else
|
||||
try_again = true;
|
||||
}
|
||||
|
||||
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
|
||||
fsc->fdblocks))
|
||||
xchk_set_corrupt(sc);
|
||||
fsc->fdblocks)) {
|
||||
if (fsc->frozen)
|
||||
xchk_set_corrupt(sc);
|
||||
else
|
||||
try_again = true;
|
||||
}
|
||||
|
||||
if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
|
||||
fsc->frextents))
|
||||
xchk_set_corrupt(sc);
|
||||
fsc->frextents)) {
|
||||
if (fsc->frozen)
|
||||
xchk_set_corrupt(sc);
|
||||
else
|
||||
try_again = true;
|
||||
}
|
||||
|
||||
if (try_again)
|
||||
return -EDEADLOCK;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -226,6 +226,16 @@ xchk_ag_btree_healthy_enough(
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we just repaired some AG metadata, sc->sick_mask will reflect all
|
||||
* the per-AG metadata types that were repaired. Exclude these from
|
||||
* the filesystem health query because we have not yet updated the
|
||||
* health status and we want everything to be scanned.
|
||||
*/
|
||||
if ((sc->flags & XREP_ALREADY_FIXED) &&
|
||||
type_to_health_flag[sc->sm->sm_type].group == XHG_AG)
|
||||
mask &= ~sc->sick_mask;
|
||||
|
||||
if (xfs_ag_has_sickness(pag, mask)) {
|
||||
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
|
||||
return false;
|
||||
|
|
|
@ -328,8 +328,7 @@ xchk_iallocbt_check_cluster_ifree(
|
|||
goto out;
|
||||
}
|
||||
|
||||
error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
|
||||
&ino_inuse);
|
||||
error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse);
|
||||
if (error == -ENODATA) {
|
||||
/* Not cached, just read the disk buffer */
|
||||
freemask_ok = irec_free ^ !!(dip->di_mode);
|
||||
|
|
|
@ -32,15 +32,13 @@ xchk_prepare_iscrub(
|
|||
{
|
||||
int error;
|
||||
|
||||
sc->ilock_flags = XFS_IOLOCK_EXCL;
|
||||
xfs_ilock(sc->ip, sc->ilock_flags);
|
||||
xchk_ilock(sc, XFS_IOLOCK_EXCL);
|
||||
|
||||
error = xchk_trans_alloc(sc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
sc->ilock_flags |= XFS_ILOCK_EXCL;
|
||||
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -83,7 +81,10 @@ xchk_setup_inode(
|
|||
|
||||
/* We want to scan the opened inode, so lock it and exit. */
|
||||
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
|
||||
sc->ip = ip_in;
|
||||
error = xchk_install_live_inode(sc, ip_in);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
return xchk_prepare_iscrub(sc);
|
||||
}
|
||||
|
||||
|
|
|
@ -150,8 +150,8 @@ xchk_parent_validate(
|
|||
|
||||
lock_mode = xchk_parent_ilock_dir(dp);
|
||||
if (!lock_mode) {
|
||||
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
|
||||
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
|
||||
xchk_iunlock(sc, XFS_ILOCK_EXCL);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
error = -EAGAIN;
|
||||
goto out_rele;
|
||||
}
|
||||
|
|
|
@ -59,9 +59,12 @@ xchk_setup_quota(
|
|||
error = xchk_setup_fs(sc);
|
||||
if (error)
|
||||
return error;
|
||||
sc->ip = xfs_quota_inode(sc->mp, dqtype);
|
||||
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
|
||||
sc->ilock_flags = XFS_ILOCK_EXCL;
|
||||
|
||||
error = xchk_install_live_inode(sc, xfs_quota_inode(sc->mp, dqtype));
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -235,13 +238,11 @@ xchk_quota(
|
|||
* data fork we have to drop ILOCK_EXCL to use the regular dquot
|
||||
* functions.
|
||||
*/
|
||||
xfs_iunlock(sc->ip, sc->ilock_flags);
|
||||
sc->ilock_flags = 0;
|
||||
xchk_iunlock(sc, sc->ilock_flags);
|
||||
sqi.sc = sc;
|
||||
sqi.last_id = 0;
|
||||
error = xfs_qm_dqiterate(mp, dqtype, xchk_quota_item, &sqi);
|
||||
sc->ilock_flags = XFS_ILOCK_EXCL;
|
||||
xfs_ilock(sc->ip, sc->ilock_flags);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL);
|
||||
if (error == -ECANCELED)
|
||||
error = 0;
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK,
|
||||
|
|
|
@ -0,0 +1,498 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_sb.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_alloc.h"
|
||||
#include "xfs_alloc_btree.h"
|
||||
#include "xfs_ialloc.h"
|
||||
#include "xfs_ialloc_btree.h"
|
||||
#include "xfs_rmap.h"
|
||||
#include "xfs_rmap_btree.h"
|
||||
#include "xfs_refcount_btree.h"
|
||||
#include "xfs_extent_busy.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "xfs_ag_resv.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "xfs_da_format.h"
|
||||
#include "xfs_da_btree.h"
|
||||
#include "xfs_attr.h"
|
||||
#include "xfs_attr_remote.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/reap.h"
|
||||
|
||||
/*
|
||||
* Disposal of Blocks from Old Metadata
|
||||
*
|
||||
* Now that we've constructed a new btree to replace the damaged one, we want
|
||||
* to dispose of the blocks that (we think) the old btree was using.
|
||||
* Previously, we used the rmapbt to collect the extents (bitmap) with the
|
||||
* rmap owner corresponding to the tree we rebuilt, collected extents for any
|
||||
* blocks with the same rmap owner that are owned by another data structure
|
||||
* (sublist), and subtracted sublist from bitmap. In theory the extents
|
||||
* remaining in bitmap are the old btree's blocks.
|
||||
*
|
||||
* Unfortunately, it's possible that the btree was crosslinked with other
|
||||
* blocks on disk. The rmap data can tell us if there are multiple owners, so
|
||||
* if the rmapbt says there is an owner of this block other than @oinfo, then
|
||||
* the block is crosslinked. Remove the reverse mapping and continue.
|
||||
*
|
||||
* If there is one rmap record, we can free the block, which removes the
|
||||
* reverse mapping but doesn't add the block to the free space. Our repair
|
||||
* strategy is to hope the other metadata objects crosslinked on this block
|
||||
* will be rebuilt (atop different blocks), thereby removing all the cross
|
||||
* links.
|
||||
*
|
||||
* If there are no rmap records at all, we also free the block. If the btree
|
||||
* being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
|
||||
* supposed to be a rmap record and everything is ok. For other btrees there
|
||||
* had to have been an rmap entry for the block to have ended up on @bitmap,
|
||||
* so if it's gone now there's something wrong and the fs will shut down.
|
||||
*
|
||||
* Note: If there are multiple rmap records with only the same rmap owner as
|
||||
* the btree we're trying to rebuild and the block is indeed owned by another
|
||||
* data structure with the same rmap owner, then the block will be in sublist
|
||||
* and therefore doesn't need disposal. If there are multiple rmap records
|
||||
* with only the same rmap owner but the block is not owned by something with
|
||||
* the same rmap owner, the block will be freed.
|
||||
*
|
||||
* The caller is responsible for locking the AG headers for the entire rebuild
|
||||
* operation so that nothing else can sneak in and change the AG state while
|
||||
* we're not looking. We must also invalidate any buffers associated with
|
||||
* @bitmap.
|
||||
*/
|
||||
|
||||
/* Information about reaping extents after a repair. */
|
||||
struct xreap_state {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Reverse mapping owner and metadata reservation type. */
|
||||
const struct xfs_owner_info *oinfo;
|
||||
enum xfs_ag_resv_type resv;
|
||||
|
||||
/* If true, roll the transaction before reaping the next extent. */
|
||||
bool force_roll;
|
||||
|
||||
/* Number of deferred reaps attached to the current transaction. */
|
||||
unsigned int deferred;
|
||||
|
||||
/* Number of invalidated buffers logged to the current transaction. */
|
||||
unsigned int invalidated;
|
||||
|
||||
/* Number of deferred reaps queued during the whole reap sequence. */
|
||||
unsigned long long total_deferred;
|
||||
};
|
||||
|
||||
/* Put a block back on the AGFL. */
|
||||
STATIC int
|
||||
xreap_put_freelist(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_agblock_t agbno)
|
||||
{
|
||||
struct xfs_buf *agfl_bp;
|
||||
int error;
|
||||
|
||||
/* Make sure there's space on the freelist. */
|
||||
error = xrep_fix_freelist(sc, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Since we're "freeing" a lost block onto the AGFL, we have to
|
||||
* create an rmap for the block prior to merging it or else other
|
||||
* parts will break.
|
||||
*/
|
||||
error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
|
||||
&XFS_RMAP_OINFO_AG);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Put the block on the AGFL. */
|
||||
error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
|
||||
agfl_bp, agbno, 0);
|
||||
if (error)
|
||||
return error;
|
||||
xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
|
||||
XFS_EXTENT_BUSY_SKIP_DISCARD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Are there any uncommitted reap operations? */
|
||||
static inline bool xreap_dirty(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->deferred)
|
||||
return true;
|
||||
if (rs->invalidated)
|
||||
return true;
|
||||
if (rs->total_deferred)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
#define XREAP_MAX_BINVAL (2048)
|
||||
|
||||
/*
|
||||
* Decide if we want to roll the transaction after reaping an extent. We don't
|
||||
* want to overrun the transaction reservation, so we prohibit more than
|
||||
* 128 EFIs per transaction. For the same reason, we limit the number
|
||||
* of buffer invalidations to 2048.
|
||||
*/
|
||||
static inline bool xreap_want_roll(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->deferred > XREP_MAX_ITRUNCATE_EFIS)
|
||||
return true;
|
||||
if (rs->invalidated > XREAP_MAX_BINVAL)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void xreap_reset(struct xreap_state *rs)
|
||||
{
|
||||
rs->total_deferred += rs->deferred;
|
||||
rs->deferred = 0;
|
||||
rs->invalidated = 0;
|
||||
rs->force_roll = false;
|
||||
}
|
||||
|
||||
#define XREAP_MAX_DEFER_CHAIN (2048)
|
||||
|
||||
/*
|
||||
* Decide if we want to finish the deferred ops that are attached to the scrub
|
||||
* transaction. We don't want to queue huge chains of deferred ops because
|
||||
* that can consume a lot of log space and kernel memory. Hence we trigger a
|
||||
* xfs_defer_finish if there are more than 2048 deferred reap operations or the
|
||||
* caller did some real work.
|
||||
*/
|
||||
static inline bool
|
||||
xreap_want_defer_finish(const struct xreap_state *rs)
|
||||
{
|
||||
if (rs->force_roll)
|
||||
return true;
|
||||
if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void xreap_defer_finish_reset(struct xreap_state *rs)
|
||||
{
|
||||
rs->total_deferred = 0;
|
||||
rs->deferred = 0;
|
||||
rs->invalidated = 0;
|
||||
rs->force_roll = false;
|
||||
}
|
||||
|
||||
/* Try to invalidate the incore buffers for an extent that we're freeing. */
|
||||
STATIC void
|
||||
xreap_agextent_binval(
|
||||
struct xreap_state *rs,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t *aglenp)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_perag *pag = sc->sa.pag;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
xfs_agnumber_t agno = sc->sa.pag->pag_agno;
|
||||
xfs_agblock_t agbno_next = agbno + *aglenp;
|
||||
xfs_agblock_t bno = agbno;
|
||||
|
||||
/*
|
||||
* Avoid invalidating AG headers and post-EOFS blocks because we never
|
||||
* own those.
|
||||
*/
|
||||
if (!xfs_verify_agbno(pag, agbno) ||
|
||||
!xfs_verify_agbno(pag, agbno_next - 1))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If there are incore buffers for these blocks, invalidate them. We
|
||||
* assume that the lack of any other known owners means that the buffer
|
||||
* can be locked without risk of deadlocking. The buffer cache cannot
|
||||
* detect aliasing, so employ nested loops to scan for incore buffers
|
||||
* of any plausible size.
|
||||
*/
|
||||
while (bno < agbno_next) {
|
||||
xfs_agblock_t fsbcount;
|
||||
xfs_agblock_t max_fsbs;
|
||||
|
||||
/*
|
||||
* Max buffer size is the max remote xattr buffer size, which
|
||||
* is one fs block larger than 64k.
|
||||
*/
|
||||
max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
|
||||
xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
|
||||
|
||||
for (fsbcount = 1; fsbcount <= max_fsbs; fsbcount++) {
|
||||
struct xfs_buf *bp = NULL;
|
||||
xfs_daddr_t daddr;
|
||||
int error;
|
||||
|
||||
daddr = XFS_AGB_TO_DADDR(mp, agno, bno);
|
||||
error = xfs_buf_incore(mp->m_ddev_targp, daddr,
|
||||
XFS_FSB_TO_BB(mp, fsbcount),
|
||||
XBF_LIVESCAN, &bp);
|
||||
if (error)
|
||||
continue;
|
||||
|
||||
xfs_trans_bjoin(sc->tp, bp);
|
||||
xfs_trans_binval(sc->tp, bp);
|
||||
rs->invalidated++;
|
||||
|
||||
/*
|
||||
* Stop invalidating if we've hit the limit; we should
|
||||
* still have enough reservation left to free however
|
||||
* far we've gotten.
|
||||
*/
|
||||
if (rs->invalidated > XREAP_MAX_BINVAL) {
|
||||
*aglenp -= agbno_next - bno;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
bno++;
|
||||
}
|
||||
|
||||
out:
|
||||
trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out the longest run of blocks that we can dispose of with a single
|
||||
* call. Cross-linked blocks should have their reverse mappings removed, but
|
||||
* single-owner extents can be freed. AGFL blocks can only be put back one at
|
||||
* a time.
|
||||
*/
|
||||
STATIC int
|
||||
xreap_agextent_select(
|
||||
struct xreap_state *rs,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_agblock_t agbno_next,
|
||||
bool *crosslinked,
|
||||
xfs_extlen_t *aglenp)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_btree_cur *cur;
|
||||
xfs_agblock_t bno = agbno + 1;
|
||||
xfs_extlen_t len = 1;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Determine if there are any other rmap records covering the first
|
||||
* block of this extent. If so, the block is crosslinked.
|
||||
*/
|
||||
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
|
||||
sc->sa.pag);
|
||||
error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
|
||||
crosslinked);
|
||||
if (error)
|
||||
goto out_cur;
|
||||
|
||||
/* AGFL blocks can only be deal with one at a time. */
|
||||
if (rs->resv == XFS_AG_RESV_AGFL)
|
||||
goto out_found;
|
||||
|
||||
/*
|
||||
* Figure out how many of the subsequent blocks have the same crosslink
|
||||
* status.
|
||||
*/
|
||||
while (bno < agbno_next) {
|
||||
bool also_crosslinked;
|
||||
|
||||
error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
|
||||
&also_crosslinked);
|
||||
if (error)
|
||||
goto out_cur;
|
||||
|
||||
if (*crosslinked != also_crosslinked)
|
||||
break;
|
||||
|
||||
len++;
|
||||
bno++;
|
||||
}
|
||||
|
||||
out_found:
|
||||
*aglenp = len;
|
||||
trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
|
||||
out_cur:
|
||||
xfs_btree_del_cursor(cur, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispose of as much of the beginning of this AG extent as possible. The
|
||||
* number of blocks disposed of will be returned in @aglenp.
|
||||
*/
|
||||
STATIC int
|
||||
xreap_agextent_iter(
|
||||
struct xreap_state *rs,
|
||||
xfs_agblock_t agbno,
|
||||
xfs_extlen_t *aglenp,
|
||||
bool crosslinked)
|
||||
{
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
xfs_fsblock_t fsbno;
|
||||
int error = 0;
|
||||
|
||||
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
|
||||
|
||||
/*
|
||||
* If there are other rmappings, this block is cross linked and must
|
||||
* not be freed. Remove the reverse mapping and move on. Otherwise,
|
||||
* we were the only owner of the block, so free the extent, which will
|
||||
* also remove the rmap.
|
||||
*
|
||||
* XXX: XFS doesn't support detecting the case where a single block
|
||||
* metadata structure is crosslinked with a multi-block structure
|
||||
* because the buffer cache doesn't detect aliasing problems, so we
|
||||
* can't fix 100% of crosslinking problems (yet). The verifiers will
|
||||
* blow on writeout, the filesystem will shut down, and the admin gets
|
||||
* to run xfs_repair.
|
||||
*/
|
||||
if (crosslinked) {
|
||||
trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
|
||||
|
||||
rs->force_roll = true;
|
||||
return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
|
||||
*aglenp, rs->oinfo);
|
||||
}
|
||||
|
||||
trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
|
||||
|
||||
/*
|
||||
* Invalidate as many buffers as we can, starting at agbno. If this
|
||||
* function sets *aglenp to zero, the transaction is full of logged
|
||||
* buffer invalidations, so we need to return early so that we can
|
||||
* roll and retry.
|
||||
*/
|
||||
xreap_agextent_binval(rs, agbno, aglenp);
|
||||
if (*aglenp == 0) {
|
||||
ASSERT(xreap_want_roll(rs));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Put blocks back on the AGFL one at a time. */
|
||||
if (rs->resv == XFS_AG_RESV_AGFL) {
|
||||
ASSERT(*aglenp == 1);
|
||||
error = xreap_put_freelist(sc, agbno);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->force_roll = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use deferred frees to get rid of the old btree blocks to try to
|
||||
* minimize the window in which we could crash and lose the old blocks.
|
||||
*/
|
||||
error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
|
||||
rs->resv, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
rs->deferred++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Break an AG metadata extent into sub-extents by fate (crosslinked, not
|
||||
* crosslinked), and dispose of each sub-extent separately.
|
||||
*/
|
||||
STATIC int
|
||||
xreap_agmeta_extent(
|
||||
uint64_t fsbno,
|
||||
uint64_t len,
|
||||
void *priv)
|
||||
{
|
||||
struct xreap_state *rs = priv;
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
xfs_agblock_t agbno = fsbno;
|
||||
xfs_agblock_t agbno_next = agbno + len;
|
||||
int error = 0;
|
||||
|
||||
ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
|
||||
ASSERT(sc->ip == NULL);
|
||||
|
||||
while (agbno < agbno_next) {
|
||||
xfs_extlen_t aglen;
|
||||
bool crosslinked;
|
||||
|
||||
error = xreap_agextent_select(rs, agbno, agbno_next,
|
||||
&crosslinked, &aglen);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_want_defer_finish(rs)) {
|
||||
error = xrep_defer_finish(sc);
|
||||
if (error)
|
||||
return error;
|
||||
xreap_defer_finish_reset(rs);
|
||||
} else if (xreap_want_roll(rs)) {
|
||||
error = xrep_roll_ag_trans(sc);
|
||||
if (error)
|
||||
return error;
|
||||
xreap_reset(rs);
|
||||
}
|
||||
|
||||
agbno += aglen;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Dispose of every block of every AG metadata extent in the bitmap. */
|
||||
int
|
||||
xrep_reap_agblocks(
|
||||
struct xfs_scrub *sc,
|
||||
struct xagb_bitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
struct xreap_state rs = {
|
||||
.sc = sc,
|
||||
.oinfo = oinfo,
|
||||
.resv = type,
|
||||
};
|
||||
int error;
|
||||
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
ASSERT(sc->ip == NULL);
|
||||
|
||||
error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (xreap_dirty(&rs))
|
||||
return xrep_defer_finish(sc);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_REAP_H__
|
||||
#define __XFS_SCRUB_REAP_H__
|
||||
|
||||
int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
|
||||
|
||||
#endif /* __XFS_SCRUB_REAP_H__ */
|
|
@ -26,11 +26,13 @@
|
|||
#include "xfs_ag_resv.h"
|
||||
#include "xfs_quota.h"
|
||||
#include "xfs_qm.h"
|
||||
#include "xfs_defer.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/bitmap.h"
|
||||
#include "scrub/stats.h"
|
||||
|
||||
/*
|
||||
* Attempt to repair some metadata, if the metadata is corrupt and userspace
|
||||
|
@ -39,8 +41,10 @@
|
|||
*/
|
||||
int
|
||||
xrep_attempt(
|
||||
struct xfs_scrub *sc)
|
||||
struct xfs_scrub *sc,
|
||||
struct xchk_stats_run *run)
|
||||
{
|
||||
u64 repair_start;
|
||||
int error = 0;
|
||||
|
||||
trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
|
||||
|
@ -49,8 +53,11 @@ xrep_attempt(
|
|||
|
||||
/* Repair whatever's broken. */
|
||||
ASSERT(sc->ops->repair);
|
||||
run->repair_attempted = true;
|
||||
repair_start = xchk_stats_now();
|
||||
error = sc->ops->repair(sc);
|
||||
trace_xrep_done(XFS_I(file_inode(sc->file)), sc->sm, error);
|
||||
run->repair_ns += xchk_stats_elapsed_ns(repair_start);
|
||||
switch (error) {
|
||||
case 0:
|
||||
/*
|
||||
|
@ -59,14 +66,17 @@ xrep_attempt(
|
|||
*/
|
||||
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
|
||||
sc->flags |= XREP_ALREADY_FIXED;
|
||||
run->repair_succeeded = true;
|
||||
return -EAGAIN;
|
||||
case -ECHRNG:
|
||||
sc->flags |= XCHK_NEED_DRAIN;
|
||||
run->retries++;
|
||||
return -EAGAIN;
|
||||
case -EDEADLOCK:
|
||||
/* Tell the caller to try again having grabbed all the locks. */
|
||||
if (!(sc->flags & XCHK_TRY_HARDER)) {
|
||||
sc->flags |= XCHK_TRY_HARDER;
|
||||
run->retries++;
|
||||
return -EAGAIN;
|
||||
}
|
||||
/*
|
||||
|
@ -166,6 +176,56 @@ xrep_roll_ag_trans(
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Finish all deferred work attached to the repair transaction. */
|
||||
int
|
||||
xrep_defer_finish(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Keep the AG header buffers locked while we complete deferred work
|
||||
* items. Ensure that both AG buffers are dirty and held when we roll
|
||||
* the transaction so that they move forward in the log without losing
|
||||
* the bli (and hence the bli type) when the transaction commits.
|
||||
*
|
||||
* Normal code would never hold clean buffers across a roll, but repair
|
||||
* needs both buffers to maintain a total lock on the AG.
|
||||
*/
|
||||
if (sc->sa.agi_bp) {
|
||||
xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
|
||||
xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
|
||||
}
|
||||
|
||||
if (sc->sa.agf_bp) {
|
||||
xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
|
||||
xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finish all deferred work items. We still hold the AG header buffers
|
||||
* locked regardless of whether or not that succeeds. On failure, the
|
||||
* buffers will be released during teardown on our way out of the
|
||||
* kernel. If successful, join the buffers to the new transaction
|
||||
* and move on.
|
||||
*/
|
||||
error = xfs_defer_finish(&sc->tp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Release the hold that we set above because defer_finish won't do
|
||||
* that for us. The defer roll code redirties held buffers after each
|
||||
* roll, so the AG header buffers should be ready for logging.
|
||||
*/
|
||||
if (sc->sa.agi_bp)
|
||||
xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
|
||||
if (sc->sa.agf_bp)
|
||||
xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Does the given AG have enough space to rebuild a btree? Neither AG
|
||||
* reservation can be critical, and we must have enough space (factoring
|
||||
|
@ -297,89 +357,6 @@ xrep_calc_ag_resblks(
|
|||
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
|
||||
}
|
||||
|
||||
/* Allocate a block in an AG. */
|
||||
int
|
||||
xrep_alloc_ag_block(
|
||||
struct xfs_scrub *sc,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
xfs_fsblock_t *fsbno,
|
||||
enum xfs_ag_resv_type resv)
|
||||
{
|
||||
struct xfs_alloc_arg args = {0};
|
||||
xfs_agblock_t bno;
|
||||
int error;
|
||||
|
||||
switch (resv) {
|
||||
case XFS_AG_RESV_AGFL:
|
||||
case XFS_AG_RESV_RMAPBT:
|
||||
error = xfs_alloc_get_freelist(sc->sa.pag, sc->tp,
|
||||
sc->sa.agf_bp, &bno, 1);
|
||||
if (error)
|
||||
return error;
|
||||
if (bno == NULLAGBLOCK)
|
||||
return -ENOSPC;
|
||||
xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false);
|
||||
*fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno);
|
||||
if (resv == XFS_AG_RESV_RMAPBT)
|
||||
xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
args.tp = sc->tp;
|
||||
args.mp = sc->mp;
|
||||
args.pag = sc->sa.pag;
|
||||
args.oinfo = *oinfo;
|
||||
args.minlen = 1;
|
||||
args.maxlen = 1;
|
||||
args.prod = 1;
|
||||
args.resv = resv;
|
||||
|
||||
error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno);
|
||||
if (error)
|
||||
return error;
|
||||
if (args.fsbno == NULLFSBLOCK)
|
||||
return -ENOSPC;
|
||||
ASSERT(args.len == 1);
|
||||
*fsbno = args.fsbno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Initialize a new AG btree root block with zero entries. */
|
||||
int
|
||||
xrep_init_btblock(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_fsblock_t fsb,
|
||||
struct xfs_buf **bpp,
|
||||
xfs_btnum_t btnum,
|
||||
const struct xfs_buf_ops *ops)
|
||||
{
|
||||
struct xfs_trans *tp = sc->tp;
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
|
||||
trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
|
||||
XFS_FSB_TO_AGBNO(mp, fsb), btnum);
|
||||
|
||||
ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.pag->pag_agno);
|
||||
error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
|
||||
XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
|
||||
&bp);
|
||||
if (error)
|
||||
return error;
|
||||
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
|
||||
xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.pag->pag_agno);
|
||||
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
|
||||
xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
|
||||
bp->b_ops = ops;
|
||||
*bpp = bp;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reconstructing per-AG Btrees
|
||||
*
|
||||
|
@ -404,91 +381,8 @@ xrep_init_btblock(
|
|||
* sublist. As with the other btrees we subtract sublist from bitmap, and the
|
||||
* result (since the rmapbt lives in the free space) are the blocks from the
|
||||
* old rmapbt.
|
||||
*
|
||||
* Disposal of Blocks from Old per-AG Btrees
|
||||
*
|
||||
* Now that we've constructed a new btree to replace the damaged one, we want
|
||||
* to dispose of the blocks that (we think) the old btree was using.
|
||||
* Previously, we used the rmapbt to collect the extents (bitmap) with the
|
||||
* rmap owner corresponding to the tree we rebuilt, collected extents for any
|
||||
* blocks with the same rmap owner that are owned by another data structure
|
||||
* (sublist), and subtracted sublist from bitmap. In theory the extents
|
||||
* remaining in bitmap are the old btree's blocks.
|
||||
*
|
||||
* Unfortunately, it's possible that the btree was crosslinked with other
|
||||
* blocks on disk. The rmap data can tell us if there are multiple owners, so
|
||||
* if the rmapbt says there is an owner of this block other than @oinfo, then
|
||||
* the block is crosslinked. Remove the reverse mapping and continue.
|
||||
*
|
||||
* If there is one rmap record, we can free the block, which removes the
|
||||
* reverse mapping but doesn't add the block to the free space. Our repair
|
||||
* strategy is to hope the other metadata objects crosslinked on this block
|
||||
* will be rebuilt (atop different blocks), thereby removing all the cross
|
||||
* links.
|
||||
*
|
||||
* If there are no rmap records at all, we also free the block. If the btree
|
||||
* being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
|
||||
* supposed to be a rmap record and everything is ok. For other btrees there
|
||||
* had to have been an rmap entry for the block to have ended up on @bitmap,
|
||||
* so if it's gone now there's something wrong and the fs will shut down.
|
||||
*
|
||||
* Note: If there are multiple rmap records with only the same rmap owner as
|
||||
* the btree we're trying to rebuild and the block is indeed owned by another
|
||||
* data structure with the same rmap owner, then the block will be in sublist
|
||||
* and therefore doesn't need disposal. If there are multiple rmap records
|
||||
* with only the same rmap owner but the block is not owned by something with
|
||||
* the same rmap owner, the block will be freed.
|
||||
*
|
||||
* The caller is responsible for locking the AG headers for the entire rebuild
|
||||
* operation so that nothing else can sneak in and change the AG state while
|
||||
* we're not looking. We also assume that the caller already invalidated any
|
||||
* buffers associated with @bitmap.
|
||||
*/
|
||||
|
||||
static int
|
||||
xrep_invalidate_block(
|
||||
uint64_t fsbno,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_scrub *sc = priv;
|
||||
struct xfs_buf *bp;
|
||||
int error;
|
||||
|
||||
/* Skip AG headers and post-EOFS blocks */
|
||||
if (!xfs_verify_fsbno(sc->mp, fsbno))
|
||||
return 0;
|
||||
|
||||
error = xfs_buf_incore(sc->mp->m_ddev_targp,
|
||||
XFS_FSB_TO_DADDR(sc->mp, fsbno),
|
||||
XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp);
|
||||
if (error)
|
||||
return 0;
|
||||
|
||||
xfs_trans_bjoin(sc->tp, bp);
|
||||
xfs_trans_binval(sc->tp, bp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate buffers for per-AG btree blocks we're dumping. This function
|
||||
* is not intended for use with file data repairs; we have bunmapi for that.
|
||||
*/
|
||||
int
|
||||
xrep_invalidate_blocks(
|
||||
struct xfs_scrub *sc,
|
||||
struct xbitmap *bitmap)
|
||||
{
|
||||
/*
|
||||
* For each block in each extent, see if there's an incore buffer for
|
||||
* exactly that block; if so, invalidate it. The buffer cache only
|
||||
* lets us look for one buffer at a time, so we have to look one block
|
||||
* at a time. Avoid invalidating AG headers and post-EOFS blocks
|
||||
* because we never own those; and if we can't TRYLOCK the buffer we
|
||||
* assume it's owned by someone else.
|
||||
*/
|
||||
return xbitmap_walk_bits(bitmap, xrep_invalidate_block, sc);
|
||||
}
|
||||
|
||||
/* Ensure the freelist is the correct size. */
|
||||
int
|
||||
xrep_fix_freelist(
|
||||
|
@ -507,155 +401,6 @@ xrep_fix_freelist(
|
|||
can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
|
||||
}
|
||||
|
||||
/* Information about reaping extents after a repair. */
|
||||
struct xrep_reap_state {
|
||||
struct xfs_scrub *sc;
|
||||
|
||||
/* Reverse mapping owner and metadata reservation type. */
|
||||
const struct xfs_owner_info *oinfo;
|
||||
enum xfs_ag_resv_type resv;
|
||||
};
|
||||
|
||||
/*
|
||||
* Put a block back on the AGFL.
|
||||
*/
|
||||
STATIC int
|
||||
xrep_put_freelist(
|
||||
struct xfs_scrub *sc,
|
||||
xfs_agblock_t agbno)
|
||||
{
|
||||
struct xfs_buf *agfl_bp;
|
||||
int error;
|
||||
|
||||
/* Make sure there's space on the freelist. */
|
||||
error = xrep_fix_freelist(sc, true);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Since we're "freeing" a lost block onto the AGFL, we have to
|
||||
* create an rmap for the block prior to merging it or else other
|
||||
* parts will break.
|
||||
*/
|
||||
error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
|
||||
&XFS_RMAP_OINFO_AG);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Put the block on the AGFL. */
|
||||
error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
|
||||
agfl_bp, agbno, 0);
|
||||
if (error)
|
||||
return error;
|
||||
xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
|
||||
XFS_EXTENT_BUSY_SKIP_DISCARD);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Dispose of a single block. */
|
||||
STATIC int
|
||||
xrep_reap_block(
|
||||
uint64_t fsbno,
|
||||
void *priv)
|
||||
{
|
||||
struct xrep_reap_state *rs = priv;
|
||||
struct xfs_scrub *sc = rs->sc;
|
||||
struct xfs_btree_cur *cur;
|
||||
struct xfs_buf *agf_bp = NULL;
|
||||
xfs_agblock_t agbno;
|
||||
bool has_other_rmap;
|
||||
int error;
|
||||
|
||||
ASSERT(sc->ip != NULL ||
|
||||
XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
|
||||
trace_xrep_dispose_btree_extent(sc->mp,
|
||||
XFS_FSB_TO_AGNO(sc->mp, fsbno),
|
||||
XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1);
|
||||
|
||||
agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
|
||||
ASSERT(XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno);
|
||||
|
||||
/*
|
||||
* If we are repairing per-inode metadata, we need to read in the AGF
|
||||
* buffer. Otherwise, we're repairing a per-AG structure, so reuse
|
||||
* the AGF buffer that the setup functions already grabbed.
|
||||
*/
|
||||
if (sc->ip) {
|
||||
error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp);
|
||||
if (error)
|
||||
return error;
|
||||
} else {
|
||||
agf_bp = sc->sa.agf_bp;
|
||||
}
|
||||
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, sc->sa.pag);
|
||||
|
||||
/* Can we find any other rmappings? */
|
||||
error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
|
||||
&has_other_rmap);
|
||||
xfs_btree_del_cursor(cur, error);
|
||||
if (error)
|
||||
goto out_free;
|
||||
|
||||
/*
|
||||
* If there are other rmappings, this block is cross linked and must
|
||||
* not be freed. Remove the reverse mapping and move on. Otherwise,
|
||||
* we were the only owner of the block, so free the extent, which will
|
||||
* also remove the rmap.
|
||||
*
|
||||
* XXX: XFS doesn't support detecting the case where a single block
|
||||
* metadata structure is crosslinked with a multi-block structure
|
||||
* because the buffer cache doesn't detect aliasing problems, so we
|
||||
* can't fix 100% of crosslinking problems (yet). The verifiers will
|
||||
* blow on writeout, the filesystem will shut down, and the admin gets
|
||||
* to run xfs_repair.
|
||||
*/
|
||||
if (has_other_rmap)
|
||||
error = xfs_rmap_free(sc->tp, agf_bp, sc->sa.pag, agbno,
|
||||
1, rs->oinfo);
|
||||
else if (rs->resv == XFS_AG_RESV_AGFL)
|
||||
error = xrep_put_freelist(sc, agbno);
|
||||
else
|
||||
error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, rs->oinfo,
|
||||
rs->resv);
|
||||
if (agf_bp != sc->sa.agf_bp)
|
||||
xfs_trans_brelse(sc->tp, agf_bp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (sc->ip)
|
||||
return xfs_trans_roll_inode(&sc->tp, sc->ip);
|
||||
return xrep_roll_ag_trans(sc);
|
||||
|
||||
out_free:
|
||||
if (agf_bp != sc->sa.agf_bp)
|
||||
xfs_trans_brelse(sc->tp, agf_bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Dispose of every block of every extent in the bitmap. */
|
||||
int
|
||||
xrep_reap_extents(
|
||||
struct xfs_scrub *sc,
|
||||
struct xbitmap *bitmap,
|
||||
const struct xfs_owner_info *oinfo,
|
||||
enum xfs_ag_resv_type type)
|
||||
{
|
||||
struct xrep_reap_state rs = {
|
||||
.sc = sc,
|
||||
.oinfo = oinfo,
|
||||
.resv = type,
|
||||
};
|
||||
|
||||
ASSERT(xfs_has_rmapbt(sc->mp));
|
||||
|
||||
return xbitmap_walk_bits(bitmap, xrep_reap_block, &rs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finding per-AG Btree Roots for AGF/AGI Reconstruction
|
||||
*
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
|
||||
#include "xfs_quota_defs.h"
|
||||
|
||||
struct xchk_stats_run;
|
||||
|
||||
static inline int xrep_notsupported(struct xfs_scrub *sc)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -15,28 +17,28 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
|
|||
|
||||
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
||||
|
||||
/*
|
||||
* This is the maximum number of deferred extent freeing item extents (EFIs)
|
||||
* that we'll attach to a transaction without rolling the transaction to avoid
|
||||
* overrunning a tr_itruncate reservation.
|
||||
*/
|
||||
#define XREP_MAX_ITRUNCATE_EFIS (128)
|
||||
|
||||
|
||||
/* Repair helpers */
|
||||
|
||||
int xrep_attempt(struct xfs_scrub *sc);
|
||||
int xrep_attempt(struct xfs_scrub *sc, struct xchk_stats_run *run);
|
||||
void xrep_failure(struct xfs_mount *mp);
|
||||
int xrep_roll_ag_trans(struct xfs_scrub *sc);
|
||||
int xrep_defer_finish(struct xfs_scrub *sc);
|
||||
bool xrep_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
|
||||
enum xfs_ag_resv_type type);
|
||||
xfs_extlen_t xrep_calc_ag_resblks(struct xfs_scrub *sc);
|
||||
int xrep_alloc_ag_block(struct xfs_scrub *sc,
|
||||
const struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
|
||||
enum xfs_ag_resv_type resv);
|
||||
int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
|
||||
struct xfs_buf **bpp, xfs_btnum_t btnum,
|
||||
const struct xfs_buf_ops *ops);
|
||||
|
||||
struct xbitmap;
|
||||
struct xagb_bitmap;
|
||||
|
||||
int xrep_fix_freelist(struct xfs_scrub *sc, bool can_shrink);
|
||||
int xrep_invalidate_blocks(struct xfs_scrub *sc, struct xbitmap *btlist);
|
||||
int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist,
|
||||
const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
|
||||
|
||||
struct xrep_find_ag_btree {
|
||||
/* in: rmap owner of the btree we're looking for */
|
||||
|
@ -70,7 +72,8 @@ int xrep_agi(struct xfs_scrub *sc);
|
|||
|
||||
static inline int
|
||||
xrep_attempt(
|
||||
struct xfs_scrub *sc)
|
||||
struct xfs_scrub *sc,
|
||||
struct xchk_stats_run *run)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
|
|
@ -19,19 +19,20 @@
|
|||
|
||||
/* Set us up with the realtime metadata locked. */
|
||||
int
|
||||
xchk_setup_rt(
|
||||
xchk_setup_rtbitmap(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = xchk_setup_fs(sc);
|
||||
error = xchk_trans_alloc(sc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP;
|
||||
sc->ip = sc->mp->m_rbmip;
|
||||
xfs_ilock(sc->ip, sc->ilock_flags);
|
||||
error = xchk_install_live_inode(sc, sc->mp->m_rbmip);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -123,43 +124,6 @@ out:
|
|||
return error;
|
||||
}
|
||||
|
||||
/* Scrub the realtime summary. */
|
||||
int
|
||||
xchk_rtsummary(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_inode *rsumip = sc->mp->m_rsumip;
|
||||
struct xfs_inode *old_ip = sc->ip;
|
||||
uint old_ilock_flags = sc->ilock_flags;
|
||||
int error = 0;
|
||||
|
||||
/*
|
||||
* We ILOCK'd the rt bitmap ip in the setup routine, now lock the
|
||||
* rt summary ip in compliance with the rt inode locking rules.
|
||||
*
|
||||
* Since we switch sc->ip to rsumip we have to save the old ilock
|
||||
* flags so that we don't mix up the inode state that @sc tracks.
|
||||
*/
|
||||
sc->ip = rsumip;
|
||||
sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM;
|
||||
xfs_ilock(sc->ip, sc->ilock_flags);
|
||||
|
||||
/* Invoke the fork scrubber. */
|
||||
error = xchk_metadata_inode_forks(sc);
|
||||
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
goto out;
|
||||
|
||||
/* XXX: implement this some day */
|
||||
xchk_set_incomplete(sc);
|
||||
out:
|
||||
/* Switch back to the rtbitmap inode and lock flags. */
|
||||
xfs_iunlock(sc->ip, sc->ilock_flags);
|
||||
sc->ilock_flags = old_ilock_flags;
|
||||
sc->ip = old_ip;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/* xref check that the extent is not free in the rtbitmap */
|
||||
void
|
||||
xchk_xref_is_used_rt_space(
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2017-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans.h"
|
||||
#include "xfs_rtalloc.h"
|
||||
#include "xfs_bit.h"
|
||||
#include "xfs_bmap.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/common.h"
|
||||
#include "scrub/trace.h"
|
||||
#include "scrub/xfile.h"
|
||||
|
||||
/*
|
||||
* Realtime Summary
|
||||
* ================
|
||||
*
|
||||
* We check the realtime summary by scanning the realtime bitmap file to create
|
||||
* a new summary file incore, and then we compare the computed version against
|
||||
* the ondisk version. We use the 'xfile' functionality to store this
|
||||
* (potentially large) amount of data in pageable memory.
|
||||
*/
|
||||
|
||||
/* Set us up to check the rtsummary file. */
|
||||
int
|
||||
xchk_setup_rtsummary(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
char *descr;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Create an xfile to construct a new rtsummary file. The xfile allows
|
||||
* us to avoid pinning kernel memory for this purpose.
|
||||
*/
|
||||
descr = xchk_xfile_descr(sc, "realtime summary file");
|
||||
error = xfile_create(descr, mp->m_rsumsize, &sc->xfile);
|
||||
kfree(descr);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = xchk_trans_alloc(sc, 0);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* Allocate a memory buffer for the summary comparison. */
|
||||
sc->buf = kvmalloc(mp->m_sb.sb_blocksize, XCHK_GFP_FLAGS);
|
||||
if (!sc->buf)
|
||||
return -ENOMEM;
|
||||
|
||||
error = xchk_install_live_inode(sc, mp->m_rsumip);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Locking order requires us to take the rtbitmap first. We must be
|
||||
* careful to unlock it ourselves when we are done with the rtbitmap
|
||||
* file since the scrub infrastructure won't do that for us. Only
|
||||
* then we can lock the rtsummary inode.
|
||||
*/
|
||||
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
|
||||
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper functions to record suminfo words in an xfile. */
|
||||
|
||||
typedef unsigned int xchk_rtsumoff_t;
|
||||
|
||||
static inline int
|
||||
xfsum_load(
|
||||
struct xfs_scrub *sc,
|
||||
xchk_rtsumoff_t sumoff,
|
||||
xfs_suminfo_t *info)
|
||||
{
|
||||
return xfile_obj_load(sc->xfile, info, sizeof(xfs_suminfo_t),
|
||||
sumoff << XFS_WORDLOG);
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfsum_store(
|
||||
struct xfs_scrub *sc,
|
||||
xchk_rtsumoff_t sumoff,
|
||||
const xfs_suminfo_t info)
|
||||
{
|
||||
return xfile_obj_store(sc->xfile, &info, sizeof(xfs_suminfo_t),
|
||||
sumoff << XFS_WORDLOG);
|
||||
}
|
||||
|
||||
static inline int
|
||||
xfsum_copyout(
|
||||
struct xfs_scrub *sc,
|
||||
xchk_rtsumoff_t sumoff,
|
||||
xfs_suminfo_t *info,
|
||||
unsigned int nr_words)
|
||||
{
|
||||
return xfile_obj_load(sc->xfile, info, nr_words << XFS_WORDLOG,
|
||||
sumoff << XFS_WORDLOG);
|
||||
}
|
||||
|
||||
/* Update the summary file to reflect the free extent that we've accumulated. */
|
||||
STATIC int
|
||||
xchk_rtsum_record_free(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
const struct xfs_rtalloc_rec *rec,
|
||||
void *priv)
|
||||
{
|
||||
struct xfs_scrub *sc = priv;
|
||||
xfs_fileoff_t rbmoff;
|
||||
xfs_rtblock_t rtbno;
|
||||
xfs_filblks_t rtlen;
|
||||
xchk_rtsumoff_t offs;
|
||||
unsigned int lenlog;
|
||||
xfs_suminfo_t v = 0;
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
|
||||
/* Compute the relevant location in the rtsum file. */
|
||||
rbmoff = XFS_BITTOBLOCK(mp, rec->ar_startext);
|
||||
lenlog = XFS_RTBLOCKLOG(rec->ar_extcount);
|
||||
offs = XFS_SUMOFFS(mp, lenlog, rbmoff);
|
||||
|
||||
rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
|
||||
rtlen = rec->ar_extcount * mp->m_sb.sb_rextsize;
|
||||
|
||||
if (!xfs_verify_rtext(mp, rtbno, rtlen)) {
|
||||
xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/* Bump the summary count. */
|
||||
error = xfsum_load(sc, offs, &v);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
v++;
|
||||
trace_xchk_rtsum_record_free(mp, rec->ar_startext, rec->ar_extcount,
|
||||
lenlog, offs, v);
|
||||
|
||||
return xfsum_store(sc, offs, v);
|
||||
}
|
||||
|
||||
/* Compute the realtime summary from the realtime bitmap. */
|
||||
STATIC int
|
||||
xchk_rtsum_compute(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
unsigned long long rtbmp_bytes;
|
||||
|
||||
/* If the bitmap size doesn't match the computed size, bail. */
|
||||
rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY);
|
||||
if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) !=
|
||||
mp->m_rbmip->i_disk_size)
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
return xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtsum_record_free,
|
||||
sc);
|
||||
}
|
||||
|
||||
/* Compare the rtsummary file against the one we computed. */
|
||||
STATIC int
|
||||
xchk_rtsum_compare(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
struct xfs_buf *bp;
|
||||
struct xfs_bmbt_irec map;
|
||||
xfs_fileoff_t off;
|
||||
xchk_rtsumoff_t sumoff = 0;
|
||||
int nmap;
|
||||
|
||||
for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) {
|
||||
int error = 0;
|
||||
|
||||
if (xchk_should_terminate(sc, &error))
|
||||
return error;
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
return 0;
|
||||
|
||||
/* Make sure we have a written extent. */
|
||||
nmap = 1;
|
||||
error = xfs_bmapi_read(mp->m_rsumip, off, 1, &map, &nmap,
|
||||
XFS_DATA_FORK);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
|
||||
return error;
|
||||
|
||||
if (nmap != 1 || !xfs_bmap_is_written_extent(&map)) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Read a block's worth of ondisk rtsummary file. */
|
||||
error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp);
|
||||
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
|
||||
return error;
|
||||
|
||||
/* Read a block's worth of computed rtsummary file. */
|
||||
error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize);
|
||||
if (error) {
|
||||
xfs_trans_brelse(sc->tp, bp);
|
||||
return error;
|
||||
}
|
||||
|
||||
if (memcmp(bp->b_addr, sc->buf,
|
||||
mp->m_blockwsize << XFS_WORDLOG) != 0)
|
||||
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
|
||||
|
||||
xfs_trans_brelse(sc->tp, bp);
|
||||
sumoff += mp->m_blockwsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Scrub the realtime summary. */
|
||||
int
|
||||
xchk_rtsummary(
|
||||
struct xfs_scrub *sc)
|
||||
{
|
||||
struct xfs_mount *mp = sc->mp;
|
||||
int error = 0;
|
||||
|
||||
/* Invoke the fork scrubber. */
|
||||
error = xchk_metadata_inode_forks(sc);
|
||||
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
|
||||
goto out_rbm;
|
||||
|
||||
/* Construct the new summary file from the rtbitmap. */
|
||||
error = xchk_rtsum_compute(sc);
|
||||
if (error == -EFSCORRUPTED) {
|
||||
/*
|
||||
* EFSCORRUPTED means the rtbitmap is corrupt, which is an xref
|
||||
* error since we're checking the summary file.
|
||||
*/
|
||||
xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
|
||||
error = 0;
|
||||
goto out_rbm;
|
||||
}
|
||||
if (error)
|
||||
goto out_rbm;
|
||||
|
||||
/* Does the computed summary file match the actual rtsummary file? */
|
||||
error = xchk_rtsum_compare(sc);
|
||||
|
||||
out_rbm:
|
||||
/* Unlock the rtbitmap since we're done with it. */
|
||||
xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
|
||||
return error;
|
||||
}
|
|
@ -22,6 +22,8 @@
|
|||
#include "scrub/trace.h"
|
||||
#include "scrub/repair.h"
|
||||
#include "scrub/health.h"
|
||||
#include "scrub/stats.h"
|
||||
#include "scrub/xfile.h"
|
||||
|
||||
/*
|
||||
* Online Scrub and Repair
|
||||
|
@ -166,8 +168,6 @@ xchk_teardown(
|
|||
struct xfs_scrub *sc,
|
||||
int error)
|
||||
{
|
||||
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
|
||||
|
||||
xchk_ag_free(sc, &sc->sa);
|
||||
if (sc->tp) {
|
||||
if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
|
||||
|
@ -178,14 +178,18 @@ xchk_teardown(
|
|||
}
|
||||
if (sc->ip) {
|
||||
if (sc->ilock_flags)
|
||||
xfs_iunlock(sc->ip, sc->ilock_flags);
|
||||
if (sc->ip != ip_in &&
|
||||
!xfs_internal_inum(sc->mp, sc->ip->i_ino))
|
||||
xchk_irele(sc, sc->ip);
|
||||
xchk_iunlock(sc, sc->ilock_flags);
|
||||
xchk_irele(sc, sc->ip);
|
||||
sc->ip = NULL;
|
||||
}
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
|
||||
if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
|
||||
sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
|
||||
mnt_drop_write_file(sc->file);
|
||||
}
|
||||
if (sc->xfile) {
|
||||
xfile_destroy(sc->xfile);
|
||||
sc->xfile = NULL;
|
||||
}
|
||||
if (sc->buf) {
|
||||
if (sc->buf_cleanup)
|
||||
sc->buf_cleanup(sc->buf);
|
||||
|
@ -320,14 +324,14 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
|
|||
},
|
||||
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_rt,
|
||||
.setup = xchk_setup_rtbitmap,
|
||||
.scrub = xchk_rtbitmap,
|
||||
.has = xfs_has_realtime,
|
||||
.repair = xrep_notsupported,
|
||||
},
|
||||
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
|
||||
.type = ST_FS,
|
||||
.setup = xchk_setup_rt,
|
||||
.setup = xchk_setup_rtsummary,
|
||||
.scrub = xchk_rtsummary,
|
||||
.has = xfs_has_realtime,
|
||||
.repair = xrep_notsupported,
|
||||
|
@ -407,6 +411,11 @@ xchk_validate_inputs(
|
|||
goto out;
|
||||
}
|
||||
|
||||
/* No rebuild without repair. */
|
||||
if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) &&
|
||||
!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We only want to repair read-write v5+ filesystems. Defer the check
|
||||
* for ops->repair until after our scrub confirms that we need to
|
||||
|
@ -461,8 +470,10 @@ xfs_scrub_metadata(
|
|||
struct file *file,
|
||||
struct xfs_scrub_metadata *sm)
|
||||
{
|
||||
struct xchk_stats_run run = { };
|
||||
struct xfs_scrub *sc;
|
||||
struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
|
||||
u64 check_start;
|
||||
int error = 0;
|
||||
|
||||
BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
|
||||
|
@ -505,6 +516,8 @@ retry_op:
|
|||
error = mnt_want_write_file(sc->file);
|
||||
if (error)
|
||||
goto out_sc;
|
||||
|
||||
sc->flags |= XCHK_HAVE_FREEZE_PROT;
|
||||
}
|
||||
|
||||
/* Set up for the operation. */
|
||||
|
@ -517,7 +530,9 @@ retry_op:
|
|||
goto out_teardown;
|
||||
|
||||
/* Scrub for errors. */
|
||||
check_start = xchk_stats_now();
|
||||
error = sc->ops->scrub(sc);
|
||||
run.scrub_ns += xchk_stats_elapsed_ns(check_start);
|
||||
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
|
||||
goto try_harder;
|
||||
if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
|
||||
|
@ -529,15 +544,16 @@ retry_op:
|
|||
|
||||
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
|
||||
!(sc->flags & XREP_ALREADY_FIXED)) {
|
||||
bool needs_fix;
|
||||
bool needs_fix = xchk_needs_repair(sc->sm);
|
||||
|
||||
/* Userspace asked us to rebuild the structure regardless. */
|
||||
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
|
||||
needs_fix = true;
|
||||
|
||||
/* Let debug users force us into the repair routines. */
|
||||
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
|
||||
if (XFS_TEST_ERROR(needs_fix, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
|
||||
needs_fix = true;
|
||||
|
||||
needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
|
||||
XFS_SCRUB_OFLAG_XCORRUPT |
|
||||
XFS_SCRUB_OFLAG_PREEN));
|
||||
/*
|
||||
* If userspace asked for a repair but it wasn't necessary,
|
||||
* report that back to userspace.
|
||||
|
@ -551,7 +567,7 @@ retry_op:
|
|||
* If it's broken, userspace wants us to fix it, and we haven't
|
||||
* already tried to fix it, then attempt a repair.
|
||||
*/
|
||||
error = xrep_attempt(sc);
|
||||
error = xrep_attempt(sc, &run);
|
||||
if (error == -EAGAIN) {
|
||||
/*
|
||||
* Either the repair function succeeded or it couldn't
|
||||
|
@ -572,6 +588,8 @@ out_nofix:
|
|||
out_teardown:
|
||||
error = xchk_teardown(sc, error);
|
||||
out_sc:
|
||||
if (error != -ENOENT)
|
||||
xchk_stats_merge(mp, sm, &run);
|
||||
kfree(sc);
|
||||
out:
|
||||
trace_xchk_done(XFS_I(file_inode(file)), sm, error);
|
||||
|
@ -585,6 +603,7 @@ need_drain:
|
|||
if (error)
|
||||
goto out_sc;
|
||||
sc->flags |= XCHK_NEED_DRAIN;
|
||||
run.retries++;
|
||||
goto retry_op;
|
||||
try_harder:
|
||||
/*
|
||||
|
@ -596,5 +615,6 @@ try_harder:
|
|||
if (error)
|
||||
goto out_sc;
|
||||
sc->flags |= XCHK_TRY_HARDER;
|
||||
run.retries++;
|
||||
goto retry_op;
|
||||
}
|
||||
|
|
|
@ -95,6 +95,10 @@ struct xfs_scrub {
|
|||
*/
|
||||
void (*buf_cleanup)(void *buf);
|
||||
|
||||
/* xfile used by the scrubbers; freed at teardown. */
|
||||
struct xfile *xfile;
|
||||
|
||||
/* Lock flags for @ip. */
|
||||
uint ilock_flags;
|
||||
|
||||
/* See the XCHK/XREP state flags below. */
|
||||
|
@ -113,6 +117,7 @@ struct xfs_scrub {
|
|||
|
||||
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
|
||||
#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
|
||||
#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
|
||||
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
|
||||
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
|
||||
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
|
||||
|
|
|
@ -0,0 +1,408 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "xfs_sysfs.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "xfs_super.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/stats.h"
|
||||
#include "scrub/trace.h"
|
||||
|
||||
struct xchk_scrub_stats {
|
||||
/* all 32-bit counters here */
|
||||
|
||||
/* checking stats */
|
||||
uint32_t invocations;
|
||||
uint32_t clean;
|
||||
uint32_t corrupt;
|
||||
uint32_t preen;
|
||||
uint32_t xfail;
|
||||
uint32_t xcorrupt;
|
||||
uint32_t incomplete;
|
||||
uint32_t warning;
|
||||
uint32_t retries;
|
||||
|
||||
/* repair stats */
|
||||
uint32_t repair_invocations;
|
||||
uint32_t repair_success;
|
||||
|
||||
/* all 64-bit items here */
|
||||
|
||||
/* runtimes */
|
||||
uint64_t checktime_us;
|
||||
uint64_t repairtime_us;
|
||||
|
||||
/* non-counter state must go at the end for clearall */
|
||||
spinlock_t css_lock;
|
||||
};
|
||||
|
||||
struct xchk_stats {
|
||||
struct dentry *cs_debugfs;
|
||||
struct xchk_scrub_stats cs_stats[XFS_SCRUB_TYPE_NR];
|
||||
};
|
||||
|
||||
|
||||
static struct xchk_stats global_stats;
|
||||
|
||||
static const char *name_map[XFS_SCRUB_TYPE_NR] = {
|
||||
[XFS_SCRUB_TYPE_SB] = "sb",
|
||||
[XFS_SCRUB_TYPE_AGF] = "agf",
|
||||
[XFS_SCRUB_TYPE_AGFL] = "agfl",
|
||||
[XFS_SCRUB_TYPE_AGI] = "agi",
|
||||
[XFS_SCRUB_TYPE_BNOBT] = "bnobt",
|
||||
[XFS_SCRUB_TYPE_CNTBT] = "cntbt",
|
||||
[XFS_SCRUB_TYPE_INOBT] = "inobt",
|
||||
[XFS_SCRUB_TYPE_FINOBT] = "finobt",
|
||||
[XFS_SCRUB_TYPE_RMAPBT] = "rmapbt",
|
||||
[XFS_SCRUB_TYPE_REFCNTBT] = "refcountbt",
|
||||
[XFS_SCRUB_TYPE_INODE] = "inode",
|
||||
[XFS_SCRUB_TYPE_BMBTD] = "bmapbtd",
|
||||
[XFS_SCRUB_TYPE_BMBTA] = "bmapbta",
|
||||
[XFS_SCRUB_TYPE_BMBTC] = "bmapbtc",
|
||||
[XFS_SCRUB_TYPE_DIR] = "directory",
|
||||
[XFS_SCRUB_TYPE_XATTR] = "xattr",
|
||||
[XFS_SCRUB_TYPE_SYMLINK] = "symlink",
|
||||
[XFS_SCRUB_TYPE_PARENT] = "parent",
|
||||
[XFS_SCRUB_TYPE_RTBITMAP] = "rtbitmap",
|
||||
[XFS_SCRUB_TYPE_RTSUM] = "rtsummary",
|
||||
[XFS_SCRUB_TYPE_UQUOTA] = "usrquota",
|
||||
[XFS_SCRUB_TYPE_GQUOTA] = "grpquota",
|
||||
[XFS_SCRUB_TYPE_PQUOTA] = "prjquota",
|
||||
[XFS_SCRUB_TYPE_FSCOUNTERS] = "fscounters",
|
||||
};
|
||||
|
||||
/* Format the scrub stats into a text buffer, similar to pcp style. */
|
||||
STATIC ssize_t
|
||||
xchk_stats_format(
|
||||
struct xchk_stats *cs,
|
||||
char *buf,
|
||||
size_t remaining)
|
||||
{
|
||||
struct xchk_scrub_stats *css = &cs->cs_stats[0];
|
||||
unsigned int i;
|
||||
ssize_t copied = 0;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < XFS_SCRUB_TYPE_NR; i++, css++) {
|
||||
if (!name_map[i])
|
||||
continue;
|
||||
|
||||
ret = scnprintf(buf, remaining,
|
||||
"%s %u %u %u %u %u %u %u %u %u %llu %u %u %llu\n",
|
||||
name_map[i],
|
||||
(unsigned int)css->invocations,
|
||||
(unsigned int)css->clean,
|
||||
(unsigned int)css->corrupt,
|
||||
(unsigned int)css->preen,
|
||||
(unsigned int)css->xfail,
|
||||
(unsigned int)css->xcorrupt,
|
||||
(unsigned int)css->incomplete,
|
||||
(unsigned int)css->warning,
|
||||
(unsigned int)css->retries,
|
||||
(unsigned long long)css->checktime_us,
|
||||
(unsigned int)css->repair_invocations,
|
||||
(unsigned int)css->repair_success,
|
||||
(unsigned long long)css->repairtime_us);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
remaining -= ret;
|
||||
copied += ret;
|
||||
buf += ret;
|
||||
}
|
||||
|
||||
return copied > 0 ? copied : ret;
|
||||
}
|
||||
|
||||
/* Estimate the worst case buffer size required to hold the whole report. */
|
||||
STATIC size_t
|
||||
xchk_stats_estimate_bufsize(
|
||||
struct xchk_stats *cs)
|
||||
{
|
||||
struct xchk_scrub_stats *css = &cs->cs_stats[0];
|
||||
unsigned int i;
|
||||
size_t field_width;
|
||||
size_t ret = 0;
|
||||
|
||||
/* 4294967296 plus one space for each u32 field */
|
||||
field_width = 11 * (offsetof(struct xchk_scrub_stats, checktime_us) /
|
||||
sizeof(uint32_t));
|
||||
|
||||
/* 18446744073709551615 plus one space for each u64 field */
|
||||
field_width += 21 * ((offsetof(struct xchk_scrub_stats, css_lock) -
|
||||
offsetof(struct xchk_scrub_stats, checktime_us)) /
|
||||
sizeof(uint64_t));
|
||||
|
||||
for (i = 0; i < XFS_SCRUB_TYPE_NR; i++, css++) {
|
||||
if (!name_map[i])
|
||||
continue;
|
||||
|
||||
/* name plus one space */
|
||||
ret += 1 + strlen(name_map[i]);
|
||||
|
||||
/* all fields, plus newline */
|
||||
ret += field_width + 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Clear all counters. */
|
||||
STATIC void
|
||||
xchk_stats_clearall(
|
||||
struct xchk_stats *cs)
|
||||
{
|
||||
struct xchk_scrub_stats *css = &cs->cs_stats[0];
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < XFS_SCRUB_TYPE_NR; i++, css++) {
|
||||
spin_lock(&css->css_lock);
|
||||
memset(css, 0, offsetof(struct xchk_scrub_stats, css_lock));
|
||||
spin_unlock(&css->css_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#define XFS_SCRUB_OFLAG_UNCLEAN (XFS_SCRUB_OFLAG_CORRUPT | \
|
||||
XFS_SCRUB_OFLAG_PREEN | \
|
||||
XFS_SCRUB_OFLAG_XFAIL | \
|
||||
XFS_SCRUB_OFLAG_XCORRUPT | \
|
||||
XFS_SCRUB_OFLAG_INCOMPLETE | \
|
||||
XFS_SCRUB_OFLAG_WARNING)
|
||||
|
||||
STATIC void
|
||||
xchk_stats_merge_one(
|
||||
struct xchk_stats *cs,
|
||||
const struct xfs_scrub_metadata *sm,
|
||||
const struct xchk_stats_run *run)
|
||||
{
|
||||
struct xchk_scrub_stats *css;
|
||||
|
||||
if (sm->sm_type >= XFS_SCRUB_TYPE_NR) {
|
||||
ASSERT(sm->sm_type < XFS_SCRUB_TYPE_NR);
|
||||
return;
|
||||
}
|
||||
|
||||
css = &cs->cs_stats[sm->sm_type];
|
||||
spin_lock(&css->css_lock);
|
||||
css->invocations++;
|
||||
if (!(sm->sm_flags & XFS_SCRUB_OFLAG_UNCLEAN))
|
||||
css->clean++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
|
||||
css->corrupt++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_PREEN)
|
||||
css->preen++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_XFAIL)
|
||||
css->xfail++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)
|
||||
css->xcorrupt++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
|
||||
css->incomplete++;
|
||||
if (sm->sm_flags & XFS_SCRUB_OFLAG_WARNING)
|
||||
css->warning++;
|
||||
css->retries += run->retries;
|
||||
css->checktime_us += howmany_64(run->scrub_ns, NSEC_PER_USEC);
|
||||
|
||||
if (run->repair_attempted)
|
||||
css->repair_invocations++;
|
||||
if (run->repair_succeeded)
|
||||
css->repair_success++;
|
||||
css->repairtime_us += howmany_64(run->repair_ns, NSEC_PER_USEC);
|
||||
spin_unlock(&css->css_lock);
|
||||
}
|
||||
|
||||
/* Merge these scrub-run stats into the global and mount stat data. */
|
||||
void
|
||||
xchk_stats_merge(
|
||||
struct xfs_mount *mp,
|
||||
const struct xfs_scrub_metadata *sm,
|
||||
const struct xchk_stats_run *run)
|
||||
{
|
||||
xchk_stats_merge_one(&global_stats, sm, run);
|
||||
xchk_stats_merge_one(mp->m_scrub_stats, sm, run);
|
||||
}
|
||||
|
||||
/* debugfs boilerplate */
|
||||
|
||||
static ssize_t
|
||||
xchk_scrub_stats_read(
|
||||
struct file *file,
|
||||
char __user *ubuf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct xchk_stats *cs = file->private_data;
|
||||
char *buf;
|
||||
size_t bufsize;
|
||||
ssize_t avail, ret;
|
||||
|
||||
/*
|
||||
* This generates stringly snapshot of all the scrub counters, so we
|
||||
* do not want userspace to receive garbled text from multiple calls.
|
||||
* If the file position is greater than 0, return a short read.
|
||||
*/
|
||||
if (*ppos > 0)
|
||||
return 0;
|
||||
|
||||
bufsize = xchk_stats_estimate_bufsize(cs);
|
||||
|
||||
buf = kvmalloc(bufsize, XCHK_GFP_FLAGS);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
avail = xchk_stats_format(cs, buf, bufsize);
|
||||
if (avail < 0) {
|
||||
ret = avail;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = simple_read_from_buffer(ubuf, count, ppos, buf, avail);
|
||||
out:
|
||||
kvfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations scrub_stats_fops = {
|
||||
.open = simple_open,
|
||||
.read = xchk_scrub_stats_read,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
xchk_clear_scrub_stats_write(
|
||||
struct file *file,
|
||||
const char __user *ubuf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct xchk_stats *cs = file->private_data;
|
||||
unsigned int val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtouint_from_user(ubuf, count, 0, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (val != 1)
|
||||
return -EINVAL;
|
||||
|
||||
xchk_stats_clearall(cs);
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations clear_scrub_stats_fops = {
|
||||
.open = simple_open,
|
||||
.write = xchk_clear_scrub_stats_write,
|
||||
};
|
||||
|
||||
/* Initialize the stats object. */
|
||||
STATIC int
|
||||
xchk_stats_init(
|
||||
struct xchk_stats *cs,
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xchk_scrub_stats *css = &cs->cs_stats[0];
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < XFS_SCRUB_TYPE_NR; i++, css++)
|
||||
spin_lock_init(&css->css_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Connect the stats object to debugfs. */
|
||||
void
|
||||
xchk_stats_register(
|
||||
struct xchk_stats *cs,
|
||||
struct dentry *parent)
|
||||
{
|
||||
if (!parent)
|
||||
return;
|
||||
|
||||
cs->cs_debugfs = xfs_debugfs_mkdir("scrub", parent);
|
||||
if (!cs->cs_debugfs)
|
||||
return;
|
||||
|
||||
debugfs_create_file("stats", 0444, cs->cs_debugfs, cs,
|
||||
&scrub_stats_fops);
|
||||
debugfs_create_file("clear_stats", 0200, cs->cs_debugfs, cs,
|
||||
&clear_scrub_stats_fops);
|
||||
}
|
||||
|
||||
/* Free all resources related to the stats object. */
|
||||
STATIC int
|
||||
xchk_stats_teardown(
|
||||
struct xchk_stats *cs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Disconnect the stats object from debugfs. */
|
||||
void
|
||||
xchk_stats_unregister(
|
||||
struct xchk_stats *cs)
|
||||
{
|
||||
debugfs_remove(cs->cs_debugfs);
|
||||
}
|
||||
|
||||
/* Initialize global stats and register them */
|
||||
int __init
|
||||
xchk_global_stats_setup(
|
||||
struct dentry *parent)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = xchk_stats_init(&global_stats, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
xchk_stats_register(&global_stats, parent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Unregister global stats and tear them down */
|
||||
void
|
||||
xchk_global_stats_teardown(void)
|
||||
{
|
||||
xchk_stats_unregister(&global_stats);
|
||||
xchk_stats_teardown(&global_stats);
|
||||
}
|
||||
|
||||
/* Allocate per-mount stats */
|
||||
int
|
||||
xchk_mount_stats_alloc(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
struct xchk_stats *cs;
|
||||
int error;
|
||||
|
||||
cs = kvzalloc(sizeof(struct xchk_stats), GFP_KERNEL);
|
||||
if (!cs)
|
||||
return -ENOMEM;
|
||||
|
||||
error = xchk_stats_init(cs, mp);
|
||||
if (error)
|
||||
goto out_free;
|
||||
|
||||
mp->m_scrub_stats = cs;
|
||||
return 0;
|
||||
out_free:
|
||||
kvfree(cs);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Free per-mount stats */
|
||||
void
|
||||
xchk_mount_stats_free(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
xchk_stats_teardown(mp->m_scrub_stats);
|
||||
kvfree(mp->m_scrub_stats);
|
||||
mp->m_scrub_stats = NULL;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_STATS_H__
|
||||
#define __XFS_SCRUB_STATS_H__
|
||||
|
||||
struct xchk_stats_run {
|
||||
u64 scrub_ns;
|
||||
u64 repair_ns;
|
||||
unsigned int retries;
|
||||
bool repair_attempted;
|
||||
bool repair_succeeded;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_XFS_ONLINE_SCRUB_STATS
|
||||
struct xchk_stats;
|
||||
|
||||
int __init xchk_global_stats_setup(struct dentry *parent);
|
||||
void xchk_global_stats_teardown(void);
|
||||
|
||||
int xchk_mount_stats_alloc(struct xfs_mount *mp);
|
||||
void xchk_mount_stats_free(struct xfs_mount *mp);
|
||||
|
||||
void xchk_stats_register(struct xchk_stats *cs, struct dentry *parent);
|
||||
void xchk_stats_unregister(struct xchk_stats *cs);
|
||||
|
||||
void xchk_stats_merge(struct xfs_mount *mp, const struct xfs_scrub_metadata *sm,
|
||||
const struct xchk_stats_run *run);
|
||||
|
||||
static inline u64 xchk_stats_now(void) { return ktime_get_ns(); }
|
||||
static inline u64 xchk_stats_elapsed_ns(u64 since)
|
||||
{
|
||||
u64 now = xchk_stats_now();
|
||||
|
||||
/*
|
||||
* If the system doesn't have a high enough resolution clock, charge at
|
||||
* least one nanosecond so that our stats don't report instantaneous
|
||||
* runtimes.
|
||||
*/
|
||||
if (now == since)
|
||||
return 1;
|
||||
|
||||
return now - since;
|
||||
}
|
||||
#else
|
||||
# define xchk_global_stats_setup(parent) (0)
|
||||
# define xchk_global_stats_teardown() ((void)0)
|
||||
# define xchk_mount_stats_alloc(mp) (0)
|
||||
# define xchk_mount_stats_free(mp) ((void)0)
|
||||
# define xchk_stats_register(cs, parent) ((void)0)
|
||||
# define xchk_stats_unregister(cs) ((void)0)
|
||||
# define xchk_stats_now() (0)
|
||||
# define xchk_stats_elapsed_ns(x) (0 * (x))
|
||||
# define xchk_stats_merge(mp, sm, run) ((void)0)
|
||||
#endif /* CONFIG_XFS_ONLINE_SCRUB_STATS */
|
||||
|
||||
#endif /* __XFS_SCRUB_STATS_H__ */
|
|
@ -12,8 +12,10 @@
|
|||
#include "xfs_mount.h"
|
||||
#include "xfs_inode.h"
|
||||
#include "xfs_btree.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
|
||||
/* Figure out which block the btree cursor was pointing to. */
|
||||
static inline xfs_fsblock_t
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
#include <linux/tracepoint.h>
|
||||
#include "xfs_bit.h"
|
||||
|
||||
struct xfile;
|
||||
struct xfarray;
|
||||
struct xfarray_sortinfo;
|
||||
|
||||
/*
|
||||
* ftrace's __print_symbolic requires that all enum values be wrapped in the
|
||||
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
|
||||
|
@ -94,10 +98,12 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
|
|||
{ XFS_SCRUB_OFLAG_XCORRUPT, "xcorrupt" }, \
|
||||
{ XFS_SCRUB_OFLAG_INCOMPLETE, "incomplete" }, \
|
||||
{ XFS_SCRUB_OFLAG_WARNING, "warning" }, \
|
||||
{ XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED, "norepair" }
|
||||
{ XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED, "norepair" }, \
|
||||
{ XFS_SCRUB_IFLAG_FORCE_REBUILD, "rebuild" }
|
||||
|
||||
#define XFS_SCRUB_STATE_STRINGS \
|
||||
{ XCHK_TRY_HARDER, "try_harder" }, \
|
||||
{ XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \
|
||||
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
|
||||
{ XCHK_NEED_DRAIN, "need_drain" }, \
|
||||
{ XREP_ALREADY_FIXED, "already_fixed" }
|
||||
|
@ -635,6 +641,28 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
|
|||
__entry->cluster_ino)
|
||||
)
|
||||
|
||||
TRACE_EVENT(xchk_inode_is_allocated,
|
||||
TP_PROTO(struct xfs_inode *ip),
|
||||
TP_ARGS(ip),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_ino_t, ino)
|
||||
__field(unsigned long, iflags)
|
||||
__field(umode_t, mode)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = VFS_I(ip)->i_sb->s_dev;
|
||||
__entry->ino = ip->i_ino;
|
||||
__entry->iflags = ip->i_flags;
|
||||
__entry->mode = VFS_I(ip)->i_mode;
|
||||
),
|
||||
TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx mode 0x%x",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->ino,
|
||||
__entry->iflags,
|
||||
__entry->mode)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xchk_fscounters_calc,
|
||||
TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
|
||||
uint64_t fdblocks, uint64_t delalloc),
|
||||
|
@ -693,6 +721,31 @@ TRACE_EVENT(xchk_fscounters_within_range,
|
|||
__entry->old_value)
|
||||
)
|
||||
|
||||
DECLARE_EVENT_CLASS(xchk_fsfreeze_class,
|
||||
TP_PROTO(struct xfs_scrub *sc, int error),
|
||||
TP_ARGS(sc, error),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(unsigned int, type)
|
||||
__field(int, error)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = sc->mp->m_super->s_dev;
|
||||
__entry->type = sc->sm->sm_type;
|
||||
__entry->error = error;
|
||||
),
|
||||
TP_printk("dev %d:%d type %s error %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
|
||||
__entry->error)
|
||||
);
|
||||
#define DEFINE_XCHK_FSFREEZE_EVENT(name) \
|
||||
DEFINE_EVENT(xchk_fsfreeze_class, name, \
|
||||
TP_PROTO(struct xfs_scrub *sc, int error), \
|
||||
TP_ARGS(sc, error))
|
||||
DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
|
||||
DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
|
||||
|
||||
TRACE_EVENT(xchk_refcount_incorrect,
|
||||
TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
|
||||
xfs_nlink_t seen),
|
||||
|
@ -725,13 +778,302 @@ TRACE_EVENT(xchk_refcount_incorrect,
|
|||
__entry->seen)
|
||||
)
|
||||
|
||||
TRACE_EVENT(xfile_create,
|
||||
TP_PROTO(struct xfile *xf),
|
||||
TP_ARGS(xf),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(unsigned long, ino)
|
||||
__array(char, pathname, 256)
|
||||
),
|
||||
TP_fast_assign(
|
||||
char pathname[257];
|
||||
char *path;
|
||||
|
||||
__entry->ino = file_inode(xf->file)->i_ino;
|
||||
memset(pathname, 0, sizeof(pathname));
|
||||
path = file_path(xf->file, pathname, sizeof(pathname) - 1);
|
||||
if (IS_ERR(path))
|
||||
path = "(unknown)";
|
||||
strncpy(__entry->pathname, path, sizeof(__entry->pathname));
|
||||
),
|
||||
TP_printk("xfino 0x%lx path '%s'",
|
||||
__entry->ino,
|
||||
__entry->pathname)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfile_destroy,
|
||||
TP_PROTO(struct xfile *xf),
|
||||
TP_ARGS(xf),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, bytes)
|
||||
__field(loff_t, size)
|
||||
),
|
||||
TP_fast_assign(
|
||||
struct xfile_stat statbuf;
|
||||
int ret;
|
||||
|
||||
ret = xfile_stat(xf, &statbuf);
|
||||
if (!ret) {
|
||||
__entry->bytes = statbuf.bytes;
|
||||
__entry->size = statbuf.size;
|
||||
} else {
|
||||
__entry->bytes = -1;
|
||||
__entry->size = -1;
|
||||
}
|
||||
__entry->ino = file_inode(xf->file)->i_ino;
|
||||
),
|
||||
TP_printk("xfino 0x%lx mem_bytes 0x%llx isize 0x%llx",
|
||||
__entry->ino,
|
||||
__entry->bytes,
|
||||
__entry->size)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(xfile_class,
|
||||
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount),
|
||||
TP_ARGS(xf, pos, bytecount),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, bytes_used)
|
||||
__field(loff_t, pos)
|
||||
__field(loff_t, size)
|
||||
__field(unsigned long long, bytecount)
|
||||
),
|
||||
TP_fast_assign(
|
||||
struct xfile_stat statbuf;
|
||||
int ret;
|
||||
|
||||
ret = xfile_stat(xf, &statbuf);
|
||||
if (!ret) {
|
||||
__entry->bytes_used = statbuf.bytes;
|
||||
__entry->size = statbuf.size;
|
||||
} else {
|
||||
__entry->bytes_used = -1;
|
||||
__entry->size = -1;
|
||||
}
|
||||
__entry->ino = file_inode(xf->file)->i_ino;
|
||||
__entry->pos = pos;
|
||||
__entry->bytecount = bytecount;
|
||||
),
|
||||
TP_printk("xfino 0x%lx mem_bytes 0x%llx pos 0x%llx bytecount 0x%llx isize 0x%llx",
|
||||
__entry->ino,
|
||||
__entry->bytes_used,
|
||||
__entry->pos,
|
||||
__entry->bytecount,
|
||||
__entry->size)
|
||||
);
|
||||
#define DEFINE_XFILE_EVENT(name) \
|
||||
DEFINE_EVENT(xfile_class, name, \
|
||||
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount), \
|
||||
TP_ARGS(xf, pos, bytecount))
|
||||
DEFINE_XFILE_EVENT(xfile_pread);
|
||||
DEFINE_XFILE_EVENT(xfile_pwrite);
|
||||
DEFINE_XFILE_EVENT(xfile_seek_data);
|
||||
DEFINE_XFILE_EVENT(xfile_get_page);
|
||||
DEFINE_XFILE_EVENT(xfile_put_page);
|
||||
|
||||
TRACE_EVENT(xfarray_create,
|
||||
TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity),
|
||||
TP_ARGS(xfa, required_capacity),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(uint64_t, max_nr)
|
||||
__field(size_t, obj_size)
|
||||
__field(int, obj_size_log)
|
||||
__field(unsigned long long, required_capacity)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->max_nr = xfa->max_nr;
|
||||
__entry->obj_size = xfa->obj_size;
|
||||
__entry->obj_size_log = xfa->obj_size_log;
|
||||
__entry->ino = file_inode(xfa->xfile->file)->i_ino;
|
||||
__entry->required_capacity = required_capacity;
|
||||
),
|
||||
TP_printk("xfino 0x%lx max_nr %llu reqd_nr %llu objsz %zu objszlog %d",
|
||||
__entry->ino,
|
||||
__entry->max_nr,
|
||||
__entry->required_capacity,
|
||||
__entry->obj_size,
|
||||
__entry->obj_size_log)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfarray_isort,
|
||||
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
|
||||
TP_ARGS(si, lo, hi),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, lo)
|
||||
__field(unsigned long long, hi)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
|
||||
__entry->lo = lo;
|
||||
__entry->hi = hi;
|
||||
),
|
||||
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
|
||||
__entry->ino,
|
||||
__entry->lo,
|
||||
__entry->hi,
|
||||
__entry->hi - __entry->lo)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfarray_pagesort,
|
||||
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
|
||||
TP_ARGS(si, lo, hi),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, lo)
|
||||
__field(unsigned long long, hi)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
|
||||
__entry->lo = lo;
|
||||
__entry->hi = hi;
|
||||
),
|
||||
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
|
||||
__entry->ino,
|
||||
__entry->lo,
|
||||
__entry->hi,
|
||||
__entry->hi - __entry->lo)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfarray_qsort,
|
||||
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
|
||||
TP_ARGS(si, lo, hi),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, lo)
|
||||
__field(unsigned long long, hi)
|
||||
__field(int, stack_depth)
|
||||
__field(int, max_stack_depth)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
|
||||
__entry->lo = lo;
|
||||
__entry->hi = hi;
|
||||
__entry->stack_depth = si->stack_depth;
|
||||
__entry->max_stack_depth = si->max_stack_depth;
|
||||
),
|
||||
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu stack %d/%d",
|
||||
__entry->ino,
|
||||
__entry->lo,
|
||||
__entry->hi,
|
||||
__entry->hi - __entry->lo,
|
||||
__entry->stack_depth,
|
||||
__entry->max_stack_depth)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfarray_sort,
|
||||
TP_PROTO(struct xfarray_sortinfo *si, size_t bytes),
|
||||
TP_ARGS(si, bytes),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
__field(unsigned long long, nr)
|
||||
__field(size_t, obj_size)
|
||||
__field(size_t, bytes)
|
||||
__field(unsigned int, max_stack_depth)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->nr = si->array->nr;
|
||||
__entry->obj_size = si->array->obj_size;
|
||||
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
|
||||
__entry->bytes = bytes;
|
||||
__entry->max_stack_depth = si->max_stack_depth;
|
||||
),
|
||||
TP_printk("xfino 0x%lx nr %llu objsz %zu stack %u bytes %zu",
|
||||
__entry->ino,
|
||||
__entry->nr,
|
||||
__entry->obj_size,
|
||||
__entry->max_stack_depth,
|
||||
__entry->bytes)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xfarray_sort_stats,
|
||||
TP_PROTO(struct xfarray_sortinfo *si, int error),
|
||||
TP_ARGS(si, error),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, ino)
|
||||
#ifdef DEBUG
|
||||
__field(unsigned long long, loads)
|
||||
__field(unsigned long long, stores)
|
||||
__field(unsigned long long, compares)
|
||||
__field(unsigned long long, heapsorts)
|
||||
#endif
|
||||
__field(unsigned int, max_stack_depth)
|
||||
__field(unsigned int, max_stack_used)
|
||||
__field(int, error)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
|
||||
#ifdef DEBUG
|
||||
__entry->loads = si->loads;
|
||||
__entry->stores = si->stores;
|
||||
__entry->compares = si->compares;
|
||||
__entry->heapsorts = si->heapsorts;
|
||||
#endif
|
||||
__entry->max_stack_depth = si->max_stack_depth;
|
||||
__entry->max_stack_used = si->max_stack_used;
|
||||
__entry->error = error;
|
||||
),
|
||||
TP_printk(
|
||||
#ifdef DEBUG
|
||||
"xfino 0x%lx loads %llu stores %llu compares %llu heapsorts %llu stack_depth %u/%u error %d",
|
||||
#else
|
||||
"xfino 0x%lx stack_depth %u/%u error %d",
|
||||
#endif
|
||||
__entry->ino,
|
||||
#ifdef DEBUG
|
||||
__entry->loads,
|
||||
__entry->stores,
|
||||
__entry->compares,
|
||||
__entry->heapsorts,
|
||||
#endif
|
||||
__entry->max_stack_used,
|
||||
__entry->max_stack_depth,
|
||||
__entry->error)
|
||||
);
|
||||
|
||||
#ifdef CONFIG_XFS_RT
|
||||
TRACE_EVENT(xchk_rtsum_record_free,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start,
|
||||
uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v),
|
||||
TP_ARGS(mp, start, len, log, pos, v),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(dev_t, rtdev)
|
||||
__field(xfs_rtblock_t, start)
|
||||
__field(unsigned long long, len)
|
||||
__field(unsigned int, log)
|
||||
__field(loff_t, pos)
|
||||
__field(xfs_suminfo_t, v)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = mp->m_super->s_dev;
|
||||
__entry->rtdev = mp->m_rtdev_targp->bt_dev;
|
||||
__entry->start = start;
|
||||
__entry->len = len;
|
||||
__entry->log = log;
|
||||
__entry->pos = pos;
|
||||
__entry->v = v;
|
||||
),
|
||||
TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx log %u rsumpos 0x%llx sumcount %u",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
|
||||
__entry->start,
|
||||
__entry->len,
|
||||
__entry->log,
|
||||
__entry->pos,
|
||||
__entry->v)
|
||||
);
|
||||
#endif /* CONFIG_XFS_RT */
|
||||
|
||||
/* repair tracepoints */
|
||||
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
|
||||
|
||||
DECLARE_EVENT_CLASS(xrep_extent_class,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t agbno, xfs_extlen_t len),
|
||||
TP_ARGS(mp, agno, agbno, len),
|
||||
TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len),
|
||||
TP_ARGS(pag, agbno, len),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
|
@ -739,8 +1081,8 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
|
|||
__field(xfs_extlen_t, len)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = mp->m_super->s_dev;
|
||||
__entry->agno = agno;
|
||||
__entry->dev = pag->pag_mount->m_super->s_dev;
|
||||
__entry->agno = pag->pag_agno;
|
||||
__entry->agbno = agbno;
|
||||
__entry->len = len;
|
||||
),
|
||||
|
@ -752,12 +1094,45 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
|
|||
);
|
||||
#define DEFINE_REPAIR_EXTENT_EVENT(name) \
|
||||
DEFINE_EVENT(xrep_extent_class, name, \
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
|
||||
xfs_agblock_t agbno, xfs_extlen_t len), \
|
||||
TP_ARGS(mp, agno, agbno, len))
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xrep_dispose_btree_extent);
|
||||
TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len), \
|
||||
TP_ARGS(pag, agbno, len))
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent);
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent);
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
|
||||
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
|
||||
|
||||
DECLARE_EVENT_CLASS(xrep_reap_find_class,
|
||||
TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len,
|
||||
bool crosslinked),
|
||||
TP_ARGS(pag, agbno, len, crosslinked),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
__field(xfs_agblock_t, agbno)
|
||||
__field(xfs_extlen_t, len)
|
||||
__field(bool, crosslinked)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = pag->pag_mount->m_super->s_dev;
|
||||
__entry->agno = pag->pag_agno;
|
||||
__entry->agbno = agbno;
|
||||
__entry->len = len;
|
||||
__entry->crosslinked = crosslinked;
|
||||
),
|
||||
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x crosslinked %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->agbno,
|
||||
__entry->len,
|
||||
__entry->crosslinked ? 1 : 0)
|
||||
);
|
||||
#define DEFINE_REPAIR_REAP_FIND_EVENT(name) \
|
||||
DEFINE_EVENT(xrep_reap_find_class, name, \
|
||||
TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, \
|
||||
bool crosslinked), \
|
||||
TP_ARGS(pag, agbno, len, crosslinked))
|
||||
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select);
|
||||
|
||||
DECLARE_EVENT_CLASS(xrep_rmap_class,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
|
||||
xfs_agblock_t agbno, xfs_extlen_t len,
|
||||
|
@ -827,28 +1202,6 @@ TRACE_EVENT(xrep_refcount_extent_fn,
|
|||
__entry->refcount)
|
||||
)
|
||||
|
||||
TRACE_EVENT(xrep_init_btblock,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
|
||||
xfs_btnum_t btnum),
|
||||
TP_ARGS(mp, agno, agbno, btnum),
|
||||
TP_STRUCT__entry(
|
||||
__field(dev_t, dev)
|
||||
__field(xfs_agnumber_t, agno)
|
||||
__field(xfs_agblock_t, agbno)
|
||||
__field(uint32_t, btnum)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->dev = mp->m_super->s_dev;
|
||||
__entry->agno = agno;
|
||||
__entry->agbno = agbno;
|
||||
__entry->btnum = btnum;
|
||||
),
|
||||
TP_printk("dev %d:%d agno 0x%x agbno 0x%x btree %s",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->agno,
|
||||
__entry->agbno,
|
||||
__print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS))
|
||||
)
|
||||
TRACE_EVENT(xrep_findroot_block,
|
||||
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
|
||||
uint32_t magic, uint16_t level),
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,141 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_XFARRAY_H__
|
||||
#define __XFS_SCRUB_XFARRAY_H__
|
||||
|
||||
/* xfile array index type, along with cursor initialization */
|
||||
typedef uint64_t xfarray_idx_t;
|
||||
#define XFARRAY_CURSOR_INIT ((__force xfarray_idx_t)0)
|
||||
|
||||
/* Iterate each index of an xfile array. */
|
||||
#define foreach_xfarray_idx(array, idx) \
|
||||
for ((idx) = XFARRAY_CURSOR_INIT; \
|
||||
(idx) < xfarray_length(array); \
|
||||
(idx)++)
|
||||
|
||||
struct xfarray {
|
||||
/* Underlying file that backs the array. */
|
||||
struct xfile *xfile;
|
||||
|
||||
/* Number of array elements. */
|
||||
xfarray_idx_t nr;
|
||||
|
||||
/* Maximum possible array size. */
|
||||
xfarray_idx_t max_nr;
|
||||
|
||||
/* Number of unset slots in the array below @nr. */
|
||||
uint64_t unset_slots;
|
||||
|
||||
/* Size of an array element. */
|
||||
size_t obj_size;
|
||||
|
||||
/* log2 of array element size, if possible. */
|
||||
int obj_size_log;
|
||||
};
|
||||
|
||||
int xfarray_create(const char *descr, unsigned long long required_capacity,
|
||||
size_t obj_size, struct xfarray **arrayp);
|
||||
void xfarray_destroy(struct xfarray *array);
|
||||
int xfarray_load(struct xfarray *array, xfarray_idx_t idx, void *ptr);
|
||||
int xfarray_unset(struct xfarray *array, xfarray_idx_t idx);
|
||||
int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr);
|
||||
int xfarray_store_anywhere(struct xfarray *array, const void *ptr);
|
||||
bool xfarray_element_is_null(struct xfarray *array, const void *ptr);
|
||||
|
||||
/* Append an element to the array. */
|
||||
static inline int xfarray_append(struct xfarray *array, const void *ptr)
|
||||
{
|
||||
return xfarray_store(array, array->nr, ptr);
|
||||
}
|
||||
|
||||
uint64_t xfarray_length(struct xfarray *array);
|
||||
int xfarray_load_next(struct xfarray *array, xfarray_idx_t *idx, void *rec);
|
||||
|
||||
/* Declarations for xfile array sort functionality. */
|
||||
|
||||
typedef cmp_func_t xfarray_cmp_fn;
|
||||
|
||||
/* Perform an in-memory heapsort for small subsets. */
|
||||
#define XFARRAY_ISORT_SHIFT (4)
|
||||
#define XFARRAY_ISORT_NR (1U << XFARRAY_ISORT_SHIFT)
|
||||
|
||||
/* Evalulate this many points to find the qsort pivot. */
|
||||
#define XFARRAY_QSORT_PIVOT_NR (9)
|
||||
|
||||
struct xfarray_sortinfo {
|
||||
struct xfarray *array;
|
||||
|
||||
/* Comparison function for the sort. */
|
||||
xfarray_cmp_fn cmp_fn;
|
||||
|
||||
/* Maximum height of the partition stack. */
|
||||
uint8_t max_stack_depth;
|
||||
|
||||
/* Current height of the partition stack. */
|
||||
int8_t stack_depth;
|
||||
|
||||
/* Maximum stack depth ever used. */
|
||||
uint8_t max_stack_used;
|
||||
|
||||
/* XFARRAY_SORT_* flags; see below. */
|
||||
unsigned int flags;
|
||||
|
||||
/* Cache a page here for faster access. */
|
||||
struct xfile_page xfpage;
|
||||
void *page_kaddr;
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Performance statistics. */
|
||||
uint64_t loads;
|
||||
uint64_t stores;
|
||||
uint64_t compares;
|
||||
uint64_t heapsorts;
|
||||
#endif
|
||||
/*
|
||||
* Extra bytes are allocated beyond the end of the structure to store
|
||||
* quicksort information. C does not permit multiple VLAs per struct,
|
||||
* so we document all of this in a comment.
|
||||
*
|
||||
* Pretend that we have a typedef for array records:
|
||||
*
|
||||
* typedef char[array->obj_size] xfarray_rec_t;
|
||||
*
|
||||
* First comes the quicksort partition stack:
|
||||
*
|
||||
* xfarray_idx_t lo[max_stack_depth];
|
||||
* xfarray_idx_t hi[max_stack_depth];
|
||||
*
|
||||
* union {
|
||||
*
|
||||
* If for a given subset we decide to use an in-memory sort, we use a
|
||||
* block of scratchpad records here to compare items:
|
||||
*
|
||||
* xfarray_rec_t scratch[ISORT_NR];
|
||||
*
|
||||
* Otherwise, we want to partition the records to partition the array.
|
||||
* We store the chosen pivot record at the start of the scratchpad area
|
||||
* and use the rest to sample some records to estimate the median.
|
||||
* The format of the qsort_pivot array enables us to use the kernel
|
||||
* heapsort function to place the median value in the middle.
|
||||
*
|
||||
* struct {
|
||||
* xfarray_rec_t pivot;
|
||||
* struct {
|
||||
* xfarray_rec_t rec; (rounded up to 8 bytes)
|
||||
* xfarray_idx_t idx;
|
||||
* } qsort_pivot[QSORT_PIVOT_NR];
|
||||
* };
|
||||
* }
|
||||
*/
|
||||
};
|
||||
|
||||
/* Sort can be interrupted by a fatal signal. */
|
||||
#define XFARRAY_SORT_KILLABLE (1U << 0)
|
||||
|
||||
int xfarray_sort(struct xfarray *array, xfarray_cmp_fn cmp_fn,
|
||||
unsigned int flags);
|
||||
|
||||
#endif /* __XFS_SCRUB_XFARRAY_H__ */
|
|
@ -0,0 +1,419 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#include "xfs.h"
|
||||
#include "xfs_fs.h"
|
||||
#include "xfs_shared.h"
|
||||
#include "xfs_format.h"
|
||||
#include "xfs_log_format.h"
|
||||
#include "xfs_trans_resv.h"
|
||||
#include "xfs_mount.h"
|
||||
#include "scrub/xfile.h"
|
||||
#include "scrub/xfarray.h"
|
||||
#include "scrub/scrub.h"
|
||||
#include "scrub/trace.h"
|
||||
#include <linux/shmem_fs.h>
|
||||
|
||||
/*
|
||||
* Swappable Temporary Memory
|
||||
* ==========================
|
||||
*
|
||||
* Online checking sometimes needs to be able to stage a large amount of data
|
||||
* in memory. This information might not fit in the available memory and it
|
||||
* doesn't all need to be accessible at all times. In other words, we want an
|
||||
* indexed data buffer to store data that can be paged out.
|
||||
*
|
||||
* When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
|
||||
* requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
|
||||
* store our staging data. This file is not installed in the file descriptor
|
||||
* table so that user programs cannot access the data, which means that the
|
||||
* xfile must be freed with xfile_destroy.
|
||||
*
|
||||
* xfiles assume that the caller will handle all required concurrency
|
||||
* management; standard vfs locks (freezer and inode) are not taken. Reads
|
||||
* and writes are satisfied directly from the page cache.
|
||||
*
|
||||
* NOTE: The current shmemfs implementation has a quirk that in-kernel reads
|
||||
* of a hole cause a page to be mapped into the file. If you are going to
|
||||
* create a sparse xfile, please be careful about reading from uninitialized
|
||||
* parts of the file. These pages are !Uptodate and will eventually be
|
||||
* reclaimed if not written, but in the short term this boosts memory
|
||||
* consumption.
|
||||
*/
|
||||
|
||||
/*
|
||||
* xfiles must not be exposed to userspace and require upper layers to
|
||||
* coordinate access to the one handle returned by the constructor, so
|
||||
* establish a separate lock class for xfiles to avoid confusing lockdep.
|
||||
*/
|
||||
static struct lock_class_key xfile_i_mutex_key;
|
||||
|
||||
/*
|
||||
* Create an xfile of the given size. The description will be used in the
|
||||
* trace output.
|
||||
*/
|
||||
int
|
||||
xfile_create(
|
||||
const char *description,
|
||||
loff_t isize,
|
||||
struct xfile **xfilep)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct xfile *xf;
|
||||
int error = -ENOMEM;
|
||||
|
||||
xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
|
||||
if (!xf)
|
||||
return -ENOMEM;
|
||||
|
||||
xf->file = shmem_file_setup(description, isize, 0);
|
||||
if (!xf->file)
|
||||
goto out_xfile;
|
||||
if (IS_ERR(xf->file)) {
|
||||
error = PTR_ERR(xf->file);
|
||||
goto out_xfile;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want a large sparse file that we can pread, pwrite, and seek.
|
||||
* xfile users are responsible for keeping the xfile hidden away from
|
||||
* all other callers, so we skip timestamp updates and security checks.
|
||||
* Make the inode only accessible by root, just in case the xfile ever
|
||||
* escapes.
|
||||
*/
|
||||
xf->file->f_mode |= FMODE_PREAD | FMODE_PWRITE | FMODE_NOCMTIME |
|
||||
FMODE_LSEEK;
|
||||
xf->file->f_flags |= O_RDWR | O_LARGEFILE | O_NOATIME;
|
||||
inode = file_inode(xf->file);
|
||||
inode->i_flags |= S_PRIVATE | S_NOCMTIME | S_NOATIME;
|
||||
inode->i_mode &= ~0177;
|
||||
inode->i_uid = GLOBAL_ROOT_UID;
|
||||
inode->i_gid = GLOBAL_ROOT_GID;
|
||||
|
||||
lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
|
||||
|
||||
trace_xfile_create(xf);
|
||||
|
||||
*xfilep = xf;
|
||||
return 0;
|
||||
out_xfile:
|
||||
kfree(xf);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Close the file and release all resources. */
|
||||
void
|
||||
xfile_destroy(
|
||||
struct xfile *xf)
|
||||
{
|
||||
struct inode *inode = file_inode(xf->file);
|
||||
|
||||
trace_xfile_destroy(xf);
|
||||
|
||||
lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
|
||||
fput(xf->file);
|
||||
kfree(xf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a memory object directly from the xfile's page cache. Unlike regular
|
||||
* pread, we return -E2BIG and -EFBIG for reads that are too large or at too
|
||||
* high an offset, instead of truncating the read. Otherwise, we return
|
||||
* bytes read or an error code, like regular pread.
|
||||
*/
|
||||
ssize_t
|
||||
xfile_pread(
|
||||
struct xfile *xf,
|
||||
void *buf,
|
||||
size_t count,
|
||||
loff_t pos)
|
||||
{
|
||||
struct inode *inode = file_inode(xf->file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct page *page = NULL;
|
||||
ssize_t read = 0;
|
||||
unsigned int pflags;
|
||||
int error = 0;
|
||||
|
||||
if (count > MAX_RW_COUNT)
|
||||
return -E2BIG;
|
||||
if (inode->i_sb->s_maxbytes - pos < count)
|
||||
return -EFBIG;
|
||||
|
||||
trace_xfile_pread(xf, pos, count);
|
||||
|
||||
pflags = memalloc_nofs_save();
|
||||
while (count > 0) {
|
||||
void *p, *kaddr;
|
||||
unsigned int len;
|
||||
|
||||
len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos));
|
||||
|
||||
/*
|
||||
* In-kernel reads of a shmem file cause it to allocate a page
|
||||
* if the mapping shows a hole. Therefore, if we hit ENOMEM
|
||||
* we can continue by zeroing the caller's buffer.
|
||||
*/
|
||||
page = shmem_read_mapping_page_gfp(mapping, pos >> PAGE_SHIFT,
|
||||
__GFP_NOWARN);
|
||||
if (IS_ERR(page)) {
|
||||
error = PTR_ERR(page);
|
||||
if (error != -ENOMEM)
|
||||
break;
|
||||
|
||||
memset(buf, 0, len);
|
||||
goto advance;
|
||||
}
|
||||
|
||||
if (PageUptodate(page)) {
|
||||
/*
|
||||
* xfile pages must never be mapped into userspace, so
|
||||
* we skip the dcache flush.
|
||||
*/
|
||||
kaddr = kmap_local_page(page);
|
||||
p = kaddr + offset_in_page(pos);
|
||||
memcpy(buf, p, len);
|
||||
kunmap_local(kaddr);
|
||||
} else {
|
||||
memset(buf, 0, len);
|
||||
}
|
||||
put_page(page);
|
||||
|
||||
advance:
|
||||
count -= len;
|
||||
pos += len;
|
||||
buf += len;
|
||||
read += len;
|
||||
}
|
||||
memalloc_nofs_restore(pflags);
|
||||
|
||||
if (read > 0)
|
||||
return read;
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Write a memory object directly to the xfile's page cache. Unlike regular
|
||||
* pwrite, we return -E2BIG and -EFBIG for writes that are too large or at too
|
||||
* high an offset, instead of truncating the write. Otherwise, we return
|
||||
* bytes written or an error code, like regular pwrite.
|
||||
*/
|
||||
ssize_t
|
||||
xfile_pwrite(
|
||||
struct xfile *xf,
|
||||
const void *buf,
|
||||
size_t count,
|
||||
loff_t pos)
|
||||
{
|
||||
struct inode *inode = file_inode(xf->file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
const struct address_space_operations *aops = mapping->a_ops;
|
||||
struct page *page = NULL;
|
||||
ssize_t written = 0;
|
||||
unsigned int pflags;
|
||||
int error = 0;
|
||||
|
||||
if (count > MAX_RW_COUNT)
|
||||
return -E2BIG;
|
||||
if (inode->i_sb->s_maxbytes - pos < count)
|
||||
return -EFBIG;
|
||||
|
||||
trace_xfile_pwrite(xf, pos, count);
|
||||
|
||||
pflags = memalloc_nofs_save();
|
||||
while (count > 0) {
|
||||
void *fsdata = NULL;
|
||||
void *p, *kaddr;
|
||||
unsigned int len;
|
||||
int ret;
|
||||
|
||||
len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos));
|
||||
|
||||
/*
|
||||
* We call write_begin directly here to avoid all the freezer
|
||||
* protection lock-taking that happens in the normal path.
|
||||
* shmem doesn't support fs freeze, but lockdep doesn't know
|
||||
* that and will trip over that.
|
||||
*/
|
||||
error = aops->write_begin(NULL, mapping, pos, len, &page,
|
||||
&fsdata);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/*
|
||||
* xfile pages must never be mapped into userspace, so we skip
|
||||
* the dcache flush. If the page is not uptodate, zero it
|
||||
* before writing data.
|
||||
*/
|
||||
kaddr = kmap_local_page(page);
|
||||
if (!PageUptodate(page)) {
|
||||
memset(kaddr, 0, PAGE_SIZE);
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
p = kaddr + offset_in_page(pos);
|
||||
memcpy(p, buf, len);
|
||||
kunmap_local(kaddr);
|
||||
|
||||
ret = aops->write_end(NULL, mapping, pos, len, len, page,
|
||||
fsdata);
|
||||
if (ret < 0) {
|
||||
error = ret;
|
||||
break;
|
||||
}
|
||||
|
||||
written += ret;
|
||||
if (ret != len)
|
||||
break;
|
||||
|
||||
count -= ret;
|
||||
pos += ret;
|
||||
buf += ret;
|
||||
}
|
||||
memalloc_nofs_restore(pflags);
|
||||
|
||||
if (written > 0)
|
||||
return written;
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Find the next written area in the xfile data for a given offset. */
|
||||
loff_t
|
||||
xfile_seek_data(
|
||||
struct xfile *xf,
|
||||
loff_t pos)
|
||||
{
|
||||
loff_t ret;
|
||||
|
||||
ret = vfs_llseek(xf->file, pos, SEEK_DATA);
|
||||
trace_xfile_seek_data(xf, pos, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Query stat information for an xfile. */
|
||||
int
|
||||
xfile_stat(
|
||||
struct xfile *xf,
|
||||
struct xfile_stat *statbuf)
|
||||
{
|
||||
struct kstat ks;
|
||||
int error;
|
||||
|
||||
error = vfs_getattr_nosec(&xf->file->f_path, &ks,
|
||||
STATX_SIZE | STATX_BLOCKS, AT_STATX_DONT_SYNC);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
statbuf->size = ks.size;
|
||||
statbuf->bytes = ks.blocks << SECTOR_SHIFT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Grab the (locked) page for a memory object. The object cannot span a page
|
||||
* boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we
|
||||
* cannot grab the page, or the usual negative errno.
|
||||
*/
|
||||
int
|
||||
xfile_get_page(
|
||||
struct xfile *xf,
|
||||
loff_t pos,
|
||||
unsigned int len,
|
||||
struct xfile_page *xfpage)
|
||||
{
|
||||
struct inode *inode = file_inode(xf->file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
const struct address_space_operations *aops = mapping->a_ops;
|
||||
struct page *page = NULL;
|
||||
void *fsdata = NULL;
|
||||
loff_t key = round_down(pos, PAGE_SIZE);
|
||||
unsigned int pflags;
|
||||
int error;
|
||||
|
||||
if (inode->i_sb->s_maxbytes - pos < len)
|
||||
return -ENOMEM;
|
||||
if (len > PAGE_SIZE - offset_in_page(pos))
|
||||
return -ENOTBLK;
|
||||
|
||||
trace_xfile_get_page(xf, pos, len);
|
||||
|
||||
pflags = memalloc_nofs_save();
|
||||
|
||||
/*
|
||||
* We call write_begin directly here to avoid all the freezer
|
||||
* protection lock-taking that happens in the normal path. shmem
|
||||
* doesn't support fs freeze, but lockdep doesn't know that and will
|
||||
* trip over that.
|
||||
*/
|
||||
error = aops->write_begin(NULL, mapping, key, PAGE_SIZE, &page,
|
||||
&fsdata);
|
||||
if (error)
|
||||
goto out_pflags;
|
||||
|
||||
/* We got the page, so make sure we push out EOF. */
|
||||
if (i_size_read(inode) < pos + len)
|
||||
i_size_write(inode, pos + len);
|
||||
|
||||
/*
|
||||
* If the page isn't up to date, fill it with zeroes before we hand it
|
||||
* to the caller and make sure the backing store will hold on to them.
|
||||
*/
|
||||
if (!PageUptodate(page)) {
|
||||
void *kaddr;
|
||||
|
||||
kaddr = kmap_local_page(page);
|
||||
memset(kaddr, 0, PAGE_SIZE);
|
||||
kunmap_local(kaddr);
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark each page dirty so that the contents are written to some
|
||||
* backing store when we drop this buffer, and take an extra reference
|
||||
* to prevent the xfile page from being swapped or removed from the
|
||||
* page cache by reclaim if the caller unlocks the page.
|
||||
*/
|
||||
set_page_dirty(page);
|
||||
get_page(page);
|
||||
|
||||
xfpage->page = page;
|
||||
xfpage->fsdata = fsdata;
|
||||
xfpage->pos = key;
|
||||
out_pflags:
|
||||
memalloc_nofs_restore(pflags);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the (locked) page for a memory object. Returns 0 or a negative
|
||||
* errno.
|
||||
*/
|
||||
int
|
||||
xfile_put_page(
|
||||
struct xfile *xf,
|
||||
struct xfile_page *xfpage)
|
||||
{
|
||||
struct inode *inode = file_inode(xf->file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
const struct address_space_operations *aops = mapping->a_ops;
|
||||
unsigned int pflags;
|
||||
int ret;
|
||||
|
||||
trace_xfile_put_page(xf, xfpage->pos, PAGE_SIZE);
|
||||
|
||||
/* Give back the reference that we took in xfile_get_page. */
|
||||
put_page(xfpage->page);
|
||||
|
||||
pflags = memalloc_nofs_save();
|
||||
ret = aops->write_end(NULL, mapping, xfpage->pos, PAGE_SIZE, PAGE_SIZE,
|
||||
xfpage->page, xfpage->fsdata);
|
||||
memalloc_nofs_restore(pflags);
|
||||
memset(xfpage, 0, sizeof(struct xfile_page));
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret != PAGE_SIZE)
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
||||
* Author: Darrick J. Wong <djwong@kernel.org>
|
||||
*/
|
||||
#ifndef __XFS_SCRUB_XFILE_H__
|
||||
#define __XFS_SCRUB_XFILE_H__
|
||||
|
||||
struct xfile_page {
|
||||
struct page *page;
|
||||
void *fsdata;
|
||||
loff_t pos;
|
||||
};
|
||||
|
||||
static inline bool xfile_page_cached(const struct xfile_page *xfpage)
|
||||
{
|
||||
return xfpage->page != NULL;
|
||||
}
|
||||
|
||||
static inline pgoff_t xfile_page_index(const struct xfile_page *xfpage)
|
||||
{
|
||||
return xfpage->page->index;
|
||||
}
|
||||
|
||||
struct xfile {
|
||||
struct file *file;
|
||||
};
|
||||
|
||||
int xfile_create(const char *description, loff_t isize, struct xfile **xfilep);
|
||||
void xfile_destroy(struct xfile *xf);
|
||||
|
||||
ssize_t xfile_pread(struct xfile *xf, void *buf, size_t count, loff_t pos);
|
||||
ssize_t xfile_pwrite(struct xfile *xf, const void *buf, size_t count,
|
||||
loff_t pos);
|
||||
|
||||
/*
|
||||
* Load an object. Since we're treating this file as "memory", any error or
|
||||
* short IO is treated as a failure to allocate memory.
|
||||
*/
|
||||
static inline int
|
||||
xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t pos)
|
||||
{
|
||||
ssize_t ret = xfile_pread(xf, buf, count, pos);
|
||||
|
||||
if (ret < 0 || ret != count)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Store an object. Since we're treating this file as "memory", any error or
|
||||
* short IO is treated as a failure to allocate memory.
|
||||
*/
|
||||
static inline int
|
||||
xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t pos)
|
||||
{
|
||||
ssize_t ret = xfile_pwrite(xf, buf, count, pos);
|
||||
|
||||
if (ret < 0 || ret != count)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
loff_t xfile_seek_data(struct xfile *xf, loff_t pos);
|
||||
|
||||
struct xfile_stat {
|
||||
loff_t size;
|
||||
unsigned long long bytes;
|
||||
};
|
||||
|
||||
int xfile_stat(struct xfile *xf, struct xfile_stat *statbuf);
|
||||
|
||||
int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len,
|
||||
struct xfile_page *xbuf);
|
||||
int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf);
|
||||
|
||||
#endif /* __XFS_SCRUB_XFILE_H__ */
|
|
@ -478,7 +478,7 @@ xfs_discard_folio(
|
|||
folio, ip->i_ino, pos);
|
||||
|
||||
/*
|
||||
* The end of the punch range is always the offset of the the first
|
||||
* The end of the punch range is always the offset of the first
|
||||
* byte of the next folio. Hence the end offset is only dependent on the
|
||||
* folio itself and not the start offset that is passed in.
|
||||
*/
|
||||
|
|
|
@ -547,7 +547,7 @@ xfs_attri_item_recover(
|
|||
struct xfs_inode *ip;
|
||||
struct xfs_da_args *args;
|
||||
struct xfs_trans *tp;
|
||||
struct xfs_trans_res tres;
|
||||
struct xfs_trans_res resv;
|
||||
struct xfs_attri_log_format *attrp;
|
||||
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
|
||||
int error;
|
||||
|
@ -618,8 +618,9 @@ xfs_attri_item_recover(
|
|||
goto out;
|
||||
}
|
||||
|
||||
xfs_init_attr_trans(args, &tres, &total);
|
||||
error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp);
|
||||
xfs_init_attr_trans(args, &resv, &total);
|
||||
resv = xlog_recover_resv(&resv);
|
||||
error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
|
|
|
@ -490,6 +490,7 @@ xfs_bui_item_recover(
|
|||
struct list_head *capture_list)
|
||||
{
|
||||
struct xfs_bmap_intent fake = { };
|
||||
struct xfs_trans_res resv;
|
||||
struct xfs_bui_log_item *buip = BUI_ITEM(lip);
|
||||
struct xfs_trans *tp;
|
||||
struct xfs_inode *ip = NULL;
|
||||
|
@ -515,7 +516,8 @@ xfs_bui_item_recover(
|
|||
return error;
|
||||
|
||||
/* Allocate transaction and do the work. */
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
|
||||
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
|
||||
error = xfs_trans_alloc(mp, &resv,
|
||||
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp);
|
||||
if (error)
|
||||
goto err_rele;
|
||||
|
|
|
@ -481,7 +481,8 @@ _xfs_buf_obj_cmp(
|
|||
* reallocating a busy extent. Skip this buffer and
|
||||
* continue searching for an exact match.
|
||||
*/
|
||||
ASSERT(bp->b_flags & XBF_STALE);
|
||||
if (!(map->bm_flags & XBM_LIVESCAN))
|
||||
ASSERT(bp->b_flags & XBF_STALE);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -559,6 +560,10 @@ xfs_buf_find_lock(
|
|||
* intact here.
|
||||
*/
|
||||
if (bp->b_flags & XBF_STALE) {
|
||||
if (flags & XBF_LIVESCAN) {
|
||||
xfs_buf_unlock(bp);
|
||||
return -ENOENT;
|
||||
}
|
||||
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
|
||||
bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
|
||||
bp->b_ops = NULL;
|
||||
|
@ -682,6 +687,8 @@ xfs_buf_get_map(
|
|||
int error;
|
||||
int i;
|
||||
|
||||
if (flags & XBF_LIVESCAN)
|
||||
cmap.bm_flags |= XBM_LIVESCAN;
|
||||
for (i = 0; i < nmaps; i++)
|
||||
cmap.bm_len += map[i].bm_len;
|
||||
|
||||
|
|
|
@ -44,6 +44,11 @@ struct xfs_buf;
|
|||
#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */
|
||||
|
||||
/* flags used only as arguments to access routines */
|
||||
/*
|
||||
* Online fsck is scanning the buffer cache for live buffers. Do not warn
|
||||
* about length mismatches during lookups and do not return stale buffers.
|
||||
*/
|
||||
#define XBF_LIVESCAN (1u << 28)
|
||||
#define XBF_INCORE (1u << 29)/* lookup only, return if found in cache */
|
||||
#define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */
|
||||
#define XBF_UNMAPPED (1u << 31)/* do not map the buffer */
|
||||
|
@ -67,6 +72,7 @@ typedef unsigned int xfs_buf_flags_t;
|
|||
{ _XBF_KMEM, "KMEM" }, \
|
||||
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
|
||||
/* The following interface flags should never be set */ \
|
||||
{ XBF_LIVESCAN, "LIVESCAN" }, \
|
||||
{ XBF_INCORE, "INCORE" }, \
|
||||
{ XBF_TRYLOCK, "TRYLOCK" }, \
|
||||
{ XBF_UNMAPPED, "UNMAPPED" }
|
||||
|
@ -115,8 +121,15 @@ typedef struct xfs_buftarg {
|
|||
struct xfs_buf_map {
|
||||
xfs_daddr_t bm_bn; /* block number for I/O */
|
||||
int bm_len; /* size of I/O */
|
||||
unsigned int bm_flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* Online fsck is scanning the buffer cache for live buffers. Do not warn
|
||||
* about length mismatches during lookups and do not return stale buffers.
|
||||
*/
|
||||
#define XBM_LIVESCAN (1U << 0)
|
||||
|
||||
#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
|
||||
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
|
||||
|
||||
|
|
|
@ -660,6 +660,7 @@ xfs_efi_item_recover(
|
|||
struct xfs_log_item *lip,
|
||||
struct list_head *capture_list)
|
||||
{
|
||||
struct xfs_trans_res resv;
|
||||
struct xfs_efi_log_item *efip = EFI_ITEM(lip);
|
||||
struct xfs_mount *mp = lip->li_log->l_mp;
|
||||
struct xfs_efd_log_item *efdp;
|
||||
|
@ -683,7 +684,8 @@ xfs_efi_item_recover(
|
|||
}
|
||||
}
|
||||
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
|
||||
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
|
||||
error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp);
|
||||
if (error)
|
||||
return error;
|
||||
efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
|
||||
|
|
|
@ -800,44 +800,6 @@ out_error_or_again:
|
|||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Is this a cached inode that's also allocated?"
|
||||
*
|
||||
* Look up an inode by number in the given file system. If the inode is
|
||||
* in cache and isn't in purgatory, return 1 if the inode is allocated
|
||||
* and 0 if it is not. For all other cases (not in cache, being torn
|
||||
* down, etc.), return a negative error code.
|
||||
*
|
||||
* The caller has to prevent inode allocation and freeing activity,
|
||||
* presumably by locking the AGI buffer. This is to ensure that an
|
||||
* inode cannot transition from allocated to freed until the caller is
|
||||
* ready to allow that. If the inode is in an intermediate state (new,
|
||||
* reclaimable, or being reclaimed), -EAGAIN will be returned; if the
|
||||
* inode is not in the cache, -ENOENT will be returned. The caller must
|
||||
* deal with these scenarios appropriately.
|
||||
*
|
||||
* This is a specialized use case for the online scrubber; if you're
|
||||
* reading this, you probably want xfs_iget.
|
||||
*/
|
||||
int
|
||||
xfs_icache_inode_is_allocated(
|
||||
struct xfs_mount *mp,
|
||||
struct xfs_trans *tp,
|
||||
xfs_ino_t ino,
|
||||
bool *inuse)
|
||||
{
|
||||
struct xfs_inode *ip;
|
||||
int error;
|
||||
|
||||
error = xfs_iget(mp, tp, ino, XFS_IGET_INCORE, 0, &ip);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
*inuse = !!(VFS_I(ip)->i_mode);
|
||||
xfs_irele(ip);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Grab the inode for reclaim exclusively.
|
||||
*
|
||||
|
|
|
@ -71,10 +71,6 @@ void xfs_inode_set_cowblocks_tag(struct xfs_inode *ip);
|
|||
void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
|
||||
|
||||
void xfs_blockgc_worker(struct work_struct *work);
|
||||
|
||||
int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
|
||||
xfs_ino_t ino, bool *inuse);
|
||||
|
||||
void xfs_blockgc_stop(struct xfs_mount *mp);
|
||||
void xfs_blockgc_start(struct xfs_mount *mp);
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@ typedef __u32 xfs_nlink_t;
|
|||
#include <linux/rhashtable.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/mnt_idmapping.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/div64.h>
|
||||
|
|
|
@ -329,7 +329,7 @@ xlog_find_verify_cycle(
|
|||
* try a smaller size. We need to be able to read at least
|
||||
* a log sector, or we're out of luck.
|
||||
*/
|
||||
bufblks = 1 << ffs(nbblks);
|
||||
bufblks = roundup_pow_of_two(nbblks);
|
||||
while (bufblks > log->l_logBBsize)
|
||||
bufblks >>= 1;
|
||||
while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
|
||||
|
@ -1528,7 +1528,7 @@ xlog_write_log_records(
|
|||
* a smaller size. We need to be able to write at least a
|
||||
* log sector, or we're out of luck.
|
||||
*/
|
||||
bufblks = 1 << ffs(blocks);
|
||||
bufblks = roundup_pow_of_two(blocks);
|
||||
while (bufblks > log->l_logBBsize)
|
||||
bufblks >>= 1;
|
||||
while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "xfs_health.h"
|
||||
#include "xfs_trace.h"
|
||||
#include "xfs_ag.h"
|
||||
#include "scrub/stats.h"
|
||||
|
||||
static DEFINE_MUTEX(xfs_uuid_table_mutex);
|
||||
static int xfs_uuid_table_size;
|
||||
|
@ -716,9 +717,11 @@ xfs_mountfs(
|
|||
if (error)
|
||||
goto out_remove_sysfs;
|
||||
|
||||
xchk_stats_register(mp->m_scrub_stats, mp->m_debugfs);
|
||||
|
||||
error = xfs_error_sysfs_init(mp);
|
||||
if (error)
|
||||
goto out_del_stats;
|
||||
goto out_remove_scrub_stats;
|
||||
|
||||
error = xfs_errortag_init(mp);
|
||||
if (error)
|
||||
|
@ -1033,7 +1036,8 @@ xfs_mountfs(
|
|||
xfs_errortag_del(mp);
|
||||
out_remove_error_sysfs:
|
||||
xfs_error_sysfs_del(mp);
|
||||
out_del_stats:
|
||||
out_remove_scrub_stats:
|
||||
xchk_stats_unregister(mp->m_scrub_stats);
|
||||
xfs_sysfs_del(&mp->m_stats.xs_kobj);
|
||||
out_remove_sysfs:
|
||||
xfs_sysfs_del(&mp->m_kobj);
|
||||
|
@ -1105,6 +1109,7 @@ xfs_unmountfs(
|
|||
|
||||
xfs_errortag_del(mp);
|
||||
xfs_error_sysfs_del(mp);
|
||||
xchk_stats_unregister(mp->m_scrub_stats);
|
||||
xfs_sysfs_del(&mp->m_stats.xs_kobj);
|
||||
xfs_sysfs_del(&mp->m_kobj);
|
||||
}
|
||||
|
|
|
@ -206,11 +206,15 @@ typedef struct xfs_mount {
|
|||
uint64_t m_resblks_avail;/* available reserved blocks */
|
||||
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
|
||||
struct delayed_work m_reclaim_work; /* background inode reclaim */
|
||||
struct dentry *m_debugfs; /* debugfs parent */
|
||||
struct xfs_kobj m_kobj;
|
||||
struct xfs_kobj m_error_kobj;
|
||||
struct xfs_kobj m_error_meta_kobj;
|
||||
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
|
||||
struct xstats m_stats; /* per-fs stats */
|
||||
#ifdef CONFIG_XFS_ONLINE_SCRUB_STATS
|
||||
struct xchk_stats *m_scrub_stats;
|
||||
#endif
|
||||
xfs_agnumber_t m_agfrotor; /* last ag where space found */
|
||||
atomic_t m_agirotor; /* last ag dir inode alloced */
|
||||
|
||||
|
|
|
@ -477,6 +477,7 @@ xfs_cui_item_recover(
|
|||
struct xfs_log_item *lip,
|
||||
struct list_head *capture_list)
|
||||
{
|
||||
struct xfs_trans_res resv;
|
||||
struct xfs_cui_log_item *cuip = CUI_ITEM(lip);
|
||||
struct xfs_cud_log_item *cudp;
|
||||
struct xfs_trans *tp;
|
||||
|
@ -514,8 +515,9 @@ xfs_cui_item_recover(
|
|||
* doesn't fit. We need to reserve enough blocks to handle a
|
||||
* full btree split on either end of the refcount range.
|
||||
*/
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
|
||||
mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp);
|
||||
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
|
||||
error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0,
|
||||
XFS_TRANS_RESERVE, &tp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
|
|
|
@ -507,6 +507,7 @@ xfs_rui_item_recover(
|
|||
struct xfs_log_item *lip,
|
||||
struct list_head *capture_list)
|
||||
{
|
||||
struct xfs_trans_res resv;
|
||||
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
|
||||
struct xfs_rud_log_item *rudp;
|
||||
struct xfs_trans *tp;
|
||||
|
@ -530,8 +531,9 @@ xfs_rui_item_recover(
|
|||
}
|
||||
}
|
||||
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
|
||||
mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp);
|
||||
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
|
||||
error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0,
|
||||
XFS_TRANS_RESERVE, &tp);
|
||||
if (error)
|
||||
return error;
|
||||
rudp = xfs_trans_get_rud(tp, ruip);
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include "xfs_xattr.h"
|
||||
#include "xfs_iunlink_item.h"
|
||||
#include "xfs_dahash_test.h"
|
||||
#include "scrub/stats.h"
|
||||
|
||||
#include <linux/magic.h>
|
||||
#include <linux/fs_context.h>
|
||||
|
@ -49,6 +50,7 @@
|
|||
|
||||
static const struct super_operations xfs_super_operations;
|
||||
|
||||
static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */
|
||||
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
|
||||
#ifdef DEBUG
|
||||
static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
|
||||
|
@ -756,6 +758,8 @@ static void
|
|||
xfs_mount_free(
|
||||
struct xfs_mount *mp)
|
||||
{
|
||||
debugfs_remove(mp->m_debugfs);
|
||||
|
||||
/*
|
||||
* Free the buftargs here because blkdev_put needs to be called outside
|
||||
* of sb->s_umount, which is held around the call to ->put_super.
|
||||
|
@ -1146,6 +1150,7 @@ xfs_fs_put_super(
|
|||
xfs_unmountfs(mp);
|
||||
|
||||
xfs_freesb(mp);
|
||||
xchk_mount_stats_free(mp);
|
||||
free_percpu(mp->m_stats.xs_stats);
|
||||
xfs_inodegc_free_percpu(mp);
|
||||
xfs_destroy_percpu_counters(mp);
|
||||
|
@ -1479,6 +1484,21 @@ xfs_fs_validate_params(
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct dentry *
|
||||
xfs_debugfs_mkdir(
|
||||
const char *name,
|
||||
struct dentry *parent)
|
||||
{
|
||||
struct dentry *child;
|
||||
|
||||
/* Apparently we're expected to ignore error returns?? */
|
||||
child = debugfs_create_dir(name, parent);
|
||||
if (IS_ERR(child))
|
||||
return NULL;
|
||||
|
||||
return child;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_fs_fill_super(
|
||||
struct super_block *sb,
|
||||
|
@ -1521,6 +1541,13 @@ xfs_fs_fill_super(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
if (xfs_debugfs) {
|
||||
mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id,
|
||||
xfs_debugfs);
|
||||
} else {
|
||||
mp->m_debugfs = NULL;
|
||||
}
|
||||
|
||||
error = xfs_init_mount_workqueues(mp);
|
||||
if (error)
|
||||
goto out_shutdown_devices;
|
||||
|
@ -1540,10 +1567,14 @@ xfs_fs_fill_super(
|
|||
goto out_destroy_inodegc;
|
||||
}
|
||||
|
||||
error = xfs_readsb(mp, flags);
|
||||
error = xchk_mount_stats_alloc(mp);
|
||||
if (error)
|
||||
goto out_free_stats;
|
||||
|
||||
error = xfs_readsb(mp, flags);
|
||||
if (error)
|
||||
goto out_free_scrub_stats;
|
||||
|
||||
error = xfs_finish_flags(mp);
|
||||
if (error)
|
||||
goto out_free_sb;
|
||||
|
@ -1721,6 +1752,8 @@ xfs_fs_fill_super(
|
|||
xfs_filestream_unmount(mp);
|
||||
out_free_sb:
|
||||
xfs_freesb(mp);
|
||||
out_free_scrub_stats:
|
||||
xchk_mount_stats_free(mp);
|
||||
out_free_stats:
|
||||
free_percpu(mp->m_stats.xs_stats);
|
||||
out_destroy_inodegc:
|
||||
|
@ -2304,10 +2337,12 @@ init_xfs_fs(void)
|
|||
if (error)
|
||||
goto out_cleanup_procfs;
|
||||
|
||||
xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL);
|
||||
|
||||
xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
|
||||
if (!xfs_kset) {
|
||||
error = -ENOMEM;
|
||||
goto out_sysctl_unregister;
|
||||
goto out_debugfs_unregister;
|
||||
}
|
||||
|
||||
xfsstats.xs_kobj.kobject.kset = xfs_kset;
|
||||
|
@ -2323,11 +2358,15 @@ init_xfs_fs(void)
|
|||
if (error)
|
||||
goto out_free_stats;
|
||||
|
||||
error = xchk_global_stats_setup(xfs_debugfs);
|
||||
if (error)
|
||||
goto out_remove_stats_kobj;
|
||||
|
||||
#ifdef DEBUG
|
||||
xfs_dbg_kobj.kobject.kset = xfs_kset;
|
||||
error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
|
||||
if (error)
|
||||
goto out_remove_stats_kobj;
|
||||
goto out_remove_scrub_stats;
|
||||
#endif
|
||||
|
||||
error = xfs_qm_init();
|
||||
|
@ -2344,14 +2383,17 @@ init_xfs_fs(void)
|
|||
out_remove_dbg_kobj:
|
||||
#ifdef DEBUG
|
||||
xfs_sysfs_del(&xfs_dbg_kobj);
|
||||
out_remove_stats_kobj:
|
||||
out_remove_scrub_stats:
|
||||
#endif
|
||||
xchk_global_stats_teardown();
|
||||
out_remove_stats_kobj:
|
||||
xfs_sysfs_del(&xfsstats.xs_kobj);
|
||||
out_free_stats:
|
||||
free_percpu(xfsstats.xs_stats);
|
||||
out_kset_unregister:
|
||||
kset_unregister(xfs_kset);
|
||||
out_sysctl_unregister:
|
||||
out_debugfs_unregister:
|
||||
debugfs_remove(xfs_debugfs);
|
||||
xfs_sysctl_unregister();
|
||||
out_cleanup_procfs:
|
||||
xfs_cleanup_procfs();
|
||||
|
@ -2373,9 +2415,11 @@ exit_xfs_fs(void)
|
|||
#ifdef DEBUG
|
||||
xfs_sysfs_del(&xfs_dbg_kobj);
|
||||
#endif
|
||||
xchk_global_stats_teardown();
|
||||
xfs_sysfs_del(&xfsstats.xs_kobj);
|
||||
free_percpu(xfsstats.xs_stats);
|
||||
kset_unregister(xfs_kset);
|
||||
debugfs_remove(xfs_debugfs);
|
||||
xfs_sysctl_unregister();
|
||||
xfs_cleanup_procfs();
|
||||
xfs_mru_cache_uninit();
|
||||
|
|
|
@ -100,4 +100,6 @@ extern struct workqueue_struct *xfs_discard_wq;
|
|||
|
||||
#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
|
||||
|
||||
struct dentry *xfs_debugfs_mkdir(const char *name, struct dentry *parent);
|
||||
|
||||
#endif /* __XFS_SUPER_H__ */
|
||||
|
|
|
@ -22,6 +22,9 @@
|
|||
* daddr: physical block number in 512b blocks
|
||||
* bbcount: number of blocks in a physical extent, in 512b blocks
|
||||
*
|
||||
* rtx: physical rt extent number for extent mappings
|
||||
* rtxcount: number of rt extents in an extent mapping
|
||||
*
|
||||
* owner: reverse-mapping owner, usually inodes
|
||||
*
|
||||
* fileoff: file offset, in fs blocks
|
||||
|
|
|
@ -46,6 +46,17 @@ xfs_attr_grab_log_assist(
|
|||
if (xfs_sb_version_haslogxattrs(&mp->m_sb))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check if the filesystem featureset is new enough to set this log
|
||||
* incompat feature bit. Strictly speaking, the minimum requirement is
|
||||
* a V5 filesystem for the superblock field, but we'll require rmap
|
||||
* or reflink to avoid having to deal with really old kernels.
|
||||
*/
|
||||
if (!xfs_has_reflink(mp) && !xfs_has_rmapbt(mp)) {
|
||||
error = -EOPNOTSUPP;
|
||||
goto drop_incompat;
|
||||
}
|
||||
|
||||
/* Enable log-assisted xattrs. */
|
||||
error = xfs_add_incompat_log_feature(mp,
|
||||
XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# CONFIG_XFS_ONLINE_SCRUB_STATS is not set
|
Loading…
Reference in New Issue