xfs: flush inodegc before swapon
Fix the brand new xfstest that tries to swapon on a recently unshared
file and use the chance to document the other bit of magic in this
function.
The big comment is taken from a mailinglist post by Dave Chinner.
Fixes: 5e672cd69f
("xfs: introduce xfs_inodegc_push()")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
This commit is contained in:
parent
3cd6a8056f
commit
2d873efd17
fs/xfs
|
@ -19,6 +19,7 @@
|
|||
#include "xfs_reflink.h"
|
||||
#include "xfs_errortag.h"
|
||||
#include "xfs_error.h"
|
||||
#include "xfs_icache.h"
|
||||
|
||||
struct xfs_writepage_ctx {
|
||||
struct iomap_writepage_ctx ctx;
|
||||
|
@ -533,7 +534,39 @@ xfs_vm_swap_activate(
|
|||
struct file *swap_file,
|
||||
sector_t *span)
|
||||
{
|
||||
sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
|
||||
struct xfs_inode *ip = XFS_I(file_inode(swap_file));
|
||||
|
||||
/*
|
||||
* Swap file activation can race against concurrent shared extent
|
||||
* removal in files that have been cloned. If this happens,
|
||||
* iomap_swapfile_iter() can fail because it encountered a shared
|
||||
* extent even though an operation is in progress to remove those
|
||||
* shared extents.
|
||||
*
|
||||
* This race becomes problematic when we defer extent removal
|
||||
* operations beyond the end of a syscall (i.e. use async background
|
||||
* processing algorithms). Users think the extents are no longer
|
||||
* shared, but iomap_swapfile_iter() still sees them as shared
|
||||
* because the refcountbt entries for the extents being removed have
|
||||
* not yet been updated. Hence the swapon call fails unexpectedly.
|
||||
*
|
||||
* The race condition is currently most obvious from the unlink()
|
||||
* operation as extent removal is deferred until after the last
|
||||
* reference to the inode goes away. We then process the extent
|
||||
* removal asynchronously, hence triggers the "syscall completed but
|
||||
* work not done" condition mentioned above. To close this race
|
||||
* window, we need to flush any pending inodegc operations to ensure
|
||||
* they have updated the refcountbt records before we try to map the
|
||||
* swapfile.
|
||||
*/
|
||||
xfs_inodegc_flush(ip->i_mount);
|
||||
|
||||
/*
|
||||
* Direct the swap code to the correct block device when this file
|
||||
* sits on the RT device.
|
||||
*/
|
||||
sis->bdev = xfs_inode_buftarg(ip)->bt_bdev;
|
||||
|
||||
return iomap_swapfile_activate(sis, swap_file, span,
|
||||
&xfs_read_iomap_ops);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue