vfs-6.14-rc1.libfs

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ4pSLQAKCRCRxhvAZXjc
 oq92AP4qTO8+FFRok2nhHlK4YNPhiqni1KabYXuHakL1ESw8OQD+O1wLgw8FUkgv
 jxi+KmxMz9Asg2wdnLrSGEZJ709eOgc=
 =6dn7
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.14-rc1.libfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs libfs updates from Christian Brauner:
 "This improves the stable directory offset behavior in various ways.

  Stable offsets are needed so that NFS can reliably read directories on
  filesystems such as tmpfs:

   - Improve the end-of-directory detection

     According to getdents(3), the d_off field in each returned
     directory entry points to the next entry in the directory. The
     d_off field in the last returned entry in the readdir buffer must
     contain a valid offset value, but if it points to an actual
     directory entry, then readdir/getdents can loop.

     Introduce a specific fixed offset value that is placed in the d_off
     field of the last entry in a directory. Some user space
     applications assume that the EOD offset value is larger than the
     offsets of real directory entries, so the largest valid offset
     value is reserved for this purpose. This new value is never
     allocated by simple_offset_add().

     When ->iterate_dir() returns, getdents{64} inserts the ctx->pos
     value into the d_off field of the last valid entry in the readdir
     buffer. When it hits EOD, offset_readdir() sets ctx->pos to the EOD
     offset value so the last entry is updated to point to the EOD
     marker.

     When trying to read the entry at the EOD offset, offset_readdir()
     terminates immediately.

   - Rely on d_children to iterate stable offset directories

     Instead of using the mtree to emit entries in the order of their
     offset values, use it only to map incoming ctx->pos to a starting
     entry. Then use the directory's d_children list, which is already
     maintained properly by the dcache, to find the next child to emit.

   - Narrow the range of directory offset values returned by
     simple_offset_add() to 3 .. (S32_MAX - 1) on all platforms. This
     means the allocation behavior is identical on 32-bit systems,
     64-bit systems, and 32-bit user space on 64-bit kernels. The new
     range still permits over 2 billion concurrent entries per
     directory.

   - Return ENOSPC when the directory offset range is exhausted. Hitting
     this error is almost impossible though.

   - Remove the simple_offset_empty() helper"

* tag 'vfs-6.14-rc1.libfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  libfs: Use d_children list to iterate simple_offset directories
  libfs: Replace simple_offset end-of-directory detection
  Revert "libfs: fix infinite directory reads for offset dir"
  Revert "libfs: Add simple_offset_empty()"
  libfs: Return ENOSPC when the directory offset range is exhausted
This commit is contained in:
Linus Torvalds 2025-01-20 11:00:53 -08:00
commit 7e587c20ad
3 changed files with 79 additions and 88 deletions

View File

@ -245,9 +245,16 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);
/* 0 is '.', 1 is '..', so always start with offset 2 or more */
/* simple_offset_add() never assigns these to a dentry */
enum {
DIR_OFFSET_MIN = 2,
DIR_OFFSET_FIRST = 2, /* Find first real entry */
DIR_OFFSET_EOD = S32_MAX,
};
/* simple_offset_add() allocation range */
enum {
DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1,
DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1,
};
static void offset_set(struct dentry *dentry, long offset)
@ -291,9 +298,10 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
return -EBUSY;
ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
LONG_MAX, &octx->next_offset, GFP_KERNEL);
if (ret < 0)
return ret;
DIR_OFFSET_MAX, &octx->next_offset,
GFP_KERNEL);
if (unlikely(ret < 0))
return ret == -EBUSY ? -ENOSPC : ret;
offset_set(dentry, offset);
return 0;
@ -329,38 +337,6 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
offset_set(dentry, 0);
}
/**
* simple_offset_empty - Check if a dentry can be unlinked
* @dentry: dentry to be tested
*
* Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
*/
int simple_offset_empty(struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
struct offset_ctx *octx;
struct dentry *child;
unsigned long index;
int ret = 1;
if (!inode || !S_ISDIR(inode->i_mode))
return ret;
index = DIR_OFFSET_MIN;
octx = inode->i_op->get_offset_ctx(inode);
mt_for_each(&octx->mt, child, index, LONG_MAX) {
spin_lock(&child->d_lock);
if (simple_positive(child)) {
spin_unlock(&child->d_lock);
ret = 0;
break;
}
spin_unlock(&child->d_lock);
}
return ret;
}
/**
* simple_offset_rename - handle directory offsets for rename
* @old_dir: parent directory of source entry
@ -454,14 +430,6 @@ void simple_offset_destroy(struct offset_ctx *octx)
mtree_destroy(&octx->mt);
}
static int offset_dir_open(struct inode *inode, struct file *file)
{
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
file->private_data = (void *)ctx->next_offset;
return 0;
}
/**
* offset_dir_llseek - Advance the read position of a directory descriptor
* @file: an open directory whose position is to be updated
@ -475,9 +443,6 @@ static int offset_dir_open(struct inode *inode, struct file *file)
*/
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_inode;
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
@ -490,62 +455,89 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
return -EINVAL;
}
/* In this case, ->private_data is protected by f_pos_lock */
if (!offset)
file->private_data = (void *)ctx->next_offset;
return vfs_setpos(file, offset, LONG_MAX);
}
static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
static struct dentry *find_positive_dentry(struct dentry *parent,
struct dentry *dentry,
bool next)
{
MA_STATE(mas, &octx->mt, offset, offset);
struct dentry *found = NULL;
spin_lock(&parent->d_lock);
if (next)
dentry = d_next_sibling(dentry);
else if (!dentry)
dentry = d_first_child(parent);
hlist_for_each_entry_from(dentry, d_sib) {
if (!simple_positive(dentry))
continue;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(dentry))
found = dget_dlock(dentry);
spin_unlock(&dentry->d_lock);
if (likely(found))
break;
}
spin_unlock(&parent->d_lock);
return found;
}
static noinline_for_stack struct dentry *
offset_dir_lookup(struct dentry *parent, loff_t offset)
{
struct inode *inode = d_inode(parent);
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *child, *found = NULL;
rcu_read_lock();
child = mas_find(&mas, LONG_MAX);
if (!child)
goto out;
spin_lock(&child->d_lock);
if (simple_positive(child))
found = dget_dlock(child);
spin_unlock(&child->d_lock);
out:
rcu_read_unlock();
MA_STATE(mas, &octx->mt, offset, offset);
if (offset == DIR_OFFSET_FIRST)
found = find_positive_dentry(parent, NULL, false);
else {
rcu_read_lock();
child = mas_find(&mas, DIR_OFFSET_MAX);
found = find_positive_dentry(parent, child, false);
rcu_read_unlock();
}
return found;
}
static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
long offset = dentry2offset(dentry);
return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
}
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
{
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
struct dentry *dir = file->f_path.dentry;
struct dentry *dentry;
dentry = offset_dir_lookup(dir, ctx->pos);
if (!dentry)
goto out_eod;
while (true) {
dentry = offset_find_next(octx, ctx->pos);
if (!dentry)
return;
struct dentry *next;
if (dentry2offset(dentry) >= last_index) {
dput(dentry);
return;
}
ctx->pos = dentry2offset(dentry);
if (!offset_dir_emit(ctx, dentry))
break;
if (!offset_dir_emit(ctx, dentry)) {
dput(dentry);
return;
}
ctx->pos = dentry2offset(dentry) + 1;
next = find_positive_dentry(dir, dentry, true);
dput(dentry);
if (!next)
goto out_eod;
dentry = next;
}
dput(dentry);
return;
out_eod:
ctx->pos = DIR_OFFSET_EOD;
}
/**
@ -565,6 +557,8 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
*
* On return, @ctx->pos contains an offset that will read the next entry
* in this directory when offset_readdir() is called again with @ctx.
* Caller places this value in the d_off field of the last entry in the
* user's buffer.
*
* Return values:
* %0 - Complete
@ -572,19 +566,17 @@ static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, lon
static int offset_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dir = file->f_path.dentry;
long last_index = (long)file->private_data;
lockdep_assert_held(&d_inode(dir)->i_rwsem);
if (!dir_emit_dots(file, ctx))
return 0;
offset_iterate_dir(d_inode(dir), ctx, last_index);
if (ctx->pos != DIR_OFFSET_EOD)
offset_iterate_dir(file, ctx);
return 0;
}
const struct file_operations simple_offset_dir_operations = {
.open = offset_dir_open,
.llseek = offset_dir_llseek,
.iterate_shared = offset_readdir,
.read = generic_read_dir,

View File

@ -3483,7 +3483,6 @@ struct offset_ctx {
void simple_offset_init(struct offset_ctx *octx);
int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
int simple_offset_empty(struct dentry *dentry);
int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
int simple_offset_rename_exchange(struct inode *old_dir,

View File

@ -3821,7 +3821,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
if (!simple_offset_empty(dentry))
if (!simple_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@ -3878,7 +3878,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
if (!simple_offset_empty(new_dentry))
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {