fuse update for 6.14
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCZ5nzXQAKCRDh3BK/laaZ PCaJAP4gw6CnxrdzuPvm7yEsINuHdavQ8aeCiimWwOC4eBzkOgD/SlMry5vwCkW9 WOzoONVUcNIPEqYXThw77OFlkFpKGwQ= =JQE1 -----END PGP SIGNATURE----- Merge tag 'fuse-update-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse Pull fuse updates from Miklos Szeredi: "Add support for io-uring communication between kernel and userspace using IORING_OP_URING_CMD (Bernd Schubert). Following features enable gains in performance compared to the regular interface: - Allow processing multiple requests with less syscall overhead - Combine commit of old and fetch of new fuse request - CPU/NUMA affinity of queues Patches were reviewed by several people, including Pavel Begunkov, io-uring co-maintainer" * tag 'fuse-update-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: prevent disabling io-uring on active connections fuse: enable fuse-over-io-uring fuse: block request allocation until io-uring init is complete fuse: {io-uring} Prevent mount point hang on fuse-server termination fuse: Allow to queue bg requests through io-uring fuse: Allow to queue fg requests through io-uring fuse: {io-uring} Make fuse_dev_queue_{interrupt,forget} non-static fuse: {io-uring} Handle teardown of ring entries fuse: Add io-uring sqe commit and fetch support fuse: {io-uring} Make hash-list req unique finding functions non-static fuse: Add fuse-io-uring handling into fuse_copy fuse: Make fuse_copy non static fuse: {io-uring} Handle SQEs - register commands fuse: make args->in_args[0] to be always the header fuse: Add fuse-io-uring design documentation fuse: Move request bits fuse: Move fuse_get_dev to header file fuse: rename to fuse_dev_end_requests and make non-static
This commit is contained in:
commit
92cc9acff7
|
@ -0,0 +1,99 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================================
|
||||
FUSE-over-io-uring design documentation
|
||||
=======================================
|
||||
|
||||
This documentation covers basic details how the fuse
|
||||
kernel/userspace communication through io-uring is configured
|
||||
and works. For generic details about FUSE see fuse.rst.
|
||||
|
||||
This document also covers the current interface, which is
|
||||
still in development and might change.
|
||||
|
||||
Limitations
|
||||
===========
|
||||
As of now not all requests types are supported through io-uring, userspace
|
||||
is required to also handle requests through /dev/fuse after io-uring setup
|
||||
is complete. Specifically notifications (initiated from the daemon side)
|
||||
and interrupts.
|
||||
|
||||
Fuse io-uring configuration
|
||||
===========================
|
||||
|
||||
Fuse kernel requests are queued through the classical /dev/fuse
|
||||
read/write interface - until io-uring setup is complete.
|
||||
|
||||
In order to set up fuse-over-io-uring fuse-server (user-space)
|
||||
needs to submit SQEs (opcode = IORING_OP_URING_CMD) to the /dev/fuse
|
||||
connection file descriptor. Initial submit is with the sub command
|
||||
FUSE_URING_REQ_REGISTER, which will just register entries to be
|
||||
available in the kernel.
|
||||
|
||||
Once at least one entry per queue is submitted, kernel starts
|
||||
to enqueue to ring queues.
|
||||
Note, every CPU core has its own fuse-io-uring queue.
|
||||
Userspace handles the CQE/fuse-request and submits the result as
|
||||
subcommand FUSE_URING_REQ_COMMIT_AND_FETCH - kernel completes
|
||||
the requests and also marks the entry available again. If there are
|
||||
pending requests waiting the request will be immediately submitted
|
||||
to the daemon again.
|
||||
|
||||
Initial SQE
|
||||
-----------::
|
||||
|
||||
| | FUSE filesystem daemon
|
||||
| |
|
||||
| | >io_uring_submit()
|
||||
| | IORING_OP_URING_CMD /
|
||||
| | FUSE_URING_CMD_REGISTER
|
||||
| | [wait cqe]
|
||||
| | >io_uring_wait_cqe() or
|
||||
| | >io_uring_submit_and_wait()
|
||||
| |
|
||||
| >fuse_uring_cmd() |
|
||||
| >fuse_uring_register() |
|
||||
|
||||
|
||||
Sending requests with CQEs
|
||||
--------------------------::
|
||||
|
||||
| | FUSE filesystem daemon
|
||||
| | [waiting for CQEs]
|
||||
| "rm /mnt/fuse/file" |
|
||||
| |
|
||||
| >sys_unlink() |
|
||||
| >fuse_unlink() |
|
||||
| [allocate request] |
|
||||
| >fuse_send_one() |
|
||||
| ... |
|
||||
| >fuse_uring_queue_fuse_req |
|
||||
| [queue request on fg queue] |
|
||||
| >fuse_uring_add_req_to_ring_ent() |
|
||||
| ... |
|
||||
| >fuse_uring_copy_to_ring() |
|
||||
| >io_uring_cmd_done() |
|
||||
| >request_wait_answer() |
|
||||
| [sleep on req->waitq] |
|
||||
| | [receives and handles CQE]
|
||||
| | [submit result and fetch next]
|
||||
| | >io_uring_submit()
|
||||
| | IORING_OP_URING_CMD/
|
||||
| | FUSE_URING_CMD_COMMIT_AND_FETCH
|
||||
| >fuse_uring_cmd() |
|
||||
| >fuse_uring_commit_fetch() |
|
||||
| >fuse_uring_commit() |
|
||||
| >fuse_uring_copy_from_ring() |
|
||||
| [ copy the result to the fuse req] |
|
||||
| >fuse_uring_req_end() |
|
||||
| >fuse_request_end() |
|
||||
| [wake up req->waitq] |
|
||||
| >fuse_uring_next_fuse_req |
|
||||
| [wait or handle next req] |
|
||||
| |
|
||||
| [req->waitq woken up] |
|
||||
| <fuse_unlink() |
|
||||
| <sys_unlink() |
|
||||
|
||||
|
||||
|
|
@ -98,6 +98,7 @@ Documentation for filesystem implementations.
|
|||
hpfs
|
||||
fuse
|
||||
fuse-io
|
||||
fuse-io-uring
|
||||
inotify
|
||||
isofs
|
||||
nilfs2
|
||||
|
|
|
@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH
|
|||
to be performed directly on a backing file.
|
||||
|
||||
If you want to allow passthrough operations, answer Y.
|
||||
|
||||
config FUSE_IO_URING
|
||||
bool "FUSE communication over io-uring"
|
||||
default y
|
||||
depends on FUSE_FS
|
||||
depends on IO_URING
|
||||
help
|
||||
This allows sending FUSE requests over the io-uring interface and
|
||||
also adds request core affinity.
|
||||
|
||||
If you want to allow fuse server/client communication through io-uring,
|
||||
answer Y
|
||||
|
|
|
@ -15,5 +15,6 @@ fuse-y += iomode.o
|
|||
fuse-$(CONFIG_FUSE_DAX) += dax.o
|
||||
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
|
||||
fuse-$(CONFIG_SYSCTL) += sysctl.o
|
||||
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
|
||||
|
||||
virtiofs-y := virtio_fs.o
|
||||
|
|
|
@ -240,11 +240,12 @@ static int fuse_send_removemapping(struct inode *inode,
|
|||
|
||||
args.opcode = FUSE_REMOVEMAPPING;
|
||||
args.nodeid = fi->nodeid;
|
||||
args.in_numargs = 2;
|
||||
args.in_args[0].size = sizeof(*inargp);
|
||||
args.in_args[0].value = inargp;
|
||||
args.in_args[1].size = inargp->count * sizeof(*remove_one);
|
||||
args.in_args[1].value = remove_one;
|
||||
args.in_numargs = 3;
|
||||
fuse_set_zero_arg0(&args);
|
||||
args.in_args[1].size = sizeof(*inargp);
|
||||
args.in_args[1].value = inargp;
|
||||
args.in_args[2].size = inargp->count * sizeof(*remove_one);
|
||||
args.in_args[2].value = remove_one;
|
||||
return fuse_simple_request(fm, &args);
|
||||
}
|
||||
|
||||
|
|
127
fs/fuse/dev.c
127
fs/fuse/dev.c
|
@ -6,7 +6,9 @@
|
|||
See the file COPYING.
|
||||
*/
|
||||
|
||||
#include "dev_uring_i.h"
|
||||
#include "fuse_i.h"
|
||||
#include "fuse_dev_i.h"
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -28,23 +30,8 @@
|
|||
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
|
||||
MODULE_ALIAS("devname:fuse");
|
||||
|
||||
/* Ordinary requests have even IDs, while interrupts IDs are odd */
|
||||
#define FUSE_INT_REQ_BIT (1ULL << 0)
|
||||
#define FUSE_REQ_ID_STEP (1ULL << 1)
|
||||
|
||||
static struct kmem_cache *fuse_req_cachep;
|
||||
|
||||
static void end_requests(struct list_head *head);
|
||||
|
||||
static struct fuse_dev *fuse_get_dev(struct file *file)
|
||||
{
|
||||
/*
|
||||
* Lockless access is OK, because file->private data is set
|
||||
* once during mount and is valid until the file is released.
|
||||
*/
|
||||
return READ_ONCE(file->private_data);
|
||||
}
|
||||
|
||||
static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
|
||||
{
|
||||
INIT_LIST_HEAD(&req->list);
|
||||
|
@ -89,7 +76,8 @@ void fuse_set_initialized(struct fuse_conn *fc)
|
|||
|
||||
static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
|
||||
{
|
||||
return !fc->initialized || (for_background && fc->blocked);
|
||||
return !fc->initialized || (for_background && fc->blocked) ||
|
||||
(fc->io_uring && !fuse_uring_ready(fc));
|
||||
}
|
||||
|
||||
static void fuse_drop_waiting(struct fuse_conn *fc)
|
||||
|
@ -234,7 +222,7 @@ u64 fuse_get_unique(struct fuse_iqueue *fiq)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(fuse_get_unique);
|
||||
|
||||
static unsigned int fuse_req_hash(u64 unique)
|
||||
unsigned int fuse_req_hash(u64 unique)
|
||||
{
|
||||
return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
|
||||
}
|
||||
|
@ -250,7 +238,8 @@ __releases(fiq->lock)
|
|||
spin_unlock(&fiq->lock);
|
||||
}
|
||||
|
||||
static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
|
||||
void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
|
||||
struct fuse_forget_link *forget)
|
||||
{
|
||||
spin_lock(&fiq->lock);
|
||||
if (fiq->connected) {
|
||||
|
@ -263,7 +252,7 @@ static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_li
|
|||
}
|
||||
}
|
||||
|
||||
static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
|
||||
void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
|
||||
{
|
||||
spin_lock(&fiq->lock);
|
||||
if (list_empty(&req->intr_entry)) {
|
||||
|
@ -580,7 +569,25 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static bool fuse_request_queue_background(struct fuse_req *req)
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
static bool fuse_request_queue_background_uring(struct fuse_conn *fc,
|
||||
struct fuse_req *req)
|
||||
{
|
||||
struct fuse_iqueue *fiq = &fc->iq;
|
||||
|
||||
req->in.h.unique = fuse_get_unique(fiq);
|
||||
req->in.h.len = sizeof(struct fuse_in_header) +
|
||||
fuse_len_args(req->args->in_numargs,
|
||||
(struct fuse_arg *) req->args->in_args);
|
||||
|
||||
return fuse_uring_queue_bq_req(req);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* @return true if queued
|
||||
*/
|
||||
static int fuse_request_queue_background(struct fuse_req *req)
|
||||
{
|
||||
struct fuse_mount *fm = req->fm;
|
||||
struct fuse_conn *fc = fm->fc;
|
||||
|
@ -592,6 +599,12 @@ static bool fuse_request_queue_background(struct fuse_req *req)
|
|||
atomic_inc(&fc->num_waiting);
|
||||
}
|
||||
__set_bit(FR_ISREPLY, &req->flags);
|
||||
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
if (fuse_uring_ready(fc))
|
||||
return fuse_request_queue_background_uring(fc, req);
|
||||
#endif
|
||||
|
||||
spin_lock(&fc->bg_lock);
|
||||
if (likely(fc->connected)) {
|
||||
fc->num_background++;
|
||||
|
@ -692,22 +705,8 @@ static int unlock_request(struct fuse_req *req)
|
|||
return err;
|
||||
}
|
||||
|
||||
struct fuse_copy_state {
|
||||
int write;
|
||||
struct fuse_req *req;
|
||||
struct iov_iter *iter;
|
||||
struct pipe_buffer *pipebufs;
|
||||
struct pipe_buffer *currbuf;
|
||||
struct pipe_inode_info *pipe;
|
||||
unsigned long nr_segs;
|
||||
struct page *pg;
|
||||
unsigned len;
|
||||
unsigned offset;
|
||||
unsigned move_pages:1;
|
||||
};
|
||||
|
||||
static void fuse_copy_init(struct fuse_copy_state *cs, int write,
|
||||
struct iov_iter *iter)
|
||||
void fuse_copy_init(struct fuse_copy_state *cs, int write,
|
||||
struct iov_iter *iter)
|
||||
{
|
||||
memset(cs, 0, sizeof(*cs));
|
||||
cs->write = write;
|
||||
|
@ -814,6 +813,9 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
|
|||
*size -= ncpy;
|
||||
cs->len -= ncpy;
|
||||
cs->offset += ncpy;
|
||||
if (cs->is_uring)
|
||||
cs->ring.copied_sz += ncpy;
|
||||
|
||||
return ncpy;
|
||||
}
|
||||
|
||||
|
@ -1068,9 +1070,9 @@ static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
|
|||
}
|
||||
|
||||
/* Copy request arguments to/from userspace buffer */
|
||||
static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
|
||||
unsigned argpages, struct fuse_arg *args,
|
||||
int zeroing)
|
||||
int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
|
||||
unsigned argpages, struct fuse_arg *args,
|
||||
int zeroing)
|
||||
{
|
||||
int err = 0;
|
||||
unsigned i;
|
||||
|
@ -1760,7 +1762,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
|
|||
args = &ap->args;
|
||||
args->nodeid = outarg->nodeid;
|
||||
args->opcode = FUSE_NOTIFY_REPLY;
|
||||
args->in_numargs = 2;
|
||||
args->in_numargs = 3;
|
||||
args->in_pages = true;
|
||||
args->end = fuse_retrieve_end;
|
||||
|
||||
|
@ -1788,9 +1790,10 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
|
|||
}
|
||||
ra->inarg.offset = outarg->offset;
|
||||
ra->inarg.size = total_len;
|
||||
args->in_args[0].size = sizeof(ra->inarg);
|
||||
args->in_args[0].value = &ra->inarg;
|
||||
args->in_args[1].size = total_len;
|
||||
fuse_set_zero_arg0(args);
|
||||
args->in_args[1].size = sizeof(ra->inarg);
|
||||
args->in_args[1].value = &ra->inarg;
|
||||
args->in_args[2].size = total_len;
|
||||
|
||||
err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
|
||||
if (err)
|
||||
|
@ -1885,7 +1888,7 @@ static void fuse_resend(struct fuse_conn *fc)
|
|||
spin_unlock(&fiq->lock);
|
||||
list_for_each_entry(req, &to_queue, list)
|
||||
clear_bit(FR_PENDING, &req->flags);
|
||||
end_requests(&to_queue);
|
||||
fuse_dev_end_requests(&to_queue);
|
||||
return;
|
||||
}
|
||||
/* iq and pq requests are both oldest to newest */
|
||||
|
@ -1934,7 +1937,7 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
|
|||
}
|
||||
|
||||
/* Look up request on processing list by unique ID */
|
||||
static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
|
||||
struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique)
|
||||
{
|
||||
unsigned int hash = fuse_req_hash(unique);
|
||||
struct fuse_req *req;
|
||||
|
@ -1946,10 +1949,17 @@ static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
|
||||
unsigned nbytes)
|
||||
int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
|
||||
unsigned nbytes)
|
||||
{
|
||||
unsigned reqsize = sizeof(struct fuse_out_header);
|
||||
|
||||
unsigned int reqsize = 0;
|
||||
|
||||
/*
|
||||
* Uring has all headers separated from args - args is payload only
|
||||
*/
|
||||
if (!cs->is_uring)
|
||||
reqsize = sizeof(struct fuse_out_header);
|
||||
|
||||
reqsize += fuse_len_args(args->out_numargs, args->out_args);
|
||||
|
||||
|
@ -2011,7 +2021,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
|
|||
spin_lock(&fpq->lock);
|
||||
req = NULL;
|
||||
if (fpq->connected)
|
||||
req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
|
||||
req = fuse_request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
|
||||
|
||||
err = -ENOENT;
|
||||
if (!req) {
|
||||
|
@ -2049,7 +2059,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
|
|||
if (oh.error)
|
||||
err = nbytes != sizeof(oh) ? -EINVAL : 0;
|
||||
else
|
||||
err = copy_out_args(cs, req->args, nbytes);
|
||||
err = fuse_copy_out_args(cs, req->args, nbytes);
|
||||
fuse_copy_finish(cs);
|
||||
|
||||
spin_lock(&fpq->lock);
|
||||
|
@ -2204,7 +2214,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
|
|||
}
|
||||
|
||||
/* Abort all requests on the given list (pending or processing) */
|
||||
static void end_requests(struct list_head *head)
|
||||
void fuse_dev_end_requests(struct list_head *head)
|
||||
{
|
||||
while (!list_empty(head)) {
|
||||
struct fuse_req *req;
|
||||
|
@ -2307,7 +2317,13 @@ void fuse_abort_conn(struct fuse_conn *fc)
|
|||
wake_up_all(&fc->blocked_waitq);
|
||||
spin_unlock(&fc->lock);
|
||||
|
||||
end_requests(&to_end);
|
||||
fuse_dev_end_requests(&to_end);
|
||||
|
||||
/*
|
||||
* fc->lock must not be taken to avoid conflicts with io-uring
|
||||
* locks
|
||||
*/
|
||||
fuse_uring_abort(fc);
|
||||
} else {
|
||||
spin_unlock(&fc->lock);
|
||||
}
|
||||
|
@ -2319,6 +2335,8 @@ void fuse_wait_aborted(struct fuse_conn *fc)
|
|||
/* matches implicit memory barrier in fuse_drop_waiting() */
|
||||
smp_mb();
|
||||
wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
|
||||
|
||||
fuse_uring_wait_stopped_queues(fc);
|
||||
}
|
||||
|
||||
int fuse_dev_release(struct inode *inode, struct file *file)
|
||||
|
@ -2337,7 +2355,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
|
|||
list_splice_init(&fpq->processing[i], &to_end);
|
||||
spin_unlock(&fpq->lock);
|
||||
|
||||
end_requests(&to_end);
|
||||
fuse_dev_end_requests(&to_end);
|
||||
|
||||
/* Are we the last open device? */
|
||||
if (atomic_dec_and_test(&fc->dev_count)) {
|
||||
|
@ -2475,6 +2493,9 @@ const struct file_operations fuse_dev_operations = {
|
|||
.fasync = fuse_dev_fasync,
|
||||
.unlocked_ioctl = fuse_dev_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
.uring_cmd = fuse_uring_cmd,
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(fuse_dev_operations);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,205 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* FUSE: Filesystem in Userspace
|
||||
* Copyright (c) 2023-2024 DataDirect Networks.
|
||||
*/
|
||||
|
||||
#ifndef _FS_FUSE_DEV_URING_I_H
|
||||
#define _FS_FUSE_DEV_URING_I_H
|
||||
|
||||
#include "fuse_i.h"
|
||||
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
|
||||
#define FUSE_URING_TEARDOWN_TIMEOUT (5 * HZ)
|
||||
#define FUSE_URING_TEARDOWN_INTERVAL (HZ/20)
|
||||
|
||||
enum fuse_ring_req_state {
|
||||
FRRS_INVALID = 0,
|
||||
|
||||
/* The ring entry received from userspace and it is being processed */
|
||||
FRRS_COMMIT,
|
||||
|
||||
/* The ring entry is waiting for new fuse requests */
|
||||
FRRS_AVAILABLE,
|
||||
|
||||
/* The ring entry got assigned a fuse req */
|
||||
FRRS_FUSE_REQ,
|
||||
|
||||
/* The ring entry is in or on the way to user space */
|
||||
FRRS_USERSPACE,
|
||||
|
||||
/* The ring entry is in teardown */
|
||||
FRRS_TEARDOWN,
|
||||
|
||||
/* The ring entry is released, but not freed yet */
|
||||
FRRS_RELEASED,
|
||||
};
|
||||
|
||||
/** A fuse ring entry, part of the ring queue */
|
||||
struct fuse_ring_ent {
|
||||
/* userspace buffer */
|
||||
struct fuse_uring_req_header __user *headers;
|
||||
void __user *payload;
|
||||
|
||||
/* the ring queue that owns the request */
|
||||
struct fuse_ring_queue *queue;
|
||||
|
||||
/* fields below are protected by queue->lock */
|
||||
|
||||
struct io_uring_cmd *cmd;
|
||||
|
||||
struct list_head list;
|
||||
|
||||
enum fuse_ring_req_state state;
|
||||
|
||||
struct fuse_req *fuse_req;
|
||||
};
|
||||
|
||||
struct fuse_ring_queue {
|
||||
/*
|
||||
* back pointer to the main fuse uring structure that holds this
|
||||
* queue
|
||||
*/
|
||||
struct fuse_ring *ring;
|
||||
|
||||
/* queue id, corresponds to the cpu core */
|
||||
unsigned int qid;
|
||||
|
||||
/*
|
||||
* queue lock, taken when any value in the queue changes _and_ also
|
||||
* a ring entry state changes.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
|
||||
/* available ring entries (struct fuse_ring_ent) */
|
||||
struct list_head ent_avail_queue;
|
||||
|
||||
/*
|
||||
* entries in the process of being committed or in the process
|
||||
* to be sent to userspace
|
||||
*/
|
||||
struct list_head ent_w_req_queue;
|
||||
struct list_head ent_commit_queue;
|
||||
|
||||
/* entries in userspace */
|
||||
struct list_head ent_in_userspace;
|
||||
|
||||
/* entries that are released */
|
||||
struct list_head ent_released;
|
||||
|
||||
/* fuse requests waiting for an entry slot */
|
||||
struct list_head fuse_req_queue;
|
||||
|
||||
/* background fuse requests */
|
||||
struct list_head fuse_req_bg_queue;
|
||||
|
||||
struct fuse_pqueue fpq;
|
||||
|
||||
unsigned int active_background;
|
||||
|
||||
bool stopped;
|
||||
};
|
||||
|
||||
/**
|
||||
* Describes if uring is for communication and holds alls the data needed
|
||||
* for uring communication
|
||||
*/
|
||||
struct fuse_ring {
|
||||
/* back pointer */
|
||||
struct fuse_conn *fc;
|
||||
|
||||
/* number of ring queues */
|
||||
size_t nr_queues;
|
||||
|
||||
/* maximum payload/arg size */
|
||||
size_t max_payload_sz;
|
||||
|
||||
struct fuse_ring_queue **queues;
|
||||
|
||||
/*
|
||||
* Log ring entry states on stop when entries cannot be released
|
||||
*/
|
||||
unsigned int stop_debug_log : 1;
|
||||
|
||||
wait_queue_head_t stop_waitq;
|
||||
|
||||
/* async tear down */
|
||||
struct delayed_work async_teardown_work;
|
||||
|
||||
/* log */
|
||||
unsigned long teardown_time;
|
||||
|
||||
atomic_t queue_refs;
|
||||
|
||||
bool ready;
|
||||
};
|
||||
|
||||
bool fuse_uring_enabled(void);
|
||||
void fuse_uring_destruct(struct fuse_conn *fc);
|
||||
void fuse_uring_stop_queues(struct fuse_ring *ring);
|
||||
void fuse_uring_abort_end_requests(struct fuse_ring *ring);
|
||||
int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req);
|
||||
bool fuse_uring_queue_bq_req(struct fuse_req *req);
|
||||
|
||||
static inline void fuse_uring_abort(struct fuse_conn *fc)
|
||||
{
|
||||
struct fuse_ring *ring = fc->ring;
|
||||
|
||||
if (ring == NULL)
|
||||
return;
|
||||
|
||||
if (atomic_read(&ring->queue_refs) > 0) {
|
||||
fuse_uring_abort_end_requests(ring);
|
||||
fuse_uring_stop_queues(ring);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc)
|
||||
{
|
||||
struct fuse_ring *ring = fc->ring;
|
||||
|
||||
if (ring)
|
||||
wait_event(ring->stop_waitq,
|
||||
atomic_read(&ring->queue_refs) == 0);
|
||||
}
|
||||
|
||||
static inline bool fuse_uring_ready(struct fuse_conn *fc)
|
||||
{
|
||||
return fc->ring && fc->ring->ready;
|
||||
}
|
||||
|
||||
#else /* CONFIG_FUSE_IO_URING */
|
||||
|
||||
struct fuse_ring;
|
||||
|
||||
static inline void fuse_uring_create(struct fuse_conn *fc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void fuse_uring_destruct(struct fuse_conn *fc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool fuse_uring_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void fuse_uring_abort(struct fuse_conn *fc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool fuse_uring_ready(struct fuse_conn *fc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FUSE_IO_URING */
|
||||
|
||||
#endif /* _FS_FUSE_DEV_URING_I_H */
|
|
@ -175,9 +175,10 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
|
|||
memset(outarg, 0, sizeof(struct fuse_entry_out));
|
||||
args->opcode = FUSE_LOOKUP;
|
||||
args->nodeid = nodeid;
|
||||
args->in_numargs = 1;
|
||||
args->in_args[0].size = name->len + 1;
|
||||
args->in_args[0].value = name->name;
|
||||
args->in_numargs = 2;
|
||||
fuse_set_zero_arg0(args);
|
||||
args->in_args[1].size = name->len + 1;
|
||||
args->in_args[1].value = name->name;
|
||||
args->out_numargs = 1;
|
||||
args->out_args[0].size = sizeof(struct fuse_entry_out);
|
||||
args->out_args[0].value = outarg;
|
||||
|
@ -929,11 +930,12 @@ static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir,
|
|||
FUSE_ARGS(args);
|
||||
|
||||
args.opcode = FUSE_SYMLINK;
|
||||
args.in_numargs = 2;
|
||||
args.in_args[0].size = entry->d_name.len + 1;
|
||||
args.in_args[0].value = entry->d_name.name;
|
||||
args.in_args[1].size = len;
|
||||
args.in_args[1].value = link;
|
||||
args.in_numargs = 3;
|
||||
fuse_set_zero_arg0(&args);
|
||||
args.in_args[1].size = entry->d_name.len + 1;
|
||||
args.in_args[1].value = entry->d_name.name;
|
||||
args.in_args[2].size = len;
|
||||
args.in_args[2].value = link;
|
||||
return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK);
|
||||
}
|
||||
|
||||
|
@ -993,9 +995,10 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
|
|||
|
||||
args.opcode = FUSE_UNLINK;
|
||||
args.nodeid = get_node_id(dir);
|
||||
args.in_numargs = 1;
|
||||
args.in_args[0].size = entry->d_name.len + 1;
|
||||
args.in_args[0].value = entry->d_name.name;
|
||||
args.in_numargs = 2;
|
||||
fuse_set_zero_arg0(&args);
|
||||
args.in_args[1].size = entry->d_name.len + 1;
|
||||
args.in_args[1].value = entry->d_name.name;
|
||||
err = fuse_simple_request(fm, &args);
|
||||
if (!err) {
|
||||
fuse_dir_changed(dir);
|
||||
|
@ -1016,9 +1019,10 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
|
|||
|
||||
args.opcode = FUSE_RMDIR;
|
||||
args.nodeid = get_node_id(dir);
|
||||
args.in_numargs = 1;
|
||||
args.in_args[0].size = entry->d_name.len + 1;
|
||||
args.in_args[0].value = entry->d_name.name;
|
||||
args.in_numargs = 2;
|
||||
fuse_set_zero_arg0(&args);
|
||||
args.in_args[1].size = entry->d_name.len + 1;
|
||||
args.in_args[1].value = entry->d_name.name;
|
||||
err = fuse_simple_request(fm, &args);
|
||||
if (!err) {
|
||||
fuse_dir_changed(dir);
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* FUSE: Filesystem in Userspace
|
||||
* Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
|
||||
*/
|
||||
#ifndef _FS_FUSE_DEV_I_H
|
||||
#define _FS_FUSE_DEV_I_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/* Ordinary requests have even IDs, while interrupts IDs are odd */
|
||||
#define FUSE_INT_REQ_BIT (1ULL << 0)
|
||||
#define FUSE_REQ_ID_STEP (1ULL << 1)
|
||||
|
||||
struct fuse_arg;
|
||||
struct fuse_args;
|
||||
struct fuse_pqueue;
|
||||
struct fuse_req;
|
||||
struct fuse_iqueue;
|
||||
struct fuse_forget_link;
|
||||
|
||||
struct fuse_copy_state {
|
||||
int write;
|
||||
struct fuse_req *req;
|
||||
struct iov_iter *iter;
|
||||
struct pipe_buffer *pipebufs;
|
||||
struct pipe_buffer *currbuf;
|
||||
struct pipe_inode_info *pipe;
|
||||
unsigned long nr_segs;
|
||||
struct page *pg;
|
||||
unsigned int len;
|
||||
unsigned int offset;
|
||||
unsigned int move_pages:1;
|
||||
unsigned int is_uring:1;
|
||||
struct {
|
||||
unsigned int copied_sz; /* copied size into the user buffer */
|
||||
} ring;
|
||||
};
|
||||
|
||||
static inline struct fuse_dev *fuse_get_dev(struct file *file)
|
||||
{
|
||||
/*
|
||||
* Lockless access is OK, because file->private data is set
|
||||
* once during mount and is valid until the file is released.
|
||||
*/
|
||||
return READ_ONCE(file->private_data);
|
||||
}
|
||||
|
||||
unsigned int fuse_req_hash(u64 unique);
|
||||
struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique);
|
||||
|
||||
void fuse_dev_end_requests(struct list_head *head);
|
||||
|
||||
void fuse_copy_init(struct fuse_copy_state *cs, int write,
|
||||
struct iov_iter *iter);
|
||||
int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs,
|
||||
unsigned int argpages, struct fuse_arg *args,
|
||||
int zeroing);
|
||||
int fuse_copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
|
||||
unsigned int nbytes);
|
||||
void fuse_dev_queue_forget(struct fuse_iqueue *fiq,
|
||||
struct fuse_forget_link *forget);
|
||||
void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req);
|
||||
|
||||
#endif
|
||||
|
|
@ -310,7 +310,7 @@ struct fuse_args {
|
|||
bool is_ext:1;
|
||||
bool is_pinned:1;
|
||||
bool invalidate_vmap:1;
|
||||
struct fuse_in_arg in_args[3];
|
||||
struct fuse_in_arg in_args[4];
|
||||
struct fuse_arg out_args[2];
|
||||
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
|
||||
/* Used for kvec iter backed by vmalloc address */
|
||||
|
@ -438,6 +438,10 @@ struct fuse_req {
|
|||
|
||||
/** fuse_mount this request belongs to */
|
||||
struct fuse_mount *fm;
|
||||
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
void *ring_entry;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct fuse_iqueue;
|
||||
|
@ -863,6 +867,9 @@ struct fuse_conn {
|
|||
/* Use pages instead of pointer for kernel I/O */
|
||||
unsigned int use_pages_for_kvec_io:1;
|
||||
|
||||
/* Use io_uring for communication */
|
||||
unsigned int io_uring;
|
||||
|
||||
/** Maximum stack depth for passthrough backing files */
|
||||
int max_stack_depth;
|
||||
|
||||
|
@ -923,6 +930,11 @@ struct fuse_conn {
|
|||
/** IDR for backing files ids */
|
||||
struct idr backing_files_map;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUSE_IO_URING
|
||||
/** uring connection information*/
|
||||
struct fuse_ring *ring;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -947,6 +959,19 @@ struct fuse_mount {
|
|||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
/*
|
||||
* Empty header for FUSE opcodes without specific header needs.
|
||||
* Used as a placeholder in args->in_args[0] for consistency
|
||||
* across all FUSE operations, simplifying request handling.
|
||||
*/
|
||||
struct fuse_zero_header {};
|
||||
|
||||
static inline void fuse_set_zero_arg0(struct fuse_args *args)
|
||||
{
|
||||
args->in_args[0].size = sizeof(struct fuse_zero_header);
|
||||
args->in_args[0].value = NULL;
|
||||
}
|
||||
|
||||
static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
|
||||
{
|
||||
return sb->s_fs_info;
|
||||
|
@ -1219,6 +1244,11 @@ void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
|
|||
*/
|
||||
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
|
||||
|
||||
/**
|
||||
* Initialize the fuse processing queue
|
||||
*/
|
||||
void fuse_pqueue_init(struct fuse_pqueue *fpq);
|
||||
|
||||
/**
|
||||
* Initialize fuse_conn
|
||||
*/
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
*/
|
||||
|
||||
#include "fuse_i.h"
|
||||
#include "dev_uring_i.h"
|
||||
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -937,7 +938,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq,
|
|||
fiq->priv = priv;
|
||||
}
|
||||
|
||||
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
|
||||
void fuse_pqueue_init(struct fuse_pqueue *fpq)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
|
@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p)
|
|||
{
|
||||
struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
|
||||
|
||||
fuse_uring_destruct(fc);
|
||||
|
||||
put_user_ns(fc->user_ns);
|
||||
fc->release(fc);
|
||||
}
|
||||
|
@ -1387,6 +1390,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
|
|||
else
|
||||
ok = false;
|
||||
}
|
||||
if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled())
|
||||
fc->io_uring = 1;
|
||||
} else {
|
||||
ra_pages = fc->max_read / PAGE_SIZE;
|
||||
fc->no_lock = 1;
|
||||
|
@ -1446,6 +1451,13 @@ void fuse_send_init(struct fuse_mount *fm)
|
|||
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
|
||||
flags |= FUSE_PASSTHROUGH;
|
||||
|
||||
/*
|
||||
* This is just an information flag for fuse server. No need to check
|
||||
* the reply - server is either sending IORING_OP_URING_CMD or not.
|
||||
*/
|
||||
if (fuse_uring_enabled())
|
||||
flags |= FUSE_OVER_IO_URING;
|
||||
|
||||
ia->in.flags = flags;
|
||||
ia->in.flags2 = flags >> 32;
|
||||
|
||||
|
|
|
@ -164,9 +164,10 @@ int fuse_removexattr(struct inode *inode, const char *name)
|
|||
|
||||
args.opcode = FUSE_REMOVEXATTR;
|
||||
args.nodeid = get_node_id(inode);
|
||||
args.in_numargs = 1;
|
||||
args.in_args[0].size = strlen(name) + 1;
|
||||
args.in_args[0].value = name;
|
||||
args.in_numargs = 2;
|
||||
fuse_set_zero_arg0(&args);
|
||||
args.in_args[1].size = strlen(name) + 1;
|
||||
args.in_args[1].value = name;
|
||||
err = fuse_simple_request(fm, &args);
|
||||
if (err == -ENOSYS) {
|
||||
fm->fc->no_removexattr = 1;
|
||||
|
|
|
@ -220,6 +220,15 @@
|
|||
*
|
||||
* 7.41
|
||||
* - add FUSE_ALLOW_IDMAP
|
||||
* 7.42
|
||||
* - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
|
||||
* structures:
|
||||
* - struct fuse_uring_ent_in_out
|
||||
* - struct fuse_uring_req_header
|
||||
* - struct fuse_uring_cmd_req
|
||||
* - FUSE_URING_IN_OUT_HEADER_SZ
|
||||
* - FUSE_URING_OP_IN_OUT_SZ
|
||||
* - enum fuse_uring_cmd
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_FUSE_H
|
||||
|
@ -255,7 +264,7 @@
|
|||
#define FUSE_KERNEL_VERSION 7
|
||||
|
||||
/** Minor version number of this interface */
|
||||
#define FUSE_KERNEL_MINOR_VERSION 41
|
||||
#define FUSE_KERNEL_MINOR_VERSION 42
|
||||
|
||||
/** The node ID of the root inode */
|
||||
#define FUSE_ROOT_ID 1
|
||||
|
@ -425,6 +434,7 @@ struct fuse_file_lock {
|
|||
* FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
|
||||
* of the request ID indicates resend requests
|
||||
* FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
|
||||
* FUSE_OVER_IO_URING: Indicate that client supports io-uring
|
||||
*/
|
||||
#define FUSE_ASYNC_READ (1 << 0)
|
||||
#define FUSE_POSIX_LOCKS (1 << 1)
|
||||
|
@ -471,6 +481,7 @@ struct fuse_file_lock {
|
|||
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
|
||||
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
|
||||
#define FUSE_ALLOW_IDMAP (1ULL << 40)
|
||||
#define FUSE_OVER_IO_URING (1ULL << 41)
|
||||
|
||||
/**
|
||||
* CUSE INIT request/reply flags
|
||||
|
@ -1206,4 +1217,67 @@ struct fuse_supp_groups {
|
|||
uint32_t groups[];
|
||||
};
|
||||
|
||||
/**
|
||||
* Size of the ring buffer header
|
||||
*/
|
||||
#define FUSE_URING_IN_OUT_HEADER_SZ 128
|
||||
#define FUSE_URING_OP_IN_OUT_SZ 128
|
||||
|
||||
/* Used as part of the fuse_uring_req_header */
|
||||
struct fuse_uring_ent_in_out {
|
||||
uint64_t flags;
|
||||
|
||||
/*
|
||||
* commit ID to be used in a reply to a ring request (see also
|
||||
* struct fuse_uring_cmd_req)
|
||||
*/
|
||||
uint64_t commit_id;
|
||||
|
||||
/* size of user payload buffer */
|
||||
uint32_t payload_sz;
|
||||
uint32_t padding;
|
||||
|
||||
uint64_t reserved;
|
||||
};
|
||||
|
||||
/**
|
||||
* Header for all fuse-io-uring requests
|
||||
*/
|
||||
struct fuse_uring_req_header {
|
||||
/* struct fuse_in_header / struct fuse_out_header */
|
||||
char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
|
||||
|
||||
/* per op code header */
|
||||
char op_in[FUSE_URING_OP_IN_OUT_SZ];
|
||||
|
||||
struct fuse_uring_ent_in_out ring_ent_in_out;
|
||||
};
|
||||
|
||||
/**
|
||||
* sqe commands to the kernel
|
||||
*/
|
||||
enum fuse_uring_cmd {
|
||||
FUSE_IO_URING_CMD_INVALID = 0,
|
||||
|
||||
/* register the request buffer and fetch a fuse request */
|
||||
FUSE_IO_URING_CMD_REGISTER = 1,
|
||||
|
||||
/* commit fuse request result and fetch next request */
|
||||
FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* In the 80B command area of the SQE.
|
||||
*/
|
||||
struct fuse_uring_cmd_req {
|
||||
uint64_t flags;
|
||||
|
||||
/* entry identifier for commits */
|
||||
uint64_t commit_id;
|
||||
|
||||
/* queue the command is for (queue index) */
|
||||
uint16_t qid;
|
||||
uint8_t padding[6];
|
||||
};
|
||||
|
||||
#endif /* _LINUX_FUSE_H */
|
||||
|
|
Loading…
Reference in New Issue