Centos-kernel-stream-9/io_uring/uring_cmd.c

357 lines
9.4 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/io_uring/cmd.h>
#include <linux/io_uring/net.h>
#include <linux/security.h>
#include <linux/nospec.h>
#include <net/sock.h>
#include <uapi/linux/io_uring.h>
io_uring/cmd: fix breakage in SOCKET_URING_OP_SIOC* implementation JIRA: https://issues.redhat.com/browse/RHEL-21391 commit 1ba0e9d69b2000e95267c888cbfa91d823388d47 Author: Al Viro <viro@zeniv.linux.org.uk> Date: Thu Dec 14 21:34:08 2023 +0000 io_uring/cmd: fix breakage in SOCKET_URING_OP_SIOC* implementation In 8e9fad0e70b7 "io_uring: Add io_uring command support for sockets" you've got an include of asm-generic/ioctls.h done in io_uring/uring_cmd.c. That had been done for the sake of this chunk - + ret = prot->ioctl(sk, SIOCINQ, &arg); + if (ret) + return ret; + return arg; + case SOCKET_URING_OP_SIOCOUTQ: + ret = prot->ioctl(sk, SIOCOUTQ, &arg); SIOC{IN,OUT}Q are defined to symbols (FIONREAD and TIOCOUTQ) that come from ioctls.h, all right, but the values vary by the architecture. FIONREAD is 0x467F on mips 0x4004667F on alpha, powerpc and sparc 0x8004667F on sh and xtensa 0x541B everywhere else TIOCOUTQ is 0x7472 on mips 0x40047473 on alpha, powerpc and sparc 0x80047473 on sh and xtensa 0x5411 everywhere else ->ioctl() expects the same values it would've gotten from userland; all places where we compare with SIOC{IN,OUT}Q are using asm/ioctls.h, so they pick the correct values. io_uring_cmd_sock(), OTOH, ends up passing the default ones. Fixes: 8e9fad0e70b7 ("io_uring: Add io_uring command support for sockets") Cc: <stable@vger.kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Link: https://lore.kernel.org/r/20231214213408.GT1674809@ZenIV Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2023-12-14 21:34:08 +00:00
#include <asm/ioctls.h>
#include "io_uring.h"
#include "alloc_cache.h"
#include "rsrc.h"
#include "uring_cmd.h"
static struct uring_cache *io_uring_async_get(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct uring_cache *cache;
cache = io_alloc_cache_get(&ctx->uring_cache);
if (cache) {
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = cache;
return cache;
}
if (!io_alloc_async_data(req))
return req->async_data;
return NULL;
}
static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct uring_cache *cache = req->async_data;
if (issue_flags & IO_URING_F_UNLOCKED)
return;
if (io_alloc_cache_put(&req->ctx->uring_cache, cache)) {
ioucmd->sqe = NULL;
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
}
}
bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
struct task_struct *task, bool cancel_all)
{
struct hlist_node *tmp;
struct io_kiocb *req;
bool ret = false;
lockdep_assert_held(&ctx->uring_lock);
hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
hash_node) {
struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
struct io_uring_cmd);
struct file *file = req->file;
if (!cancel_all && req->task != task)
continue;
if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
/* ->sqe isn't available if no async data */
if (!req_has_async_data(req))
cmd->sqe = NULL;
file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL |
IO_URING_F_COMPLETE_DEFER);
ret = true;
}
}
io_submit_flush_completions(ctx);
return ret;
}
static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(cmd);
struct io_ring_ctx *ctx = req->ctx;
if (!(cmd->flags & IORING_URING_CMD_CANCELABLE))
return;
cmd->flags &= ~IORING_URING_CMD_CANCELABLE;
io_ring_submit_lock(ctx, issue_flags);
hlist_del(&req->hash_node);
io_ring_submit_unlock(ctx, issue_flags);
}
/*
* Mark this command as concelable, then io_uring_try_cancel_uring_cmd()
* will try to cancel this issued command by sending ->uring_cmd() with
* issue_flags of IO_URING_F_CANCEL.
*
* The command is guaranteed to not be done when calling ->uring_cmd()
* with IO_URING_F_CANCEL, but it is driver's responsibility to deal
* with race between io_uring canceling and normal completion.
*/
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(cmd);
struct io_ring_ctx *ctx = req->ctx;
if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) {
cmd->flags |= IORING_URING_CMD_CANCELABLE;
io_ring_submit_lock(ctx, issue_flags);
hlist_add_head(&req->hash_node, &ctx->cancelable_uring_cmd);
io_ring_submit_unlock(ctx, issue_flags);
}
}
EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
/* task_work executor checks the deffered list completion */
ioucmd->task_work_cb(ioucmd, IO_URING_F_COMPLETE_DEFER);
}
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
ioucmd->task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
__io_req_task_work_add(req, flags);
}
EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
u64 extra1, u64 extra2)
{
req->big_cqe.extra1 = extra1;
req->big_cqe.extra2 = extra2;
}
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
*/
void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
unsigned issue_flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
io_uring_cmd_del_cancelable(ioucmd, issue_flags);
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
if (req->ctx->flags & IORING_SETUP_CQE32)
io_req_set_cqe32_extra(req, res2, 0);
io_req_uring_cleanup(req, issue_flags);
if (req->ctx->flags & IORING_SETUP_IOPOLL) {
/* order with io_iopoll_req_issued() checking ->iopoll_complete */
smp_store_release(&req->iopoll_completed, 1);
} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
return;
io_req_complete_defer(req);
} else {
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
}
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct uring_cache *cache;
cache = io_uring_async_get(req);
if (unlikely(!cache))
return -ENOMEM;
if (!(req->flags & REQ_F_FORCE_ASYNC)) {
/* defer memcpy until we need it */
ioucmd->sqe = sqe;
return 0;
}
memcpy(req->async_data, sqe, uring_sqe_size(req->ctx));
ioucmd->sqe = req->async_data;
return 0;
}
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
if (sqe->__pad1)
return -EINVAL;
ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
if (ioucmd->flags & ~IORING_URING_CMD_MASK)
return -EINVAL;
if (ioucmd->flags & IORING_URING_CMD_FIXED) {
struct io_ring_ctx *ctx = req->ctx;
u16 index;
req->buf_index = READ_ONCE(sqe->buf_index);
if (unlikely(req->buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req, ctx, 0);
}
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return io_uring_cmd_prep_setup(req, sqe);
}
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct io_ring_ctx *ctx = req->ctx;
struct file *file = req->file;
int ret;
if (!file->f_op->uring_cmd)
return -EOPNOTSUPP;
ret = security_uring_cmd(ioucmd);
if (ret)
return ret;
if (ctx->flags & IORING_SETUP_SQE128)
issue_flags |= IO_URING_F_SQE128;
if (ctx->flags & IORING_SETUP_CQE32)
issue_flags |= IO_URING_F_CQE32;
if (ctx->compat)
issue_flags |= IO_URING_F_COMPAT;
if (ctx->flags & IORING_SETUP_IOPOLL) {
if (!file->f_op->uring_cmd_iopoll)
return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0;
}
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) {
struct uring_cache *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
return -EAGAIN;
} else if (ret == -EIOCBQUEUED) {
return -EIOCBQUEUED;
}
if (ret < 0)
req_set_fail(req);
io_req_uring_cleanup(req, issue_flags);
io_req_set_res(req, ret, 0);
io_uring: fix lost getsockopt completions JIRA: https://issues.redhat.com/browse/RHEL-64867 commit 24dce1c538a7ceac43f2f97aae8dfd4bb93ea9b9 Author: Pavel Begunkov <asml.silence@gmail.com> Date: Tue Jul 16 19:05:46 2024 +0100 io_uring: fix lost getsockopt completions There is a report that iowq executed getsockopt never completes. The reason being that io_uring_cmd_sock() can return a positive result, and io_uring_cmd() propagates it back to core io_uring, instead of IOU_OK. In case of io_wq_submit_work(), the request will be dropped without completing it. The offending code was introduced by a hack in a9c3eda7eada9 ("io_uring: fix submission-failure handling for uring-cmd"), however it was fine until getsockopt was introduced and started returning positive results. The right solution is to always return IOU_OK, since e0b23d9953b0c ("io_uring: optimise ltimeout for inline execution"), we should be able to do it without problems, however for the sake of backporting and minimising side effects, let's keep returning negative return codes and otherwise do IOU_OK. Link: https://github.com/axboe/liburing/issues/1181 Cc: stable@vger.kernel.org Fixes: 8e9fad0e70b7b ("io_uring: Add io_uring command support for sockets") Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Reviewed-by: Breno Leitao <leitao@debian.org> Link: https://lore.kernel.org/r/ff349cf0654018189b6077e85feed935f0f8839e.1721149870.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2024-07-16 18:05:46 +00:00
return ret < 0 ? ret : IOU_OK;
}
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
return io_import_fixed(rw, iter, req->imu, ubuf, len);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
static inline int io_uring_cmd_getsockopt(struct socket *sock,
struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
int optlen, optname, level, err;
void __user *optval;
level = READ_ONCE(cmd->sqe->level);
if (level != SOL_SOCKET)
return -EOPNOTSUPP;
optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
optname = READ_ONCE(cmd->sqe->optname);
optlen = READ_ONCE(cmd->sqe->optlen);
err = do_sock_getsockopt(sock, compat, level, optname,
USER_SOCKPTR(optval),
KERNEL_SOCKPTR(&optlen));
if (err)
return err;
/* On success, return optlen */
return optlen;
}
static inline int io_uring_cmd_setsockopt(struct socket *sock,
struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
int optname, optlen, level;
void __user *optval;
sockptr_t optval_s;
optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
optname = READ_ONCE(cmd->sqe->optname);
optlen = READ_ONCE(cmd->sqe->optlen);
level = READ_ONCE(cmd->sqe->level);
optval_s = USER_SOCKPTR(optval);
return do_sock_setsockopt(sock, compat, level, optname, optval_s,
optlen);
}
#if defined(CONFIG_NET)
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct socket *sock = cmd->file->private_data;
struct sock *sk = sock->sk;
struct proto *prot = READ_ONCE(sk->sk_prot);
int ret, arg = 0;
if (!prot || !prot->ioctl)
return -EOPNOTSUPP;
switch (cmd->sqe->cmd_op) {
case SOCKET_URING_OP_SIOCINQ:
ret = prot->ioctl(sk, SIOCINQ, &arg);
if (ret)
return ret;
return arg;
case SOCKET_URING_OP_SIOCOUTQ:
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
if (ret)
return ret;
return arg;
case SOCKET_URING_OP_GETSOCKOPT:
return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
case SOCKET_URING_OP_SETSOCKOPT:
return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
default:
return -EOPNOTSUPP;
}
}
EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
#endif