Centos-kernel-stream-9/io_uring/cancel.c

352 lines
8.0 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/namei.h>
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
#include <linux/nospec.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
#include "tctx.h"
#include "poll.h"
#include "timeout.h"
#include "waitid.h"
io_uring: add support for futex wake and wait JIRA: https://issues.redhat.com/browse/RHEL-27755 commit 194bb58c6090e39bd7d9b9c888a079213628e1f6 Author: Jens Axboe <axboe@kernel.dk> Date: Thu Jun 8 11:57:40 2023 -0600 io_uring: add support for futex wake and wait Add support for FUTEX_WAKE/WAIT primitives. IORING_OP_FUTEX_WAKE is mix of FUTEX_WAKE and FUTEX_WAKE_BITSET, as it does support passing in a bitset. Similary, IORING_OP_FUTEX_WAIT is a mix of FUTEX_WAIT and FUTEX_WAIT_BITSET. For both of them, they are using the futex2 interface. FUTEX_WAKE is straight forward, as those can always be done directly from the io_uring submission without needing async handling. For FUTEX_WAIT, things are a bit more complicated. If the futex isn't ready, then we rely on a callback via futex_queue->wake() when someone wakes up the futex. From that calback, we queue up task_work with the original task, which will post a CQE and wake it, if necessary. Cancelations are supported, both from the application point-of-view, but also to be able to cancel pending waits if the ring exits before all events have occurred. The return value of futex_unqueue() is used to gate who wins the potential race between cancelation and futex wakeups. Whomever gets a 'ret == 1' return from that claims ownership of the io_uring futex request. This is just the barebones wait/wake support. PI or REQUEUE support is not added at this point, unclear if we might look into that later. Likewise, explicit timeouts are not supported either. It is expected that users that need timeouts would do so via the usual io_uring mechanism to do that using linked timeouts. The SQE format is as follows: `addr` Address of futex `fd` futex2(2) FUTEX2_* flags `futex_flags` io_uring specific command flags. None valid now. `addr2` Value of futex `addr3` Mask to wake/wait Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2023-06-08 17:57:40 +00:00
#include "futex.h"
#include "cancel.h"
struct io_cancel {
struct file *file;
u64 addr;
u32 flags;
s32 fd;
u8 opcode;
};
#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \
IORING_ASYNC_CANCEL_ANY | IORING_ASYNC_CANCEL_FD_FIXED | \
IORING_ASYNC_CANCEL_USERDATA | IORING_ASYNC_CANCEL_OP)
/*
* Returns true if the request matches the criteria outlined by 'cd'.
*/
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd)
{
bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
if (req->ctx != cd->ctx)
return false;
if (!(cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP)))
match_user_data = true;
if (cd->flags & IORING_ASYNC_CANCEL_ANY)
goto check_seq;
if (cd->flags & IORING_ASYNC_CANCEL_FD) {
if (req->file != cd->file)
return false;
}
if (cd->flags & IORING_ASYNC_CANCEL_OP) {
if (req->opcode != cd->opcode)
return false;
}
if (match_user_data && req->cqe.user_data != cd->data)
return false;
if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
check_seq:
if (io_cancel_match_sequence(req, cd->seq))
return false;
}
return true;
}
static bool io_cancel_cb(struct io_wq_work *work, void *data)
{
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_cancel_data *cd = data;
return io_cancel_req_match(req, cd);
}
static int io_async_cancel_one(struct io_uring_task *tctx,
struct io_cancel_data *cd)
{
enum io_wq_cancel cancel_ret;
int ret = 0;
bool all;
if (!tctx || !tctx->io_wq)
return -ENOENT;
all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all);
switch (cancel_ret) {
case IO_WQ_CANCEL_OK:
ret = 0;
break;
case IO_WQ_CANCEL_RUNNING:
ret = -EALREADY;
break;
case IO_WQ_CANCEL_NOTFOUND:
ret = -ENOENT;
break;
}
return ret;
}
int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
unsigned issue_flags)
{
struct io_ring_ctx *ctx = cd->ctx;
int ret;
WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
ret = io_async_cancel_one(tctx, cd);
/*
* Fall-through even for -EALREADY, as we may have poll armed
* that need unarming.
*/
if (!ret)
return 0;
ret = io_poll_cancel(ctx, cd, issue_flags);
if (ret != -ENOENT)
return ret;
ret = io_waitid_cancel(ctx, cd, issue_flags);
if (ret != -ENOENT)
return ret;
io_uring: add support for futex wake and wait JIRA: https://issues.redhat.com/browse/RHEL-27755 commit 194bb58c6090e39bd7d9b9c888a079213628e1f6 Author: Jens Axboe <axboe@kernel.dk> Date: Thu Jun 8 11:57:40 2023 -0600 io_uring: add support for futex wake and wait Add support for FUTEX_WAKE/WAIT primitives. IORING_OP_FUTEX_WAKE is mix of FUTEX_WAKE and FUTEX_WAKE_BITSET, as it does support passing in a bitset. Similary, IORING_OP_FUTEX_WAIT is a mix of FUTEX_WAIT and FUTEX_WAIT_BITSET. For both of them, they are using the futex2 interface. FUTEX_WAKE is straight forward, as those can always be done directly from the io_uring submission without needing async handling. For FUTEX_WAIT, things are a bit more complicated. If the futex isn't ready, then we rely on a callback via futex_queue->wake() when someone wakes up the futex. From that calback, we queue up task_work with the original task, which will post a CQE and wake it, if necessary. Cancelations are supported, both from the application point-of-view, but also to be able to cancel pending waits if the ring exits before all events have occurred. The return value of futex_unqueue() is used to gate who wins the potential race between cancelation and futex wakeups. Whomever gets a 'ret == 1' return from that claims ownership of the io_uring futex request. This is just the barebones wait/wake support. PI or REQUEUE support is not added at this point, unclear if we might look into that later. Likewise, explicit timeouts are not supported either. It is expected that users that need timeouts would do so via the usual io_uring mechanism to do that using linked timeouts. The SQE format is as follows: `addr` Address of futex `fd` futex2(2) FUTEX2_* flags `futex_flags` io_uring specific command flags. None valid now. `addr2` Value of futex `addr3` Mask to wake/wait Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2023-06-08 17:57:40 +00:00
ret = io_futex_cancel(ctx, cd, issue_flags);
if (ret != -ENOENT)
return ret;
spin_lock(&ctx->completion_lock);
if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
ret = io_timeout_cancel(ctx, cd);
spin_unlock(&ctx->completion_lock);
return ret;
}
int io_async_cancel_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (sqe->off || sqe->splice_fd_in)
return -EINVAL;
cancel->addr = READ_ONCE(sqe->addr);
cancel->flags = READ_ONCE(sqe->cancel_flags);
if (cancel->flags & ~CANCEL_FLAGS)
return -EINVAL;
if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
return -EINVAL;
cancel->fd = READ_ONCE(sqe->fd);
}
if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
return -EINVAL;
cancel->opcode = READ_ONCE(sqe->len);
}
return 0;
}
static int __io_async_cancel(struct io_cancel_data *cd,
struct io_uring_task *tctx,
unsigned int issue_flags)
{
bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY);
struct io_ring_ctx *ctx = cd->ctx;
struct io_tctx_node *node;
int ret, nr = 0;
do {
ret = io_try_cancel(tctx, cd, issue_flags);
if (ret == -ENOENT)
break;
if (!all)
return ret;
nr++;
} while (1);
/* slow path, try all io-wq's */
io_ring_submit_lock(ctx, issue_flags);
ret = -ENOENT;
list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
ret = io_async_cancel_one(node->task->io_uring, cd);
if (ret != -ENOENT) {
if (!all)
break;
nr++;
}
}
io_ring_submit_unlock(ctx, issue_flags);
return all ? nr : ret;
}
int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_cancel *cancel = io_kiocb_to_cmd(req, struct io_cancel);
struct io_cancel_data cd = {
.ctx = req->ctx,
.data = cancel->addr,
.flags = cancel->flags,
.opcode = cancel->opcode,
.seq = atomic_inc_return(&req->ctx->cancel_seq),
};
struct io_uring_task *tctx = req->task->io_uring;
int ret;
if (cd.flags & IORING_ASYNC_CANCEL_FD) {
if (req->flags & REQ_F_FIXED_FILE ||
cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
req->flags |= REQ_F_FIXED_FILE;
req->file = io_file_get_fixed(req, cancel->fd,
issue_flags);
} else {
req->file = io_file_get_normal(req, cancel->fd);
}
if (!req->file) {
ret = -EBADF;
goto done;
}
cd.file = req->file;
}
ret = __io_async_cancel(&cd, tctx, issue_flags);
done:
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
void init_hash_table(struct io_hash_table *table, unsigned size)
{
unsigned int i;
for (i = 0; i < size; i++) {
spin_lock_init(&table->hbs[i].lock);
INIT_HLIST_HEAD(&table->hbs[i].list);
}
}
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
static int __io_sync_cancel(struct io_uring_task *tctx,
struct io_cancel_data *cd, int fd)
{
struct io_ring_ctx *ctx = cd->ctx;
/* fixed must be grabbed every time since we drop the uring_lock */
if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
if (unlikely(fd >= ctx->nr_user_files))
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
return -EBADF;
fd = array_index_nospec(fd, ctx->nr_user_files);
cd->file = io_file_from_index(&ctx->file_table, fd);
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
if (!cd->file)
return -EBADF;
}
return __io_async_cancel(cd, tctx, 0);
}
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
__must_hold(&ctx->uring_lock)
{
struct io_cancel_data cd = {
.ctx = ctx,
.seq = atomic_inc_return(&ctx->cancel_seq),
};
ktime_t timeout = KTIME_MAX;
struct io_uring_sync_cancel_reg sc;
struct file *file = NULL;
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
DEFINE_WAIT(wait);
int ret, i;
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
if (copy_from_user(&sc, arg, sizeof(sc)))
return -EFAULT;
if (sc.flags & ~CANCEL_FLAGS)
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sc.pad); i++)
if (sc.pad[i])
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sc.pad2); i++)
if (sc.pad2[i])
return -EINVAL;
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
cd.data = sc.addr;
cd.flags = sc.flags;
cd.opcode = sc.opcode;
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
/* we can grab a normal file descriptor upfront */
if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
!(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
file = fget(sc.fd);
if (!file)
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
return -EBADF;
cd.file = file;
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
}
ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
/* found something, done! */
if (ret != -EALREADY)
goto out;
if (sc.timeout.tv_sec != -1UL || sc.timeout.tv_nsec != -1UL) {
struct timespec64 ts = {
.tv_sec = sc.timeout.tv_sec,
.tv_nsec = sc.timeout.tv_nsec
};
timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
}
/*
* Keep looking until we get -ENOENT. we'll get woken everytime
* every time a request completes and will retry the cancelation.
*/
do {
cd.seq = atomic_inc_return(&ctx->cancel_seq);
prepare_to_wait(&ctx->cq_wait, &wait, TASK_INTERRUPTIBLE);
ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
mutex_unlock(&ctx->uring_lock);
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
if (ret != -EALREADY)
break;
io_uring: add IORING_SETUP_DEFER_TASKRUN Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit c0e0d6ba25f180ab76d3c18f8b360a119dffa634 Author: Dylan Yudaken <dylany@fb.com> Date: Tue Aug 30 05:50:10 2022 -0700 io_uring: add IORING_SETUP_DEFER_TASKRUN Allow deferring async tasks until the user calls io_uring_enter(2) with the IORING_ENTER_GETEVENTS flag. Enable this mode with a flag at io_uring_setup time. This functionality requires that the later io_uring_enter will be called from the same submission task, and therefore restrict this flag to work only when IORING_SETUP_SINGLE_ISSUER is also set. Being able to hand pick when tasks are run prevents the problem where there is current work to be done, however task work runs anyway. For example, a common workload would obtain a batch of CQEs, and process each one. Interrupting this to additional taskwork would add latency but not gain anything. If instead task work is deferred to just before more CQEs are obtained then no additional latency is added. The way this is implemented is by trying to keep task work local to a io_ring_ctx, rather than to the submission task. This is required, as the application will want to wake up only a single io_ring_ctx at a time to process work, and so the lists of work have to be kept separate. This has some other benefits like not having to check the task continually in handle_tw_list (and potentially unlocking/locking those), and reducing locks in the submit & process completions path. There are networking cases where using this option can reduce request latency by 50%. For example a contrived example using [1] where the client sends 2k data and receives the same data back while doing some system calls (to trigger task work) shows this reduction. The reason ends up being that if sending responses is delayed by processing task work, then the client side sits idle. Whereas reordering the sends first means that the client runs it's workload in parallel with the local task work. [1]: Using https://github.com/DylanZA/netbench/tree/defer_run Client: ./netbench --client_only 1 --control_port 10000 --host <host> --tx "epoll --threads 16 --per_thread 1 --size 2048 --resp 2048 --workload 1000" Server: ./netbench --server_only 1 --control_port 10000 --rx "io_uring --defer_taskrun 0 --workload 100" --rx "io_uring --defer_taskrun 1 --workload 100" Signed-off-by: Dylan Yudaken <dylany@fb.com> Link: https://lore.kernel.org/r/20220830125013.570060-5-dylany@fb.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-08-30 12:50:10 +00:00
ret = io_run_task_work_sig(ctx);
if (ret < 0)
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
break;
ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS);
if (!ret) {
ret = -ETIME;
break;
}
mutex_lock(&ctx->uring_lock);
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
} while (1);
finish_wait(&ctx->cq_wait, &wait);
mutex_lock(&ctx->uring_lock);
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
if (ret == -ENOENT || ret > 0)
ret = 0;
out:
if (file)
fput(file);
io_uring: add sync cancelation API through io_uring_register() Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2068237 commit 78a861b9495920f8609dee5b670dacbff09d359f Author: Jens Axboe <axboe@kernel.dk> Date: Sat Jun 18 10:00:50 2022 -0600 io_uring: add sync cancelation API through io_uring_register() The io_uring cancelation API is async, like any other API that we expose there. For the case of finding a request to cancel, or not finding one, it is fully sync in that when submission returns, the CQE for both the cancelation request and the targeted request have been posted to the CQ ring. However, if the targeted work is being executed by io-wq, the API can only start the act of canceling it. This makes it difficult to use in some circumstances, as the caller then has to wait for the CQEs to come in and match on the same cancelation data there. Provide a IORING_REGISTER_SYNC_CANCEL command for io_uring_register() that does sync cancelations, always. For the io-wq case, it'll wait for the cancelation to come in before returning. The only expected returns from this API is: 0 Request found and canceled fine. > 0 Requests found and canceled. Only happens if asked to cancel multiple requests, and if the work wasn't in progress. -ENOENT Request not found. -ETIME A timeout on the operation was requested, but the timeout expired before we could cancel. and we won't get -EALREADY via this API. If the timeout value passed in is -1 (tv_sec and tv_nsec), then that means that no timeout is requested. Otherwise, the timespec passed in is the amount of time the sync cancel will wait for a successful cancelation. Link: https://github.com/axboe/liburing/discussions/608 Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
2022-06-18 16:00:50 +00:00
return ret;
}