io_uring/net: support bundles for send
If IORING_OP_SEND is used with provided buffers, the caller may also set IORING_RECVSEND_BUNDLE to turn it into a multi-buffer send. The idea is that an application can fill outgoing buffers in a provided buffer group, and then arm a single send that will service them all. Once there are no more buffers to send, or if the requested length has been sent, the request posts a single completion for all the buffers. This only enables it for IORING_OP_SEND, IORING_OP_SENDMSG is coming in a separate patch. However, this patch does do a lot of the prep work that makes wiring up the sendmsg variant pretty trivial. They share the prep side. Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
35c8711c8f
commit
a05d1f625c
2 changed files with 138 additions and 18 deletions
|
@ -351,11 +351,20 @@ enum io_uring_op {
|
||||||
* 0 is reported if zerocopy was actually possible.
|
* 0 is reported if zerocopy was actually possible.
|
||||||
* IORING_NOTIF_USAGE_ZC_COPIED if data was copied
|
* IORING_NOTIF_USAGE_ZC_COPIED if data was copied
|
||||||
* (at least partially).
|
* (at least partially).
|
||||||
|
*
|
||||||
|
* IORING_RECVSEND_BUNDLE Used with IOSQE_BUFFER_SELECT. If set, send will
|
||||||
|
* grab as many buffers from the buffer group ID
|
||||||
|
* given and send them all. The completion result
|
||||||
|
* will be the number of buffers send, with the
|
||||||
|
* starting buffer ID in cqe->flags as per usual
|
||||||
|
* for provided buffer usage. The buffers will be
|
||||||
|
* contigious from the starting buffer ID.
|
||||||
*/
|
*/
|
||||||
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
|
#define IORING_RECVSEND_POLL_FIRST (1U << 0)
|
||||||
#define IORING_RECV_MULTISHOT (1U << 1)
|
#define IORING_RECV_MULTISHOT (1U << 1)
|
||||||
#define IORING_RECVSEND_FIXED_BUF (1U << 2)
|
#define IORING_RECVSEND_FIXED_BUF (1U << 2)
|
||||||
#define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
|
#define IORING_SEND_ZC_REPORT_USAGE (1U << 3)
|
||||||
|
#define IORING_RECVSEND_BUNDLE (1U << 4)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cqe.res for IORING_CQE_F_NOTIF if
|
* cqe.res for IORING_CQE_F_NOTIF if
|
||||||
|
|
147
io_uring/net.c
147
io_uring/net.c
|
@ -57,7 +57,7 @@ struct io_sr_msg {
|
||||||
struct user_msghdr __user *umsg;
|
struct user_msghdr __user *umsg;
|
||||||
void __user *buf;
|
void __user *buf;
|
||||||
};
|
};
|
||||||
unsigned len;
|
int len;
|
||||||
unsigned done_io;
|
unsigned done_io;
|
||||||
unsigned msg_flags;
|
unsigned msg_flags;
|
||||||
unsigned nr_multishot_loops;
|
unsigned nr_multishot_loops;
|
||||||
|
@ -389,6 +389,8 @@ static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
|
||||||
|
|
||||||
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
{
|
{
|
||||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
|
@ -407,11 +409,20 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||||
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||||
sr->len = READ_ONCE(sqe->len);
|
sr->len = READ_ONCE(sqe->len);
|
||||||
sr->flags = READ_ONCE(sqe->ioprio);
|
sr->flags = READ_ONCE(sqe->ioprio);
|
||||||
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
|
if (sr->flags & ~SENDMSG_FLAGS)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
|
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
|
||||||
if (sr->msg_flags & MSG_DONTWAIT)
|
if (sr->msg_flags & MSG_DONTWAIT)
|
||||||
req->flags |= REQ_F_NOWAIT;
|
req->flags |= REQ_F_NOWAIT;
|
||||||
|
if (sr->flags & IORING_RECVSEND_BUNDLE) {
|
||||||
|
if (req->opcode == IORING_OP_SENDMSG)
|
||||||
|
return -EINVAL;
|
||||||
|
if (!(req->flags & REQ_F_BUFFER_SELECT))
|
||||||
|
return -EINVAL;
|
||||||
|
sr->msg_flags |= MSG_WAITALL;
|
||||||
|
sr->buf_group = req->buf_index;
|
||||||
|
req->buf_list = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_COMPAT
|
#ifdef CONFIG_COMPAT
|
||||||
if (req->ctx->compat)
|
if (req->ctx->compat)
|
||||||
|
@ -427,6 +438,79 @@ static void io_req_msg_cleanup(struct io_kiocb *req,
|
||||||
io_netmsg_recycle(req, issue_flags);
|
io_netmsg_recycle(req, issue_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For bundle completions, we need to figure out how many segments we consumed.
|
||||||
|
* A bundle could be using a single ITER_UBUF if that's all we mapped, or it
|
||||||
|
* could be using an ITER_IOVEC. If the latter, then if we consumed all of
|
||||||
|
* the segments, then it's a trivial questiont o answer. If we have residual
|
||||||
|
* data in the iter, then loop the segments to figure out how much we
|
||||||
|
* transferred.
|
||||||
|
*/
|
||||||
|
static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
|
||||||
|
{
|
||||||
|
struct iovec *iov;
|
||||||
|
int nbufs;
|
||||||
|
|
||||||
|
/* no data is always zero segments, and a ubuf is always 1 segment */
|
||||||
|
if (ret <= 0)
|
||||||
|
return 0;
|
||||||
|
if (iter_is_ubuf(&kmsg->msg.msg_iter))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
iov = kmsg->free_iov;
|
||||||
|
if (!iov)
|
||||||
|
iov = &kmsg->fast_iov;
|
||||||
|
|
||||||
|
/* if all data was transferred, it's basic pointer math */
|
||||||
|
if (!iov_iter_count(&kmsg->msg.msg_iter))
|
||||||
|
return iter_iov(&kmsg->msg.msg_iter) - iov;
|
||||||
|
|
||||||
|
/* short transfer, count segments */
|
||||||
|
nbufs = 0;
|
||||||
|
do {
|
||||||
|
int this_len = min_t(int, iov[nbufs].iov_len, ret);
|
||||||
|
|
||||||
|
nbufs++;
|
||||||
|
ret -= this_len;
|
||||||
|
} while (ret);
|
||||||
|
|
||||||
|
return nbufs;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool io_send_finish(struct io_kiocb *req, int *ret,
|
||||||
|
struct io_async_msghdr *kmsg,
|
||||||
|
unsigned issue_flags)
|
||||||
|
{
|
||||||
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
|
bool bundle_finished = *ret <= 0;
|
||||||
|
unsigned int cflags;
|
||||||
|
|
||||||
|
if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
|
||||||
|
cflags = io_put_kbuf(req, issue_flags);
|
||||||
|
goto finish;
|
||||||
|
}
|
||||||
|
|
||||||
|
cflags = io_put_kbufs(req, io_bundle_nbufs(kmsg, *ret), issue_flags);
|
||||||
|
|
||||||
|
if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
|
||||||
|
goto finish;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fill CQE for this receive and see if we should keep trying to
|
||||||
|
* receive from this socket.
|
||||||
|
*/
|
||||||
|
if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
|
||||||
|
io_mshot_prep_retry(req, kmsg);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise stop bundle and use the current result. */
|
||||||
|
finish:
|
||||||
|
io_req_set_res(req, *ret, cflags);
|
||||||
|
*ret = IOU_OK;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
|
int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
{
|
{
|
||||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
|
@ -482,7 +566,6 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||||
struct io_async_msghdr *kmsg = req->async_data;
|
struct io_async_msghdr *kmsg = req->async_data;
|
||||||
struct socket *sock;
|
struct socket *sock;
|
||||||
unsigned int cflags;
|
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
int min_ret = 0;
|
int min_ret = 0;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -495,21 +578,47 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
(sr->flags & IORING_RECVSEND_POLL_FIRST))
|
(sr->flags & IORING_RECVSEND_POLL_FIRST))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
if (io_do_buffer_select(req)) {
|
|
||||||
size_t len = sr->len;
|
|
||||||
void __user *buf;
|
|
||||||
|
|
||||||
buf = io_buffer_select(req, &len, issue_flags);
|
|
||||||
if (unlikely(!buf))
|
|
||||||
return -ENOBUFS;
|
|
||||||
sr->buf = buf;
|
|
||||||
sr->len = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
flags = sr->msg_flags;
|
flags = sr->msg_flags;
|
||||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||||
flags |= MSG_DONTWAIT;
|
flags |= MSG_DONTWAIT;
|
||||||
if (flags & MSG_WAITALL)
|
|
||||||
|
retry_bundle:
|
||||||
|
if (io_do_buffer_select(req)) {
|
||||||
|
struct buf_sel_arg arg = {
|
||||||
|
.iovs = &kmsg->fast_iov,
|
||||||
|
.max_len = min_not_zero(sr->len, INT_MAX),
|
||||||
|
.nr_iovs = 1,
|
||||||
|
.mode = KBUF_MODE_EXPAND,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (kmsg->free_iov) {
|
||||||
|
arg.nr_iovs = kmsg->free_iov_nr;
|
||||||
|
arg.iovs = kmsg->free_iov;
|
||||||
|
arg.mode |= KBUF_MODE_FREE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(sr->flags & IORING_RECVSEND_BUNDLE))
|
||||||
|
arg.nr_iovs = 1;
|
||||||
|
|
||||||
|
ret = io_buffers_select(req, &arg, issue_flags);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
sr->len = arg.out_len;
|
||||||
|
iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE, arg.iovs, ret,
|
||||||
|
arg.out_len);
|
||||||
|
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
|
||||||
|
kmsg->free_iov_nr = ret;
|
||||||
|
kmsg->free_iov = arg.iovs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If MSG_WAITALL is set, or this is a bundle send, then we need
|
||||||
|
* the full amount. If just bundle is set, if we do a short send
|
||||||
|
* then we complete the bundle sequence rather than continue on.
|
||||||
|
*/
|
||||||
|
if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
|
||||||
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
|
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
|
||||||
|
|
||||||
flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
|
flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
|
||||||
|
@ -534,10 +643,12 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
ret += sr->done_io;
|
ret += sr->done_io;
|
||||||
else if (sr->done_io)
|
else if (sr->done_io)
|
||||||
ret = sr->done_io;
|
ret = sr->done_io;
|
||||||
|
|
||||||
|
if (!io_send_finish(req, &ret, kmsg, issue_flags))
|
||||||
|
goto retry_bundle;
|
||||||
|
|
||||||
io_req_msg_cleanup(req, issue_flags);
|
io_req_msg_cleanup(req, issue_flags);
|
||||||
cflags = io_put_kbuf(req, issue_flags);
|
return ret;
|
||||||
io_req_set_res(req, ret, cflags);
|
|
||||||
return IOU_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int io_recvmsg_mshot_prep(struct io_kiocb *req,
|
static int io_recvmsg_mshot_prep(struct io_kiocb *req,
|
||||||
|
|
Loading…
Add table
Reference in a new issue