1
0
Fork 0
mirror of synced 2025-03-06 20:59:54 +01:00

io_uring-6.14-20250131

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmec70wQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpp61D/4pFyr6hgqq22bkUHonGRqSpXnFXLfWmjWJ
 p/M9i8+3YS7Q5BUmBjmE0rncOrjqs+oFACXBXPTKqboPqgjGDLrrhZuOWn6OH6Pv
 nPxHS1eP813B/SY/qpSrPXz9b8tlgLZqY35dB9/2USB7k1Lbly204HoonHWnNvu7
 tk43YkSa8q5IWoJaUn2a8q8yi0isxCkt2UtlChkAaQEhXNoUIpr1lHnUx1VTHoB4
 +VfwMNvyXNMy3ENGvGjMEKLqKF2QyFJbwCsPYZDgvAxw8gCUHqCqMgCfTzWHAXgH
 VRvspost+6DKAbR0nIHpH421NZ1n4nnN1MUxxJizGSPpfxBR/R8i8Vtfswxzl6MN
 YNQlASGIbzlJhdweDKRwZH2LHgo+EkF2ULQG0b0Di7KFLwjfPtDN7KraPHRHnMJr
 yiKUY4Tf9PuEjgdIDAzqfU8Lgr5GKFE9pYA6NlB+3mkPt2JGbecWjeBV76a4DqjA
 RyaRKNwAQzlZkJxftq0OJLiFsBUTewZumRdxlrouV+RZZ5HlzZjINKBqEYlMzned
 zTdr4xzc96O5xV7OcLDuSk2aMU0RKcFyMmLMfOHET11Hu/PFmmiI+KaBPxheKZLb
 nWPQFtUuEJmYkSntsNZZ8rx6ef4CoUPnhmJrN1JR0zfhJeykxl/1eCmWZjwKc8s1
 7iXe48s4Dg==
 =hygF
 -----END PGP SIGNATURE-----

Merge tag 'io_uring-6.14-20250131' of git://git.kernel.dk/linux

Pull more io_uring updates from Jens Axboe:

 - Series cleaning up the alloc cache changes from this merge window,
   and then another series on top making it better yet.

   This also solves an issue with KASAN_EXTRA_INFO, by making io_uring
   resilient to KASAN using parts of the freed struct for storage

 - Cleanups and simplications to buffer cloning and io resource node
   management

 - Fix an issue introduced in this merge window where READ/WRITE_ONCE
   was used on an atomic_t, which made some archs complain

 - Fix for an errant connect retry when the socket has been shut down

 - Fix for multishot and provided buffers

* tag 'io_uring-6.14-20250131' of git://git.kernel.dk/linux:
  io_uring/net: don't retry connect operation on EPOLLERR
  io_uring/rw: simplify io_rw_recycle()
  io_uring: remove !KASAN guards from cache free
  io_uring/net: extract io_send_select_buffer()
  io_uring/net: clean io_msg_copy_hdr()
  io_uring/net: make io_net_vec_assign() return void
  io_uring: add alloc_cache.c
  io_uring: dont ifdef io_alloc_cache_kasan()
  io_uring: include all deps for alloc_cache.h
  io_uring: fix multishots with selected buffers
  io_uring/register: use atomic_read/write for sq_flags migration
  io_uring/alloc_cache: get rid of _nocache() helper
  io_uring: get rid of alloc cache init_once handling
  io_uring/uring_cmd: cleanup struct io_uring_cmd_data layout
  io_uring/uring_cmd: use cached cmd_op in io_uring_cmd_sock()
  io_uring/msg_ring: don't leave potentially dangling ->tctx pointer
  io_uring/rsrc: Move lockdep assert from io_free_rsrc_node() to caller
  io_uring/rsrc: remove unused parameter ctx for io_rsrc_node_alloc()
  io_uring: clean up io_uring_register_get_file()
  io_uring/rsrc: Simplify buffer cloning by locking both rings
This commit is contained in:
Linus Torvalds 2025-01-31 11:29:23 -08:00
commit c82da38b28
21 changed files with 272 additions and 243 deletions

View file

@ -19,8 +19,8 @@ struct io_uring_cmd {
};
struct io_uring_cmd_data {
struct io_uring_sqe sqes[2];
void *op_data;
struct io_uring_sqe sqes[2];
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)

View file

@ -222,7 +222,8 @@ struct io_alloc_cache {
void **entries;
unsigned int nr_cached;
unsigned int max_cached;
size_t elem_size;
unsigned int elem_size;
unsigned int init_clear;
};
struct io_ring_ctx {

View file

@ -13,7 +13,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
sync.o msg_ring.o advise.o openclose.o \
epoll.o statx.o timeout.o fdinfo.o \
cancel.o waitid.o register.o \
truncate.o memmap.o
truncate.o memmap.o alloc_cache.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o

44
io_uring/alloc_cache.c Normal file
View file

@ -0,0 +1,44 @@
// SPDX-License-Identifier: GPL-2.0
#include "alloc_cache.h"
void io_alloc_cache_free(struct io_alloc_cache *cache,
void (*free)(const void *))
{
void *entry;
if (!cache->entries)
return;
while ((entry = io_alloc_cache_get(cache)) != NULL)
free(entry);
kvfree(cache->entries);
cache->entries = NULL;
}
/* returns false if the cache was initialized properly */
bool io_alloc_cache_init(struct io_alloc_cache *cache,
unsigned max_nr, unsigned int size,
unsigned int init_bytes)
{
cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
if (!cache->entries)
return true;
cache->nr_cached = 0;
cache->max_cached = max_nr;
cache->elem_size = size;
cache->init_clear = init_bytes;
return false;
}
void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp)
{
void *obj;
obj = kmalloc(cache->elem_size, gfp);
if (obj && cache->init_clear)
memset(obj, 0, cache->init_clear);
return obj;
}

View file

@ -1,11 +1,30 @@
#ifndef IOU_ALLOC_CACHE_H
#define IOU_ALLOC_CACHE_H
#include <linux/io_uring_types.h>
/*
* Don't allow the cache to grow beyond this size.
*/
#define IO_ALLOC_CACHE_MAX 128
void io_alloc_cache_free(struct io_alloc_cache *cache,
void (*free)(const void *));
bool io_alloc_cache_init(struct io_alloc_cache *cache,
unsigned max_nr, unsigned int size,
unsigned int init_bytes);
void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp);
static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
{
if (IS_ENABLED(CONFIG_KASAN)) {
kfree(*iov);
*iov = NULL;
*nr = 0;
}
}
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
void *entry)
{
@ -23,52 +42,30 @@ static inline void *io_alloc_cache_get(struct io_alloc_cache *cache)
if (cache->nr_cached) {
void *entry = cache->entries[--cache->nr_cached];
/*
* If KASAN is enabled, always clear the initial bytes that
* must be zeroed post alloc, in case any of them overlap
* with KASAN storage.
*/
#if defined(CONFIG_KASAN)
kasan_mempool_unpoison_object(entry, cache->elem_size);
if (cache->init_clear)
memset(entry, 0, cache->init_clear);
#endif
return entry;
}
return NULL;
}
static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp,
void (*init_once)(void *obj))
static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
{
if (unlikely(!cache->nr_cached)) {
void *obj = kmalloc(cache->elem_size, gfp);
void *obj;
if (obj && init_once)
init_once(obj);
obj = io_alloc_cache_get(cache);
if (obj)
return obj;
}
return io_alloc_cache_get(cache);
return io_cache_alloc_new(cache, gfp);
}
/* returns false if the cache was initialized properly */
static inline bool io_alloc_cache_init(struct io_alloc_cache *cache,
unsigned max_nr, size_t size)
{
cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL);
if (cache->entries) {
cache->nr_cached = 0;
cache->max_cached = max_nr;
cache->elem_size = size;
return false;
}
return true;
}
static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
void (*free)(const void *))
{
void *entry;
if (!cache->entries)
return;
while ((entry = io_alloc_cache_get(cache)) != NULL)
free(entry);
kvfree(cache->entries);
cache->entries = NULL;
}
#endif

View file

@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (slot_index >= ctx->file_table.data.nr)
return -EINVAL;
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node)
return -ENOMEM;

View file

@ -36,7 +36,7 @@ struct io_futex_data {
bool io_futex_cache_init(struct io_ring_ctx *ctx)
{
return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX,
sizeof(struct io_futex_data));
sizeof(struct io_futex_data), 0);
}
void io_futex_cache_free(struct io_ring_ctx *ctx)
@ -320,7 +320,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
}
io_ring_submit_lock(ctx, issue_flags);
ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT, NULL);
ifd = io_cache_alloc(&ctx->futex_cache, GFP_NOWAIT);
if (!ifd) {
ret = -ENOMEM;
goto done_unlock;

View file

@ -315,16 +315,18 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_LIST_HEAD(&ctx->cq_overflow_list);
INIT_LIST_HEAD(&ctx->io_buffers_cache);
ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
sizeof(struct async_poll));
sizeof(struct async_poll), 0);
ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_msghdr));
sizeof(struct io_async_msghdr),
offsetof(struct io_async_msghdr, clear));
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_rw));
sizeof(struct io_async_rw),
offsetof(struct io_async_rw, clear));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_uring_cmd_data));
sizeof(struct io_uring_cmd_data), 0);
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb));
sizeof(struct io_kiocb), 0);
ret |= io_futex_cache_init(ctx);
if (ret)
goto free_ref;

View file

@ -226,21 +226,16 @@ static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
}
static inline void *io_uring_alloc_async_data(struct io_alloc_cache *cache,
struct io_kiocb *req,
void (*init_once)(void *obj))
struct io_kiocb *req)
{
req->async_data = io_cache_alloc(cache, GFP_KERNEL, init_once);
if (req->async_data)
req->flags |= REQ_F_ASYNC_DATA;
return req->async_data;
}
if (cache) {
req->async_data = io_cache_alloc(cache, GFP_KERNEL);
} else {
const struct io_issue_def *def = &io_issue_defs[req->opcode];
static inline void *io_uring_alloc_async_data_nocache(struct io_kiocb *req)
{
const struct io_issue_def *def = &io_issue_defs[req->opcode];
WARN_ON_ONCE(!def->async_size);
req->async_data = kmalloc(def->async_size, GFP_KERNEL);
WARN_ON_ONCE(!def->async_size);
req->async_data = kmalloc(def->async_size, GFP_KERNEL);
}
if (req->async_data)
req->flags |= REQ_F_ASYNC_DATA;
return req->async_data;

View file

@ -89,8 +89,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
int res, u32 cflags, u64 user_data)
{
req->tctx = READ_ONCE(ctx->submitter_task->io_uring);
if (!req->tctx) {
if (!READ_ONCE(ctx->submitter_task)) {
kmem_cache_free(req_cachep, req);
return -EOWNERDEAD;
}
@ -98,6 +97,7 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
io_req_set_res(req, res, cflags);
percpu_ref_get(&ctx->refs);
req->ctx = ctx;
req->tctx = NULL;
req->io_task_work.func = io_msg_tw_complete;
io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
return 0;

View file

@ -137,7 +137,6 @@ static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_msghdr *hdr = req->async_data;
struct iovec *iov;
/* can't recycle, ensure we free the iovec if we have one */
if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
@ -146,44 +145,30 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
/* Let normal cleanup path reap it if we fail adding to the cache */
iov = hdr->free_iov;
io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr);
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
if (iov)
kasan_mempool_poison_object(iov);
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
}
}
static void io_msg_async_data_init(void *obj)
{
struct io_async_msghdr *hdr = (struct io_async_msghdr *)obj;
hdr->free_iov = NULL;
hdr->free_iov_nr = 0;
}
static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_async_msghdr *hdr;
hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req,
io_msg_async_data_init);
hdr = io_uring_alloc_async_data(&ctx->netmsg_cache, req);
if (!hdr)
return NULL;
/* If the async data was cached, we might have an iov cached inside. */
if (hdr->free_iov) {
kasan_mempool_unpoison_object(hdr->free_iov,
hdr->free_iov_nr * sizeof(struct iovec));
if (hdr->free_iov)
req->flags |= REQ_F_NEED_CLEANUP;
}
return hdr;
}
/* assign new iovec to kmsg, if we need to */
static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
struct iovec *iov)
{
if (iov) {
@ -193,7 +178,6 @@ static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
kfree(kmsg->free_iov);
kmsg->free_iov = iov;
}
return 0;
}
static inline void io_mshot_prep_retry(struct io_kiocb *req,
@ -255,7 +239,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
if (unlikely(ret < 0))
return ret;
return io_net_vec_assign(req, iomsg, iov);
io_net_vec_assign(req, iomsg, iov);
return 0;
}
#endif
@ -295,11 +280,12 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
ret = -EINVAL;
goto ua_end;
} else {
struct iovec __user *uiov = msg->msg_iov;
/* we only need the length for provided buffers */
if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len)))
goto ua_end;
unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
ua_end);
unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end);
sr->len = iov->iov_len;
}
ret = 0;
@ -314,7 +300,8 @@ ua_end:
if (unlikely(ret < 0))
return ret;
return io_net_vec_assign(req, iomsg, iov);
io_net_vec_assign(req, iomsg, iov);
return 0;
}
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
@ -579,6 +566,54 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
struct io_async_msghdr *kmsg)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
int ret;
struct buf_sel_arg arg = {
.iovs = &kmsg->fast_iov,
.max_len = min_not_zero(sr->len, INT_MAX),
.nr_iovs = 1,
};
if (kmsg->free_iov) {
arg.nr_iovs = kmsg->free_iov_nr;
arg.iovs = kmsg->free_iov;
arg.mode = KBUF_MODE_FREE;
}
if (!(sr->flags & IORING_RECVSEND_BUNDLE))
arg.nr_iovs = 1;
else
arg.mode |= KBUF_MODE_EXPAND;
ret = io_buffers_select(req, &arg, issue_flags);
if (unlikely(ret < 0))
return ret;
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
kmsg->free_iov_nr = ret;
kmsg->free_iov = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
sr->len = arg.out_len;
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
&kmsg->msg.msg_iter);
if (unlikely(ret))
return ret;
} else {
iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
arg.iovs, ret, arg.out_len);
}
return 0;
}
int io_send(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
@ -602,44 +637,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
retry_bundle:
if (io_do_buffer_select(req)) {
struct buf_sel_arg arg = {
.iovs = &kmsg->fast_iov,
.max_len = min_not_zero(sr->len, INT_MAX),
.nr_iovs = 1,
};
if (kmsg->free_iov) {
arg.nr_iovs = kmsg->free_iov_nr;
arg.iovs = kmsg->free_iov;
arg.mode = KBUF_MODE_FREE;
}
if (!(sr->flags & IORING_RECVSEND_BUNDLE))
arg.nr_iovs = 1;
else
arg.mode |= KBUF_MODE_EXPAND;
ret = io_buffers_select(req, &arg, issue_flags);
if (unlikely(ret < 0))
ret = io_send_select_buffer(req, issue_flags, kmsg);
if (ret)
return ret;
if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
kmsg->free_iov_nr = ret;
kmsg->free_iov = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
sr->len = arg.out_len;
if (ret == 1) {
sr->buf = arg.iovs[0].iov_base;
ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
&kmsg->msg.msg_iter);
if (unlikely(ret))
return ret;
} else {
iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
arg.iovs, ret, arg.out_len);
}
}
/*
@ -1710,6 +1710,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
int ret;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
if (unlikely(req->flags & REQ_F_FAIL)) {
ret = -ECONNRESET;
goto out;
}
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
@ -1813,11 +1818,8 @@ void io_netmsg_cache_free(const void *entry)
{
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
if (kmsg->free_iov) {
kasan_mempool_unpoison_object(kmsg->free_iov,
kmsg->free_iov_nr * sizeof(struct iovec));
if (kmsg->free_iov)
io_netmsg_iovec_free(kmsg);
}
kfree(kmsg);
}
#endif

View file

@ -5,16 +5,20 @@
struct io_async_msghdr {
#if defined(CONFIG_NET)
struct iovec fast_iov;
/* points to an allocated iov, if NULL we use fast_iov instead */
struct iovec *free_iov;
/* points to an allocated iov, if NULL we use fast_iov instead */
int free_iov_nr;
int namelen;
__kernel_size_t controllen;
__kernel_size_t payloadlen;
struct sockaddr __user *uaddr;
struct msghdr msg;
struct sockaddr_storage addr;
struct_group(clear,
int namelen;
struct iovec fast_iov;
__kernel_size_t controllen;
__kernel_size_t payloadlen;
struct sockaddr __user *uaddr;
struct msghdr msg;
struct sockaddr_storage addr;
);
#else
struct_group(clear);
#endif
};

View file

@ -273,6 +273,8 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
return IOU_POLL_REISSUE;
}
}
if (unlikely(req->cqe.res & EPOLLERR))
req_set_fail(req);
if (req->apoll_events & EPOLLONESHOT)
return IOU_POLL_DONE;
@ -315,8 +317,10 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
ret = io_poll_check_events(req, ts);
if (ret == IOU_POLL_NO_ACTION) {
io_kbuf_recycle(req, 0);
return;
} else if (ret == IOU_POLL_REQUEUE) {
io_kbuf_recycle(req, 0);
__io_poll_execute(req, 0);
return;
}
@ -650,7 +654,7 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req,
kfree(apoll->double_poll);
} else {
if (!(issue_flags & IO_URING_F_UNLOCKED))
apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC, NULL);
apoll = io_cache_alloc(&ctx->apoll_cache, GFP_ATOMIC);
else
apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
if (!apoll)

View file

@ -552,7 +552,7 @@ overflow:
ctx->cqe_cached = ctx->cqe_sentinel = NULL;
WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags));
atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
@ -853,6 +853,8 @@ struct file *io_uring_register_get_file(unsigned int fd, bool registered)
return ERR_PTR(-EINVAL);
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
file = tctx->registered_rings[fd];
if (file)
get_file(file);
} else {
file = fget(fd);
}
@ -919,7 +921,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
ctx->buf_table.nr, ret);
mutex_unlock(&ctx->uring_lock);
if (!use_registered_ring)
fput(file);
fput(file);
return ret;
}

View file

@ -118,7 +118,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
}
}
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
struct io_rsrc_node *io_rsrc_node_alloc(int type)
{
struct io_rsrc_node *node;
@ -203,7 +203,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node) {
err = -ENOMEM;
fput(file);
@ -444,8 +444,6 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{
lockdep_assert_held(&ctx->uring_lock);
if (node->tag)
io_post_aux_cqe(ctx, node->tag, 0, 0);
@ -525,7 +523,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto fail;
}
ret = -ENOMEM;
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
if (!node) {
fput(file);
goto fail;
@ -730,7 +728,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (!iov->iov_base)
return NULL;
node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
if (!node)
return ERR_PTR(-ENOMEM);
node->buf = NULL;
@ -921,6 +919,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
return 0;
}
/* Lock two rings at once. The rings must be different! */
static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
{
if (ctx1 > ctx2)
swap(ctx1, ctx2);
mutex_lock(&ctx1->uring_lock);
mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING);
}
/* Both rings are locked by the caller. */
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
@ -928,6 +936,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
int i, ret, off, nr;
unsigned int nbufs;
lockdep_assert_held(&ctx->uring_lock);
lockdep_assert_held(&src_ctx->uring_lock);
/*
* Accounting state is shared between the two rings; that only works if
* both rings are accounted towards the same counters.
@ -942,7 +953,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
return -EBUSY;
nbufs = READ_ONCE(src_ctx->buf_table.nr);
nbufs = src_ctx->buf_table.nr;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
@ -966,27 +977,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
}
}
/*
* Drop our own lock here. We'll setup the data we need and reference
* the source buffers, then re-grab, check, and assign at the end.
*/
mutex_unlock(&ctx->uring_lock);
mutex_lock(&src_ctx->uring_lock);
ret = -ENXIO;
nbufs = src_ctx->buf_table.nr;
if (!nbufs)
goto out_unlock;
goto out_free;
ret = -EINVAL;
if (!arg->nr)
arg->nr = nbufs;
else if (arg->nr > nbufs)
goto out_unlock;
goto out_free;
ret = -EOVERFLOW;
if (check_add_overflow(arg->nr, arg->src_off, &off))
goto out_unlock;
goto out_free;
if (off > nbufs)
goto out_unlock;
goto out_free;
off = arg->dst_off;
i = arg->src_off;
@ -998,10 +1002,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (!src_node) {
dst_node = NULL;
} else {
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
goto out_unlock;
goto out_free;
}
refcount_inc(&src_node->buf->refs);
@ -1011,10 +1015,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
i++;
}
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock);
/*
* If asked for replace, put the old table. data->nodes[] holds both
* old and new nodes at this point.
@ -1023,24 +1023,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
io_rsrc_data_free(ctx, &ctx->buf_table);
/*
* ctx->buf_table should be empty now - either the contents are being
* replaced and we just freed the table, or someone raced setting up
* a buffer table while the clone was happening. If not empty, fall
* through to failure handling.
* ctx->buf_table must be empty now - either the contents are being
* replaced and we just freed the table, or the contents are being
* copied to a ring that does not have buffers yet (checked at function
* entry).
*/
if (!ctx->buf_table.nr) {
ctx->buf_table = data;
return 0;
}
WARN_ON_ONCE(ctx->buf_table.nr);
ctx->buf_table = data;
return 0;
mutex_unlock(&ctx->uring_lock);
mutex_lock(&src_ctx->uring_lock);
/* someone raced setting up buffers, dump ours */
ret = -EBUSY;
out_unlock:
out_free:
io_rsrc_data_free(ctx, &data);
mutex_unlock(&src_ctx->uring_lock);
mutex_lock(&ctx->uring_lock);
return ret;
}
@ -1054,6 +1047,7 @@ out_unlock:
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_clone_buffers buf;
struct io_ring_ctx *src_ctx;
bool registered_src;
struct file *file;
int ret;
@ -1071,8 +1065,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
file = io_uring_register_get_file(buf.src_fd, registered_src);
if (IS_ERR(file))
return PTR_ERR(file);
ret = io_clone_buffers(ctx, file->private_data, &buf);
if (!registered_src)
fput(file);
src_ctx = file->private_data;
if (src_ctx != ctx) {
mutex_unlock(&ctx->uring_lock);
lock_two_rings(ctx, src_ctx);
}
ret = io_clone_buffers(ctx, src_ctx, &buf);
if (src_ctx != ctx)
mutex_unlock(&src_ctx->uring_lock);
fput(file);
return ret;
}

View file

@ -2,6 +2,8 @@
#ifndef IOU_RSRC_H
#define IOU_RSRC_H
#include <linux/lockdep.h>
#define IO_NODE_ALLOC_CACHE_MAX 32
#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
@ -43,7 +45,7 @@ struct io_imu_folio_data {
unsigned int nr_folios;
};
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
struct io_rsrc_node *io_rsrc_node_alloc(int type);
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
@ -80,6 +82,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data
static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{
lockdep_assert_held(&ctx->uring_lock);
if (node && !--node->refs)
io_free_rsrc_node(ctx, node);
}

View file

@ -146,28 +146,15 @@ static inline int io_import_iovec(int rw, struct io_kiocb *req,
return 0;
}
static void io_rw_iovec_free(struct io_async_rw *rw)
{
if (rw->free_iovec) {
kfree(rw->free_iovec);
rw->free_iov_nr = 0;
rw->free_iovec = NULL;
}
}
static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_async_rw *rw = req->async_data;
struct iovec *iov;
if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
io_rw_iovec_free(rw);
if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
return;
}
iov = rw->free_iovec;
io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr);
if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
if (iov)
kasan_mempool_poison_object(iov);
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
}
@ -208,27 +195,16 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags)
}
}
static void io_rw_async_data_init(void *obj)
{
struct io_async_rw *rw = (struct io_async_rw *)obj;
rw->free_iovec = NULL;
rw->bytes_done = 0;
}
static int io_rw_alloc_async(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_async_rw *rw;
rw = io_uring_alloc_async_data(&ctx->rw_cache, req, io_rw_async_data_init);
rw = io_uring_alloc_async_data(&ctx->rw_cache, req);
if (!rw)
return -ENOMEM;
if (rw->free_iovec) {
kasan_mempool_unpoison_object(rw->free_iovec,
rw->free_iov_nr * sizeof(struct iovec));
if (rw->free_iovec)
req->flags |= REQ_F_NEED_CLEANUP;
}
rw->bytes_done = 0;
return 0;
}
@ -1323,10 +1299,7 @@ void io_rw_cache_free(const void *entry)
{
struct io_async_rw *rw = (struct io_async_rw *) entry;
if (rw->free_iovec) {
kasan_mempool_unpoison_object(rw->free_iovec,
rw->free_iov_nr * sizeof(struct iovec));
io_rw_iovec_free(rw);
}
if (rw->free_iovec)
kfree(rw->free_iovec);
kfree(rw);
}

View file

@ -9,19 +9,24 @@ struct io_meta_state {
struct io_async_rw {
size_t bytes_done;
struct iov_iter iter;
struct iov_iter_state iter_state;
struct iovec fast_iov;
struct iovec *free_iovec;
int free_iov_nr;
/* wpq is for buffered io, while meta fields are used with direct io */
union {
struct wait_page_queue wpq;
struct {
struct uio_meta meta;
struct io_meta_state meta_state;
struct_group(clear,
struct iov_iter iter;
struct iov_iter_state iter_state;
struct iovec fast_iov;
int free_iov_nr;
/*
* wpq is for buffered io, while meta fields are used with
* direct io
*/
union {
struct wait_page_queue wpq;
struct {
struct uio_meta meta;
struct io_meta_state meta_state;
};
};
};
);
};
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);

View file

@ -544,7 +544,7 @@ static int __io_timeout_prep(struct io_kiocb *req,
if (WARN_ON_ONCE(req_has_async_data(req)))
return -EFAULT;
data = io_uring_alloc_async_data_nocache(req);
data = io_uring_alloc_async_data(NULL, req);
if (!data)
return -ENOMEM;
data->req = req;

View file

@ -168,23 +168,16 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
static void io_uring_cmd_init_once(void *obj)
{
struct io_uring_cmd_data *data = obj;
data->op_data = NULL;
}
static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct io_uring_cmd_data *cache;
cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req,
io_uring_cmd_init_once);
cache = io_uring_alloc_async_data(&req->ctx->uring_cache, req);
if (!cache)
return -ENOMEM;
cache->op_data = NULL;
if (!(req->flags & REQ_F_FORCE_ASYNC)) {
/* defer memcpy until we need it */
@ -192,8 +185,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
return 0;
}
memcpy(req->async_data, sqe, uring_sqe_size(req->ctx));
ioucmd->sqe = req->async_data;
memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx));
ioucmd->sqe = cache->sqes;
return 0;
}
@ -260,7 +253,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
struct io_uring_cmd_data *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
return -EAGAIN;
} else if (ret == -EIOCBQUEUED) {
return -EIOCBQUEUED;
@ -350,7 +343,7 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
if (!prot || !prot->ioctl)
return -EOPNOTSUPP;
switch (cmd->sqe->cmd_op) {
switch (cmd->cmd_op) {
case SOCKET_URING_OP_SIOCINQ:
ret = prot->ioctl(sk, SIOCINQ, &arg);
if (ret)

View file

@ -303,7 +303,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
struct io_waitid_async *iwa;
int ret;
iwa = io_uring_alloc_async_data_nocache(req);
iwa = io_uring_alloc_async_data(NULL, req);
if (!iwa)
return -ENOMEM;